Add TypeScript/Go parsers and utils modules
Some checks failed
CI / test (push) Has been cancelled

This commit is contained in:
2026-01-29 16:53:37 +00:00
parent e8e939cff9
commit 7cd5545218

View File

@@ -0,0 +1,405 @@
"""TypeScript parser using regex patterns."""
import re
from typing import Optional, List
from .base import Parser, DocElement, ElementType, Parameter
class TypeScriptParser(Parser):
"""Parser for TypeScript source files."""
EXTENSIONS = [".ts", ".tsx"]
def __init__(self, file_path: str):
super().__init__(file_path)
def get_language_name(self) -> str:
return "typescript"
@classmethod
def supports_file(cls, file_path: str) -> bool:
ext = cls._get_extension(file_path)
return ext in cls.EXTENSIONS
@staticmethod
def _get_extension(file_path: str) -> str:
import os
return os.path.splitext(file_path)[1].lower()
def parse(self) -> list[DocElement]:
"""Parse TypeScript file and extract documentation elements."""
self.content = self._read_content()
self.elements = []
self._parse_module_docstring()
self._parse_functions()
self._parse_classes()
self._parse_interfaces()
self._parse_constants()
return self.elements
def _parse_module_docstring(self) -> None:
"""Parse module-level documentation."""
patterns = [
r"/\*\*[\s\S]*?\*/",
r"^\s*///.*$",
]
for pattern in patterns:
match = re.search(pattern, self.content, re.MULTILINE)
if match:
docstring = self._clean_jsdoc(match.group(0))
if docstring.strip():
elem = DocElement(
name=self._get_module_name(),
element_type=ElementType.MODULE,
description=docstring,
full_docstring=docstring,
source_file=self.file_path,
)
self.elements.append(elem)
break
def _parse_functions(self) -> None:
"""Parse function definitions."""
patterns = [
r"(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)\s*(?::\s*([^{;]+))?\s*\{",
r"(?:export\s+)?const\s+(\w+)\s*=\s*(?:async\s+)?\([^)]*\)\s*(?:=>|[^{])\s*\{",
r"(?:export\s+)?const\s+(\w+)\s*=\s*(?:async\s+)?function\s*\([^)]*\)\s*\{",
]
for pattern in patterns:
for match in re.finditer(pattern, self.content):
name = match.group(1)
groups = match.groups()
params_str = groups[1] if len(groups) > 1 else ""
return_type = groups[2] if len(groups) > 2 else None
params = self._parse_params(params_str)
docstring = self._find_jsdoc_before(match.start())
elem = DocElement(
name=name,
element_type=ElementType.FUNCTION,
description=self._extract_summary(docstring),
full_docstring=docstring,
parameters=params,
return_type=return_type.strip() if return_type else None,
return_description=self._extract_jsdoc_tag(docstring, "returns"),
raises=self._extract_jsdoc_raises(docstring),
examples=self._extract_jsdoc_examples(docstring),
source_file=self.file_path,
line_number=self._get_line_number(match.start()),
visibility=self._get_visibility(name),
)
self.elements.append(elem)
def _parse_classes(self) -> None:
"""Parse class definitions."""
pattern = r"(?:export\s+)?class\s+(\w+)\s*(?:<[^>]*>)?\s*(?:extends\s+(\w+))?\s*(?:implements\s+([^{]+))?\s*\{"
for match in re.finditer(pattern, self.content):
class_name = match.group(1)
groups = match.groups()
extends = groups[1] if len(groups) > 1 else None
implements = groups[2] if len(groups) > 2 else None
docstring = self._find_jsdoc_before(match.start())
elem = DocElement(
name=class_name,
element_type=ElementType.CLASS,
description=self._extract_summary(docstring),
full_docstring=docstring,
source_file=self.file_path,
line_number=self._get_line_number(match.start()),
visibility=self._get_visibility(class_name),
decorators=self._find_decorators_before(match.start()),
)
if extends:
elem.parameters.append(Parameter(name=extends, description=f"Extends: {extends}"))
if implements:
for impl in implements.split(","):
elem.parameters.append(Parameter(name=impl.strip(), description=f"Implements: {impl.strip()}"))
self._parse_class_methods(elem)
self.elements.append(elem)
def _parse_class_methods(self, class_elem: DocElement) -> None:
"""Parse methods within a class."""
start_pos = self.content.find(f"class {class_elem.name}")
if start_pos == -1:
return
brace_pos = self.content.find("{", start_pos)
if brace_pos == -1:
return
class_body = self.content[brace_pos + 1:]
end_pos = self._find_matching_brace(class_body)
if end_pos == -1:
return
class_body = class_body[:end_pos]
method_pattern = r"(?:public|private|protected|readonly)?\s*(?:static\s+)?(?:async\s+)?(\w+)\s*\(([^)]*)\)\s*(?::\s*([^{]+))?\s*\{"
for match in re.finditer(method_pattern, class_body):
method_name = match.group(1)
groups = match.groups()
params_str = groups[1] if len(groups) > 1 else ""
return_type = groups[2] if len(groups) > 2 else None
method_docstring = self._find_jsdoc_in_body(class_body, match.start())
params = self._parse_params(params_str)
params = [p for p in params if p.name != "this"] if params else params
method_elem = DocElement(
name=f"{class_elem.name}.{method_name}",
element_type=ElementType.METHOD,
description=self._extract_summary(method_docstring),
full_docstring=method_docstring,
parameters=params,
return_type=return_type.strip() if return_type else None,
return_description=self._extract_jsdoc_tag(method_docstring, "returns"),
source_file=self.file_path,
line_number=class_elem.line_number + self._count_lines(class_body[:match.start()]),
visibility=self._get_visibility(method_name),
)
self.elements.append(method_elem)
def _parse_interfaces(self) -> None:
"""Parse interface definitions."""
pattern = r"(?:export\s+)?interface\s+(\w+)\s*(?:<[^>]*>)?\s*\{([^}]*)\}"
for match in re.finditer(pattern, self.content):
interface_name = match.group(1)
body = match.group(2)
docstring = self._find_jsdoc_before(match.start())
elem = DocElement(
name=interface_name,
element_type=ElementType.INTERFACE,
description=self._extract_summary(docstring),
full_docstring=docstring,
source_file=self.file_path,
line_number=self._get_line_number(match.start()),
visibility=self._get_visibility(interface_name),
)
properties = self._parse_interface_properties(body)
elem.attributes = properties
self.elements.append(elem)
def _parse_interface_properties(self, body: str) -> list[tuple[str, Optional[str], Optional[str]]]:
"""Parse properties from interface body."""
properties = []
lines = body.split("\n")
for line in lines:
line = line.strip()
if not line or line.startswith("//") or line.startswith("/**"):
continue
match = re.match(r"(\w+)(?:\?)?\s*:\s*([^;]+);?", line)
if match:
prop_name = match.group(1)
prop_type = match.group(2).strip()
is_optional = "?" in line
properties.append((prop_name, prop_type, f"{'Optional. ' if is_optional else ''}"))
return properties
def _parse_constants(self) -> None:
"""Parse constant declarations."""
patterns = [
r"(?:export\s+)?(?:const|let|var)\s+(\w+)\s*[=:]=\s*([^;{]+)",
r"(?:export\s+)?type\s+(\w+)\s*=",
]
for pattern in patterns:
for match in re.finditer(pattern, self.content):
name = match.group(1)
groups = match.groups()
value = groups[1] if len(groups) > 1 else ""
if re.match(r"^\s*(function|class|interface)\s", value):
continue
docstring = self._find_jsdoc_before(match.start())
elem = DocElement(
name=name,
element_type=ElementType.CONSTANT,
description=self._extract_summary(docstring),
full_docstring=docstring,
source_file=self.file_path,
line_number=self._get_line_number(match.start()),
visibility=self._get_visibility(name),
)
self.elements.append(elem)
def _find_jsdoc_before(self, position: int) -> str:
"""Find JSDoc comment before a position."""
search_text = self.content[:position]
patterns = [
r"/\*\*[\s\S]*?\*/\s*$",
r"^\s*///.*$",
]
for pattern in patterns:
matches = list(re.finditer(pattern, search_text, re.MULTILINE))
if matches:
jsdoc = matches[-1].group(0)
return self._clean_jsdoc(jsdoc)
return ""
def _find_jsdoc_in_body(self, body: str, position: int) -> str:
"""Find JSDoc comment before position in class body."""
search_text = body[:position]
pattern = r"/\*\*[\s\S]*?\*/\s*$"
matches = list(re.finditer(pattern, search_text, re.MULTILINE))
if matches:
jsdoc = matches[-1].group(0)
return self._clean_jsdoc(jsdoc)
return ""
def _find_decorators_before(self, position: int) -> list[str]:
"""Find decorators before a position."""
search_text = self.content[:position]
decorators = []
for match in re.finditer(r"@(\w+)", search_text):
decorators.append(f"@{match.group(1)}")
return decorators[-5:]
def _clean_jsdoc(self, jsdoc: str) -> str:
"""Clean JSDoc comment to plain text."""
lines = jsdoc.split("\n")
cleaned = []
for line in lines:
line = line.strip()
if line.startswith("/**") or line.startswith("*"):
line = line.lstrip("/").lstrip("*").lstrip()
if line.startswith("@"):
break
if line:
cleaned.append(line)
return "\n".join(cleaned).strip()
def _extract_summary(self, docstring: str) -> str:
"""Extract first line as summary."""
if not docstring:
return ""
lines = docstring.strip().split("\n")
return lines[0].strip() if lines else ""
def _extract_jsdoc_tag(self, docstring: str, tag: str) -> Optional[str]:
"""Extract value of a specific JSDoc tag."""
pattern = rf"@{tag}\s*(.+?)(?:@|\Z)"
match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
if match:
return match.group(1).strip()
return None
def _extract_jsdoc_raises(self, docstring: str) -> list[tuple[str, str]]:
"""Extract @throws tags from JSDoc."""
raises = []
pattern = r"@throws\s+(\w+)\s*(.+?)(?:@|\Z)"
for match in re.finditer(pattern, docstring, re.DOTALL):
exc_type = match.group(1)
exc_desc = match.group(2).strip()
raises.append((exc_type, exc_desc))
return raises
def _extract_jsdoc_examples(self, docstring: str) -> list[str]:
"""Extract @example tags from JSDoc."""
examples = []
pattern = r"@example\s*\n?\s*([^\n@]+)"
for match in re.finditer(pattern, docstring, re.DOTALL):
examples.append(match.group(1).strip())
return examples
def _parse_params(self, params_str: str) -> list[Parameter]:
"""Parse parameters from parameter string."""
params = []
if not params_str.strip():
return params
parts = self._split_params(params_str)
for part in parts:
part = part.strip()
if not part:
continue
match = re.match(r"(\w+)(?:\??)\s*:\s*([^,=]+)(?:=\s*([^,]+))?", part)
if match:
param = Parameter(
name=match.group(1),
type_hint=match.group(2).strip() if match.group(2) else None,
default_value=match.group(3).strip() if match.group(3) else None,
is_optional="?" in part,
)
params.append(param)
else:
param = Parameter(name=part)
params.append(param)
return params
def _split_params(self, params_str: str) -> list[str]:
"""Split parameter string respecting nested types."""
parts = []
current = ""
depth = 0
for char in params_str:
if char == "<":
depth += 1
current += char
elif char == ">":
depth -= 1
current += char
elif char == "," and depth == 0:
parts.append(current)
current = ""
else:
current += char
if current.strip():
parts.append(current)
return parts
def _get_visibility(self, name: str) -> str:
"""Determine visibility based on name."""
if name.startswith("_"):
if name.startswith("__"):
return "dunder"
return "private"
return "public"
def _get_module_name(self) -> str:
"""Extract module name from file path."""
import os
base = os.path.basename(self.file_path)
return os.path.splitext(base)[0]
def _get_line_number(self, position: int) -> int:
"""Get line number from position."""
return self.content[:position].count("\n") + 1
def _count_lines(self, text: str) -> int:
"""Count lines in text."""
return text.count("\n")
def _find_matching_brace(self, text: str) -> int:
"""Find matching closing brace."""
depth = 0
for i, char in enumerate(text):
if char == "{":
depth += 1
elif char == "}":
depth -= 1
if depth == 0:
return i
return -1