This commit is contained in:
405
.code_doc_cli/parsers/typescript_parser.py
Normal file
405
.code_doc_cli/parsers/typescript_parser.py
Normal file
@@ -0,0 +1,405 @@
|
||||
"""TypeScript parser using regex patterns."""
|
||||
|
||||
import re
|
||||
from typing import Optional, List
|
||||
from .base import Parser, DocElement, ElementType, Parameter
|
||||
|
||||
|
||||
class TypeScriptParser(Parser):
|
||||
"""Parser for TypeScript source files."""
|
||||
|
||||
EXTENSIONS = [".ts", ".tsx"]
|
||||
|
||||
def __init__(self, file_path: str):
|
||||
super().__init__(file_path)
|
||||
|
||||
def get_language_name(self) -> str:
|
||||
return "typescript"
|
||||
|
||||
@classmethod
|
||||
def supports_file(cls, file_path: str) -> bool:
|
||||
ext = cls._get_extension(file_path)
|
||||
return ext in cls.EXTENSIONS
|
||||
|
||||
@staticmethod
|
||||
def _get_extension(file_path: str) -> str:
|
||||
import os
|
||||
return os.path.splitext(file_path)[1].lower()
|
||||
|
||||
def parse(self) -> list[DocElement]:
|
||||
"""Parse TypeScript file and extract documentation elements."""
|
||||
self.content = self._read_content()
|
||||
self.elements = []
|
||||
|
||||
self._parse_module_docstring()
|
||||
self._parse_functions()
|
||||
self._parse_classes()
|
||||
self._parse_interfaces()
|
||||
self._parse_constants()
|
||||
|
||||
return self.elements
|
||||
|
||||
def _parse_module_docstring(self) -> None:
|
||||
"""Parse module-level documentation."""
|
||||
patterns = [
|
||||
r"/\*\*[\s\S]*?\*/",
|
||||
r"^\s*///.*$",
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, self.content, re.MULTILINE)
|
||||
if match:
|
||||
docstring = self._clean_jsdoc(match.group(0))
|
||||
if docstring.strip():
|
||||
elem = DocElement(
|
||||
name=self._get_module_name(),
|
||||
element_type=ElementType.MODULE,
|
||||
description=docstring,
|
||||
full_docstring=docstring,
|
||||
source_file=self.file_path,
|
||||
)
|
||||
self.elements.append(elem)
|
||||
break
|
||||
|
||||
def _parse_functions(self) -> None:
|
||||
"""Parse function definitions."""
|
||||
patterns = [
|
||||
r"(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)\s*(?::\s*([^{;]+))?\s*\{",
|
||||
r"(?:export\s+)?const\s+(\w+)\s*=\s*(?:async\s+)?\([^)]*\)\s*(?:=>|[^{])\s*\{",
|
||||
r"(?:export\s+)?const\s+(\w+)\s*=\s*(?:async\s+)?function\s*\([^)]*\)\s*\{",
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
for match in re.finditer(pattern, self.content):
|
||||
name = match.group(1)
|
||||
groups = match.groups()
|
||||
params_str = groups[1] if len(groups) > 1 else ""
|
||||
return_type = groups[2] if len(groups) > 2 else None
|
||||
|
||||
params = self._parse_params(params_str)
|
||||
docstring = self._find_jsdoc_before(match.start())
|
||||
|
||||
elem = DocElement(
|
||||
name=name,
|
||||
element_type=ElementType.FUNCTION,
|
||||
description=self._extract_summary(docstring),
|
||||
full_docstring=docstring,
|
||||
parameters=params,
|
||||
return_type=return_type.strip() if return_type else None,
|
||||
return_description=self._extract_jsdoc_tag(docstring, "returns"),
|
||||
raises=self._extract_jsdoc_raises(docstring),
|
||||
examples=self._extract_jsdoc_examples(docstring),
|
||||
source_file=self.file_path,
|
||||
line_number=self._get_line_number(match.start()),
|
||||
visibility=self._get_visibility(name),
|
||||
)
|
||||
self.elements.append(elem)
|
||||
|
||||
def _parse_classes(self) -> None:
|
||||
"""Parse class definitions."""
|
||||
pattern = r"(?:export\s+)?class\s+(\w+)\s*(?:<[^>]*>)?\s*(?:extends\s+(\w+))?\s*(?:implements\s+([^{]+))?\s*\{"
|
||||
for match in re.finditer(pattern, self.content):
|
||||
class_name = match.group(1)
|
||||
groups = match.groups()
|
||||
extends = groups[1] if len(groups) > 1 else None
|
||||
implements = groups[2] if len(groups) > 2 else None
|
||||
|
||||
docstring = self._find_jsdoc_before(match.start())
|
||||
|
||||
elem = DocElement(
|
||||
name=class_name,
|
||||
element_type=ElementType.CLASS,
|
||||
description=self._extract_summary(docstring),
|
||||
full_docstring=docstring,
|
||||
source_file=self.file_path,
|
||||
line_number=self._get_line_number(match.start()),
|
||||
visibility=self._get_visibility(class_name),
|
||||
decorators=self._find_decorators_before(match.start()),
|
||||
)
|
||||
|
||||
if extends:
|
||||
elem.parameters.append(Parameter(name=extends, description=f"Extends: {extends}"))
|
||||
if implements:
|
||||
for impl in implements.split(","):
|
||||
elem.parameters.append(Parameter(name=impl.strip(), description=f"Implements: {impl.strip()}"))
|
||||
|
||||
self._parse_class_methods(elem)
|
||||
|
||||
self.elements.append(elem)
|
||||
|
||||
def _parse_class_methods(self, class_elem: DocElement) -> None:
|
||||
"""Parse methods within a class."""
|
||||
start_pos = self.content.find(f"class {class_elem.name}")
|
||||
if start_pos == -1:
|
||||
return
|
||||
|
||||
brace_pos = self.content.find("{", start_pos)
|
||||
if brace_pos == -1:
|
||||
return
|
||||
|
||||
class_body = self.content[brace_pos + 1:]
|
||||
end_pos = self._find_matching_brace(class_body)
|
||||
if end_pos == -1:
|
||||
return
|
||||
|
||||
class_body = class_body[:end_pos]
|
||||
|
||||
method_pattern = r"(?:public|private|protected|readonly)?\s*(?:static\s+)?(?:async\s+)?(\w+)\s*\(([^)]*)\)\s*(?::\s*([^{]+))?\s*\{"
|
||||
|
||||
for match in re.finditer(method_pattern, class_body):
|
||||
method_name = match.group(1)
|
||||
groups = match.groups()
|
||||
params_str = groups[1] if len(groups) > 1 else ""
|
||||
return_type = groups[2] if len(groups) > 2 else None
|
||||
|
||||
method_docstring = self._find_jsdoc_in_body(class_body, match.start())
|
||||
|
||||
params = self._parse_params(params_str)
|
||||
params = [p for p in params if p.name != "this"] if params else params
|
||||
|
||||
method_elem = DocElement(
|
||||
name=f"{class_elem.name}.{method_name}",
|
||||
element_type=ElementType.METHOD,
|
||||
description=self._extract_summary(method_docstring),
|
||||
full_docstring=method_docstring,
|
||||
parameters=params,
|
||||
return_type=return_type.strip() if return_type else None,
|
||||
return_description=self._extract_jsdoc_tag(method_docstring, "returns"),
|
||||
source_file=self.file_path,
|
||||
line_number=class_elem.line_number + self._count_lines(class_body[:match.start()]),
|
||||
visibility=self._get_visibility(method_name),
|
||||
)
|
||||
self.elements.append(method_elem)
|
||||
|
||||
def _parse_interfaces(self) -> None:
|
||||
"""Parse interface definitions."""
|
||||
pattern = r"(?:export\s+)?interface\s+(\w+)\s*(?:<[^>]*>)?\s*\{([^}]*)\}"
|
||||
for match in re.finditer(pattern, self.content):
|
||||
interface_name = match.group(1)
|
||||
body = match.group(2)
|
||||
|
||||
docstring = self._find_jsdoc_before(match.start())
|
||||
|
||||
elem = DocElement(
|
||||
name=interface_name,
|
||||
element_type=ElementType.INTERFACE,
|
||||
description=self._extract_summary(docstring),
|
||||
full_docstring=docstring,
|
||||
source_file=self.file_path,
|
||||
line_number=self._get_line_number(match.start()),
|
||||
visibility=self._get_visibility(interface_name),
|
||||
)
|
||||
|
||||
properties = self._parse_interface_properties(body)
|
||||
elem.attributes = properties
|
||||
|
||||
self.elements.append(elem)
|
||||
|
||||
def _parse_interface_properties(self, body: str) -> list[tuple[str, Optional[str], Optional[str]]]:
|
||||
"""Parse properties from interface body."""
|
||||
properties = []
|
||||
lines = body.split("\n")
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line or line.startswith("//") or line.startswith("/**"):
|
||||
continue
|
||||
|
||||
match = re.match(r"(\w+)(?:\?)?\s*:\s*([^;]+);?", line)
|
||||
if match:
|
||||
prop_name = match.group(1)
|
||||
prop_type = match.group(2).strip()
|
||||
is_optional = "?" in line
|
||||
properties.append((prop_name, prop_type, f"{'Optional. ' if is_optional else ''}"))
|
||||
return properties
|
||||
|
||||
def _parse_constants(self) -> None:
|
||||
"""Parse constant declarations."""
|
||||
patterns = [
|
||||
r"(?:export\s+)?(?:const|let|var)\s+(\w+)\s*[=:]=\s*([^;{]+)",
|
||||
r"(?:export\s+)?type\s+(\w+)\s*=",
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
for match in re.finditer(pattern, self.content):
|
||||
name = match.group(1)
|
||||
groups = match.groups()
|
||||
value = groups[1] if len(groups) > 1 else ""
|
||||
|
||||
if re.match(r"^\s*(function|class|interface)\s", value):
|
||||
continue
|
||||
|
||||
docstring = self._find_jsdoc_before(match.start())
|
||||
|
||||
elem = DocElement(
|
||||
name=name,
|
||||
element_type=ElementType.CONSTANT,
|
||||
description=self._extract_summary(docstring),
|
||||
full_docstring=docstring,
|
||||
source_file=self.file_path,
|
||||
line_number=self._get_line_number(match.start()),
|
||||
visibility=self._get_visibility(name),
|
||||
)
|
||||
self.elements.append(elem)
|
||||
|
||||
def _find_jsdoc_before(self, position: int) -> str:
|
||||
"""Find JSDoc comment before a position."""
|
||||
search_text = self.content[:position]
|
||||
patterns = [
|
||||
r"/\*\*[\s\S]*?\*/\s*$",
|
||||
r"^\s*///.*$",
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
matches = list(re.finditer(pattern, search_text, re.MULTILINE))
|
||||
if matches:
|
||||
jsdoc = matches[-1].group(0)
|
||||
return self._clean_jsdoc(jsdoc)
|
||||
|
||||
return ""
|
||||
|
||||
def _find_jsdoc_in_body(self, body: str, position: int) -> str:
|
||||
"""Find JSDoc comment before position in class body."""
|
||||
search_text = body[:position]
|
||||
pattern = r"/\*\*[\s\S]*?\*/\s*$"
|
||||
matches = list(re.finditer(pattern, search_text, re.MULTILINE))
|
||||
if matches:
|
||||
jsdoc = matches[-1].group(0)
|
||||
return self._clean_jsdoc(jsdoc)
|
||||
return ""
|
||||
|
||||
def _find_decorators_before(self, position: int) -> list[str]:
|
||||
"""Find decorators before a position."""
|
||||
search_text = self.content[:position]
|
||||
decorators = []
|
||||
for match in re.finditer(r"@(\w+)", search_text):
|
||||
decorators.append(f"@{match.group(1)}")
|
||||
return decorators[-5:]
|
||||
|
||||
def _clean_jsdoc(self, jsdoc: str) -> str:
|
||||
"""Clean JSDoc comment to plain text."""
|
||||
lines = jsdoc.split("\n")
|
||||
cleaned = []
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line.startswith("/**") or line.startswith("*"):
|
||||
line = line.lstrip("/").lstrip("*").lstrip()
|
||||
if line.startswith("@"):
|
||||
break
|
||||
if line:
|
||||
cleaned.append(line)
|
||||
return "\n".join(cleaned).strip()
|
||||
|
||||
def _extract_summary(self, docstring: str) -> str:
|
||||
"""Extract first line as summary."""
|
||||
if not docstring:
|
||||
return ""
|
||||
lines = docstring.strip().split("\n")
|
||||
return lines[0].strip() if lines else ""
|
||||
|
||||
def _extract_jsdoc_tag(self, docstring: str, tag: str) -> Optional[str]:
|
||||
"""Extract value of a specific JSDoc tag."""
|
||||
pattern = rf"@{tag}\s*(.+?)(?:@|\Z)"
|
||||
match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
return None
|
||||
|
||||
def _extract_jsdoc_raises(self, docstring: str) -> list[tuple[str, str]]:
|
||||
"""Extract @throws tags from JSDoc."""
|
||||
raises = []
|
||||
pattern = r"@throws\s+(\w+)\s*(.+?)(?:@|\Z)"
|
||||
for match in re.finditer(pattern, docstring, re.DOTALL):
|
||||
exc_type = match.group(1)
|
||||
exc_desc = match.group(2).strip()
|
||||
raises.append((exc_type, exc_desc))
|
||||
return raises
|
||||
|
||||
def _extract_jsdoc_examples(self, docstring: str) -> list[str]:
|
||||
"""Extract @example tags from JSDoc."""
|
||||
examples = []
|
||||
pattern = r"@example\s*\n?\s*([^\n@]+)"
|
||||
for match in re.finditer(pattern, docstring, re.DOTALL):
|
||||
examples.append(match.group(1).strip())
|
||||
return examples
|
||||
|
||||
def _parse_params(self, params_str: str) -> list[Parameter]:
|
||||
"""Parse parameters from parameter string."""
|
||||
params = []
|
||||
if not params_str.strip():
|
||||
return params
|
||||
|
||||
parts = self._split_params(params_str)
|
||||
for part in parts:
|
||||
part = part.strip()
|
||||
if not part:
|
||||
continue
|
||||
|
||||
match = re.match(r"(\w+)(?:\??)\s*:\s*([^,=]+)(?:=\s*([^,]+))?", part)
|
||||
if match:
|
||||
param = Parameter(
|
||||
name=match.group(1),
|
||||
type_hint=match.group(2).strip() if match.group(2) else None,
|
||||
default_value=match.group(3).strip() if match.group(3) else None,
|
||||
is_optional="?" in part,
|
||||
)
|
||||
params.append(param)
|
||||
else:
|
||||
param = Parameter(name=part)
|
||||
params.append(param)
|
||||
|
||||
return params
|
||||
|
||||
def _split_params(self, params_str: str) -> list[str]:
|
||||
"""Split parameter string respecting nested types."""
|
||||
parts = []
|
||||
current = ""
|
||||
depth = 0
|
||||
for char in params_str:
|
||||
if char == "<":
|
||||
depth += 1
|
||||
current += char
|
||||
elif char == ">":
|
||||
depth -= 1
|
||||
current += char
|
||||
elif char == "," and depth == 0:
|
||||
parts.append(current)
|
||||
current = ""
|
||||
else:
|
||||
current += char
|
||||
if current.strip():
|
||||
parts.append(current)
|
||||
return parts
|
||||
|
||||
def _get_visibility(self, name: str) -> str:
|
||||
"""Determine visibility based on name."""
|
||||
if name.startswith("_"):
|
||||
if name.startswith("__"):
|
||||
return "dunder"
|
||||
return "private"
|
||||
return "public"
|
||||
|
||||
def _get_module_name(self) -> str:
|
||||
"""Extract module name from file path."""
|
||||
import os
|
||||
base = os.path.basename(self.file_path)
|
||||
return os.path.splitext(base)[0]
|
||||
|
||||
def _get_line_number(self, position: int) -> int:
|
||||
"""Get line number from position."""
|
||||
return self.content[:position].count("\n") + 1
|
||||
|
||||
def _count_lines(self, text: str) -> int:
|
||||
"""Count lines in text."""
|
||||
return text.count("\n")
|
||||
|
||||
def _find_matching_brace(self, text: str) -> int:
|
||||
"""Find matching closing brace."""
|
||||
depth = 0
|
||||
for i, char in enumerate(text):
|
||||
if char == "{":
|
||||
depth += 1
|
||||
elif char == "}":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
return i
|
||||
return -1
|
||||
Reference in New Issue
Block a user