diff --git a/.code_doc_cli/parsers/typescript_parser.py b/.code_doc_cli/parsers/typescript_parser.py new file mode 100644 index 0000000..809d28c --- /dev/null +++ b/.code_doc_cli/parsers/typescript_parser.py @@ -0,0 +1,405 @@ +"""TypeScript parser using regex patterns.""" + +import re +from typing import Optional, List +from .base import Parser, DocElement, ElementType, Parameter + + +class TypeScriptParser(Parser): + """Parser for TypeScript source files.""" + + EXTENSIONS = [".ts", ".tsx"] + + def __init__(self, file_path: str): + super().__init__(file_path) + + def get_language_name(self) -> str: + return "typescript" + + @classmethod + def supports_file(cls, file_path: str) -> bool: + ext = cls._get_extension(file_path) + return ext in cls.EXTENSIONS + + @staticmethod + def _get_extension(file_path: str) -> str: + import os + return os.path.splitext(file_path)[1].lower() + + def parse(self) -> list[DocElement]: + """Parse TypeScript file and extract documentation elements.""" + self.content = self._read_content() + self.elements = [] + + self._parse_module_docstring() + self._parse_functions() + self._parse_classes() + self._parse_interfaces() + self._parse_constants() + + return self.elements + + def _parse_module_docstring(self) -> None: + """Parse module-level documentation.""" + patterns = [ + r"/\*\*[\s\S]*?\*/", + r"^\s*///.*$", + ] + + for pattern in patterns: + match = re.search(pattern, self.content, re.MULTILINE) + if match: + docstring = self._clean_jsdoc(match.group(0)) + if docstring.strip(): + elem = DocElement( + name=self._get_module_name(), + element_type=ElementType.MODULE, + description=docstring, + full_docstring=docstring, + source_file=self.file_path, + ) + self.elements.append(elem) + break + + def _parse_functions(self) -> None: + """Parse function definitions.""" + patterns = [ + r"(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)\s*(?::\s*([^{;]+))?\s*\{", + r"(?:export\s+)?const\s+(\w+)\s*=\s*(?:async\s+)?\([^)]*\)\s*(?:=>|[^{])\s*\{", + r"(?:export\s+)?const\s+(\w+)\s*=\s*(?:async\s+)?function\s*\([^)]*\)\s*\{", + ] + + for pattern in patterns: + for match in re.finditer(pattern, self.content): + name = match.group(1) + groups = match.groups() + params_str = groups[1] if len(groups) > 1 else "" + return_type = groups[2] if len(groups) > 2 else None + + params = self._parse_params(params_str) + docstring = self._find_jsdoc_before(match.start()) + + elem = DocElement( + name=name, + element_type=ElementType.FUNCTION, + description=self._extract_summary(docstring), + full_docstring=docstring, + parameters=params, + return_type=return_type.strip() if return_type else None, + return_description=self._extract_jsdoc_tag(docstring, "returns"), + raises=self._extract_jsdoc_raises(docstring), + examples=self._extract_jsdoc_examples(docstring), + source_file=self.file_path, + line_number=self._get_line_number(match.start()), + visibility=self._get_visibility(name), + ) + self.elements.append(elem) + + def _parse_classes(self) -> None: + """Parse class definitions.""" + pattern = r"(?:export\s+)?class\s+(\w+)\s*(?:<[^>]*>)?\s*(?:extends\s+(\w+))?\s*(?:implements\s+([^{]+))?\s*\{" + for match in re.finditer(pattern, self.content): + class_name = match.group(1) + groups = match.groups() + extends = groups[1] if len(groups) > 1 else None + implements = groups[2] if len(groups) > 2 else None + + docstring = self._find_jsdoc_before(match.start()) + + elem = DocElement( + name=class_name, + element_type=ElementType.CLASS, + description=self._extract_summary(docstring), + full_docstring=docstring, + source_file=self.file_path, + line_number=self._get_line_number(match.start()), + visibility=self._get_visibility(class_name), + decorators=self._find_decorators_before(match.start()), + ) + + if extends: + elem.parameters.append(Parameter(name=extends, description=f"Extends: {extends}")) + if implements: + for impl in implements.split(","): + elem.parameters.append(Parameter(name=impl.strip(), description=f"Implements: {impl.strip()}")) + + self._parse_class_methods(elem) + + self.elements.append(elem) + + def _parse_class_methods(self, class_elem: DocElement) -> None: + """Parse methods within a class.""" + start_pos = self.content.find(f"class {class_elem.name}") + if start_pos == -1: + return + + brace_pos = self.content.find("{", start_pos) + if brace_pos == -1: + return + + class_body = self.content[brace_pos + 1:] + end_pos = self._find_matching_brace(class_body) + if end_pos == -1: + return + + class_body = class_body[:end_pos] + + method_pattern = r"(?:public|private|protected|readonly)?\s*(?:static\s+)?(?:async\s+)?(\w+)\s*\(([^)]*)\)\s*(?::\s*([^{]+))?\s*\{" + + for match in re.finditer(method_pattern, class_body): + method_name = match.group(1) + groups = match.groups() + params_str = groups[1] if len(groups) > 1 else "" + return_type = groups[2] if len(groups) > 2 else None + + method_docstring = self._find_jsdoc_in_body(class_body, match.start()) + + params = self._parse_params(params_str) + params = [p for p in params if p.name != "this"] if params else params + + method_elem = DocElement( + name=f"{class_elem.name}.{method_name}", + element_type=ElementType.METHOD, + description=self._extract_summary(method_docstring), + full_docstring=method_docstring, + parameters=params, + return_type=return_type.strip() if return_type else None, + return_description=self._extract_jsdoc_tag(method_docstring, "returns"), + source_file=self.file_path, + line_number=class_elem.line_number + self._count_lines(class_body[:match.start()]), + visibility=self._get_visibility(method_name), + ) + self.elements.append(method_elem) + + def _parse_interfaces(self) -> None: + """Parse interface definitions.""" + pattern = r"(?:export\s+)?interface\s+(\w+)\s*(?:<[^>]*>)?\s*\{([^}]*)\}" + for match in re.finditer(pattern, self.content): + interface_name = match.group(1) + body = match.group(2) + + docstring = self._find_jsdoc_before(match.start()) + + elem = DocElement( + name=interface_name, + element_type=ElementType.INTERFACE, + description=self._extract_summary(docstring), + full_docstring=docstring, + source_file=self.file_path, + line_number=self._get_line_number(match.start()), + visibility=self._get_visibility(interface_name), + ) + + properties = self._parse_interface_properties(body) + elem.attributes = properties + + self.elements.append(elem) + + def _parse_interface_properties(self, body: str) -> list[tuple[str, Optional[str], Optional[str]]]: + """Parse properties from interface body.""" + properties = [] + lines = body.split("\n") + for line in lines: + line = line.strip() + if not line or line.startswith("//") or line.startswith("/**"): + continue + + match = re.match(r"(\w+)(?:\?)?\s*:\s*([^;]+);?", line) + if match: + prop_name = match.group(1) + prop_type = match.group(2).strip() + is_optional = "?" in line + properties.append((prop_name, prop_type, f"{'Optional. ' if is_optional else ''}")) + return properties + + def _parse_constants(self) -> None: + """Parse constant declarations.""" + patterns = [ + r"(?:export\s+)?(?:const|let|var)\s+(\w+)\s*[=:]=\s*([^;{]+)", + r"(?:export\s+)?type\s+(\w+)\s*=", + ] + + for pattern in patterns: + for match in re.finditer(pattern, self.content): + name = match.group(1) + groups = match.groups() + value = groups[1] if len(groups) > 1 else "" + + if re.match(r"^\s*(function|class|interface)\s", value): + continue + + docstring = self._find_jsdoc_before(match.start()) + + elem = DocElement( + name=name, + element_type=ElementType.CONSTANT, + description=self._extract_summary(docstring), + full_docstring=docstring, + source_file=self.file_path, + line_number=self._get_line_number(match.start()), + visibility=self._get_visibility(name), + ) + self.elements.append(elem) + + def _find_jsdoc_before(self, position: int) -> str: + """Find JSDoc comment before a position.""" + search_text = self.content[:position] + patterns = [ + r"/\*\*[\s\S]*?\*/\s*$", + r"^\s*///.*$", + ] + + for pattern in patterns: + matches = list(re.finditer(pattern, search_text, re.MULTILINE)) + if matches: + jsdoc = matches[-1].group(0) + return self._clean_jsdoc(jsdoc) + + return "" + + def _find_jsdoc_in_body(self, body: str, position: int) -> str: + """Find JSDoc comment before position in class body.""" + search_text = body[:position] + pattern = r"/\*\*[\s\S]*?\*/\s*$" + matches = list(re.finditer(pattern, search_text, re.MULTILINE)) + if matches: + jsdoc = matches[-1].group(0) + return self._clean_jsdoc(jsdoc) + return "" + + def _find_decorators_before(self, position: int) -> list[str]: + """Find decorators before a position.""" + search_text = self.content[:position] + decorators = [] + for match in re.finditer(r"@(\w+)", search_text): + decorators.append(f"@{match.group(1)}") + return decorators[-5:] + + def _clean_jsdoc(self, jsdoc: str) -> str: + """Clean JSDoc comment to plain text.""" + lines = jsdoc.split("\n") + cleaned = [] + for line in lines: + line = line.strip() + if line.startswith("/**") or line.startswith("*"): + line = line.lstrip("/").lstrip("*").lstrip() + if line.startswith("@"): + break + if line: + cleaned.append(line) + return "\n".join(cleaned).strip() + + def _extract_summary(self, docstring: str) -> str: + """Extract first line as summary.""" + if not docstring: + return "" + lines = docstring.strip().split("\n") + return lines[0].strip() if lines else "" + + def _extract_jsdoc_tag(self, docstring: str, tag: str) -> Optional[str]: + """Extract value of a specific JSDoc tag.""" + pattern = rf"@{tag}\s*(.+?)(?:@|\Z)" + match = re.search(pattern, docstring, re.DOTALL | re.IGNORECASE) + if match: + return match.group(1).strip() + return None + + def _extract_jsdoc_raises(self, docstring: str) -> list[tuple[str, str]]: + """Extract @throws tags from JSDoc.""" + raises = [] + pattern = r"@throws\s+(\w+)\s*(.+?)(?:@|\Z)" + for match in re.finditer(pattern, docstring, re.DOTALL): + exc_type = match.group(1) + exc_desc = match.group(2).strip() + raises.append((exc_type, exc_desc)) + return raises + + def _extract_jsdoc_examples(self, docstring: str) -> list[str]: + """Extract @example tags from JSDoc.""" + examples = [] + pattern = r"@example\s*\n?\s*([^\n@]+)" + for match in re.finditer(pattern, docstring, re.DOTALL): + examples.append(match.group(1).strip()) + return examples + + def _parse_params(self, params_str: str) -> list[Parameter]: + """Parse parameters from parameter string.""" + params = [] + if not params_str.strip(): + return params + + parts = self._split_params(params_str) + for part in parts: + part = part.strip() + if not part: + continue + + match = re.match(r"(\w+)(?:\??)\s*:\s*([^,=]+)(?:=\s*([^,]+))?", part) + if match: + param = Parameter( + name=match.group(1), + type_hint=match.group(2).strip() if match.group(2) else None, + default_value=match.group(3).strip() if match.group(3) else None, + is_optional="?" in part, + ) + params.append(param) + else: + param = Parameter(name=part) + params.append(param) + + return params + + def _split_params(self, params_str: str) -> list[str]: + """Split parameter string respecting nested types.""" + parts = [] + current = "" + depth = 0 + for char in params_str: + if char == "<": + depth += 1 + current += char + elif char == ">": + depth -= 1 + current += char + elif char == "," and depth == 0: + parts.append(current) + current = "" + else: + current += char + if current.strip(): + parts.append(current) + return parts + + def _get_visibility(self, name: str) -> str: + """Determine visibility based on name.""" + if name.startswith("_"): + if name.startswith("__"): + return "dunder" + return "private" + return "public" + + def _get_module_name(self) -> str: + """Extract module name from file path.""" + import os + base = os.path.basename(self.file_path) + return os.path.splitext(base)[0] + + def _get_line_number(self, position: int) -> int: + """Get line number from position.""" + return self.content[:position].count("\n") + 1 + + def _count_lines(self, text: str) -> int: + """Count lines in text.""" + return text.count("\n") + + def _find_matching_brace(self, text: str) -> int: + """Find matching closing brace.""" + depth = 0 + for i, char in enumerate(text): + if char == "{": + depth += 1 + elif char == "}": + depth -= 1 + if depth == 0: + return i + return -1