import os from typing import List, Optional, Dict from pathlib import Path from ..models import Module, ClassSymbol, FunctionSymbol, MethodSymbol from .symbol_extractor import SymbolExtractor from .language_detector import LanguageDetector class CodeParser: def __init__(self): self.extractor = SymbolExtractor() self.language_detector = LanguageDetector() def parse_file(self, file_path: str) -> Optional[Module]: path = Path(file_path) ext = path.suffix.lstrip(".") if not self.language_detector.is_supported(ext): return None try: with open(file_path, "r", encoding="utf-8") as f: content = f.read() except (IOError, UnicodeDecodeError): return None symbols = self.extractor.extract(content, ext) line_count = content.count("\n") + 1 return Module( name=path.stem, path=str(path), classes=symbols.get("classes", []), functions=symbols.get("functions", []), imports=symbols.get("imports", []), line_count=line_count, ) def parse_directory( self, dir_path: str, extensions: Optional[List[str]] = None, exclude_patterns: Optional[List[str]] = None, ) -> List[Module]: modules = [] exclude_patterns = exclude_patterns or [] for root, dirs, files in os.walk(dir_path): dirs[:] = [ d for d in dirs if not d.startswith(".") and not any(d in p for p in exclude_patterns) ] for file in files: file_path = os.path.join(root, file) if any(p in file_path for p in exclude_patterns): continue ext = Path(file).suffix.lstrip(".") if extensions and ext not in extensions: continue module = self.parse_file(file_path) if module: modules.append(module) return modules def get_statistics(self, modules: List[Module]) -> Dict: return { "total_modules": len(modules), "total_classes": sum(len(m.classes) for m in modules), "total_functions": sum(len(m.functions) for m in modules), "total_methods": sum( sum(len(c.methods) for c in m.classes) for m in modules ), "languages": self._count_languages(modules), } def _count_languages(self, modules: List[Module]) -> Dict[str, int]: languages = {} for module in modules: ext = Path(module.path).suffix.lstrip(".") languages[ext] = languages.get(ext, 0) + 1 return languages