Initial upload: TermDiagram v0.1.0
This commit is contained in:
87
src/termdiagram/parser/code_parser.py
Normal file
87
src/termdiagram/parser/code_parser.py
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
import os
|
||||||
|
from typing import List, Optional, Dict
|
||||||
|
from pathlib import Path
|
||||||
|
from ..models import Module, ClassSymbol, FunctionSymbol, MethodSymbol
|
||||||
|
from .symbol_extractor import SymbolExtractor
|
||||||
|
from .language_detector import LanguageDetector
|
||||||
|
|
||||||
|
|
||||||
|
class CodeParser:
|
||||||
|
def __init__(self):
|
||||||
|
self.extractor = SymbolExtractor()
|
||||||
|
self.language_detector = LanguageDetector()
|
||||||
|
|
||||||
|
def parse_file(self, file_path: str) -> Optional[Module]:
|
||||||
|
path = Path(file_path)
|
||||||
|
ext = path.suffix.lstrip(".")
|
||||||
|
|
||||||
|
if not self.language_detector.is_supported(ext):
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(file_path, "r", encoding="utf-8") as f:
|
||||||
|
content = f.read()
|
||||||
|
except (IOError, UnicodeDecodeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
symbols = self.extractor.extract(content, ext)
|
||||||
|
line_count = content.count("\n") + 1
|
||||||
|
|
||||||
|
return Module(
|
||||||
|
name=path.stem,
|
||||||
|
path=str(path),
|
||||||
|
classes=symbols.get("classes", []),
|
||||||
|
functions=symbols.get("functions", []),
|
||||||
|
imports=symbols.get("imports", []),
|
||||||
|
line_count=line_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
def parse_directory(
|
||||||
|
self,
|
||||||
|
dir_path: str,
|
||||||
|
extensions: Optional[List[str]] = None,
|
||||||
|
exclude_patterns: Optional[List[str]] = None,
|
||||||
|
) -> List[Module]:
|
||||||
|
modules = []
|
||||||
|
exclude_patterns = exclude_patterns or []
|
||||||
|
|
||||||
|
for root, dirs, files in os.walk(dir_path):
|
||||||
|
dirs[:] = [
|
||||||
|
d
|
||||||
|
for d in dirs
|
||||||
|
if not d.startswith(".") and not any(d in p for p in exclude_patterns)
|
||||||
|
]
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
file_path = os.path.join(root, file)
|
||||||
|
|
||||||
|
if any(p in file_path for p in exclude_patterns):
|
||||||
|
continue
|
||||||
|
|
||||||
|
ext = Path(file).suffix.lstrip(".")
|
||||||
|
if extensions and ext not in extensions:
|
||||||
|
continue
|
||||||
|
|
||||||
|
module = self.parse_file(file_path)
|
||||||
|
if module:
|
||||||
|
modules.append(module)
|
||||||
|
|
||||||
|
return modules
|
||||||
|
|
||||||
|
def get_statistics(self, modules: List[Module]) -> Dict:
|
||||||
|
return {
|
||||||
|
"total_modules": len(modules),
|
||||||
|
"total_classes": sum(len(m.classes) for m in modules),
|
||||||
|
"total_functions": sum(len(m.functions) for m in modules),
|
||||||
|
"total_methods": sum(
|
||||||
|
sum(len(c.methods) for c in m.classes) for m in modules
|
||||||
|
),
|
||||||
|
"languages": self._count_languages(modules),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _count_languages(self, modules: List[Module]) -> Dict[str, int]:
|
||||||
|
languages = {}
|
||||||
|
for module in modules:
|
||||||
|
ext = Path(module.path).suffix.lstrip(".")
|
||||||
|
languages[ext] = languages.get(ext, 0) + 1
|
||||||
|
return languages
|
||||||
Reference in New Issue
Block a user