Add project and language analyzers
This commit is contained in:
102
src/contextgen/analyzers/language_detector.py
Normal file
102
src/contextgen/analyzers/language_detector.py
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
"""Language detector using file extensions and patterns."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from contextgen.analyzers.base import BaseAnalyzer
|
||||||
|
|
||||||
|
|
||||||
|
LANGUAGE_EXTENSIONS: dict[str, list[str]] = {
|
||||||
|
"Python": [".py", ".pyi"],
|
||||||
|
"JavaScript": [".js", ".mjs", ".cjs"],
|
||||||
|
"TypeScript": [".ts", ".tsx"],
|
||||||
|
"Go": [".go"],
|
||||||
|
"Rust": [".rs"],
|
||||||
|
"Java": [".java"],
|
||||||
|
"C": [".c", ".h"],
|
||||||
|
"C++": [".cpp", ".cc", ".cxx", ".hpp"],
|
||||||
|
"C#": [".cs"],
|
||||||
|
"Ruby": [".rb"],
|
||||||
|
"PHP": [".php"],
|
||||||
|
"Swift": [".swift"],
|
||||||
|
"Kotlin": [".kt", ".kts"],
|
||||||
|
"Scala": [".scala"],
|
||||||
|
"HTML": [".html", ".htm"],
|
||||||
|
"CSS": [".css", ".scss", ".sass", ".less"],
|
||||||
|
"JSON": [".json"],
|
||||||
|
"YAML": [".yaml", ".yml"],
|
||||||
|
"XML": [".xml"],
|
||||||
|
"Shell": [".sh", ".bash", ".zsh"],
|
||||||
|
"Markdown": [".md", ".markdown"],
|
||||||
|
"SQL": [".sql"],
|
||||||
|
"Dockerfile": ["Dockerfile"],
|
||||||
|
"Vue": [".vue"],
|
||||||
|
"Svelte": [".svelte"],
|
||||||
|
"Solid": [".tsx"],
|
||||||
|
}
|
||||||
|
|
||||||
|
LANGUAGE_PATTERNS: dict[str, list[tuple[str, str]]] = {
|
||||||
|
"Python": [("import ", "from "), ("def ", "class "), ("if __name__ == ", "print(")],
|
||||||
|
"JavaScript": [("function ", "const "), ("export ", "import "), ("=>", "console.log(")],
|
||||||
|
"TypeScript": [("interface ", "type "), ("export ", "import "), (": string", ": number")],
|
||||||
|
"Go": [("package ", "func "), ("import (", "type "), ("struct ", "func (")],
|
||||||
|
"Rust": [("fn ", "struct "), ("impl ", "pub "), ("use ", "mod ")],
|
||||||
|
"Java": [("public class", "private "), ("import ", "public "), ("System.out.", "void ")],
|
||||||
|
"Ruby": [("def ", "class "), ("require ", "module "), ("puts ", "end")],
|
||||||
|
"PHP": [("<?php", "function "), ("class ", "public "), ("require ", "namespace ")],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageDetector(BaseAnalyzer):
|
||||||
|
"""Detects programming languages used in the project."""
|
||||||
|
|
||||||
|
def analyze(self) -> dict[str, Any]:
|
||||||
|
"""Detect languages and return statistics."""
|
||||||
|
language_counts: dict[str, int] = {}
|
||||||
|
language_files: dict[str, list[str]] = {}
|
||||||
|
|
||||||
|
for ext, lang in self._extension_to_language().items():
|
||||||
|
files = list(self.project_path.rglob(f"*{ext}"))
|
||||||
|
filtered_files = [f for f in files if not self._is_ignored(f)]
|
||||||
|
|
||||||
|
count = len(filtered_files)
|
||||||
|
if count > 0:
|
||||||
|
language_counts[lang] = language_counts.get(lang, 0) + count
|
||||||
|
language_files.setdefault(lang, []).extend(
|
||||||
|
[str(f.relative_to(self.project_path)) for f in filtered_files[:10]]
|
||||||
|
)
|
||||||
|
|
||||||
|
primary_language = max(language_counts, key=language_counts.get) if language_counts else None
|
||||||
|
|
||||||
|
return {
|
||||||
|
"languages": language_counts,
|
||||||
|
"language_files": language_files,
|
||||||
|
"primary_language": primary_language,
|
||||||
|
"language_details": self._get_language_details(language_counts),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extension_to_language(self) -> dict[str, str]:
|
||||||
|
"""Create a mapping from extensions to languages."""
|
||||||
|
mapping: dict[str, str] = {}
|
||||||
|
for lang, exts in LANGUAGE_EXTENSIONS.items():
|
||||||
|
for ext in exts:
|
||||||
|
mapping[ext] = lang
|
||||||
|
return mapping
|
||||||
|
|
||||||
|
def _is_ignored(self, path: Path) -> bool:
|
||||||
|
"""Check if file should be ignored."""
|
||||||
|
ignore_names = [".git", "__pycache__", "node_modules", ".venv", "venv", "dist", "build", ".tox", ".nox"]
|
||||||
|
return any(part in path.parts for part in ignore_names)
|
||||||
|
|
||||||
|
def _get_language_details(self, language_counts: dict[str, int]) -> dict[str, dict[str, Any]]:
|
||||||
|
"""Get detailed information about each detected language."""
|
||||||
|
details: dict[str, dict[str, Any]] = {}
|
||||||
|
|
||||||
|
for lang, count in language_counts.items():
|
||||||
|
details[lang] = {
|
||||||
|
"file_count": count,
|
||||||
|
"extensions": LANGUAGE_EXTENSIONS.get(lang, []),
|
||||||
|
"common_patterns": LANGUAGE_PATTERNS.get(lang, []),
|
||||||
|
}
|
||||||
|
|
||||||
|
return details
|
||||||
Reference in New Issue
Block a user