From 6de9de909469608a0a1bd96338eccec34f2daf5f Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Thu, 29 Jan 2026 13:21:18 +0000 Subject: [PATCH] Add project and language analyzers --- src/contextgen/analyzers/language_detector.py | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 src/contextgen/analyzers/language_detector.py diff --git a/src/contextgen/analyzers/language_detector.py b/src/contextgen/analyzers/language_detector.py new file mode 100644 index 0000000..eb61c62 --- /dev/null +++ b/src/contextgen/analyzers/language_detector.py @@ -0,0 +1,102 @@ +"""Language detector using file extensions and patterns.""" + +from pathlib import Path +from typing import Any + +from contextgen.analyzers.base import BaseAnalyzer + + +LANGUAGE_EXTENSIONS: dict[str, list[str]] = { + "Python": [".py", ".pyi"], + "JavaScript": [".js", ".mjs", ".cjs"], + "TypeScript": [".ts", ".tsx"], + "Go": [".go"], + "Rust": [".rs"], + "Java": [".java"], + "C": [".c", ".h"], + "C++": [".cpp", ".cc", ".cxx", ".hpp"], + "C#": [".cs"], + "Ruby": [".rb"], + "PHP": [".php"], + "Swift": [".swift"], + "Kotlin": [".kt", ".kts"], + "Scala": [".scala"], + "HTML": [".html", ".htm"], + "CSS": [".css", ".scss", ".sass", ".less"], + "JSON": [".json"], + "YAML": [".yaml", ".yml"], + "XML": [".xml"], + "Shell": [".sh", ".bash", ".zsh"], + "Markdown": [".md", ".markdown"], + "SQL": [".sql"], + "Dockerfile": ["Dockerfile"], + "Vue": [".vue"], + "Svelte": [".svelte"], + "Solid": [".tsx"], +} + +LANGUAGE_PATTERNS: dict[str, list[tuple[str, str]]] = { + "Python": [("import ", "from "), ("def ", "class "), ("if __name__ == ", "print(")], + "JavaScript": [("function ", "const "), ("export ", "import "), ("=>", "console.log(")], + "TypeScript": [("interface ", "type "), ("export ", "import "), (": string", ": number")], + "Go": [("package ", "func "), ("import (", "type "), ("struct ", "func (")], + "Rust": [("fn ", "struct "), ("impl ", "pub "), ("use ", "mod ")], + "Java": [("public class", "private "), ("import ", "public "), ("System.out.", "void ")], + "Ruby": [("def ", "class "), ("require ", "module "), ("puts ", "end")], + "PHP": [(" dict[str, Any]: + """Detect languages and return statistics.""" + language_counts: dict[str, int] = {} + language_files: dict[str, list[str]] = {} + + for ext, lang in self._extension_to_language().items(): + files = list(self.project_path.rglob(f"*{ext}")) + filtered_files = [f for f in files if not self._is_ignored(f)] + + count = len(filtered_files) + if count > 0: + language_counts[lang] = language_counts.get(lang, 0) + count + language_files.setdefault(lang, []).extend( + [str(f.relative_to(self.project_path)) for f in filtered_files[:10]] + ) + + primary_language = max(language_counts, key=language_counts.get) if language_counts else None + + return { + "languages": language_counts, + "language_files": language_files, + "primary_language": primary_language, + "language_details": self._get_language_details(language_counts), + } + + def _extension_to_language(self) -> dict[str, str]: + """Create a mapping from extensions to languages.""" + mapping: dict[str, str] = {} + for lang, exts in LANGUAGE_EXTENSIONS.items(): + for ext in exts: + mapping[ext] = lang + return mapping + + def _is_ignored(self, path: Path) -> bool: + """Check if file should be ignored.""" + ignore_names = [".git", "__pycache__", "node_modules", ".venv", "venv", "dist", "build", ".tox", ".nox"] + return any(part in path.parts for part in ignore_names) + + def _get_language_details(self, language_counts: dict[str, int]) -> dict[str, dict[str, Any]]: + """Get detailed information about each detected language.""" + details: dict[str, dict[str, Any]] = {} + + for lang, count in language_counts.items(): + details[lang] = { + "file_count": count, + "extensions": LANGUAGE_EXTENSIONS.get(lang, []), + "common_patterns": LANGUAGE_PATTERNS.get(lang, []), + } + + return details