Add project and language analyzers
Some checks failed
CI / test (push) Failing after 9s
CI / build (push) Has been skipped

This commit is contained in:
2026-01-29 13:21:18 +00:00
parent 022ccd5a5b
commit 6de9de9094

View File

@@ -0,0 +1,102 @@
"""Language detector using file extensions and patterns."""
from pathlib import Path
from typing import Any
from contextgen.analyzers.base import BaseAnalyzer
LANGUAGE_EXTENSIONS: dict[str, list[str]] = {
"Python": [".py", ".pyi"],
"JavaScript": [".js", ".mjs", ".cjs"],
"TypeScript": [".ts", ".tsx"],
"Go": [".go"],
"Rust": [".rs"],
"Java": [".java"],
"C": [".c", ".h"],
"C++": [".cpp", ".cc", ".cxx", ".hpp"],
"C#": [".cs"],
"Ruby": [".rb"],
"PHP": [".php"],
"Swift": [".swift"],
"Kotlin": [".kt", ".kts"],
"Scala": [".scala"],
"HTML": [".html", ".htm"],
"CSS": [".css", ".scss", ".sass", ".less"],
"JSON": [".json"],
"YAML": [".yaml", ".yml"],
"XML": [".xml"],
"Shell": [".sh", ".bash", ".zsh"],
"Markdown": [".md", ".markdown"],
"SQL": [".sql"],
"Dockerfile": ["Dockerfile"],
"Vue": [".vue"],
"Svelte": [".svelte"],
"Solid": [".tsx"],
}
LANGUAGE_PATTERNS: dict[str, list[tuple[str, str]]] = {
"Python": [("import ", "from "), ("def ", "class "), ("if __name__ == ", "print(")],
"JavaScript": [("function ", "const "), ("export ", "import "), ("=>", "console.log(")],
"TypeScript": [("interface ", "type "), ("export ", "import "), (": string", ": number")],
"Go": [("package ", "func "), ("import (", "type "), ("struct ", "func (")],
"Rust": [("fn ", "struct "), ("impl ", "pub "), ("use ", "mod ")],
"Java": [("public class", "private "), ("import ", "public "), ("System.out.", "void ")],
"Ruby": [("def ", "class "), ("require ", "module "), ("puts ", "end")],
"PHP": [("<?php", "function "), ("class ", "public "), ("require ", "namespace ")],
}
class LanguageDetector(BaseAnalyzer):
"""Detects programming languages used in the project."""
def analyze(self) -> dict[str, Any]:
"""Detect languages and return statistics."""
language_counts: dict[str, int] = {}
language_files: dict[str, list[str]] = {}
for ext, lang in self._extension_to_language().items():
files = list(self.project_path.rglob(f"*{ext}"))
filtered_files = [f for f in files if not self._is_ignored(f)]
count = len(filtered_files)
if count > 0:
language_counts[lang] = language_counts.get(lang, 0) + count
language_files.setdefault(lang, []).extend(
[str(f.relative_to(self.project_path)) for f in filtered_files[:10]]
)
primary_language = max(language_counts, key=language_counts.get) if language_counts else None
return {
"languages": language_counts,
"language_files": language_files,
"primary_language": primary_language,
"language_details": self._get_language_details(language_counts),
}
def _extension_to_language(self) -> dict[str, str]:
"""Create a mapping from extensions to languages."""
mapping: dict[str, str] = {}
for lang, exts in LANGUAGE_EXTENSIONS.items():
for ext in exts:
mapping[ext] = lang
return mapping
def _is_ignored(self, path: Path) -> bool:
"""Check if file should be ignored."""
ignore_names = [".git", "__pycache__", "node_modules", ".venv", "venv", "dist", "build", ".tox", ".nox"]
return any(part in path.parts for part in ignore_names)
def _get_language_details(self, language_counts: dict[str, int]) -> dict[str, dict[str, Any]]:
"""Get detailed information about each detected language."""
details: dict[str, dict[str, Any]] = {}
for lang, count in language_counts.items():
details[lang] = {
"file_count": count,
"extensions": LANGUAGE_EXTENSIONS.get(lang, []),
"common_patterns": LANGUAGE_PATTERNS.get(lang, []),
}
return details