From 41bfbbc3867cb2dbb725caaee7081ad0e346e579 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Tue, 3 Feb 2026 10:30:15 +0000 Subject: [PATCH] Initial upload of ai-code-audit-cli project --- src/utils/language_detector.py | 155 +++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 src/utils/language_detector.py diff --git a/src/utils/language_detector.py b/src/utils/language_detector.py new file mode 100644 index 0000000..2416b79 --- /dev/null +++ b/src/utils/language_detector.py @@ -0,0 +1,155 @@ +"""Language detection utilities for AI Code Audit CLI.""" + +from pathlib import Path +from typing import Optional + + +class LanguageDetector: + """Detect programming language of source files.""" + + LANGUAGE_MAP = { + ".py": "python", + ".pyw": "python", + ".pyx": "python", + ".js": "javascript", + ".mjs": "javascript", + ".jsx": "javascript", + ".ts": "typescript", + ".tsx": "typescript", + ".mts": "typescript", + } + + PYTHON_SHEBANG = "#!" + JS_SHEBANG = "#!" + + def __init__(self): + """Initialize the language detector.""" + self.language_map = self.LANGUAGE_MAP + + def detect(self, file_path: Path | str) -> "Language": + """Detect the language of a file.""" + path = Path(file_path) if isinstance(file_path, str) else file_path + + ext = path.suffix.lower() + if ext in self.language_map: + return Language(self.language_map[ext]) + + try: + content = path.read_text(encoding="utf-8", errors="replace") + return self.detect_from_content(content, path.name) + except Exception: + return Language("unknown") + + def detect_from_content( + self, content: str, filename: str = "" + ) -> "Language": + """Detect language from file content.""" + if not content: + return Language("unknown") + + first_line = content.split('\n')[0].strip() + + if first_line.startswith("#!"): + shebang = first_line[2:].strip().split()[0] if len(first_line) > 2 else "" + if "python" in shebang.lower(): + return Language("python") + elif "node" in shebang.lower() or "js" in shebang.lower(): + return Language("javascript") + + if filename.endswith(".py") or filename.endswith(".pyw"): + return Language("python") + + if filename.endswith((".ts", ".tsx")): + return Language("typescript") + + if filename.endswith((".js", ".jsx")): + return Language("javascript") + + keywords = self._detect_keywords(content) + if keywords: + return Language(keywords) + + return Language("unknown") + + def _detect_keywords(self, content: str) -> Optional[str]: + """Detect language from keywords in content.""" + content_lower = content.lower() + + python_indicators = [ + "import ", + "from ", + "def ", + "class ", + "if __name__", + "print(", + "return ", + "except ", + "try:", + "with open", + "lambda ", + ] + + js_indicators = [ + "const ", + "let ", + "=>", + "function ", + "require(", + "module.exports", + "export ", + "import ", + "console.log", + "async function", + "await ", + ] + + python_score = sum(1 for ind in python_indicators if ind in content_lower) + js_score = sum(1 for ind in js_indicators if ind in content_lower) + + if python_score > js_score: + return "python" + elif js_score > python_score: + return "javascript" + + return None + + def get_supported_languages(self) -> list[str]: + """Get list of supported languages.""" + return ["python", "javascript", "typescript"] + + def is_supported(self, language: str) -> bool: + """Check if a language is supported.""" + return language.lower() in self.get_supported_languages() + + +class Language: + """Represents a detected programming language.""" + + def __init__(self, value: str): + """Initialize the language.""" + self._value = value.lower() + + @property + def value(self) -> str: + """Get the language value.""" + return self._value + + def __eq__(self, other: object) -> bool: + """Check equality with another Language or string.""" + if isinstance(other, Language): + return self._value == other._value + if isinstance(other, str): + return self._value == other.lower() + return False + + def __hash__(self) -> int: + """Hash for use in sets and dicts.""" + return hash(self._value) + + def __str__(self) -> str: + """String representation.""" + return self._value + + def __repr__(self) -> str: + """Repr representation.""" + return f"Language('{self._value}')"