Initial upload of ai-code-audit-cli project
Some checks failed
Some checks failed
This commit is contained in:
155
src/utils/language_detector.py
Normal file
155
src/utils/language_detector.py
Normal file
@@ -0,0 +1,155 @@
|
||||
"""Language detection utilities for AI Code Audit CLI."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class LanguageDetector:
|
||||
"""Detect programming language of source files."""
|
||||
|
||||
LANGUAGE_MAP = {
|
||||
".py": "python",
|
||||
".pyw": "python",
|
||||
".pyx": "python",
|
||||
".js": "javascript",
|
||||
".mjs": "javascript",
|
||||
".jsx": "javascript",
|
||||
".ts": "typescript",
|
||||
".tsx": "typescript",
|
||||
".mts": "typescript",
|
||||
}
|
||||
|
||||
PYTHON_SHEBANG = "#!"
|
||||
JS_SHEBANG = "#!"
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the language detector."""
|
||||
self.language_map = self.LANGUAGE_MAP
|
||||
|
||||
def detect(self, file_path: Path | str) -> "Language":
|
||||
"""Detect the language of a file."""
|
||||
path = Path(file_path) if isinstance(file_path, str) else file_path
|
||||
|
||||
ext = path.suffix.lower()
|
||||
if ext in self.language_map:
|
||||
return Language(self.language_map[ext])
|
||||
|
||||
try:
|
||||
content = path.read_text(encoding="utf-8", errors="replace")
|
||||
return self.detect_from_content(content, path.name)
|
||||
except Exception:
|
||||
return Language("unknown")
|
||||
|
||||
def detect_from_content(
|
||||
self, content: str, filename: str = ""
|
||||
) -> "Language":
|
||||
"""Detect language from file content."""
|
||||
if not content:
|
||||
return Language("unknown")
|
||||
|
||||
first_line = content.split('\n')[0].strip()
|
||||
|
||||
if first_line.startswith("#!"):
|
||||
shebang = first_line[2:].strip().split()[0] if len(first_line) > 2 else ""
|
||||
if "python" in shebang.lower():
|
||||
return Language("python")
|
||||
elif "node" in shebang.lower() or "js" in shebang.lower():
|
||||
return Language("javascript")
|
||||
|
||||
if filename.endswith(".py") or filename.endswith(".pyw"):
|
||||
return Language("python")
|
||||
|
||||
if filename.endswith((".ts", ".tsx")):
|
||||
return Language("typescript")
|
||||
|
||||
if filename.endswith((".js", ".jsx")):
|
||||
return Language("javascript")
|
||||
|
||||
keywords = self._detect_keywords(content)
|
||||
if keywords:
|
||||
return Language(keywords)
|
||||
|
||||
return Language("unknown")
|
||||
|
||||
def _detect_keywords(self, content: str) -> Optional[str]:
|
||||
"""Detect language from keywords in content."""
|
||||
content_lower = content.lower()
|
||||
|
||||
python_indicators = [
|
||||
"import ",
|
||||
"from ",
|
||||
"def ",
|
||||
"class ",
|
||||
"if __name__",
|
||||
"print(",
|
||||
"return ",
|
||||
"except ",
|
||||
"try:",
|
||||
"with open",
|
||||
"lambda ",
|
||||
]
|
||||
|
||||
js_indicators = [
|
||||
"const ",
|
||||
"let ",
|
||||
"=>",
|
||||
"function ",
|
||||
"require(",
|
||||
"module.exports",
|
||||
"export ",
|
||||
"import ",
|
||||
"console.log",
|
||||
"async function",
|
||||
"await ",
|
||||
]
|
||||
|
||||
python_score = sum(1 for ind in python_indicators if ind in content_lower)
|
||||
js_score = sum(1 for ind in js_indicators if ind in content_lower)
|
||||
|
||||
if python_score > js_score:
|
||||
return "python"
|
||||
elif js_score > python_score:
|
||||
return "javascript"
|
||||
|
||||
return None
|
||||
|
||||
def get_supported_languages(self) -> list[str]:
|
||||
"""Get list of supported languages."""
|
||||
return ["python", "javascript", "typescript"]
|
||||
|
||||
def is_supported(self, language: str) -> bool:
|
||||
"""Check if a language is supported."""
|
||||
return language.lower() in self.get_supported_languages()
|
||||
|
||||
|
||||
class Language:
|
||||
"""Represents a detected programming language."""
|
||||
|
||||
def __init__(self, value: str):
|
||||
"""Initialize the language."""
|
||||
self._value = value.lower()
|
||||
|
||||
@property
|
||||
def value(self) -> str:
|
||||
"""Get the language value."""
|
||||
return self._value
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
"""Check equality with another Language or string."""
|
||||
if isinstance(other, Language):
|
||||
return self._value == other._value
|
||||
if isinstance(other, str):
|
||||
return self._value == other.lower()
|
||||
return False
|
||||
|
||||
def __hash__(self) -> int:
|
||||
"""Hash for use in sets and dicts."""
|
||||
return hash(self._value)
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""String representation."""
|
||||
return self._value
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""Repr representation."""
|
||||
return f"Language('{self._value}')"
|
||||
Reference in New Issue
Block a user