Initial upload of ai-code-audit-cli project
Some checks failed
Some checks failed
This commit is contained in:
155
src/utils/language_detector.py
Normal file
155
src/utils/language_detector.py
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
"""Language detection utilities for AI Code Audit CLI."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageDetector:
|
||||||
|
"""Detect programming language of source files."""
|
||||||
|
|
||||||
|
LANGUAGE_MAP = {
|
||||||
|
".py": "python",
|
||||||
|
".pyw": "python",
|
||||||
|
".pyx": "python",
|
||||||
|
".js": "javascript",
|
||||||
|
".mjs": "javascript",
|
||||||
|
".jsx": "javascript",
|
||||||
|
".ts": "typescript",
|
||||||
|
".tsx": "typescript",
|
||||||
|
".mts": "typescript",
|
||||||
|
}
|
||||||
|
|
||||||
|
PYTHON_SHEBANG = "#!"
|
||||||
|
JS_SHEBANG = "#!"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""Initialize the language detector."""
|
||||||
|
self.language_map = self.LANGUAGE_MAP
|
||||||
|
|
||||||
|
def detect(self, file_path: Path | str) -> "Language":
|
||||||
|
"""Detect the language of a file."""
|
||||||
|
path = Path(file_path) if isinstance(file_path, str) else file_path
|
||||||
|
|
||||||
|
ext = path.suffix.lower()
|
||||||
|
if ext in self.language_map:
|
||||||
|
return Language(self.language_map[ext])
|
||||||
|
|
||||||
|
try:
|
||||||
|
content = path.read_text(encoding="utf-8", errors="replace")
|
||||||
|
return self.detect_from_content(content, path.name)
|
||||||
|
except Exception:
|
||||||
|
return Language("unknown")
|
||||||
|
|
||||||
|
def detect_from_content(
|
||||||
|
self, content: str, filename: str = ""
|
||||||
|
) -> "Language":
|
||||||
|
"""Detect language from file content."""
|
||||||
|
if not content:
|
||||||
|
return Language("unknown")
|
||||||
|
|
||||||
|
first_line = content.split('\n')[0].strip()
|
||||||
|
|
||||||
|
if first_line.startswith("#!"):
|
||||||
|
shebang = first_line[2:].strip().split()[0] if len(first_line) > 2 else ""
|
||||||
|
if "python" in shebang.lower():
|
||||||
|
return Language("python")
|
||||||
|
elif "node" in shebang.lower() or "js" in shebang.lower():
|
||||||
|
return Language("javascript")
|
||||||
|
|
||||||
|
if filename.endswith(".py") or filename.endswith(".pyw"):
|
||||||
|
return Language("python")
|
||||||
|
|
||||||
|
if filename.endswith((".ts", ".tsx")):
|
||||||
|
return Language("typescript")
|
||||||
|
|
||||||
|
if filename.endswith((".js", ".jsx")):
|
||||||
|
return Language("javascript")
|
||||||
|
|
||||||
|
keywords = self._detect_keywords(content)
|
||||||
|
if keywords:
|
||||||
|
return Language(keywords)
|
||||||
|
|
||||||
|
return Language("unknown")
|
||||||
|
|
||||||
|
def _detect_keywords(self, content: str) -> Optional[str]:
|
||||||
|
"""Detect language from keywords in content."""
|
||||||
|
content_lower = content.lower()
|
||||||
|
|
||||||
|
python_indicators = [
|
||||||
|
"import ",
|
||||||
|
"from ",
|
||||||
|
"def ",
|
||||||
|
"class ",
|
||||||
|
"if __name__",
|
||||||
|
"print(",
|
||||||
|
"return ",
|
||||||
|
"except ",
|
||||||
|
"try:",
|
||||||
|
"with open",
|
||||||
|
"lambda ",
|
||||||
|
]
|
||||||
|
|
||||||
|
js_indicators = [
|
||||||
|
"const ",
|
||||||
|
"let ",
|
||||||
|
"=>",
|
||||||
|
"function ",
|
||||||
|
"require(",
|
||||||
|
"module.exports",
|
||||||
|
"export ",
|
||||||
|
"import ",
|
||||||
|
"console.log",
|
||||||
|
"async function",
|
||||||
|
"await ",
|
||||||
|
]
|
||||||
|
|
||||||
|
python_score = sum(1 for ind in python_indicators if ind in content_lower)
|
||||||
|
js_score = sum(1 for ind in js_indicators if ind in content_lower)
|
||||||
|
|
||||||
|
if python_score > js_score:
|
||||||
|
return "python"
|
||||||
|
elif js_score > python_score:
|
||||||
|
return "javascript"
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_supported_languages(self) -> list[str]:
|
||||||
|
"""Get list of supported languages."""
|
||||||
|
return ["python", "javascript", "typescript"]
|
||||||
|
|
||||||
|
def is_supported(self, language: str) -> bool:
|
||||||
|
"""Check if a language is supported."""
|
||||||
|
return language.lower() in self.get_supported_languages()
|
||||||
|
|
||||||
|
|
||||||
|
class Language:
|
||||||
|
"""Represents a detected programming language."""
|
||||||
|
|
||||||
|
def __init__(self, value: str):
|
||||||
|
"""Initialize the language."""
|
||||||
|
self._value = value.lower()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def value(self) -> str:
|
||||||
|
"""Get the language value."""
|
||||||
|
return self._value
|
||||||
|
|
||||||
|
def __eq__(self, other: object) -> bool:
|
||||||
|
"""Check equality with another Language or string."""
|
||||||
|
if isinstance(other, Language):
|
||||||
|
return self._value == other._value
|
||||||
|
if isinstance(other, str):
|
||||||
|
return self._value == other.lower()
|
||||||
|
return False
|
||||||
|
|
||||||
|
def __hash__(self) -> int:
|
||||||
|
"""Hash for use in sets and dicts."""
|
||||||
|
return hash(self._value)
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
"""String representation."""
|
||||||
|
return self._value
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
"""Repr representation."""
|
||||||
|
return f"Language('{self._value}')"
|
||||||
Reference in New Issue
Block a user