Initial upload with CI/CD workflow
This commit is contained in:
129
codesnap/core/language_detection.py
Normal file
129
codesnap/core/language_detection.py
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
"""Language detection module for CodeSnap."""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LanguageInfo:
|
||||||
|
"""Information about a detected programming language."""
|
||||||
|
|
||||||
|
name: str
|
||||||
|
extensions: tuple[str, ...]
|
||||||
|
shebangs: tuple[str, ...]
|
||||||
|
|
||||||
|
|
||||||
|
LANGUAGE_MAP: dict[str, LanguageInfo] = {
|
||||||
|
"python": LanguageInfo(
|
||||||
|
name="Python",
|
||||||
|
extensions=(".py", ".pyw", ".pyi"),
|
||||||
|
shebangs=("python", "python3", "pypy"),
|
||||||
|
),
|
||||||
|
"javascript": LanguageInfo(
|
||||||
|
name="JavaScript",
|
||||||
|
extensions=(".js", ".mjs", ".cjs"),
|
||||||
|
shebangs=("node", "nodejs"),
|
||||||
|
),
|
||||||
|
"typescript": LanguageInfo(
|
||||||
|
name="TypeScript",
|
||||||
|
extensions=(".ts", ".tsx"),
|
||||||
|
shebangs=(),
|
||||||
|
),
|
||||||
|
"go": LanguageInfo(
|
||||||
|
name="Go",
|
||||||
|
extensions=(".go",),
|
||||||
|
shebangs=("go",),
|
||||||
|
),
|
||||||
|
"rust": LanguageInfo(
|
||||||
|
name="Rust",
|
||||||
|
extensions=(".rs",),
|
||||||
|
shebangs=("rust", "rustc"),
|
||||||
|
),
|
||||||
|
"java": LanguageInfo(
|
||||||
|
name="Java",
|
||||||
|
extensions=(".java",),
|
||||||
|
shebangs=(),
|
||||||
|
),
|
||||||
|
"c": LanguageInfo(
|
||||||
|
name="C",
|
||||||
|
extensions=(".c", ".h"),
|
||||||
|
shebangs=(),
|
||||||
|
),
|
||||||
|
"cpp": LanguageInfo(
|
||||||
|
name="C++",
|
||||||
|
extensions=(".cpp", ".cc", ".cxx", ".hpp", ".hxx"),
|
||||||
|
shebangs=(),
|
||||||
|
),
|
||||||
|
"ruby": LanguageInfo(
|
||||||
|
name="Ruby",
|
||||||
|
extensions=(".rb", ".erb"),
|
||||||
|
shebangs=("ruby",),
|
||||||
|
),
|
||||||
|
"php": LanguageInfo(
|
||||||
|
name="PHP",
|
||||||
|
extensions=(".php",),
|
||||||
|
shebangs=("php",),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageDetector:
|
||||||
|
"""Detects programming languages from file extensions and shebangs."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._ext_to_lang: dict[str, str] = {}
|
||||||
|
for lang_key, lang_info in LANGUAGE_MAP.items():
|
||||||
|
for ext in lang_info.extensions:
|
||||||
|
self._ext_to_lang[ext] = lang_key
|
||||||
|
|
||||||
|
def detect_from_path(self, path: Path) -> Optional[str]:
|
||||||
|
"""Detect language from file path extension."""
|
||||||
|
suffix = path.suffix.lower()
|
||||||
|
return self._ext_to_lang.get(suffix)
|
||||||
|
|
||||||
|
def detect_from_content(self, content: str) -> Optional[str]:
|
||||||
|
"""Detect language from file content shebang."""
|
||||||
|
first_line = content.split("\n")[0].strip()
|
||||||
|
if not first_line.startswith("#!"):
|
||||||
|
return None
|
||||||
|
|
||||||
|
shebang = first_line[2:].strip()
|
||||||
|
shebang_cmd = re.split(r"\s+", shebang)[0]
|
||||||
|
|
||||||
|
for lang_key, lang_info in LANGUAGE_MAP.items():
|
||||||
|
for shebang_pattern in lang_info.shebangs:
|
||||||
|
if shebang_cmd.endswith(shebang_pattern):
|
||||||
|
return lang_key
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def detect(self, path: Path, content: Optional[str] = None) -> Optional[str]:
|
||||||
|
"""Detect language using both path and content analysis."""
|
||||||
|
ext_result = self.detect_from_path(path)
|
||||||
|
if ext_result:
|
||||||
|
return ext_result
|
||||||
|
|
||||||
|
if content is not None:
|
||||||
|
return self.detect_from_content(content)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_language_info(self, lang_key: str) -> Optional[LanguageInfo]:
|
||||||
|
"""Get language information by key."""
|
||||||
|
return LANGUAGE_MAP.get(lang_key)
|
||||||
|
|
||||||
|
def get_supported_extensions(self) -> list[str]:
|
||||||
|
"""Get all supported file extensions."""
|
||||||
|
return list(self._ext_to_lang.keys())
|
||||||
|
|
||||||
|
def get_supported_languages(self) -> list[str]:
|
||||||
|
"""Get all supported language names."""
|
||||||
|
return [info.name for info in LANGUAGE_MAP.values()]
|
||||||
|
|
||||||
|
|
||||||
|
def detect_language(path: Path, content: Optional[str] = None) -> Optional[str]:
|
||||||
|
"""Convenience function to detect language of a file."""
|
||||||
|
detector = LanguageDetector()
|
||||||
|
return detector.detect(path, content)
|
||||||
Reference in New Issue
Block a user