Initial upload with CI/CD workflow

This commit is contained in:
2026-01-30 22:12:50 +00:00
parent c6c8de6636
commit 99dd7d11b4

View File

@@ -0,0 +1,129 @@
"""Language detection module for CodeSnap."""
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
@dataclass
class LanguageInfo:
"""Information about a detected programming language."""
name: str
extensions: tuple[str, ...]
shebangs: tuple[str, ...]
LANGUAGE_MAP: dict[str, LanguageInfo] = {
"python": LanguageInfo(
name="Python",
extensions=(".py", ".pyw", ".pyi"),
shebangs=("python", "python3", "pypy"),
),
"javascript": LanguageInfo(
name="JavaScript",
extensions=(".js", ".mjs", ".cjs"),
shebangs=("node", "nodejs"),
),
"typescript": LanguageInfo(
name="TypeScript",
extensions=(".ts", ".tsx"),
shebangs=(),
),
"go": LanguageInfo(
name="Go",
extensions=(".go",),
shebangs=("go",),
),
"rust": LanguageInfo(
name="Rust",
extensions=(".rs",),
shebangs=("rust", "rustc"),
),
"java": LanguageInfo(
name="Java",
extensions=(".java",),
shebangs=(),
),
"c": LanguageInfo(
name="C",
extensions=(".c", ".h"),
shebangs=(),
),
"cpp": LanguageInfo(
name="C++",
extensions=(".cpp", ".cc", ".cxx", ".hpp", ".hxx"),
shebangs=(),
),
"ruby": LanguageInfo(
name="Ruby",
extensions=(".rb", ".erb"),
shebangs=("ruby",),
),
"php": LanguageInfo(
name="PHP",
extensions=(".php",),
shebangs=("php",),
),
}
class LanguageDetector:
"""Detects programming languages from file extensions and shebangs."""
def __init__(self) -> None:
self._ext_to_lang: dict[str, str] = {}
for lang_key, lang_info in LANGUAGE_MAP.items():
for ext in lang_info.extensions:
self._ext_to_lang[ext] = lang_key
def detect_from_path(self, path: Path) -> Optional[str]:
"""Detect language from file path extension."""
suffix = path.suffix.lower()
return self._ext_to_lang.get(suffix)
def detect_from_content(self, content: str) -> Optional[str]:
"""Detect language from file content shebang."""
first_line = content.split("\n")[0].strip()
if not first_line.startswith("#!"):
return None
shebang = first_line[2:].strip()
shebang_cmd = re.split(r"\s+", shebang)[0]
for lang_key, lang_info in LANGUAGE_MAP.items():
for shebang_pattern in lang_info.shebangs:
if shebang_cmd.endswith(shebang_pattern):
return lang_key
return None
def detect(self, path: Path, content: Optional[str] = None) -> Optional[str]:
"""Detect language using both path and content analysis."""
ext_result = self.detect_from_path(path)
if ext_result:
return ext_result
if content is not None:
return self.detect_from_content(content)
return None
def get_language_info(self, lang_key: str) -> Optional[LanguageInfo]:
"""Get language information by key."""
return LANGUAGE_MAP.get(lang_key)
def get_supported_extensions(self) -> list[str]:
"""Get all supported file extensions."""
return list(self._ext_to_lang.keys())
def get_supported_languages(self) -> list[str]:
"""Get all supported language names."""
return [info.name for info in LANGUAGE_MAP.values()]
def detect_language(path: Path, content: Optional[str] = None) -> Optional[str]:
"""Convenience function to detect language of a file."""
detector = LanguageDetector()
return detector.detect(path, content)