From 99dd7d11b418930691815cd427219e5936851a2e Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Fri, 30 Jan 2026 22:12:50 +0000 Subject: [PATCH] Initial upload with CI/CD workflow --- codesnap/core/language_detection.py | 129 ++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 codesnap/core/language_detection.py diff --git a/codesnap/core/language_detection.py b/codesnap/core/language_detection.py new file mode 100644 index 0000000..4d75e6f --- /dev/null +++ b/codesnap/core/language_detection.py @@ -0,0 +1,129 @@ +"""Language detection module for CodeSnap.""" + +import re +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + + +@dataclass +class LanguageInfo: + """Information about a detected programming language.""" + + name: str + extensions: tuple[str, ...] + shebangs: tuple[str, ...] + + +LANGUAGE_MAP: dict[str, LanguageInfo] = { + "python": LanguageInfo( + name="Python", + extensions=(".py", ".pyw", ".pyi"), + shebangs=("python", "python3", "pypy"), + ), + "javascript": LanguageInfo( + name="JavaScript", + extensions=(".js", ".mjs", ".cjs"), + shebangs=("node", "nodejs"), + ), + "typescript": LanguageInfo( + name="TypeScript", + extensions=(".ts", ".tsx"), + shebangs=(), + ), + "go": LanguageInfo( + name="Go", + extensions=(".go",), + shebangs=("go",), + ), + "rust": LanguageInfo( + name="Rust", + extensions=(".rs",), + shebangs=("rust", "rustc"), + ), + "java": LanguageInfo( + name="Java", + extensions=(".java",), + shebangs=(), + ), + "c": LanguageInfo( + name="C", + extensions=(".c", ".h"), + shebangs=(), + ), + "cpp": LanguageInfo( + name="C++", + extensions=(".cpp", ".cc", ".cxx", ".hpp", ".hxx"), + shebangs=(), + ), + "ruby": LanguageInfo( + name="Ruby", + extensions=(".rb", ".erb"), + shebangs=("ruby",), + ), + "php": LanguageInfo( + name="PHP", + extensions=(".php",), + shebangs=("php",), + ), +} + + +class LanguageDetector: + """Detects programming languages from file extensions and shebangs.""" + + def __init__(self) -> None: + self._ext_to_lang: dict[str, str] = {} + for lang_key, lang_info in LANGUAGE_MAP.items(): + for ext in lang_info.extensions: + self._ext_to_lang[ext] = lang_key + + def detect_from_path(self, path: Path) -> Optional[str]: + """Detect language from file path extension.""" + suffix = path.suffix.lower() + return self._ext_to_lang.get(suffix) + + def detect_from_content(self, content: str) -> Optional[str]: + """Detect language from file content shebang.""" + first_line = content.split("\n")[0].strip() + if not first_line.startswith("#!"): + return None + + shebang = first_line[2:].strip() + shebang_cmd = re.split(r"\s+", shebang)[0] + + for lang_key, lang_info in LANGUAGE_MAP.items(): + for shebang_pattern in lang_info.shebangs: + if shebang_cmd.endswith(shebang_pattern): + return lang_key + + return None + + def detect(self, path: Path, content: Optional[str] = None) -> Optional[str]: + """Detect language using both path and content analysis.""" + ext_result = self.detect_from_path(path) + if ext_result: + return ext_result + + if content is not None: + return self.detect_from_content(content) + + return None + + def get_language_info(self, lang_key: str) -> Optional[LanguageInfo]: + """Get language information by key.""" + return LANGUAGE_MAP.get(lang_key) + + def get_supported_extensions(self) -> list[str]: + """Get all supported file extensions.""" + return list(self._ext_to_lang.keys()) + + def get_supported_languages(self) -> list[str]: + """Get all supported language names.""" + return [info.name for info in LANGUAGE_MAP.values()] + + +def detect_language(path: Path, content: Optional[str] = None) -> Optional[str]: + """Convenience function to detect language of a file.""" + detector = LanguageDetector() + return detector.detect(path, content)