fix: resolve CI issues - push complete implementation with tests
Some checks failed
CI / test (3.11) (push) Has been cancelled
CI / test (3.12) (push) Has been cancelled
CI / lint (push) Has been cancelled
CI / build (push) Has been cancelled
CI / test (3.10) (push) Has been cancelled

This commit is contained in:
2026-02-02 15:30:35 +00:00
parent 9878d95b39
commit c055777858

View File

@@ -1,166 +1,157 @@
"""Language detection for code files."""
class LanguageDetector: class LanguageDetector:
"""Detects programming language from file extensions and content."""
EXTENSION_MAP = { EXTENSION_MAP = {
'py': 'python', "py": "python",
'pyw': 'python', "pyw": "python",
'pyx': 'python', "pyx": "python",
'js': 'javascript', "js": "javascript",
'mjs': 'javascript', "mjs": "javascript",
'cjs': 'javascript', "cjs": "javascript",
'jsx': 'javascript', "jsx": "javascript",
'ts': 'typescript', "ts": "typescript",
'tsx': 'typescript', "tsx": "typescript",
'mts': 'typescript', "mts": "typescript",
'cts': 'typescript', "cts": "typescript",
'java': 'java', "java": "java",
'kt': 'kotlin', "kt": "kotlin",
'kts': 'kotlin', "kts": "kotlin",
'go': 'go', "go": "go",
'rs': 'rust', "rs": "rust",
'c': 'c', "c": "c",
'h': 'c', "h": "c",
'cpp': 'cpp', "cpp": "cpp",
'cc': 'cpp', "cc": "cpp",
'cxx': 'cpp', "cxx": "cpp",
'hpp': 'cpp', "hpp": "cpp",
'hxx': 'cpp', "hxx": "cpp",
'cs': 'csharp', "cs": "csharp",
'rb': 'ruby', "rb": "ruby",
'erb': 'ruby', "erb": "ruby",
'php': 'php', "php": "php",
'swift': 'swift', "swift": "swift",
'm': 'objective-c', "m": "objective-c",
'mm': 'objective-c', "mm": "objective-c",
'scala': 'scala', "scala": "scala",
'sc': 'scala', "sc": "scala",
'jl': 'julia', "jl": "julia",
'r': 'r', "r": "r",
'R': 'r', "R": "r",
'lua': 'lua', "lua": "lua",
'pl': 'perl', "pl": "perl",
'pm': 'perl', "pm": "perl",
'sql': 'sql', "sql": "sql",
'sh': 'bash', "sh": "bash",
'bash': 'bash', "bash": "bash",
'zsh': 'bash', "zsh": "bash",
'fish': 'bash', "fish": "bash",
'yaml': 'yaml', "yaml": "yaml",
'yml': 'yaml', "yml": "yaml",
'json': 'json', "json": "json",
'xml': 'xml', "xml": "xml",
'html': 'html', "html": "html",
'htm': 'html', "htm": "html",
'css': 'css', "css": "css",
'scss': 'scss', "scss": "scss",
'sass': 'sass', "sass": "sass",
'less': 'less', "less": "less",
'md': 'markdown', "md": "markdown",
'markdown': 'markdown', "markdown": "markdown",
'txt': 'text', "txt": "text",
'dockerfile': 'dockerfile', "dockerfile": "dockerfile",
'Dockerfile': 'dockerfile', "Dockerfile": "dockerfile",
} }
CONTENT_PATTERNS = { CONTENT_PATTERNS = {
'python': [ "python": [
r'^import\\s+\\w+', r"^import\\s+\\w+",
r'^from\\s+\\w+\\s+import', r"^from\\s+\\w+\\s+import",
r'^def\\s+\\w+\\s*\\(', r"^def\\s+\\w+\\s*\\(",
r'^class\\s+\\w+\\s*[:\\(]', r"^class\\s+\\w+\\s*[:\\(]",
r'^if\\s+__name__\\s*==\\s*[\\'\\"]__main__[\\'\\"]', r"^if\\s+__name__\\s*==\\s*['\"]__main__['\"]",
], ],
'javascript': [ "javascript": [
r'^const\\s+\\w+\\s*=', r"^const\\s+\\w+\\s*=",
r'^let\\s+\\w+\\s*=', r"^let\\s+\\w+\\s*=",
r'^var\\s+\\w+\\s*=', r"^var\\s+\\w+\\s*=",
r'^function\\s+\\w+\\s*\\(', r"^function\\s+\\w+\\s*\\(",
r'=>\\s*\\{', r"=>\\s*\\{",
r'import\\s+.*\\s+from', r"import\\s+.*\\s+from",
r'export\\s+(default\\s+)?', r"export\\s+(default\\s+)?",
], ],
'typescript': [ "typescript": [
r'^interface\\s+\\w+\\s*\\{', r"^interface\\s+\\w+\\s*\\{",
r'^type\\s+\\w+\\s*=', r"^type\\s+\\w+\\s*=",
r':\\s*(string|number|boolean|any|void|null|undefined)', r":\\s*(string|number|boolean|any|void|null|undefined)",
r'<[A-Z]\\w*>', r"<[A-Z]\\w*>",
], ],
'java': [ "java": [
r'^package\\s+[\\w.]+;', r"^package\\s+[\\w.]+;",
r'^import\\s+[\\w.]+;', r"^import\\s+[\\w.]+;",
r'^public\\s+(class|interface|enum)\\s+\\w+', r"^public\\s+(class|interface|enum)\\s+\\w+",
r'^private\\s+(static\\s+)?(final\\s+)?\\w+\\s+\\w+;', r"^private\\s+(static\\s+)?(final\\s+)?\\w+\\s+\\w+;",
], ],
'go': [ "go": [
r'^package\\s+\\w+', r"^package\\s+\\w+",
r'^import\\s+\\(', r"^import\\s*\\(",
r'func\\s+\\w+\\s*\\(', r"func\\s+\\w+\\s*\\(",
r':=', r":=",
r'go\\s+func', r"go\\s+func",
], ],
'rust': [ "rust": [
r'^fn\\s+\\w+\\s*\\(', r"^fn\\s+\\w+\\s*\\(",
r'^impl\\s+\\w+', r"^impl\\s+\\w+",
r'^struct\\s+\\w+', r"^struct\\s+\\w+",
r'^enum\\s+\\w+', r"^enum\\s+\\w+",
r'let\\s+mut\\s+\\w+', r"let\\s+mut\\s+\\w+",
r'->\\s*\\w+', r"->\\s*\\w+",
], ],
'c': [ "c": [
r'#include\\s*<', r"#include\\s*<",
r'#include\\s*"', r"#include\\s*\"",
r'int\\s+main\\s*\\(', r"int\\s+main\\s*\\(",
r'struct\\s+\\w+\\s*\\{', r"struct\\s+\\w+\\s*\\{",
r'void\\s+\\*?\\s*\\w+\\s*\\(', r"void\\s+\\*?\\s*\\w+\\s*\\(",
], ],
'cpp': [ "cpp": [
r'#include\\s*<', r"#include\\s*<",
r'#include\\s*"', r"#include\\s*\"",
r'class\\s+\\w+\\s*(:\\s*public)?', r"class\\s+\\w+\\s*(:\\s*public)?",
r'std::\\w+', r"std::\\w+",
r'using\\s+namespace\\s+std', r"using\\s+namespace\\s+std",
], ],
'ruby': [ "ruby": [
r'^require\\s+[\\'\\"]', r"^require\\s+['\"]",
r'^class\\s+\\w+(\\s*<\\s*\\w+)?', r"^class\\s+\\w+(\\s*<\\s*\\w+)?",
r'^module\\s+\\w+', r"^module\\s+\\w+",
r'def\\s+\\w+', r"def\\s+\\w+",
r'puts\\s+', r"puts\\s+",
r'puts!', r"puts!",
], ],
'php': [ "php": [
r'<\\?php', r"<\?php",
r'\\$\\w+\\s*=', r"\$\\w+\\s*=",
r'function\\s+\\w+\\s*\\(', r"function\\s+\\w+\\s*\\(",
r'class\\s+\\w+\\s*\\{', r"class\\s+\\w+\\s*\\{",
], ],
} }
def __init__(self): def __init__(self):
self._tree_sitter_languages = {} self._tree_sitter_languages = {}
def detect_from_filename(self, filename: str) -> str | None: def detect_from_filename(self, filename):
"""Detect language from file extension.""" if "." not in filename:
if '.' not in filename:
return None return None
ext = filename.rsplit('.', 1)[-1].lower() ext = filename.rsplit(".", 1)[-1].lower()
return self.EXTENSION_MAP.get(ext) return self.EXTENSION_MAP.get(ext)
def detect_from_content(self, content: str) -> str | None: def detect_from_content(self, content):
"""Detect language from file content patterns.""" first_lines = "\n".join(content.splitlines()[:50])
first_lines = '\\n'.join(content.splitlines()[:50])
scores: dict[str, int] = {} scores = {}
for lang, patterns in self.CONTENT_PATTERNS.items(): for lang, patterns in self.CONTENT_PATTERNS.items():
score = 0 score = 0
for pattern in patterns: for pattern in patterns:
import re
matches = len(re.findall(pattern, first_lines, re.MULTILINE)) matches = len(re.findall(pattern, first_lines, re.MULTILINE))
score += matches score += matches
@@ -173,12 +164,11 @@ class LanguageDetector:
return None return None
def detect(self, filename: str, content: str = "") -> str: def detect(self, filename, content=""):
"""Detect language from filename and optionally content."""
ext_lang = self.detect_from_filename(filename) ext_lang = self.detect_from_filename(filename)
if ext_lang and ext_lang not in [ if ext_lang and ext_lang not in [
'text', 'markdown', 'json', 'yaml', 'xml', 'html', 'css', 'dockerfile' "text", "markdown", "json", "yaml", "xml", "html", "css", "dockerfile"
]: ]:
if content: if content:
content_lang = self.detect_from_content(content) content_lang = self.detect_from_content(content)
@@ -193,16 +183,13 @@ class LanguageDetector:
return ext_lang or "text" return ext_lang or "text"
def get_supported_languages(self) -> list[str]: def get_supported_languages(self):
"""Return list of supported languages."""
return sorted(set(self.EXTENSION_MAP.values())) return sorted(set(self.EXTENSION_MAP.values()))
def is_language_supported(self, language: str) -> bool: def is_language_supported(self, language):
"""Check if a language is supported."""
return language in self.get_supported_languages() return language in self.get_supported_languages()
def detect_language(filename: str, content: str = "") -> str: def detect_language(filename, content=""):
"""Detect programming language from filename and content."""
detector = LanguageDetector() detector = LanguageDetector()
return detector.detect(filename, content) return detector.detect(filename, content)