fix: resolve CI issues - push complete implementation with tests
Some checks failed
CI / test (3.11) (push) Has been cancelled
CI / test (3.12) (push) Has been cancelled
CI / lint (push) Has been cancelled
CI / build (push) Has been cancelled
CI / test (3.10) (push) Has been cancelled

This commit is contained in:
2026-02-02 15:30:35 +00:00
parent 9878d95b39
commit c055777858

View File

@@ -1,166 +1,157 @@
"""Language detection for code files."""
class LanguageDetector:
"""Detects programming language from file extensions and content."""
EXTENSION_MAP = {
'py': 'python',
'pyw': 'python',
'pyx': 'python',
'js': 'javascript',
'mjs': 'javascript',
'cjs': 'javascript',
'jsx': 'javascript',
'ts': 'typescript',
'tsx': 'typescript',
'mts': 'typescript',
'cts': 'typescript',
'java': 'java',
'kt': 'kotlin',
'kts': 'kotlin',
'go': 'go',
'rs': 'rust',
'c': 'c',
'h': 'c',
'cpp': 'cpp',
'cc': 'cpp',
'cxx': 'cpp',
'hpp': 'cpp',
'hxx': 'cpp',
'cs': 'csharp',
'rb': 'ruby',
'erb': 'ruby',
'php': 'php',
'swift': 'swift',
'm': 'objective-c',
'mm': 'objective-c',
'scala': 'scala',
'sc': 'scala',
'jl': 'julia',
'r': 'r',
'R': 'r',
'lua': 'lua',
'pl': 'perl',
'pm': 'perl',
'sql': 'sql',
'sh': 'bash',
'bash': 'bash',
'zsh': 'bash',
'fish': 'bash',
'yaml': 'yaml',
'yml': 'yaml',
'json': 'json',
'xml': 'xml',
'html': 'html',
'htm': 'html',
'css': 'css',
'scss': 'scss',
'sass': 'sass',
'less': 'less',
'md': 'markdown',
'markdown': 'markdown',
'txt': 'text',
'dockerfile': 'dockerfile',
'Dockerfile': 'dockerfile',
"py": "python",
"pyw": "python",
"pyx": "python",
"js": "javascript",
"mjs": "javascript",
"cjs": "javascript",
"jsx": "javascript",
"ts": "typescript",
"tsx": "typescript",
"mts": "typescript",
"cts": "typescript",
"java": "java",
"kt": "kotlin",
"kts": "kotlin",
"go": "go",
"rs": "rust",
"c": "c",
"h": "c",
"cpp": "cpp",
"cc": "cpp",
"cxx": "cpp",
"hpp": "cpp",
"hxx": "cpp",
"cs": "csharp",
"rb": "ruby",
"erb": "ruby",
"php": "php",
"swift": "swift",
"m": "objective-c",
"mm": "objective-c",
"scala": "scala",
"sc": "scala",
"jl": "julia",
"r": "r",
"R": "r",
"lua": "lua",
"pl": "perl",
"pm": "perl",
"sql": "sql",
"sh": "bash",
"bash": "bash",
"zsh": "bash",
"fish": "bash",
"yaml": "yaml",
"yml": "yaml",
"json": "json",
"xml": "xml",
"html": "html",
"htm": "html",
"css": "css",
"scss": "scss",
"sass": "sass",
"less": "less",
"md": "markdown",
"markdown": "markdown",
"txt": "text",
"dockerfile": "dockerfile",
"Dockerfile": "dockerfile",
}
CONTENT_PATTERNS = {
'python': [
r'^import\\s+\\w+',
r'^from\\s+\\w+\\s+import',
r'^def\\s+\\w+\\s*\\(',
r'^class\\s+\\w+\\s*[:\\(]',
r'^if\\s+__name__\\s*==\\s*[\\'\\"]__main__[\\'\\"]',
"python": [
r"^import\\s+\\w+",
r"^from\\s+\\w+\\s+import",
r"^def\\s+\\w+\\s*\\(",
r"^class\\s+\\w+\\s*[:\\(]",
r"^if\\s+__name__\\s*==\\s*['\"]__main__['\"]",
],
'javascript': [
r'^const\\s+\\w+\\s*=',
r'^let\\s+\\w+\\s*=',
r'^var\\s+\\w+\\s*=',
r'^function\\s+\\w+\\s*\\(',
r'=>\\s*\\{',
r'import\\s+.*\\s+from',
r'export\\s+(default\\s+)?',
"javascript": [
r"^const\\s+\\w+\\s*=",
r"^let\\s+\\w+\\s*=",
r"^var\\s+\\w+\\s*=",
r"^function\\s+\\w+\\s*\\(",
r"=>\\s*\\{",
r"import\\s+.*\\s+from",
r"export\\s+(default\\s+)?",
],
'typescript': [
r'^interface\\s+\\w+\\s*\\{',
r'^type\\s+\\w+\\s*=',
r':\\s*(string|number|boolean|any|void|null|undefined)',
r'<[A-Z]\\w*>',
"typescript": [
r"^interface\\s+\\w+\\s*\\{",
r"^type\\s+\\w+\\s*=",
r":\\s*(string|number|boolean|any|void|null|undefined)",
r"<[A-Z]\\w*>",
],
'java': [
r'^package\\s+[\\w.]+;',
r'^import\\s+[\\w.]+;',
r'^public\\s+(class|interface|enum)\\s+\\w+',
r'^private\\s+(static\\s+)?(final\\s+)?\\w+\\s+\\w+;',
"java": [
r"^package\\s+[\\w.]+;",
r"^import\\s+[\\w.]+;",
r"^public\\s+(class|interface|enum)\\s+\\w+",
r"^private\\s+(static\\s+)?(final\\s+)?\\w+\\s+\\w+;",
],
'go': [
r'^package\\s+\\w+',
r'^import\\s+\\(',
r'func\\s+\\w+\\s*\\(',
r':=',
r'go\\s+func',
"go": [
r"^package\\s+\\w+",
r"^import\\s*\\(",
r"func\\s+\\w+\\s*\\(",
r":=",
r"go\\s+func",
],
'rust': [
r'^fn\\s+\\w+\\s*\\(',
r'^impl\\s+\\w+',
r'^struct\\s+\\w+',
r'^enum\\s+\\w+',
r'let\\s+mut\\s+\\w+',
r'->\\s*\\w+',
"rust": [
r"^fn\\s+\\w+\\s*\\(",
r"^impl\\s+\\w+",
r"^struct\\s+\\w+",
r"^enum\\s+\\w+",
r"let\\s+mut\\s+\\w+",
r"->\\s*\\w+",
],
'c': [
r'#include\\s*<',
r'#include\\s*"',
r'int\\s+main\\s*\\(',
r'struct\\s+\\w+\\s*\\{',
r'void\\s+\\*?\\s*\\w+\\s*\\(',
"c": [
r"#include\\s*<",
r"#include\\s*\"",
r"int\\s+main\\s*\\(",
r"struct\\s+\\w+\\s*\\{",
r"void\\s+\\*?\\s*\\w+\\s*\\(",
],
'cpp': [
r'#include\\s*<',
r'#include\\s*"',
r'class\\s+\\w+\\s*(:\\s*public)?',
r'std::\\w+',
r'using\\s+namespace\\s+std',
"cpp": [
r"#include\\s*<",
r"#include\\s*\"",
r"class\\s+\\w+\\s*(:\\s*public)?",
r"std::\\w+",
r"using\\s+namespace\\s+std",
],
'ruby': [
r'^require\\s+[\\'\\"]',
r'^class\\s+\\w+(\\s*<\\s*\\w+)?',
r'^module\\s+\\w+',
r'def\\s+\\w+',
r'puts\\s+',
r'puts!',
"ruby": [
r"^require\\s+['\"]",
r"^class\\s+\\w+(\\s*<\\s*\\w+)?",
r"^module\\s+\\w+",
r"def\\s+\\w+",
r"puts\\s+",
r"puts!",
],
'php': [
r'<\\?php',
r'\\$\\w+\\s*=',
r'function\\s+\\w+\\s*\\(',
r'class\\s+\\w+\\s*\\{',
"php": [
r"<\?php",
r"\$\\w+\\s*=",
r"function\\s+\\w+\\s*\\(",
r"class\\s+\\w+\\s*\\{",
],
}
def __init__(self):
self._tree_sitter_languages = {}
def detect_from_filename(self, filename: str) -> str | None:
"""Detect language from file extension."""
if '.' not in filename:
def detect_from_filename(self, filename):
if "." not in filename:
return None
ext = filename.rsplit('.', 1)[-1].lower()
ext = filename.rsplit(".", 1)[-1].lower()
return self.EXTENSION_MAP.get(ext)
def detect_from_content(self, content: str) -> str | None:
"""Detect language from file content patterns."""
first_lines = '\\n'.join(content.splitlines()[:50])
def detect_from_content(self, content):
first_lines = "\n".join(content.splitlines()[:50])
scores: dict[str, int] = {}
scores = {}
for lang, patterns in self.CONTENT_PATTERNS.items():
score = 0
for pattern in patterns:
import re
matches = len(re.findall(pattern, first_lines, re.MULTILINE))
score += matches
@@ -173,12 +164,11 @@ class LanguageDetector:
return None
def detect(self, filename: str, content: str = "") -> str:
"""Detect language from filename and optionally content."""
def detect(self, filename, content=""):
ext_lang = self.detect_from_filename(filename)
if ext_lang and ext_lang not in [
'text', 'markdown', 'json', 'yaml', 'xml', 'html', 'css', 'dockerfile'
"text", "markdown", "json", "yaml", "xml", "html", "css", "dockerfile"
]:
if content:
content_lang = self.detect_from_content(content)
@@ -193,16 +183,13 @@ class LanguageDetector:
return ext_lang or "text"
def get_supported_languages(self) -> list[str]:
"""Return list of supported languages."""
def get_supported_languages(self):
return sorted(set(self.EXTENSION_MAP.values()))
def is_language_supported(self, language: str) -> bool:
"""Check if a language is supported."""
def is_language_supported(self, language):
return language in self.get_supported_languages()
def detect_language(filename: str, content: str = "") -> str:
"""Detect programming language from filename and content."""
def detect_language(filename, content=""):
detector = LanguageDetector()
return detector.detect(filename, content)