fix: resolve CI issues - push complete implementation with tests
This commit is contained in:
@@ -1,166 +1,157 @@
|
|||||||
"""Language detection for code files."""
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class LanguageDetector:
|
class LanguageDetector:
|
||||||
"""Detects programming language from file extensions and content."""
|
|
||||||
|
|
||||||
EXTENSION_MAP = {
|
EXTENSION_MAP = {
|
||||||
'py': 'python',
|
"py": "python",
|
||||||
'pyw': 'python',
|
"pyw": "python",
|
||||||
'pyx': 'python',
|
"pyx": "python",
|
||||||
'js': 'javascript',
|
"js": "javascript",
|
||||||
'mjs': 'javascript',
|
"mjs": "javascript",
|
||||||
'cjs': 'javascript',
|
"cjs": "javascript",
|
||||||
'jsx': 'javascript',
|
"jsx": "javascript",
|
||||||
'ts': 'typescript',
|
"ts": "typescript",
|
||||||
'tsx': 'typescript',
|
"tsx": "typescript",
|
||||||
'mts': 'typescript',
|
"mts": "typescript",
|
||||||
'cts': 'typescript',
|
"cts": "typescript",
|
||||||
'java': 'java',
|
"java": "java",
|
||||||
'kt': 'kotlin',
|
"kt": "kotlin",
|
||||||
'kts': 'kotlin',
|
"kts": "kotlin",
|
||||||
'go': 'go',
|
"go": "go",
|
||||||
'rs': 'rust',
|
"rs": "rust",
|
||||||
'c': 'c',
|
"c": "c",
|
||||||
'h': 'c',
|
"h": "c",
|
||||||
'cpp': 'cpp',
|
"cpp": "cpp",
|
||||||
'cc': 'cpp',
|
"cc": "cpp",
|
||||||
'cxx': 'cpp',
|
"cxx": "cpp",
|
||||||
'hpp': 'cpp',
|
"hpp": "cpp",
|
||||||
'hxx': 'cpp',
|
"hxx": "cpp",
|
||||||
'cs': 'csharp',
|
"cs": "csharp",
|
||||||
'rb': 'ruby',
|
"rb": "ruby",
|
||||||
'erb': 'ruby',
|
"erb": "ruby",
|
||||||
'php': 'php',
|
"php": "php",
|
||||||
'swift': 'swift',
|
"swift": "swift",
|
||||||
'm': 'objective-c',
|
"m": "objective-c",
|
||||||
'mm': 'objective-c',
|
"mm": "objective-c",
|
||||||
'scala': 'scala',
|
"scala": "scala",
|
||||||
'sc': 'scala',
|
"sc": "scala",
|
||||||
'jl': 'julia',
|
"jl": "julia",
|
||||||
'r': 'r',
|
"r": "r",
|
||||||
'R': 'r',
|
"R": "r",
|
||||||
'lua': 'lua',
|
"lua": "lua",
|
||||||
'pl': 'perl',
|
"pl": "perl",
|
||||||
'pm': 'perl',
|
"pm": "perl",
|
||||||
'sql': 'sql',
|
"sql": "sql",
|
||||||
'sh': 'bash',
|
"sh": "bash",
|
||||||
'bash': 'bash',
|
"bash": "bash",
|
||||||
'zsh': 'bash',
|
"zsh": "bash",
|
||||||
'fish': 'bash',
|
"fish": "bash",
|
||||||
'yaml': 'yaml',
|
"yaml": "yaml",
|
||||||
'yml': 'yaml',
|
"yml": "yaml",
|
||||||
'json': 'json',
|
"json": "json",
|
||||||
'xml': 'xml',
|
"xml": "xml",
|
||||||
'html': 'html',
|
"html": "html",
|
||||||
'htm': 'html',
|
"htm": "html",
|
||||||
'css': 'css',
|
"css": "css",
|
||||||
'scss': 'scss',
|
"scss": "scss",
|
||||||
'sass': 'sass',
|
"sass": "sass",
|
||||||
'less': 'less',
|
"less": "less",
|
||||||
'md': 'markdown',
|
"md": "markdown",
|
||||||
'markdown': 'markdown',
|
"markdown": "markdown",
|
||||||
'txt': 'text',
|
"txt": "text",
|
||||||
'dockerfile': 'dockerfile',
|
"dockerfile": "dockerfile",
|
||||||
'Dockerfile': 'dockerfile',
|
"Dockerfile": "dockerfile",
|
||||||
}
|
}
|
||||||
|
|
||||||
CONTENT_PATTERNS = {
|
CONTENT_PATTERNS = {
|
||||||
'python': [
|
"python": [
|
||||||
r'^import\\s+\\w+',
|
r"^import\\s+\\w+",
|
||||||
r'^from\\s+\\w+\\s+import',
|
r"^from\\s+\\w+\\s+import",
|
||||||
r'^def\\s+\\w+\\s*\\(',
|
r"^def\\s+\\w+\\s*\\(",
|
||||||
r'^class\\s+\\w+\\s*[:\\(]',
|
r"^class\\s+\\w+\\s*[:\\(]",
|
||||||
r'^if\\s+__name__\\s*==\\s*[\\'\\"]__main__[\\'\\"]',
|
r"^if\\s+__name__\\s*==\\s*['\"]__main__['\"]",
|
||||||
],
|
],
|
||||||
'javascript': [
|
"javascript": [
|
||||||
r'^const\\s+\\w+\\s*=',
|
r"^const\\s+\\w+\\s*=",
|
||||||
r'^let\\s+\\w+\\s*=',
|
r"^let\\s+\\w+\\s*=",
|
||||||
r'^var\\s+\\w+\\s*=',
|
r"^var\\s+\\w+\\s*=",
|
||||||
r'^function\\s+\\w+\\s*\\(',
|
r"^function\\s+\\w+\\s*\\(",
|
||||||
r'=>\\s*\\{',
|
r"=>\\s*\\{",
|
||||||
r'import\\s+.*\\s+from',
|
r"import\\s+.*\\s+from",
|
||||||
r'export\\s+(default\\s+)?',
|
r"export\\s+(default\\s+)?",
|
||||||
],
|
],
|
||||||
'typescript': [
|
"typescript": [
|
||||||
r'^interface\\s+\\w+\\s*\\{',
|
r"^interface\\s+\\w+\\s*\\{",
|
||||||
r'^type\\s+\\w+\\s*=',
|
r"^type\\s+\\w+\\s*=",
|
||||||
r':\\s*(string|number|boolean|any|void|null|undefined)',
|
r":\\s*(string|number|boolean|any|void|null|undefined)",
|
||||||
r'<[A-Z]\\w*>',
|
r"<[A-Z]\\w*>",
|
||||||
],
|
],
|
||||||
'java': [
|
"java": [
|
||||||
r'^package\\s+[\\w.]+;',
|
r"^package\\s+[\\w.]+;",
|
||||||
r'^import\\s+[\\w.]+;',
|
r"^import\\s+[\\w.]+;",
|
||||||
r'^public\\s+(class|interface|enum)\\s+\\w+',
|
r"^public\\s+(class|interface|enum)\\s+\\w+",
|
||||||
r'^private\\s+(static\\s+)?(final\\s+)?\\w+\\s+\\w+;',
|
r"^private\\s+(static\\s+)?(final\\s+)?\\w+\\s+\\w+;",
|
||||||
],
|
],
|
||||||
'go': [
|
"go": [
|
||||||
r'^package\\s+\\w+',
|
r"^package\\s+\\w+",
|
||||||
r'^import\\s+\\(',
|
r"^import\\s*\\(",
|
||||||
r'func\\s+\\w+\\s*\\(',
|
r"func\\s+\\w+\\s*\\(",
|
||||||
r':=',
|
r":=",
|
||||||
r'go\\s+func',
|
r"go\\s+func",
|
||||||
],
|
],
|
||||||
'rust': [
|
"rust": [
|
||||||
r'^fn\\s+\\w+\\s*\\(',
|
r"^fn\\s+\\w+\\s*\\(",
|
||||||
r'^impl\\s+\\w+',
|
r"^impl\\s+\\w+",
|
||||||
r'^struct\\s+\\w+',
|
r"^struct\\s+\\w+",
|
||||||
r'^enum\\s+\\w+',
|
r"^enum\\s+\\w+",
|
||||||
r'let\\s+mut\\s+\\w+',
|
r"let\\s+mut\\s+\\w+",
|
||||||
r'->\\s*\\w+',
|
r"->\\s*\\w+",
|
||||||
],
|
],
|
||||||
'c': [
|
"c": [
|
||||||
r'#include\\s*<',
|
r"#include\\s*<",
|
||||||
r'#include\\s*"',
|
r"#include\\s*\"",
|
||||||
r'int\\s+main\\s*\\(',
|
r"int\\s+main\\s*\\(",
|
||||||
r'struct\\s+\\w+\\s*\\{',
|
r"struct\\s+\\w+\\s*\\{",
|
||||||
r'void\\s+\\*?\\s*\\w+\\s*\\(',
|
r"void\\s+\\*?\\s*\\w+\\s*\\(",
|
||||||
],
|
],
|
||||||
'cpp': [
|
"cpp": [
|
||||||
r'#include\\s*<',
|
r"#include\\s*<",
|
||||||
r'#include\\s*"',
|
r"#include\\s*\"",
|
||||||
r'class\\s+\\w+\\s*(:\\s*public)?',
|
r"class\\s+\\w+\\s*(:\\s*public)?",
|
||||||
r'std::\\w+',
|
r"std::\\w+",
|
||||||
r'using\\s+namespace\\s+std',
|
r"using\\s+namespace\\s+std",
|
||||||
],
|
],
|
||||||
'ruby': [
|
"ruby": [
|
||||||
r'^require\\s+[\\'\\"]',
|
r"^require\\s+['\"]",
|
||||||
r'^class\\s+\\w+(\\s*<\\s*\\w+)?',
|
r"^class\\s+\\w+(\\s*<\\s*\\w+)?",
|
||||||
r'^module\\s+\\w+',
|
r"^module\\s+\\w+",
|
||||||
r'def\\s+\\w+',
|
r"def\\s+\\w+",
|
||||||
r'puts\\s+',
|
r"puts\\s+",
|
||||||
r'puts!',
|
r"puts!",
|
||||||
],
|
],
|
||||||
'php': [
|
"php": [
|
||||||
r'<\\?php',
|
r"<\?php",
|
||||||
r'\\$\\w+\\s*=',
|
r"\$\\w+\\s*=",
|
||||||
r'function\\s+\\w+\\s*\\(',
|
r"function\\s+\\w+\\s*\\(",
|
||||||
r'class\\s+\\w+\\s*\\{',
|
r"class\\s+\\w+\\s*\\{",
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._tree_sitter_languages = {}
|
self._tree_sitter_languages = {}
|
||||||
|
|
||||||
def detect_from_filename(self, filename: str) -> str | None:
|
def detect_from_filename(self, filename):
|
||||||
"""Detect language from file extension."""
|
if "." not in filename:
|
||||||
if '.' not in filename:
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
ext = filename.rsplit('.', 1)[-1].lower()
|
ext = filename.rsplit(".", 1)[-1].lower()
|
||||||
return self.EXTENSION_MAP.get(ext)
|
return self.EXTENSION_MAP.get(ext)
|
||||||
|
|
||||||
def detect_from_content(self, content: str) -> str | None:
|
def detect_from_content(self, content):
|
||||||
"""Detect language from file content patterns."""
|
first_lines = "\n".join(content.splitlines()[:50])
|
||||||
first_lines = '\\n'.join(content.splitlines()[:50])
|
|
||||||
|
|
||||||
scores: dict[str, int] = {}
|
scores = {}
|
||||||
|
|
||||||
for lang, patterns in self.CONTENT_PATTERNS.items():
|
for lang, patterns in self.CONTENT_PATTERNS.items():
|
||||||
score = 0
|
score = 0
|
||||||
for pattern in patterns:
|
for pattern in patterns:
|
||||||
import re
|
|
||||||
matches = len(re.findall(pattern, first_lines, re.MULTILINE))
|
matches = len(re.findall(pattern, first_lines, re.MULTILINE))
|
||||||
score += matches
|
score += matches
|
||||||
|
|
||||||
@@ -173,12 +164,11 @@ class LanguageDetector:
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def detect(self, filename: str, content: str = "") -> str:
|
def detect(self, filename, content=""):
|
||||||
"""Detect language from filename and optionally content."""
|
|
||||||
ext_lang = self.detect_from_filename(filename)
|
ext_lang = self.detect_from_filename(filename)
|
||||||
|
|
||||||
if ext_lang and ext_lang not in [
|
if ext_lang and ext_lang not in [
|
||||||
'text', 'markdown', 'json', 'yaml', 'xml', 'html', 'css', 'dockerfile'
|
"text", "markdown", "json", "yaml", "xml", "html", "css", "dockerfile"
|
||||||
]:
|
]:
|
||||||
if content:
|
if content:
|
||||||
content_lang = self.detect_from_content(content)
|
content_lang = self.detect_from_content(content)
|
||||||
@@ -193,16 +183,13 @@ class LanguageDetector:
|
|||||||
|
|
||||||
return ext_lang or "text"
|
return ext_lang or "text"
|
||||||
|
|
||||||
def get_supported_languages(self) -> list[str]:
|
def get_supported_languages(self):
|
||||||
"""Return list of supported languages."""
|
|
||||||
return sorted(set(self.EXTENSION_MAP.values()))
|
return sorted(set(self.EXTENSION_MAP.values()))
|
||||||
|
|
||||||
def is_language_supported(self, language: str) -> bool:
|
def is_language_supported(self, language):
|
||||||
"""Check if a language is supported."""
|
|
||||||
return language in self.get_supported_languages()
|
return language in self.get_supported_languages()
|
||||||
|
|
||||||
|
|
||||||
def detect_language(filename: str, content: str = "") -> str:
|
def detect_language(filename, content=""):
|
||||||
"""Detect programming language from filename and content."""
|
|
||||||
detector = LanguageDetector()
|
detector = LanguageDetector()
|
||||||
return detector.detect(filename, content)
|
return detector.detect(filename, content)
|
||||||
|
|||||||
Reference in New Issue
Block a user