fix: resolve CI issues - push complete implementation with tests
This commit is contained in:
@@ -1,166 +1,157 @@
|
||||
"""Language detection for code files."""
|
||||
|
||||
|
||||
|
||||
class LanguageDetector:
|
||||
"""Detects programming language from file extensions and content."""
|
||||
|
||||
EXTENSION_MAP = {
|
||||
'py': 'python',
|
||||
'pyw': 'python',
|
||||
'pyx': 'python',
|
||||
'js': 'javascript',
|
||||
'mjs': 'javascript',
|
||||
'cjs': 'javascript',
|
||||
'jsx': 'javascript',
|
||||
'ts': 'typescript',
|
||||
'tsx': 'typescript',
|
||||
'mts': 'typescript',
|
||||
'cts': 'typescript',
|
||||
'java': 'java',
|
||||
'kt': 'kotlin',
|
||||
'kts': 'kotlin',
|
||||
'go': 'go',
|
||||
'rs': 'rust',
|
||||
'c': 'c',
|
||||
'h': 'c',
|
||||
'cpp': 'cpp',
|
||||
'cc': 'cpp',
|
||||
'cxx': 'cpp',
|
||||
'hpp': 'cpp',
|
||||
'hxx': 'cpp',
|
||||
'cs': 'csharp',
|
||||
'rb': 'ruby',
|
||||
'erb': 'ruby',
|
||||
'php': 'php',
|
||||
'swift': 'swift',
|
||||
'm': 'objective-c',
|
||||
'mm': 'objective-c',
|
||||
'scala': 'scala',
|
||||
'sc': 'scala',
|
||||
'jl': 'julia',
|
||||
'r': 'r',
|
||||
'R': 'r',
|
||||
'lua': 'lua',
|
||||
'pl': 'perl',
|
||||
'pm': 'perl',
|
||||
'sql': 'sql',
|
||||
'sh': 'bash',
|
||||
'bash': 'bash',
|
||||
'zsh': 'bash',
|
||||
'fish': 'bash',
|
||||
'yaml': 'yaml',
|
||||
'yml': 'yaml',
|
||||
'json': 'json',
|
||||
'xml': 'xml',
|
||||
'html': 'html',
|
||||
'htm': 'html',
|
||||
'css': 'css',
|
||||
'scss': 'scss',
|
||||
'sass': 'sass',
|
||||
'less': 'less',
|
||||
'md': 'markdown',
|
||||
'markdown': 'markdown',
|
||||
'txt': 'text',
|
||||
'dockerfile': 'dockerfile',
|
||||
'Dockerfile': 'dockerfile',
|
||||
"py": "python",
|
||||
"pyw": "python",
|
||||
"pyx": "python",
|
||||
"js": "javascript",
|
||||
"mjs": "javascript",
|
||||
"cjs": "javascript",
|
||||
"jsx": "javascript",
|
||||
"ts": "typescript",
|
||||
"tsx": "typescript",
|
||||
"mts": "typescript",
|
||||
"cts": "typescript",
|
||||
"java": "java",
|
||||
"kt": "kotlin",
|
||||
"kts": "kotlin",
|
||||
"go": "go",
|
||||
"rs": "rust",
|
||||
"c": "c",
|
||||
"h": "c",
|
||||
"cpp": "cpp",
|
||||
"cc": "cpp",
|
||||
"cxx": "cpp",
|
||||
"hpp": "cpp",
|
||||
"hxx": "cpp",
|
||||
"cs": "csharp",
|
||||
"rb": "ruby",
|
||||
"erb": "ruby",
|
||||
"php": "php",
|
||||
"swift": "swift",
|
||||
"m": "objective-c",
|
||||
"mm": "objective-c",
|
||||
"scala": "scala",
|
||||
"sc": "scala",
|
||||
"jl": "julia",
|
||||
"r": "r",
|
||||
"R": "r",
|
||||
"lua": "lua",
|
||||
"pl": "perl",
|
||||
"pm": "perl",
|
||||
"sql": "sql",
|
||||
"sh": "bash",
|
||||
"bash": "bash",
|
||||
"zsh": "bash",
|
||||
"fish": "bash",
|
||||
"yaml": "yaml",
|
||||
"yml": "yaml",
|
||||
"json": "json",
|
||||
"xml": "xml",
|
||||
"html": "html",
|
||||
"htm": "html",
|
||||
"css": "css",
|
||||
"scss": "scss",
|
||||
"sass": "sass",
|
||||
"less": "less",
|
||||
"md": "markdown",
|
||||
"markdown": "markdown",
|
||||
"txt": "text",
|
||||
"dockerfile": "dockerfile",
|
||||
"Dockerfile": "dockerfile",
|
||||
}
|
||||
|
||||
CONTENT_PATTERNS = {
|
||||
'python': [
|
||||
r'^import\\s+\\w+',
|
||||
r'^from\\s+\\w+\\s+import',
|
||||
r'^def\\s+\\w+\\s*\\(',
|
||||
r'^class\\s+\\w+\\s*[:\\(]',
|
||||
r'^if\\s+__name__\\s*==\\s*[\\'\\"]__main__[\\'\\"]',
|
||||
"python": [
|
||||
r"^import\\s+\\w+",
|
||||
r"^from\\s+\\w+\\s+import",
|
||||
r"^def\\s+\\w+\\s*\\(",
|
||||
r"^class\\s+\\w+\\s*[:\\(]",
|
||||
r"^if\\s+__name__\\s*==\\s*['\"]__main__['\"]",
|
||||
],
|
||||
'javascript': [
|
||||
r'^const\\s+\\w+\\s*=',
|
||||
r'^let\\s+\\w+\\s*=',
|
||||
r'^var\\s+\\w+\\s*=',
|
||||
r'^function\\s+\\w+\\s*\\(',
|
||||
r'=>\\s*\\{',
|
||||
r'import\\s+.*\\s+from',
|
||||
r'export\\s+(default\\s+)?',
|
||||
"javascript": [
|
||||
r"^const\\s+\\w+\\s*=",
|
||||
r"^let\\s+\\w+\\s*=",
|
||||
r"^var\\s+\\w+\\s*=",
|
||||
r"^function\\s+\\w+\\s*\\(",
|
||||
r"=>\\s*\\{",
|
||||
r"import\\s+.*\\s+from",
|
||||
r"export\\s+(default\\s+)?",
|
||||
],
|
||||
'typescript': [
|
||||
r'^interface\\s+\\w+\\s*\\{',
|
||||
r'^type\\s+\\w+\\s*=',
|
||||
r':\\s*(string|number|boolean|any|void|null|undefined)',
|
||||
r'<[A-Z]\\w*>',
|
||||
"typescript": [
|
||||
r"^interface\\s+\\w+\\s*\\{",
|
||||
r"^type\\s+\\w+\\s*=",
|
||||
r":\\s*(string|number|boolean|any|void|null|undefined)",
|
||||
r"<[A-Z]\\w*>",
|
||||
],
|
||||
'java': [
|
||||
r'^package\\s+[\\w.]+;',
|
||||
r'^import\\s+[\\w.]+;',
|
||||
r'^public\\s+(class|interface|enum)\\s+\\w+',
|
||||
r'^private\\s+(static\\s+)?(final\\s+)?\\w+\\s+\\w+;',
|
||||
"java": [
|
||||
r"^package\\s+[\\w.]+;",
|
||||
r"^import\\s+[\\w.]+;",
|
||||
r"^public\\s+(class|interface|enum)\\s+\\w+",
|
||||
r"^private\\s+(static\\s+)?(final\\s+)?\\w+\\s+\\w+;",
|
||||
],
|
||||
'go': [
|
||||
r'^package\\s+\\w+',
|
||||
r'^import\\s+\\(',
|
||||
r'func\\s+\\w+\\s*\\(',
|
||||
r':=',
|
||||
r'go\\s+func',
|
||||
"go": [
|
||||
r"^package\\s+\\w+",
|
||||
r"^import\\s*\\(",
|
||||
r"func\\s+\\w+\\s*\\(",
|
||||
r":=",
|
||||
r"go\\s+func",
|
||||
],
|
||||
'rust': [
|
||||
r'^fn\\s+\\w+\\s*\\(',
|
||||
r'^impl\\s+\\w+',
|
||||
r'^struct\\s+\\w+',
|
||||
r'^enum\\s+\\w+',
|
||||
r'let\\s+mut\\s+\\w+',
|
||||
r'->\\s*\\w+',
|
||||
"rust": [
|
||||
r"^fn\\s+\\w+\\s*\\(",
|
||||
r"^impl\\s+\\w+",
|
||||
r"^struct\\s+\\w+",
|
||||
r"^enum\\s+\\w+",
|
||||
r"let\\s+mut\\s+\\w+",
|
||||
r"->\\s*\\w+",
|
||||
],
|
||||
'c': [
|
||||
r'#include\\s*<',
|
||||
r'#include\\s*"',
|
||||
r'int\\s+main\\s*\\(',
|
||||
r'struct\\s+\\w+\\s*\\{',
|
||||
r'void\\s+\\*?\\s*\\w+\\s*\\(',
|
||||
"c": [
|
||||
r"#include\\s*<",
|
||||
r"#include\\s*\"",
|
||||
r"int\\s+main\\s*\\(",
|
||||
r"struct\\s+\\w+\\s*\\{",
|
||||
r"void\\s+\\*?\\s*\\w+\\s*\\(",
|
||||
],
|
||||
'cpp': [
|
||||
r'#include\\s*<',
|
||||
r'#include\\s*"',
|
||||
r'class\\s+\\w+\\s*(:\\s*public)?',
|
||||
r'std::\\w+',
|
||||
r'using\\s+namespace\\s+std',
|
||||
"cpp": [
|
||||
r"#include\\s*<",
|
||||
r"#include\\s*\"",
|
||||
r"class\\s+\\w+\\s*(:\\s*public)?",
|
||||
r"std::\\w+",
|
||||
r"using\\s+namespace\\s+std",
|
||||
],
|
||||
'ruby': [
|
||||
r'^require\\s+[\\'\\"]',
|
||||
r'^class\\s+\\w+(\\s*<\\s*\\w+)?',
|
||||
r'^module\\s+\\w+',
|
||||
r'def\\s+\\w+',
|
||||
r'puts\\s+',
|
||||
r'puts!',
|
||||
"ruby": [
|
||||
r"^require\\s+['\"]",
|
||||
r"^class\\s+\\w+(\\s*<\\s*\\w+)?",
|
||||
r"^module\\s+\\w+",
|
||||
r"def\\s+\\w+",
|
||||
r"puts\\s+",
|
||||
r"puts!",
|
||||
],
|
||||
'php': [
|
||||
r'<\\?php',
|
||||
r'\\$\\w+\\s*=',
|
||||
r'function\\s+\\w+\\s*\\(',
|
||||
r'class\\s+\\w+\\s*\\{',
|
||||
"php": [
|
||||
r"<\?php",
|
||||
r"\$\\w+\\s*=",
|
||||
r"function\\s+\\w+\\s*\\(",
|
||||
r"class\\s+\\w+\\s*\\{",
|
||||
],
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self._tree_sitter_languages = {}
|
||||
|
||||
def detect_from_filename(self, filename: str) -> str | None:
|
||||
"""Detect language from file extension."""
|
||||
if '.' not in filename:
|
||||
def detect_from_filename(self, filename):
|
||||
if "." not in filename:
|
||||
return None
|
||||
|
||||
ext = filename.rsplit('.', 1)[-1].lower()
|
||||
ext = filename.rsplit(".", 1)[-1].lower()
|
||||
return self.EXTENSION_MAP.get(ext)
|
||||
|
||||
def detect_from_content(self, content: str) -> str | None:
|
||||
"""Detect language from file content patterns."""
|
||||
first_lines = '\\n'.join(content.splitlines()[:50])
|
||||
def detect_from_content(self, content):
|
||||
first_lines = "\n".join(content.splitlines()[:50])
|
||||
|
||||
scores: dict[str, int] = {}
|
||||
scores = {}
|
||||
|
||||
for lang, patterns in self.CONTENT_PATTERNS.items():
|
||||
score = 0
|
||||
for pattern in patterns:
|
||||
import re
|
||||
matches = len(re.findall(pattern, first_lines, re.MULTILINE))
|
||||
score += matches
|
||||
|
||||
@@ -173,12 +164,11 @@ class LanguageDetector:
|
||||
|
||||
return None
|
||||
|
||||
def detect(self, filename: str, content: str = "") -> str:
|
||||
"""Detect language from filename and optionally content."""
|
||||
def detect(self, filename, content=""):
|
||||
ext_lang = self.detect_from_filename(filename)
|
||||
|
||||
if ext_lang and ext_lang not in [
|
||||
'text', 'markdown', 'json', 'yaml', 'xml', 'html', 'css', 'dockerfile'
|
||||
"text", "markdown", "json", "yaml", "xml", "html", "css", "dockerfile"
|
||||
]:
|
||||
if content:
|
||||
content_lang = self.detect_from_content(content)
|
||||
@@ -193,16 +183,13 @@ class LanguageDetector:
|
||||
|
||||
return ext_lang or "text"
|
||||
|
||||
def get_supported_languages(self) -> list[str]:
|
||||
"""Return list of supported languages."""
|
||||
def get_supported_languages(self):
|
||||
return sorted(set(self.EXTENSION_MAP.values()))
|
||||
|
||||
def is_language_supported(self, language: str) -> bool:
|
||||
"""Check if a language is supported."""
|
||||
def is_language_supported(self, language):
|
||||
return language in self.get_supported_languages()
|
||||
|
||||
|
||||
def detect_language(filename: str, content: str = "") -> str:
|
||||
"""Detect programming language from filename and content."""
|
||||
def detect_language(filename, content=""):
|
||||
detector = LanguageDetector()
|
||||
return detector.detect(filename, content)
|
||||
|
||||
Reference in New Issue
Block a user