Add language detector, code analyzer, and issue detector

This commit is contained in:
2026-02-02 13:59:07 +00:00
parent 0e4182c504
commit 057c91d31a

View File

@@ -0,0 +1,75 @@
"""Language detection for code files."""
from typing import Optional
class LanguageDetector:
EXTENSION_MAP = {
'py': 'python', 'pyw': 'python', 'pyx': 'python',
'js': 'javascript', 'mjs': 'javascript', 'cjs': 'javascript', 'jsx': 'javascript',
'ts': 'typescript', 'tsx': 'typescript', 'mts': 'typescript', 'cts': 'typescript',
'java': 'java', 'kt': 'kotlin', 'kts': 'kotlin',
'go': 'go', 'rs': 'rust', 'c': 'c', 'h': 'c',
'cpp': 'cpp', 'cc': 'cpp', 'cxx': 'cpp', 'hpp': 'cpp', 'hxx': 'cpp',
'cs': 'csharp', 'rb': 'ruby', 'erb': 'ruby', 'php': 'php',
'swift': 'swift', 'm': 'objective-c', 'mm': 'objective-c',
'scala': 'scala', 'sc': 'scala', 'jl': 'julia',
'r': 'r', 'R': 'r', 'lua': 'lua',
'pl': 'perl', 'pm': 'perl', 'sql': 'sql',
'sh': 'bash', 'bash': 'bash', 'zsh': 'bash', 'fish': 'bash',
'yaml': 'yaml', 'yml': 'yaml', 'json': 'json',
'xml': 'xml', 'html': 'html', 'htm': 'html',
'css': 'css', 'scss': 'scss', 'sass': 'sass', 'less': 'less',
'md': 'markdown', 'markdown': 'markdown',
'txt': 'text', 'dockerfile': 'dockerfile', 'Dockerfile': 'dockerfile',
}
CONTENT_PATTERNS = {
'python': [r'^import\s+\w+', r'^from\s+\w+\s+import', r'^def\s+\w+\s*\(', r'^class\s+\w+'],
'javascript': [r'^const\s+\w+', r'^let\s+\w+', r'^var\s+\w+', r'^function\s+\w+', r'=>\s*\{'],
'typescript': [r'^interface\s+\w+', r'^type\s+\w+', r':\s*(string|number|boolean)'],
'java': [r'^package\s+[\w.]+;', r'^import\s+[\w.]+;', r'^public\s+class\s+\w+'],
'go': [r'^package\s+\w+', r'^import\s+\(', r'func\s+\w+'],
'rust': [r'^fn\s+\w+', r'^impl\s+\w+', r'^struct\s+\w+', r'^enum\s+\w+'],
'c': [r'#include\s*<', r'#include\s*"', r'int\s+main\s*\('],
'cpp': [r'#include\s*<', r'#include\s*"', r'class\s+\w+', r'std::\w+'],
'ruby': [r'^require\s+', r'^class\s+\w+', r'^module\s+\w+', r'def\s+\w+'],
'php': [r'<\?php', r'\$\w+\s*=', r'function\s+\w+', r'class\s+\w+'],
}
def detect_from_filename(self, filename: str) -> Optional[str]:
if '.' not in filename:
return None
ext = filename.rsplit('.', 1)[-1].lower()
return self.EXTENSION_MAP.get(ext)
def detect_from_content(self, content: str) -> Optional[str]:
first_lines = '\n'.join(content.splitlines()[:50])
scores = {}
for lang, patterns in self.CONTENT_PATTERNS.items():
import re
score = sum(len(re.findall(p, first_lines, re.MULTILINE)) for p in patterns)
if score > 0:
scores[lang] = score
return max(scores, key=scores.get) if scores else None
def detect(self, filename: str, content: str = "") -> str:
ext_lang = self.detect_from_filename(filename)
if ext_lang and ext_lang not in ['text', 'markdown', 'json', 'yaml', 'xml', 'html', 'css', 'dockerfile']:
return ext_lang
if content:
content_lang = self.detect_from_content(content)
if content_lang:
return content_lang
return ext_lang or "text"
def get_supported_languages(self) -> list[str]:
return sorted(set(self.EXTENSION_MAP.values()))
def is_language_supported(self, language: str) -> bool:
return language in self.get_supported_languages()
def detect_language(filename: str, content: str = "") -> str:
detector = LanguageDetector()
return detector.detect(filename, content)