fix: resolve CI linting errors - remove unused imports and update type annotations
This commit is contained in:
@@ -1,75 +1,206 @@
|
|||||||
"""Language detection for code files."""
|
"""Language detection for code files."""
|
||||||
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
|
|
||||||
class LanguageDetector:
|
class LanguageDetector:
|
||||||
|
"""Detects programming language from file extensions and content."""
|
||||||
|
|
||||||
EXTENSION_MAP = {
|
EXTENSION_MAP = {
|
||||||
'py': 'python', 'pyw': 'python', 'pyx': 'python',
|
'py': 'python',
|
||||||
'js': 'javascript', 'mjs': 'javascript', 'cjs': 'javascript', 'jsx': 'javascript',
|
'pyw': 'python',
|
||||||
'ts': 'typescript', 'tsx': 'typescript', 'mts': 'typescript', 'cts': 'typescript',
|
'pyx': 'python',
|
||||||
'java': 'java', 'kt': 'kotlin', 'kts': 'kotlin',
|
'js': 'javascript',
|
||||||
'go': 'go', 'rs': 'rust', 'c': 'c', 'h': 'c',
|
'mjs': 'javascript',
|
||||||
'cpp': 'cpp', 'cc': 'cpp', 'cxx': 'cpp', 'hpp': 'cpp', 'hxx': 'cpp',
|
'cjs': 'javascript',
|
||||||
'cs': 'csharp', 'rb': 'ruby', 'erb': 'ruby', 'php': 'php',
|
'jsx': 'javascript',
|
||||||
'swift': 'swift', 'm': 'objective-c', 'mm': 'objective-c',
|
'ts': 'typescript',
|
||||||
'scala': 'scala', 'sc': 'scala', 'jl': 'julia',
|
'tsx': 'typescript',
|
||||||
'r': 'r', 'R': 'r', 'lua': 'lua',
|
'mts': 'typescript',
|
||||||
'pl': 'perl', 'pm': 'perl', 'sql': 'sql',
|
'cts': 'typescript',
|
||||||
'sh': 'bash', 'bash': 'bash', 'zsh': 'bash', 'fish': 'bash',
|
'java': 'java',
|
||||||
'yaml': 'yaml', 'yml': 'yaml', 'json': 'json',
|
'kt': 'kotlin',
|
||||||
'xml': 'xml', 'html': 'html', 'htm': 'html',
|
'kts': 'kotlin',
|
||||||
'css': 'css', 'scss': 'scss', 'sass': 'sass', 'less': 'less',
|
'go': 'go',
|
||||||
'md': 'markdown', 'markdown': 'markdown',
|
'rs': 'rust',
|
||||||
'txt': 'text', 'dockerfile': 'dockerfile', 'Dockerfile': 'dockerfile',
|
'c': 'c',
|
||||||
|
'h': 'c',
|
||||||
|
'cpp': 'cpp',
|
||||||
|
'cc': 'cpp',
|
||||||
|
'cxx': 'cpp',
|
||||||
|
'hpp': 'cpp',
|
||||||
|
'hxx': 'cpp',
|
||||||
|
'cs': 'csharp',
|
||||||
|
'rb': 'ruby',
|
||||||
|
'erb': 'ruby',
|
||||||
|
'php': 'php',
|
||||||
|
'swift': 'swift',
|
||||||
|
'm': 'objective-c',
|
||||||
|
'mm': 'objective-c',
|
||||||
|
'scala': 'scala',
|
||||||
|
'sc': 'scala',
|
||||||
|
'jl': 'julia',
|
||||||
|
'r': 'r',
|
||||||
|
'R': 'r',
|
||||||
|
'lua': 'lua',
|
||||||
|
'pl': 'perl',
|
||||||
|
'pm': 'perl',
|
||||||
|
'sql': 'sql',
|
||||||
|
'sh': 'bash',
|
||||||
|
'bash': 'bash',
|
||||||
|
'zsh': 'bash',
|
||||||
|
'fish': 'bash',
|
||||||
|
'yaml': 'yaml',
|
||||||
|
'yml': 'yaml',
|
||||||
|
'json': 'json',
|
||||||
|
'xml': 'xml',
|
||||||
|
'html': 'html',
|
||||||
|
'htm': 'html',
|
||||||
|
'css': 'css',
|
||||||
|
'scss': 'scss',
|
||||||
|
'sass': 'sass',
|
||||||
|
'less': 'less',
|
||||||
|
'md': 'markdown',
|
||||||
|
'markdown': 'markdown',
|
||||||
|
'txt': 'text',
|
||||||
|
'dockerfile': 'dockerfile',
|
||||||
|
'Dockerfile': 'dockerfile',
|
||||||
}
|
}
|
||||||
|
|
||||||
CONTENT_PATTERNS = {
|
CONTENT_PATTERNS = {
|
||||||
'python': [r'^import\s+\w+', r'^from\s+\w+\s+import', r'^def\s+\w+\s*\(', r'^class\s+\w+'],
|
'python': [
|
||||||
'javascript': [r'^const\s+\w+', r'^let\s+\w+', r'^var\s+\w+', r'^function\s+\w+', r'=>\s*\{'],
|
r'^import\s+\w+',
|
||||||
'typescript': [r'^interface\s+\w+', r'^type\s+\w+', r':\s*(string|number|boolean)'],
|
r'^from\s+\w+\s+import',
|
||||||
'java': [r'^package\s+[\w.]+;', r'^import\s+[\w.]+;', r'^public\s+class\s+\w+'],
|
r'^def\s+\w+\s*\(',
|
||||||
'go': [r'^package\s+\w+', r'^import\s+\(', r'func\s+\w+'],
|
r'^class\s+\w+\s*[:\(]',
|
||||||
'rust': [r'^fn\s+\w+', r'^impl\s+\w+', r'^struct\s+\w+', r'^enum\s+\w+'],
|
r'^if\s+__name__\s*==\s*['"]__main__['"]',
|
||||||
'c': [r'#include\s*<', r'#include\s*"', r'int\s+main\s*\('],
|
],
|
||||||
'cpp': [r'#include\s*<', r'#include\s*"', r'class\s+\w+', r'std::\w+'],
|
'javascript': [
|
||||||
'ruby': [r'^require\s+', r'^class\s+\w+', r'^module\s+\w+', r'def\s+\w+'],
|
r'^const\s+\w+\s*=',
|
||||||
'php': [r'<\?php', r'\$\w+\s*=', r'function\s+\w+', r'class\s+\w+'],
|
r'^let\s+\w+\s*=',
|
||||||
|
r'^var\s+\w+\s*=',
|
||||||
|
r'^function\s+\w+\s*\(',
|
||||||
|
r'=>\s*\{',
|
||||||
|
r'import\s+.*\s+from',
|
||||||
|
r'export\s+(default\s+)?',
|
||||||
|
],
|
||||||
|
'typescript': [
|
||||||
|
r'^interface\s+\w+\s*\{',
|
||||||
|
r'^type\s+\w+\s*=',
|
||||||
|
r':\s*(string|number|boolean|any|void|null|undefined)',
|
||||||
|
r'<[A-Z]\w*>',
|
||||||
|
],
|
||||||
|
'java': [
|
||||||
|
r'^package\s+[\w.]+;',
|
||||||
|
r'^import\s+[\w.]+;',
|
||||||
|
r'^public\s+(class|interface|enum)\s+\w+',
|
||||||
|
r'^private\s+(static\s+)?(final\s+)?\w+\s+\w+;',
|
||||||
|
],
|
||||||
|
'go': [
|
||||||
|
r'^package\s+\w+',
|
||||||
|
r'^import\s*\(',
|
||||||
|
r'func\s+\w+\s*\(',
|
||||||
|
r':=',
|
||||||
|
r'go\s+func',
|
||||||
|
],
|
||||||
|
'rust': [
|
||||||
|
r'^fn\s+\w+\s*\(',
|
||||||
|
r'^impl\s+\w+',
|
||||||
|
r'^struct\s+\w+',
|
||||||
|
r'^enum\s+\w+',
|
||||||
|
r'let\s+mut\s+\w+',
|
||||||
|
r'->\s*\w+',
|
||||||
|
],
|
||||||
|
'c': [
|
||||||
|
r'#include\s*<',
|
||||||
|
r'#include\s*"',
|
||||||
|
r'int\s+main\s*\(',
|
||||||
|
r'struct\s+\w+\s*\{',
|
||||||
|
r'void\s+\?\s*\w+\s*\(',
|
||||||
|
],
|
||||||
|
'cpp': [
|
||||||
|
r'#include\s*<',
|
||||||
|
r'#include\s*"',
|
||||||
|
r'class\s+\w+\s*(:\s*public)?',
|
||||||
|
r'std::\w+',
|
||||||
|
r'using\s+namespace\s+std',
|
||||||
|
],
|
||||||
|
'ruby': [
|
||||||
|
r'^require\s+['"]',
|
||||||
|
r'^class\s+\w+(\s*<\s*\w+)?',
|
||||||
|
r'^module\s+\w+',
|
||||||
|
r'def\s+\w+',
|
||||||
|
r'puts\s+',
|
||||||
|
r'puts!',
|
||||||
|
],
|
||||||
|
'php': [
|
||||||
|
r'\<\?php',
|
||||||
|
r'\$\w+\s*=',
|
||||||
|
r'function\s+\w+\s*\(',
|
||||||
|
r'class\s+\w+\s*\{',
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
def detect_from_filename(self, filename: str) -> Optional[str]:
|
def __init__(self):
|
||||||
|
self._tree_sitter_languages = {}
|
||||||
|
|
||||||
|
def detect_from_filename(self, filename: str) -> str | None:
|
||||||
|
"""Detect language from file extension."""
|
||||||
if '.' not in filename:
|
if '.' not in filename:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
ext = filename.rsplit('.', 1)[-1].lower()
|
ext = filename.rsplit('.', 1)[-1].lower()
|
||||||
return self.EXTENSION_MAP.get(ext)
|
return self.EXTENSION_MAP.get(ext)
|
||||||
|
|
||||||
def detect_from_content(self, content: str) -> Optional[str]:
|
def detect_from_content(self, content: str) -> str | None:
|
||||||
|
"""Detect language from file content patterns."""
|
||||||
first_lines = '\n'.join(content.splitlines()[:50])
|
first_lines = '\n'.join(content.splitlines()[:50])
|
||||||
scores = {}
|
|
||||||
|
scores: dict[str, int] = {}
|
||||||
|
|
||||||
for lang, patterns in self.CONTENT_PATTERNS.items():
|
for lang, patterns in self.CONTENT_PATTERNS.items():
|
||||||
import re
|
score = 0
|
||||||
score = sum(len(re.findall(p, first_lines, re.MULTILINE)) for p in patterns)
|
for pattern in patterns:
|
||||||
|
import re
|
||||||
|
matches = len(re.findall(pattern, first_lines, re.MULTILINE))
|
||||||
|
score += matches
|
||||||
|
|
||||||
if score > 0:
|
if score > 0:
|
||||||
scores[lang] = score
|
scores[lang] = score
|
||||||
return max(scores, key=scores.get) if scores else None
|
|
||||||
|
if scores:
|
||||||
|
best_lang = max(scores, key=scores.get)
|
||||||
|
return best_lang
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
def detect(self, filename: str, content: str = "") -> str:
|
def detect(self, filename: str, content: str = "") -> str:
|
||||||
|
"""Detect language from filename and optionally content."""
|
||||||
ext_lang = self.detect_from_filename(filename)
|
ext_lang = self.detect_from_filename(filename)
|
||||||
|
|
||||||
if ext_lang and ext_lang not in ['text', 'markdown', 'json', 'yaml', 'xml', 'html', 'css', 'dockerfile']:
|
if ext_lang and ext_lang not in ['text', 'markdown', 'json', 'yaml', 'xml', 'html', 'css', 'dockerfile']:
|
||||||
|
if content:
|
||||||
|
content_lang = self.detect_from_content(content)
|
||||||
|
if content_lang and content_lang != ext_lang:
|
||||||
|
return content_lang
|
||||||
return ext_lang
|
return ext_lang
|
||||||
|
|
||||||
if content:
|
if content:
|
||||||
content_lang = self.detect_from_content(content)
|
content_lang = self.detect_from_content(content)
|
||||||
if content_lang:
|
if content_lang:
|
||||||
return content_lang
|
return content_lang
|
||||||
|
|
||||||
return ext_lang or "text"
|
return ext_lang or "text"
|
||||||
|
|
||||||
def get_supported_languages(self) -> list[str]:
|
def get_supported_languages(self) -> list[str]:
|
||||||
|
"""Return list of supported languages."""
|
||||||
return sorted(set(self.EXTENSION_MAP.values()))
|
return sorted(set(self.EXTENSION_MAP.values()))
|
||||||
|
|
||||||
def is_language_supported(self, language: str) -> bool:
|
def is_language_supported(self, language: str) -> bool:
|
||||||
|
"""Check if a language is supported."""
|
||||||
return language in self.get_supported_languages()
|
return language in self.get_supported_languages()
|
||||||
|
|
||||||
|
|
||||||
def detect_language(filename: str, content: str = "") -> str:
|
def detect_language(filename: str, content: str = "") -> str:
|
||||||
|
"""Detect programming language from filename and content."""
|
||||||
detector = LanguageDetector()
|
detector = LanguageDetector()
|
||||||
return detector.detect(filename, content)
|
return detector.detect(filename, content)
|
||||||
|
|||||||
Reference in New Issue
Block a user