diff --git a/src/gdiffer/language_detector.py b/src/gdiffer/language_detector.py index 0d6ad3c..c50200f 100644 --- a/src/gdiffer/language_detector.py +++ b/src/gdiffer/language_detector.py @@ -68,75 +68,75 @@ class LanguageDetector: CONTENT_PATTERNS = { 'python': [ - r'^import\s+\w+', - r'^from\s+\w+\s+import', - r'^def\s+\w+\s*\(', - r'^class\s+\w+\s*[:\(]', - r'^if\s+__name__\s*==\s*['"]__main__['"]', + r'^import\\s+\\w+', + r'^from\\s+\\w+\\s+import', + r'^def\\s+\\w+\\s*\\(', + r'^class\\s+\\w+\\s*[:\\(]', + r'^if\\s+__name__\\s*==\\s*[\\'\\"]__main__[\\'\\"]', ], 'javascript': [ - r'^const\s+\w+\s*=', - r'^let\s+\w+\s*=', - r'^var\s+\w+\s*=', - r'^function\s+\w+\s*\(', - r'=>\s*\{', - r'import\s+.*\s+from', - r'export\s+(default\s+)?', + r'^const\\s+\\w+\\s*=', + r'^let\\s+\\w+\\s*=', + r'^var\\s+\\w+\\s*=', + r'^function\\s+\\w+\\s*\\(', + r'=>\\s*\\{', + r'import\\s+.*\\s+from', + r'export\\s+(default\\s+)?', ], 'typescript': [ - r'^interface\s+\w+\s*\{', - r'^type\s+\w+\s*=', - r':\s*(string|number|boolean|any|void|null|undefined)', - r'<[A-Z]\w*>', + r'^interface\\s+\\w+\\s*\\{', + r'^type\\s+\\w+\\s*=', + r':\\s*(string|number|boolean|any|void|null|undefined)', + r'<[A-Z]\\w*>', ], 'java': [ - r'^package\s+[\w.]+;', - r'^import\s+[\w.]+;', - r'^public\s+(class|interface|enum)\s+\w+', - r'^private\s+(static\s+)?(final\s+)?\w+\s+\w+;', + r'^package\\s+[\\w.]+;', + r'^import\\s+[\\w.]+;', + r'^public\\s+(class|interface|enum)\\s+\\w+', + r'^private\\s+(static\\s+)?(final\\s+)?\\w+\\s+\\w+;', ], 'go': [ - r'^package\s+\w+', - r'^import\s*\(', - r'func\s+\w+\s*\(', + r'^package\\s+\\w+', + r'^import\\s+\\(', + r'func\\s+\\w+\\s*\\(', r':=', - r'go\s+func', + r'go\\s+func', ], 'rust': [ - r'^fn\s+\w+\s*\(', - r'^impl\s+\w+', - r'^struct\s+\w+', - r'^enum\s+\w+', - r'let\s+mut\s+\w+', - r'->\s*\w+', + r'^fn\\s+\\w+\\s*\\(', + r'^impl\\s+\\w+', + r'^struct\\s+\\w+', + r'^enum\\s+\\w+', + r'let\\s+mut\\s+\\w+', + r'->\\s*\\w+', ], 'c': [ - r'#include\s*<', - r'#include\s*"', - r'int\s+main\s*\(', - r'struct\s+\w+\s*\{', - r'void\s+\?\s*\w+\s*\(', + r'#include\\s*<', + r'#include\\s*"', + r'int\\s+main\\s*\\(', + r'struct\\s+\\w+\\s*\\{', + r'void\\s+\\*?\\s*\\w+\\s*\\(', ], 'cpp': [ - r'#include\s*<', - r'#include\s*"', - r'class\s+\w+\s*(:\s*public)?', - r'std::\w+', - r'using\s+namespace\s+std', + r'#include\\s*<', + r'#include\\s*"', + r'class\\s+\\w+\\s*(:\\s*public)?', + r'std::\\w+', + r'using\\s+namespace\\s+std', ], 'ruby': [ - r'^require\s+['"]', - r'^class\s+\w+(\s*<\s*\w+)?', - r'^module\s+\w+', - r'def\s+\w+', - r'puts\s+', + r'^require\\s+[\\'\\"]', + r'^class\\s+\\w+(\\s*<\\s*\\w+)?', + r'^module\\s+\\w+', + r'def\\s+\\w+', + r'puts\\s+', r'puts!', ], 'php': [ - r'\<\?php', - r'\$\w+\s*=', - r'function\s+\w+\s*\(', - r'class\s+\w+\s*\{', + r'<\\?php', + r'\\$\\w+\\s*=', + r'function\\s+\\w+\\s*\\(', + r'class\\s+\\w+\\s*\\{', ], } @@ -153,7 +153,7 @@ class LanguageDetector: def detect_from_content(self, content: str) -> str | None: """Detect language from file content patterns.""" - first_lines = '\n'.join(content.splitlines()[:50]) + first_lines = '\\n'.join(content.splitlines()[:50]) scores: dict[str, int] = {} @@ -177,7 +177,9 @@ class LanguageDetector: """Detect language from filename and optionally content.""" ext_lang = self.detect_from_filename(filename) - if ext_lang and ext_lang not in ['text', 'markdown', 'json', 'yaml', 'xml', 'html', 'css', 'dockerfile']: + if ext_lang and ext_lang not in [ + 'text', 'markdown', 'json', 'yaml', 'xml', 'html', 'css', 'dockerfile' + ]: if content: content_lang = self.detect_from_content(content) if content_lang and content_lang != ext_lang: