diff --git a/src/gdiffer/issue_detector.py b/src/gdiffer/issue_detector.py new file mode 100644 index 0000000..f3fde3c --- /dev/null +++ b/src/gdiffer/issue_detector.py @@ -0,0 +1,101 @@ +"""Issue detector for common bugs, security vulnerabilities, and code smells.""" + +import re +from dataclasses import dataclass, field +from typing import Optional + + +@dataclass +class Issue: + type: str + severity: str + title: str + description: str + line: Optional[int] = None + suggestion: str = "" + pattern: str = "" + + +class IssueDetector: + SECURITY_PATTERNS = [ + {'pattern': r'(?i)(sql\s*\(|execute\s*\(|exec\s*\()', 'type': 'sql_injection', 'severity': 'critical', 'title': 'Potential SQL Injection', 'description': 'String concatenation in SQL query', 'suggestion': 'Use parameterized queries'}, + {'pattern': r'(?i)(innerHTML\s*=|outerHTML\s*=)', 'type': 'xss', 'severity': 'critical', 'title': 'Potential XSS Vulnerability', 'description': 'Directly setting HTML content', 'suggestion': 'Use textContent or sanitize HTML'}, + {'pattern': r'(?i)(eval\s*\()', 'type': 'code_injection', 'severity': 'critical', 'title': 'Code Injection Risk', 'description': 'eval() detected', 'suggestion': 'Avoid eval()'}, + {'pattern': r'(?i)(os\.system\s*\(|subprocess\.|shell=True)', 'type': 'command_injection', 'severity': 'critical', 'title': 'Command Injection Risk', 'description': 'Shell command execution', 'suggestion': 'Use subprocess with shell=False'}, + {'pattern': r'(?i)(password\s*=|passwd\s*=|secret\s*=|token\s*=)', 'type': 'hardcoded_secret', 'severity': 'high', 'title': 'Hardcoded Secret', 'description': 'Potential hardcoded credential', 'suggestion': 'Use environment variables'}, + {'pattern': r'(?i)(http://)', 'type': 'insecure_transport', 'severity': 'medium', 'title': 'Insecure HTTP', 'description': 'Using HTTP instead of HTTPS', 'suggestion': 'Use HTTPS'}, + {'pattern': r'(?i)(random\.randint\s*\()', 'type': 'weak_crypto', 'severity': 'medium', 'title': 'Weak Random', 'description': 'Using random module', 'suggestion': 'Use secrets module'}, + ] + + BUG_PATTERNS = [ + {'pattern': r'(?i)(if\s*\([^)]*==[^)]*\)\s*:)', 'type': 'assignment_in_condition', 'severity': 'high', 'title': 'Assignment in Condition', 'description': 'Possible typo = instead of ==', 'suggestion': 'Use == for comparison'}, + {'pattern': r'(?i)(\bNone\b.*==)', 'type': 'none_comparison', 'severity': 'medium', 'title': 'Direct None Comparison', 'description': 'Using == None', 'suggestion': 'Use is None'}, + {'pattern': r'\bexcept\s*:\s*$', 'type': 'bare_except', 'severity': 'medium', 'title': 'Bare Except Clause', 'description': 'Catching all exceptions', 'suggestion': 'Catch specific exceptions'}, + ] + + CODE_SMELL_PATTERNS = [ + {'pattern': r'(?i)(\bTODO\b|\bFIXME\b)', 'type': 'code_tag', 'severity': 'low', 'title': 'Code Tag', 'description': 'TODO/FIXME comment', 'suggestion': 'Address or create ticket'}, + {'pattern': r'(?i)(\bprint\s*\()', 'type': 'debug_statement', 'severity': 'low', 'title': 'Debug Statement', 'description': 'print() detected', 'suggestion': 'Remove debug statements'}, + {'pattern': r'.{80,}', 'type': 'long_line', 'severity': 'low', 'title': 'Long Line', 'description': 'Line exceeds 80 characters', 'suggestion': 'Split long lines'}, + {'pattern': r'\bpass\b', 'type': 'empty_block', 'severity': 'low', 'title': 'Empty Code Block', 'description': 'Empty pass statement', 'suggestion': 'Add explanatory comment'}, + ] + + def __init__(self): + self._compiled_patterns = [] + self._compile_patterns() + + def _compile_patterns(self): + self._compiled_patterns = [] + for pattern_info in self.SECURITY_PATTERNS + self.BUG_PATTERNS + self.CODE_SMELL_PATTERNS: + try: + compiled = re.compile(pattern_info['pattern']) + self._compiled_patterns.append((compiled, pattern_info)) + except re.error: + pass + + def detect_issues(self, code: str, language: str = "text") -> list[Issue]: + issues = [] + for line_num, line in enumerate(code.splitlines(), 1): + for compiled, pattern_info in self._compiled_patterns: + if compiled.search(line): + issues.append(Issue( + type=pattern_info['type'], severity=pattern_info['severity'], + title=pattern_info['title'], description=pattern_info['description'], + line=line_num, suggestion=pattern_info['suggestion'], pattern=pattern_info['pattern'], + )) + return issues + + def detect_diff_issues(self, old_code: str, new_code: str, language: str = "text") -> list[Issue]: + issues = [] + for i, line in enumerate(new_code.splitlines(), 1): + if line.startswith('+') and not line.startswith('+++'): + clean_line = line[1:] + for compiled, pattern_info in self._compiled_patterns: + if compiled.search(clean_line): + issues.append(Issue( + type=pattern_info['type'], severity=pattern_info['severity'], + title=pattern_info['title'], description=pattern_info['description'], + line=i, suggestion=pattern_info['suggestion'], pattern=pattern_info['pattern'], + )) + return issues + + def suggest_improvements(self, code: str, language: str = "text") -> list[str]: + suggestions = [] + issues = self.detect_issues(code, language) + severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3} + seen_types = set() + for issue in sorted(issues, key=lambda x: (severity_order.get(x.severity, 4), x.title)): + if issue.type not in seen_types and issue.suggestion: + suggestions.append(f"{issue.title}: {issue.suggestion}") + seen_types.add(issue.type) + return suggestions + + +def detect_issues(code: str, language: str = "text") -> list[Issue]: + detector = IssueDetector() + return detector.detect_issues(code, language) + + +def suggest_improvements(code: str, language: str = "text") -> list[str]: + detector = IssueDetector() + return detector.suggest_improvements(code, language)