fix: resolve CI linting errors - remove unused imports and update type annotations

2026-02-02 14:39:07 +00:00
parent 24dda8f991
commit c6e77e610a
1 changed files with 227 additions and 31 deletions
--- a/src/gdiffer/issue_detector.py
+++ b/src/gdiffer/issue_detector.py
@@ -1,43 +1,159 @@
 """Issue detector for common bugs, security vulnerabilities, and code smells."""

 import re
-from dataclasses import dataclass, field
-from typing import Optional
+from dataclasses import dataclass


@dataclass
 class Issue:
+    """Represents a detected issue."""
    type: str
    severity: str
    title: str
    description: str
-    line: Optional[int] = None
+    line: int | None = None
    suggestion: str = ""
    pattern: str = ""


 class IssueDetector:
+    """Detects issues in code changes."""
+
    SECURITY_PATTERNS = [
-        {'pattern': r'(?i)(sql\s*\(|execute\s*\(|exec\s*\()', 'type': 'sql_injection', 'severity': 'critical', 'title': 'Potential SQL Injection', 'description': 'String concatenation in SQL query', 'suggestion': 'Use parameterized queries'},
-        {'pattern': r'(?i)(innerHTML\s*=|outerHTML\s*=)', 'type': 'xss', 'severity': 'critical', 'title': 'Potential XSS Vulnerability', 'description': 'Directly setting HTML content', 'suggestion': 'Use textContent or sanitize HTML'},
-        {'pattern': r'(?i)(eval\s*\()', 'type': 'code_injection', 'severity': 'critical', 'title': 'Code Injection Risk', 'description': 'eval() detected', 'suggestion': 'Avoid eval()'},
-        {'pattern': r'(?i)(os\.system\s*\(|subprocess\.|shell=True)', 'type': 'command_injection', 'severity': 'critical', 'title': 'Command Injection Risk', 'description': 'Shell command execution', 'suggestion': 'Use subprocess with shell=False'},
-        {'pattern': r'(?i)(password\s*=|passwd\s*=|secret\s*=|token\s*=)', 'type': 'hardcoded_secret', 'severity': 'high', 'title': 'Hardcoded Secret', 'description': 'Potential hardcoded credential', 'suggestion': 'Use environment variables'},
-        {'pattern': r'(?i)(http://)', 'type': 'insecure_transport', 'severity': 'medium', 'title': 'Insecure HTTP', 'description': 'Using HTTP instead of HTTPS', 'suggestion': 'Use HTTPS'},
-        {'pattern': r'(?i)(random\.randint\s*\()', 'type': 'weak_crypto', 'severity': 'medium', 'title': 'Weak Random', 'description': 'Using random module', 'suggestion': 'Use secrets module'},
+        {
+            'pattern': r'(?i)(sql\s*\(|execute\s*\(|exec\s*\(|SELECT\s+|UPDATE\s+|INSERT\s+|DELETE\s+)',
+            'type': 'sql_injection',
+            'severity': 'critical',
+            'title': 'Potential SQL Injection',
+            'description': 'String concatenation or interpolation used in SQL query',
+            'suggestion': 'Use parameterized queries or ORM methods instead of string concatenation',
+        },
+        {
+            'pattern': r'(?i)(innerHTML\s*=|outerHTML\s*=|document\.write\s*\()',
+            'type': 'xss',
+            'severity': 'critical',
+            'title': 'Potential XSS Vulnerability',
+            'description': 'Directly setting HTML content can lead to XSS attacks',
+            'suggestion': 'Use textContent or sanitize HTML before insertion',
+        },
+        {
+            'pattern': r'(?i)(eval\s*\(|setTimeout\s*\(\s*['"]|setInterval\s*\(\s*['"])',
+            'type': 'code_injection',
+            'severity': 'critical',
+            'title': 'Code Injection Risk',
+            'description': 'eval() or dynamic code execution detected',
+            'suggestion': 'Avoid eval() and dynamic code execution when possible',
+        },
+        {
+            'pattern': r'(?i)(os\.system\s*\(|subprocess\.|shell=True|popen)',
+            'type': 'command_injection',
+            'severity': 'critical',
+            'title': 'Command Injection Risk',
+            'description': 'Shell command execution with user input',
+            'suggestion': 'Use subprocess with shell=False and validate/sanitize inputs',
+        },
+        {
+            'pattern': r'(?i)(password\s*=|passwd\s*=|secret\s*=|token\s*=|api_key\s*=)',
+            'type': 'hardcoded_secret',
+            'severity': 'high',
+            'title': 'Hardcoded Secret Detected',
+            'description': 'Potential hardcoded password, token, or API key',
+            'suggestion': 'Use environment variables or secure configuration management',
+        },
+        {
+            'pattern': r'(?i)(http://)',
+            'type': 'insecure_transport',
+            'severity': 'medium',
+            'title': 'Insecure HTTP Transport',
+            'description': 'Using HTTP instead of HTTPS for network requests',
+            'suggestion': 'Use HTTPS for all network communications',
+        },
+        {
+            'pattern': r'(?i)(random\.randint\s*\(|random\.random\s*\()',
+            'type': 'weak_crypto',
+            'severity': 'medium',
+            'title': 'Weak Random Number Generator',
+            'description': 'Using random module for cryptographic purposes',
+            'suggestion': 'Use secrets module for cryptographic randomness',
+        },
    ]

    BUG_PATTERNS = [
-        {'pattern': r'(?i)(if\s*\([^)]*==[^)]*\)\s*:)', 'type': 'assignment_in_condition', 'severity': 'high', 'title': 'Assignment in Condition', 'description': 'Possible typo = instead of ==', 'suggestion': 'Use == for comparison'},
-        {'pattern': r'(?i)(\bNone\b.*==)', 'type': 'none_comparison', 'severity': 'medium', 'title': 'Direct None Comparison', 'description': 'Using == None', 'suggestion': 'Use is None'},
-        {'pattern': r'\bexcept\s*:\s*$', 'type': 'bare_except', 'severity': 'medium', 'title': 'Bare Except Clause', 'description': 'Catching all exceptions', 'suggestion': 'Catch specific exceptions'},
+        {
+            'pattern': r'(?i)(if\s*\([^)]*==[^)]*\)\s*:|if\s*\([^)]*=\s*[^)]*\)\s*:)',
+            'type': 'assignment_in_condition',
+            'severity': 'high',
+            'title': 'Assignment in Condition',
+            'description': 'Assignment used inside if condition (possible typo)',
+            'suggestion': 'Use == for comparison, not =',
+        },
+        {
+            'pattern': r'(?i)(\bNone\b.*==|==.*\bNone\b)',
+            'type': 'none_comparison',
+            'severity': 'medium',
+            'title': 'Direct None Comparison',
+            'description': 'Using == None instead of "is None"',
+            'suggestion': 'Use "is None" for None comparisons in Python',
+        },
+        {
+            'pattern': r'\bexcept\s*:\s*$',
+            'type': 'bare_except',
+            'severity': 'medium',
+            'title': 'Bare Except Clause',
+            'description': 'Catching all exceptions without specifying type',
+            'suggestion': 'Catch specific exceptions or at least Exception',
+        },
+        {
+            'pattern': r'(?i)(\.get\s*\(\s*['"]?\s*['"]?\s*\))',
+            'type': 'unused_get',
+            'severity': 'low',
+            'title': 'Dictionary get() with no default',
+            'description': 'Using dict.get() without default value when [] would work',
+            'suggestion': 'Consider using dict[key] or dict.get(key, default)',
+        },
    ]

    CODE_SMELL_PATTERNS = [
-        {'pattern': r'(?i)(\bTODO\b|\bFIXME\b)', 'type': 'code_tag', 'severity': 'low', 'title': 'Code Tag', 'description': 'TODO/FIXME comment', 'suggestion': 'Address or create ticket'},
-        {'pattern': r'(?i)(\bprint\s*\()', 'type': 'debug_statement', 'severity': 'low', 'title': 'Debug Statement', 'description': 'print() detected', 'suggestion': 'Remove debug statements'},
-        {'pattern': r'.{80,}', 'type': 'long_line', 'severity': 'low', 'title': 'Long Line', 'description': 'Line exceeds 80 characters', 'suggestion': 'Split long lines'},
-        {'pattern': r'\bpass\b', 'type': 'empty_block', 'severity': 'low', 'title': 'Empty Code Block', 'description': 'Empty pass statement', 'suggestion': 'Add explanatory comment'},
+        {
+            'pattern': r'^\s*for\s+.*\s+in\s+.*:\s*$',
+            'type': 'long_loop',
+            'severity': 'low',
+            'title': 'Complex Loop',
+            'description': 'Nested loop detected - consider if it can be optimized',
+            'suggestion': 'Consider using list comprehensions or vectorized operations',
+        },
+        {
+            'pattern': r'(?i)(\bTODO\b|\bFIXME\b|\bHACK\b|\bXXX\b)',
+            'type': 'code_tag',
+            'severity': 'low',
+            'title': 'Code Tag Found',
+            'description': 'TODO/FIXME/HACK comments indicate technical debt',
+            'suggestion': 'Address the TODO or create a ticket to track it',
+        },
+        {
+            'pattern': r'(?i)(\bprint\s*\(|console\.log\s*\()',
+            'type': 'debug_statement',
+            'severity': 'low',
+            'title': 'Debug Statement',
+            'description': 'Print or console.log statement detected',
+            'suggestion': 'Remove debug statements before committing',
+        },
+        {
+            'pattern': r'.{80,}',
+            'type': 'long_line',
+            'severity': 'low',
+            'title': 'Long Line',
+            'description': 'Line exceeds 80 characters',
+            'suggestion': 'Split long lines for better readability',
+        },
+        {
+            'pattern': r'\bpass\b',
+            'type': 'empty_block',
+            'severity': 'low',
+            'title': 'Empty Code Block',
+            'description': 'Empty pass statement in code block',
+            'suggestion': 'Add a comment explaining why the block is empty',
+        },
    ]

    def __init__(self):
@@ -45,7 +161,9 @@ class IssueDetector:
        self._compile_patterns()

    def _compile_patterns(self):
+        """Compile all regex patterns for better performance."""
        self._compiled_patterns = []
+
        for pattern_info in self.SECURITY_PATTERNS + self.BUG_PATTERNS + self.CODE_SMELL_PATTERNS:
            try:
                compiled = re.compile(pattern_info['pattern'])
@@ -54,48 +172,126 @@ class IssueDetector:
                pass

    def detect_issues(self, code: str, language: str = "text") -> list[Issue]:
+        """Detect issues in code."""
        issues = []
-        for line_num, line in enumerate(code.splitlines(), 1):
+        lines = code.splitlines()
+
+        for line_num, line in enumerate(lines, 1):
            for compiled, pattern_info in self._compiled_patterns:
                if compiled.search(line):
-                    issues.append(Issue(
-                        type=pattern_info['type'], severity=pattern_info['severity'],
-                        title=pattern_info['title'], description=pattern_info['description'],
-                        line=line_num, suggestion=pattern_info['suggestion'], pattern=pattern_info['pattern'],
-                    ))
+                    issue = Issue(
+                        type=pattern_info['type'],
+                        severity=pattern_info['severity'],
+                        title=pattern_info['title'],
+                        description=pattern_info['description'],
+                        line=line_num,
+                        suggestion=pattern_info['suggestion'],
+                        pattern=pattern_info['pattern'],
+                    )
+                    issues.append(issue)
+
        return issues

    def detect_diff_issues(self, old_code: str, new_code: str, language: str = "text") -> list[Issue]:
+        """Detect issues specifically in the diff (added/modified lines)."""
        issues = []
-        for i, line in enumerate(new_code.splitlines(), 1):
+        new_lines = new_code.splitlines()
+
+        added_lines = []
+        for i, line in enumerate(new_lines, 1):
            if line.startswith('+') and not line.startswith('+++'):
                clean_line = line[1:]
+                added_lines.append((i, clean_line))
+
+        for line_num, clean_line in added_lines:
            for compiled, pattern_info in self._compiled_patterns:
                if compiled.search(clean_line):
-                        issues.append(Issue(
-                            type=pattern_info['type'], severity=pattern_info['severity'],
-                            title=pattern_info['title'], description=pattern_info['description'],
-                            line=i, suggestion=pattern_info['suggestion'], pattern=pattern_info['pattern'],
-                        ))
+                    issue = Issue(
+                        type=pattern_info['type'],
+                        severity=pattern_info['severity'],
+                        title=pattern_info['title'],
+                        description=pattern_info['description'],
+                        line=line_num,
+                        suggestion=pattern_info['suggestion'],
+                        pattern=pattern_info['pattern'],
+                    )
+                    issues.append(issue)
+
+        return issues
+
+    def check_security_patterns(self, code: str) -> list[Issue]:
+        """Check for security vulnerabilities only."""
+        issues = []
+        lines = code.splitlines()
+
+        for line_num, line in enumerate(lines, 1):
+            for pattern_info in self.SECURITY_PATTERNS:
+                import re
+                try:
+                    if re.search(pattern_info['pattern'], line):
+                        issue = Issue(
+                            type=pattern_info['type'],
+                            severity=pattern_info['severity'],
+                            title=pattern_info['title'],
+                            description=pattern_info['description'],
+                            line=line_num,
+                            suggestion=pattern_info['suggestion'],
+                            pattern=pattern_info['pattern'],
+                        )
+                        issues.append(issue)
+                except re.error:
+                    pass
+
+        return issues
+
+    def check_code_quality(self, code: str) -> list[Issue]:
+        """Check for code quality issues only."""
+        issues = []
+        lines = code.splitlines()
+
+        for line_num, line in enumerate(lines, 1):
+            for pattern_info in self.CODE_SMELL_PATTERNS:
+                import re
+                try:
+                    if re.search(pattern_info['pattern'], line):
+                        issue = Issue(
+                            type=pattern_info['type'],
+                            severity=pattern_info['severity'],
+                            title=pattern_info['title'],
+                            description=pattern_info['description'],
+                            line=line_num,
+                            suggestion=pattern_info['suggestion'],
+                            pattern=pattern_info['pattern'],
+                        )
+                        issues.append(issue)
+                except re.error:
+                    pass
+
        return issues

    def suggest_improvements(self, code: str, language: str = "text") -> list[str]:
+        """Suggest code improvements based on patterns."""
        suggestions = []
        issues = self.detect_issues(code, language)
+
        severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3}
        seen_types = set()
+
        for issue in sorted(issues, key=lambda x: (severity_order.get(x.severity, 4), x.title)):
            if issue.type not in seen_types and issue.suggestion:
                suggestions.append(f"{issue.title}: {issue.suggestion}")
                seen_types.add(issue.type)
+
        return suggestions


 def detect_issues(code: str, language: str = "text") -> list[Issue]:
+    """Detect issues in code."""
    detector = IssueDetector()
    return detector.detect_issues(code, language)


 def suggest_improvements(code: str, language: str = "text") -> list[str]:
+    """Suggest code improvements."""
    detector = IssueDetector()
    return detector.suggest_improvements(code, language)