Files
ai-code-audit-cli/src/core/scanner.py
7000pctAUTO f4bf1d6536
Some checks failed
CI / test (3.11) (push) Has been cancelled
CI / test (3.12) (push) Has been cancelled
CI / test (3.9) (push) Has been cancelled
CI / build (push) Has been cancelled
CI / release (push) Has been cancelled
CI / test (3.10) (push) Has been cancelled
Initial upload of ai-code-audit-cli project
2026-02-03 10:30:02 +00:00

145 lines
5.4 KiB
Python

"""Main scanner orchestrator for AI Code Audit CLI."""
import logging
from datetime import datetime
from pathlib import Path
from typing import Optional
from .config import AuditConfig
from .models import ScanResult, Issue, IssueCategory, SeverityLevel
from ..scanners import BanditScanner, RuffScanner, TreeSitterScanner
from ..utils import FileUtils, LanguageDetector
logger = logging.getLogger(__name__)
class Scanner:
"""Main scanner class that orchestrates all scanning components."""
def __init__(self, config: AuditConfig):
"""Initialize the scanner with configuration."""
self.config = config
self.file_utils = FileUtils()
self.language_detector = LanguageDetector()
self.result = ScanResult(files_scanned=0, target_path=config.target_path)
self._setup_scanners()
def _setup_scanners(self) -> None:
"""Initialize scanner components."""
self.bandit_scanner = BanditScanner()
self.ruff_scanner = RuffScanner()
self.tree_sitter_scanner = TreeSitterScanner()
def scan(self) -> ScanResult:
"""Execute the full scan operation."""
self.config.validate()
target_path = Path(self.config.target_path)
self.result.scan_time = datetime.now()
if target_path.is_file():
self._scan_file(target_path)
else:
self._scan_directory(target_path)
return self.result
def _scan_directory(self, directory: Path) -> None:
"""Scan all files in a directory."""
if self.config.verbose:
logger.info(f"Scanning directory: {directory}")
for file_path in self.file_utils.find_files(
directory,
max_size=self.config.max_file_size,
excluded_patterns=self.config.excluded_patterns,
):
if self.config.language_filter:
lang = self.language_detector.detect(file_path)
if lang.value != self.config.language_filter:
continue
self._scan_file(file_path)
def _scan_file(self, file_path: Path) -> None:
"""Scan a single file."""
if not self.config.should_scan_file(file_path):
if self.config.verbose:
logger.info(f"Skipping file (excluded or too large): {file_path}")
return
try:
file_content = file_path.read_text(encoding="utf-8", errors="replace")
file_str = str(file_path)
self.result.files_scanned += 1
language = self.language_detector.detect(file_path)
file_extension = file_path.suffix.lower()
if self.config.verbose:
logger.info(f"Scanning: {file_path} (language: {language.value})")
if language.value == "python":
self._scan_python_file(file_str, file_content)
elif language.value in ("javascript", "typescript"):
self._scan_js_ts_file(file_str, file_content, language.value)
except PermissionError:
self.result.add_warning(f"Permission denied: {file_path}")
except UnicodeDecodeError:
self.result.add_warning(f"Could not decode file (encoding issue): {file_path}")
except Exception as e:
self.result.add_warning(f"Error scanning {file_path}: {str(e)}")
if self.config.verbose:
logger.exception(f"Error scanning file: {file_path}")
def _scan_python_file(self, file_path: str, content: str) -> None:
"""Scan a Python file for issues."""
bandit_issues = self.bandit_scanner.scan_content(content, file_path)
ruff_issues = self.ruff_scanner.scan_content(content, file_path, "python")
tree_sitter_issues = self.tree_sitter_scanner.scan_content(
content, file_path, "python"
)
for issue in bandit_issues + ruff_issues + tree_sitter_issues:
if self._should_include_issue(issue):
self.result.add_issue(issue)
def _scan_js_ts_file(self, file_path: str, content: str, language: str) -> None:
"""Scan a JavaScript or TypeScript file for issues."""
ruff_issues = self.ruff_scanner.scan_content(content, file_path, language)
tree_sitter_issues = self.tree_sitter_scanner.scan_content(
content, file_path, language
)
for issue in ruff_issues + tree_sitter_issues:
if self._should_include_issue(issue):
self.result.add_issue(issue)
def _should_include_issue(self, issue: Issue) -> bool:
"""Check if an issue should be included based on filters."""
if self.config.severity_filter:
severity_order = {
SeverityLevel.LOW: 0,
SeverityLevel.MEDIUM: 1,
SeverityLevel.HIGH: 2,
SeverityLevel.CRITICAL: 3,
}
if severity_order.get(issue.severity, 0) < severity_order.get(
self._get_severity_from_string(self.config.severity_filter), -1
):
return False
return True
def _get_severity_from_string(self, severity_str: str) -> Optional[SeverityLevel]:
"""Convert severity string to enum."""
mapping = {
"low": SeverityLevel.LOW,
"medium": SeverityLevel.MEDIUM,
"high": SeverityLevel.HIGH,
"critical": SeverityLevel.CRITICAL,
}
return mapping.get(severity_str.lower())