"""File scanning utilities for the Auto README Generator.""" from pathlib import Path from typing import Iterator, Optional from dataclasses import dataclass from ..models import ProjectType, FileType, SourceFile from .path_utils import PathUtils @dataclass class ScanResult: """Result of a file scan operation.""" files: list[SourceFile] project_type: ProjectType root_path: Path error_messages: list[str] = None class FileScanner: """Scanner for discovering and categorizing project files.""" PROJECT_MARKERS = { ProjectType.PYTHON: ["pyproject.toml", "setup.py", "setup.cfg", "requirements.txt"], ProjectType.JAVASCRIPT: ["package.json"], ProjectType.TYPESCRIPT: ["package.json", "tsconfig.json"], ProjectType.GO: ["go.mod"], ProjectType.RUST: ["Cargo.toml"], } def __init__(self, root_path: Path, exclude_hidden: bool = True): self.root_path = PathUtils.normalize_path(root_path) self.exclude_hidden = exclude_hidden self.errors: list[str] = [] def scan(self) -> list[Path]: """Scan the directory and return all relevant file paths.""" files = [] for path in self.root_path.rglob("*"): if path.is_file(): if self.exclude_hidden and PathUtils.is_hidden(path): continue if PathUtils.is_ignored_file(path): continue files.append(path) return sorted(files) def categorize_file(self, file_path: Path) -> FileType: """Determine the type of a file.""" if PathUtils.is_test_file(file_path): return FileType.TEST elif PathUtils.is_config_file(file_path): return FileType.CONFIG elif PathUtils.is_documentation_file(file_path): return FileType.DOCUMENTATION elif PathUtils.is_source_file(file_path): return FileType.SOURCE return FileType.UNKNOWN def detect_language(self, file_path: Path) -> Optional[ProjectType]: """Detect the programming language of a file based on extension.""" ext = file_path.suffix.lower() language_map = { ".py": ProjectType.PYTHON, ".js": ProjectType.JAVASCRIPT, ".ts": ProjectType.TYPESCRIPT, ".jsx": ProjectType.JAVASCRIPT, ".tsx": ProjectType.TYPESCRIPT, ".go": ProjectType.GO, ".rs": ProjectType.RUST, ".java": ProjectType.UNKNOWN, ".c": ProjectType.UNKNOWN, ".cpp": ProjectType.UNKNOWN, ".rb": ProjectType.UNKNOWN, ".php": ProjectType.UNKNOWN, } return language_map.get(ext) def detect_project_type(self) -> ProjectType: """Detect the project type based on marker files.""" for project_type, markers in self.PROJECT_MARKERS.items(): for marker in markers: if (self.root_path / marker).exists(): return project_type return ProjectType.UNKNOWN def create_source_file(self, file_path: Path) -> Optional[SourceFile]: """Create a SourceFile object from a path.""" try: file_type = self.categorize_file(file_path) language = self.detect_language(file_path) relative_path = PathUtils.get_relative_path(file_path, self.root_path) line_count = PathUtils.count_lines(file_path) return SourceFile( path=relative_path, file_type=file_type, language=language, line_count=line_count, ) except Exception as e: self.errors.append(f"Error processing {file_path}: {e}") return None def scan_and_create(self) -> list[SourceFile]: """Scan files and create SourceFile objects.""" source_files = [] file_paths = self.scan() for file_path in file_paths: source_file = self.create_source_file(file_path) if source_file: source_files.append(source_file) return source_files def scan_project(root_path: Path) -> ScanResult: """Scan a project and return all discovered files and project type.""" scanner = FileScanner(root_path) files = scanner.scan_and_create() project_type = scanner.detect_project_type() return ScanResult( files=files, project_type=project_type, root_path=scanner.root_path, error_messages=scanner.errors, )