Files
auto-readme-cli/src/auto_readme/utils/file_scanner.py
7000pctAUTO 260032642e
Some checks failed
CI / test (push) Has been cancelled
CI / build (push) Has been cancelled
CI / release (push) Has been cancelled
Add utils, templates, config, interactive, and github modules
2026-02-05 08:45:40 +00:00

133 lines
4.5 KiB
Python

"""File scanning utilities for the Auto README Generator."""
from pathlib import Path
from typing import Iterator, Optional
from dataclasses import dataclass
from ..models import ProjectType, FileType, SourceFile
from .path_utils import PathUtils
@dataclass
class ScanResult:
"""Result of a file scan operation."""
files: list[SourceFile]
project_type: ProjectType
root_path: Path
error_messages: list[str] = None
class FileScanner:
"""Scanner for discovering and categorizing project files."""
PROJECT_MARKERS = {
ProjectType.PYTHON: ["pyproject.toml", "setup.py", "setup.cfg", "requirements.txt"],
ProjectType.JAVASCRIPT: ["package.json"],
ProjectType.TYPESCRIPT: ["package.json", "tsconfig.json"],
ProjectType.GO: ["go.mod"],
ProjectType.RUST: ["Cargo.toml"],
}
def __init__(self, root_path: Path, exclude_hidden: bool = True):
self.root_path = PathUtils.normalize_path(root_path)
self.exclude_hidden = exclude_hidden
self.errors: list[str] = []
def scan(self) -> list[Path]:
"""Scan the directory and return all relevant file paths."""
files = []
for path in self.root_path.rglob("*"):
if path.is_file():
if self.exclude_hidden and PathUtils.is_hidden(path):
continue
if PathUtils.is_ignored_file(path):
continue
files.append(path)
return sorted(files)
def categorize_file(self, file_path: Path) -> FileType:
"""Determine the type of a file."""
if PathUtils.is_test_file(file_path):
return FileType.TEST
elif PathUtils.is_config_file(file_path):
return FileType.CONFIG
elif PathUtils.is_documentation_file(file_path):
return FileType.DOCUMENTATION
elif PathUtils.is_source_file(file_path):
return FileType.SOURCE
return FileType.UNKNOWN
def detect_language(self, file_path: Path) -> Optional[ProjectType]:
"""Detect the programming language of a file based on extension."""
ext = file_path.suffix.lower()
language_map = {
".py": ProjectType.PYTHON,
".js": ProjectType.JAVASCRIPT,
".ts": ProjectType.TYPESCRIPT,
".jsx": ProjectType.JAVASCRIPT,
".tsx": ProjectType.TYPESCRIPT,
".go": ProjectType.GO,
".rs": ProjectType.RUST,
".java": ProjectType.UNKNOWN,
".c": ProjectType.UNKNOWN,
".cpp": ProjectType.UNKNOWN,
".rb": ProjectType.UNKNOWN,
".php": ProjectType.UNKNOWN,
}
return language_map.get(ext)
def detect_project_type(self) -> ProjectType:
"""Detect the project type based on marker files."""
for project_type, markers in self.PROJECT_MARKERS.items():
for marker in markers:
if (self.root_path / marker).exists():
return project_type
return ProjectType.UNKNOWN
def create_source_file(self, file_path: Path) -> Optional[SourceFile]:
"""Create a SourceFile object from a path."""
try:
file_type = self.categorize_file(file_path)
language = self.detect_language(file_path)
relative_path = PathUtils.get_relative_path(file_path, self.root_path)
line_count = PathUtils.count_lines(file_path)
return SourceFile(
path=relative_path,
file_type=file_type,
language=language,
line_count=line_count,
)
except Exception as e:
self.errors.append(f"Error processing {file_path}: {e}")
return None
def scan_and_create(self) -> list[SourceFile]:
"""Scan files and create SourceFile objects."""
source_files = []
file_paths = self.scan()
for file_path in file_paths:
source_file = self.create_source_file(file_path)
if source_file:
source_files.append(source_file)
return source_files
def scan_project(root_path: Path) -> ScanResult:
"""Scan a project and return all discovered files and project type."""
scanner = FileScanner(root_path)
files = scanner.scan_and_create()
project_type = scanner.detect_project_type()
return ScanResult(
files=files,
project_type=project_type,
root_path=scanner.root_path,
error_messages=scanner.errors,
)