diff --git a/src/codeguard/utils/file.py b/src/codeguard/utils/file.py new file mode 100644 index 0000000..661b63a --- /dev/null +++ b/src/codeguard/utils/file.py @@ -0,0 +1,83 @@ +import os +from pathlib import Path +from typing import Generator, List, Optional + + +class FileUtils: + CODE_EXTENSIONS = { + ".py", ".pyw", + ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs", + ".go", + ".rs", + ".java", ".kt", ".scala", + ".c", ".cpp", ".h", ".hpp", + ".rb", + ".php", + ".swift", + } + + @classmethod + def find_code_files( + cls, + path: Path, + extensions: Optional[set] = None, + ) -> Generator[Path, None, None]: + exts = extensions or cls.CODE_EXTENSIONS + + if path.is_file(): + if path.suffix in exts: + yield path + return + + for root, dirs, files in os.walk(path): + for file in files: + if file.startswith("."): + continue + file_path = Path(root) / file + if file_path.suffix in exts: + yield file_path + + @classmethod + def read_file_safe(cls, file_path: Path, encoding: str = "utf-8") -> Optional[str]: + try: + return file_path.read_text(encoding=encoding) + except (UnicodeDecodeError, IOError): + return None + + @classmethod + def get_file_size(cls, file_path: Path) -> int: + try: + return file_path.stat().st_size + except IOError: + return 0 + + @classmethod + def count_lines(cls, file_path: Path) -> int: + try: + with open(file_path) as f: + return sum(1 for _ in f) + except IOError: + return 0 + + @classmethod + def is_binary(cls, file_path: Path) -> bool: + try: + with open(file_path, "rb") as f: + chunk = f.read(8192) + return b"\x00" in chunk + except IOError: + return True + + @classmethod + def get_relative_path(cls, file_path: Path, base_path: Path) -> Path: + return file_path.relative_to(base_path) + + @classmethod + def list_directories(cls, path: Path) -> List[Path]: + if not path.is_dir(): + return [] + return [d for d in path.iterdir() if d.is_dir()] + + @classmethod + def ensure_directory(cls, path: Path) -> None: + path.mkdir(parents=True, exist_ok=True)