From e8ce0ccc1da304117b65068ffe398ac808385804 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Sun, 1 Feb 2026 23:46:51 +0000 Subject: [PATCH] Add utils and tests --- codechunk/utils/file_utils.py | 60 +++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 codechunk/utils/file_utils.py diff --git a/codechunk/utils/file_utils.py b/codechunk/utils/file_utils.py new file mode 100644 index 0000000..74bf579 --- /dev/null +++ b/codechunk/utils/file_utils.py @@ -0,0 +1,60 @@ +from pathlib import Path +from typing import Optional +import hashlib + + +def read_file_safe(file_path: Path, encoding: str = "utf-8") -> Optional[str]: + """Safely read a file, handling encoding errors.""" + try: + return file_path.read_text(encoding=encoding, errors="replace") + except Exception: + return None + + +def write_file_safe(content: str, file_path: Path, encoding: str = "utf-8") -> bool: + """Safely write to a file.""" + try: + ensure_directory(file_path.parent) + file_path.write_text(content, encoding=encoding) + return True + except Exception: + return False + + +def ensure_directory(path: Path) -> None: + """Ensure directory exists.""" + path.mkdir(parents=True, exist_ok=True) + + +def get_file_hash(file_path: Path, algorithm: str = "md5") -> Optional[str]: + """Get hash of file contents.""" + try: + content = file_path.read_bytes() + if algorithm == "md5": + return hashlib.md5(content).hexdigest() + elif algorithm == "sha256": + return hashlib.sha256(content).hexdigest() + elif algorithm == "sha1": + return hashlib.sha1(content).hexdigest() + else: + return hashlib.md5(content).hexdigest() + except Exception: + return None + + +def find_files_pattern(directory: Path, patterns: list) -> list: + """Find files matching patterns using fnmatch.""" + from fnmatch import fnmatch + matches = [] + + for root, dirs, files in directory.walk(): + for file in files: + file_path = Path(root) / file + rel_path = str(file_path.relative_to(directory)) + + for pattern in patterns: + if fnmatch(file, pattern) or fnmatch(rel_path, pattern): + matches.append(file_path) + break + + return matches