fix: resolve CI issues - push complete implementation with tests
Some checks failed
CI / test (3.10) (push) Has been cancelled
CI / test (3.11) (push) Has been cancelled
CI / test (3.12) (push) Has been cancelled
CI / lint (push) Has been cancelled
CI / build (push) Has been cancelled

This commit is contained in:
2026-02-02 15:30:41 +00:00
parent 5fe6dd83c9
commit e1f36c29b8

View File

@@ -1,21 +1,16 @@
"""Diff parser for unified git diff format."""
import re import re
from gdiffer.models import DiffFile, DiffHunk from gdiffer.models import DiffFile, DiffHunk
class DiffParser: class DiffParser:
"""Parser for unified diff format (as produced by git diff).""" HUNK_PATTERN = re.compile(r"^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@")
HUNK_PATTERN = re.compile(r'^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@')
def __init__(self): def __init__(self):
self.files: list[DiffFile] = [] self.files = []
self.errors: list[str] = [] self.errors = []
def parse(self, diff_content: str) -> list[DiffFile]: def parse(self, diff_content):
"""Parse diff content and return list of DiffFile objects."""
self.files = [] self.files = []
self.errors = [] self.errors = []
@@ -26,14 +21,14 @@ class DiffParser:
self._parse_lines(lines) self._parse_lines(lines)
return self.files return self.files
def _parse_lines(self, lines: list[str]) -> None: def _parse_lines(self, lines):
i = 0 i = 0
n = len(lines) n = len(lines)
while i < n: while i < n:
line = lines[i].rstrip('\n') line = lines[i].rstrip("\n")
if line.startswith('diff --git'): if line.startswith("diff --git"):
file_obj = self._parse_file(lines, i) file_obj = self._parse_file(lines, i)
if file_obj: if file_obj:
self.files.append(file_obj) self.files.append(file_obj)
@@ -42,24 +37,24 @@ class DiffParser:
i += 1 i += 1
def _parse_file(self, lines: list[str], start: int) -> DiffFile | None: def _parse_file(self, lines, start):
if start >= len(lines): if start >= len(lines):
return None return None
first_line = lines[start] first_line = lines[start]
if not first_line.startswith('diff --git'): if not first_line.startswith("diff --git"):
return None return None
parts = first_line.split(' ', 3) parts = first_line.split(" ", 3)
if len(parts) < 4: if len(parts) < 4:
return None return None
old_path = parts[2][2:] if len(parts) > 2 else '' old_path = parts[2][2:] if len(parts) > 2 else ""
new_path = parts[3][2:] if len(parts) > 3 else old_path new_path = parts[3][2:] if len(parts) > 3 else old_path
if old_path.startswith('a/'): if old_path.startswith("a/"):
old_path = old_path[2:] old_path = old_path[2:]
if new_path.startswith('b/'): if new_path.startswith("b/"):
new_path = new_path[2:] new_path = new_path[2:]
file_obj = DiffFile(old_path=old_path, new_path=new_path) file_obj = DiffFile(old_path=old_path, new_path=new_path)
@@ -68,63 +63,63 @@ class DiffParser:
n = len(lines) n = len(lines)
while i < n: while i < n:
line = lines[i].rstrip('\n') line = lines[i].rstrip("\n")
if line.startswith('new file mode '): if line.startswith("new file mode "):
file_obj.new_file_mode = line.split()[-1] file_obj.new_file_mode = line.split()[-1]
file_obj.change_type = "add" file_obj.change_type = "add"
i += 1 i += 1
continue continue
if line.startswith('deleted file mode '): if line.startswith("deleted file mode "):
file_obj.deleted_file_mode = line.split()[-1] file_obj.deleted_file_mode = line.split()[-1]
file_obj.change_type = "delete" file_obj.change_type = "delete"
i += 1 i += 1
continue continue
if line.startswith('similarity index '): if line.startswith("similarity index "):
file_obj.similarity_index = line.split()[-1].rstrip('%') file_obj.similarity_index = line.split()[-1].rstrip("%")
i += 1 i += 1
continue continue
if line.startswith('rename from '): if line.startswith("rename from "):
file_obj.rename_from = line[12:] file_obj.rename_from = line[12:]
i += 1 i += 1
continue continue
if line.startswith('rename to '): if line.startswith("rename to "):
file_obj.rename_to = line[10:] file_obj.rename_to = line[10:]
file_obj.change_type = "rename" file_obj.change_type = "rename"
i += 1 i += 1
continue continue
if line.startswith('---'): if line.startswith("---"):
i += 1 i += 1
continue continue
if line.startswith('+++'): if line.startswith("+++"):
i += 1 i += 1
continue continue
if line.startswith('@@'): if line.startswith("@@"):
hunk, consumed = self._parse_hunk(lines, i) hunk, consumed = self._parse_hunk(lines, i)
if hunk: if hunk:
file_obj.hunks.append(hunk) file_obj.hunks.append(hunk)
i += consumed i += consumed
continue continue
if line.startswith('diff --git'): if line.startswith("diff --git"):
break break
i += 1 i += 1
return file_obj return file_obj
def _parse_hunk(self, lines: list[str], start: int) -> tuple[DiffHunk | None, int]: def _parse_hunk(self, lines, start):
if start >= len(lines): if start >= len(lines):
return None, 0 return None, 0
line = lines[start].rstrip('\n') line = lines[start].rstrip("\n")
match = self.HUNK_PATTERN.match(line) match = self.HUNK_PATTERN.match(line)
if not match: if not match:
@@ -151,30 +146,30 @@ class DiffParser:
new_content = [] new_content = []
while i < n: while i < n:
line = lines[i].rstrip('\n') line = lines[i].rstrip("\n")
if line.startswith('@@'): if line.startswith("@@"):
break break
if line.startswith('diff --git'): if line.startswith("diff --git"):
break break
if line.startswith('---'): if line.startswith("---"):
break break
if line.startswith('+++'): if line.startswith("+++"):
break break
if old_lines_collected >= old_lines and new_lines_collected >= new_lines: if old_lines_collected >= old_lines and new_lines_collected >= new_lines:
break break
if line.startswith('+') and not line.startswith('+++'): if line.startswith("+") and not line.startswith("+++"):
new_content.append(line) new_content.append(line)
new_lines_collected += 1 new_lines_collected += 1
elif line.startswith('-') and not line.startswith('---'): elif line.startswith("-") and not line.startswith("---"):
old_content.append(line) old_content.append(line)
old_lines_collected += 1 old_lines_collected += 1
elif line.startswith(' ') or line == '': elif line.startswith(" ") or line == "":
old_content.append(line) old_content.append(line)
new_content.append(line) new_content.append(line)
old_lines_collected += 1 old_lines_collected += 1
@@ -190,14 +185,12 @@ class DiffParser:
return hunk, i - start return hunk, i - start
def parse_diff(diff_content: str) -> list[DiffFile]: def parse_diff(diff_content):
"""Parse diff content and return list of DiffFile objects."""
parser = DiffParser() parser = DiffParser()
return parser.parse(diff_content) return parser.parse(diff_content)
def parse_diff_from_file(filepath: str) -> list[DiffFile]: def parse_diff_from_file(filepath):
"""Read a diff file and parse its contents."""
with open(filepath) as f: with open(filepath) as f:
content = f.read() content = f.read()
return parse_diff(content) return parse_diff(content)