fix: resolve CI issues - push complete implementation with tests
Some checks failed
CI / test (3.10) (push) Has been cancelled
CI / test (3.11) (push) Has been cancelled
CI / test (3.12) (push) Has been cancelled
CI / lint (push) Has been cancelled
CI / build (push) Has been cancelled

This commit is contained in:
2026-02-02 15:30:33 +00:00
parent 293dbd6ad3
commit cc6022cdc7

View File

@@ -1,32 +1,27 @@
"""Code analyzer using tree-sitter for AST-based analysis."""
import re import re
from gdiffer.language_detector import LanguageDetector from gdiffer.language_detector import LanguageDetector
LANGUAGE_GRAMMARS = { LANGUAGE_GRAMMARS = {
'python': 'python', "python": "python",
'javascript': 'javascript', "javascript": "javascript",
'typescript': 'typescript', "typescript": "typescript",
'java': 'java', "java": "java",
'go': 'go', "go": "go",
'rust': 'rust', "rust": "rust",
'c': 'c', "c": "c",
'cpp': 'cpp', "cpp": "cpp",
'ruby': 'ruby', "ruby": "ruby",
'php': 'php', "php": "php",
} }
class CodeAnalyzer: class CodeAnalyzer:
"""Analyzes code using tree-sitter AST parsing."""
def __init__(self): def __init__(self):
self.language_detector = LanguageDetector() self.language_detector = LanguageDetector()
self._parsers = {} self._parsers = {}
def _get_parser(self, language: str): def _get_parser(self, language):
"""Get or create a tree-sitter parser for a language."""
if language not in self._parsers: if language not in self._parsers:
try: try:
import tree_sitter import tree_sitter
@@ -36,16 +31,15 @@ class CodeAnalyzer:
self._parsers[language] = None self._parsers[language] = None
return self._parsers[language] return self._parsers[language]
def analyze_code(self, code: str, language: str = "text") -> dict: def analyze_code(self, code, language="text"):
"""Analyze code and return structured information."""
result = { result = {
'language': language, "language": language,
'functions': [], "functions": [],
'classes': [], "classes": [],
'imports': [], "imports": [],
'variables': [], "variables": [],
'function_calls': [], "function_calls": [],
'change_summary': "", "change_summary": "",
} }
if language == "text" or not code.strip(): if language == "text" or not code.strip():
@@ -53,26 +47,25 @@ class CodeAnalyzer:
parser = self._get_parser(language) parser = self._get_parser(language)
if parser is None: if parser is None:
result['change_summary'] = self._analyze_without_parser(code) result["change_summary"] = self._analyze_without_parser(code)
return result return result
try: try:
tree = parser.parse(code.encode() if isinstance(code, str) else code) tree = parser.parse(code.encode() if isinstance(code, str) else code)
result['ast_info'] = self._extract_ast_info(tree.root_node, language) result["ast_info"] = self._extract_ast_info(tree.root_node, language)
result['change_summary'] = self._generate_summary(result['ast_info']) result["change_summary"] = self._generate_summary(result["ast_info"])
except Exception: except Exception:
result['change_summary'] = self._analyze_without_parser(code) result["change_summary"] = self._analyze_without_parser(code)
return result return result
def _extract_ast_info(self, node, language: str) -> dict: def _extract_ast_info(self, node, language):
"""Extract information from AST node."""
info = { info = {
'functions': [], "functions": [],
'classes': [], "classes": [],
'imports': [], "imports": [],
'function_calls': [], "function_calls": [],
'nested_nodes': [], "nested_nodes": [],
} }
if node is None: if node is None:
@@ -82,90 +75,86 @@ class CodeAnalyzer:
node_text = node.text.decode() if isinstance(node.text, bytes) else node.text node_text = node.text.decode() if isinstance(node.text, bytes) else node.text
function_keywords = [ function_keywords = [
'function_definition', 'function_declaration', 'method_definition', 'func' "function_definition", "function_declaration", "method_definition", "func"
] ]
class_keywords = ['class_definition', 'class_declaration', 'struct', 'impl'] class_keywords = ["class_definition", "class_declaration", "struct", "impl"]
import_keywords = ['import_statement', 'import_from_statement', 'import', 'require'] import_keywords = ["import_statement", "import_from_statement", "import", "require"]
call_keywords = ['call_expression', 'function_call', 'method_call', 'expression_statement'] call_keywords = ["call_expression", "function_call", "method_call", "expression_statement"]
if node_type in function_keywords: if node_type in function_keywords:
info['functions'].append(self._extract_function_info(node, language)) info["functions"].append(self._extract_function_info(node, language))
if node_type in class_keywords: if node_type in class_keywords:
info['classes'].append(self._extract_class_info(node, language)) info["classes"].append(self._extract_class_info(node, language))
if node_type in import_keywords: if node_type in import_keywords:
info['imports'].append(node_text) info["imports"].append(node_text)
if node_type in call_keywords: if node_type in call_keywords:
info['function_calls'].append(node_text) info["function_calls"].append(node_text)
for child in node.children: for child in node.children:
child_info = self._extract_ast_info(child, language) child_info = self._extract_ast_info(child, language)
info['functions'].extend(child_info['functions']) info["functions"].extend(child_info["functions"])
info['classes'].extend(child_info['classes']) info["classes"].extend(child_info["classes"])
info['imports'].extend(child_info['imports']) info["imports"].extend(child_info["imports"])
info['function_calls'].extend(child_info['function_calls']) info["function_calls"].extend(child_info["function_calls"])
return info return info
def _extract_function_info(self, node, language: str) -> dict: def _extract_function_info(self, node, language):
"""Extract function name and details."""
name = "" name = ""
params = [] params = []
start_line = node.start_point[0] + 1 if node.start_point else 0 start_line = node.start_point[0] + 1 if node.start_point else 0
for child in node.children: for child in node.children:
if child.type in ['identifier', 'function_name', 'name']: if child.type in ["identifier", "function_name", "name"]:
name = child.text.decode() if isinstance(child.text, bytes) else child.text name = child.text.decode() if isinstance(child.text, bytes) else child.text
elif child.type in ['parameters', 'parameter_list', 'formal_parameters']: elif child.type in ["parameters", "parameter_list", "formal_parameters"]:
params = self._extract_parameters(child) params = self._extract_parameters(child)
return { return {
'name': name, "name": name,
'parameters': params, "parameters": params,
'start_line': start_line, "start_line": start_line,
} }
def _extract_class_info(self, node, language: str) -> dict: def _extract_class_info(self, node, language):
"""Extract class name and details."""
name = "" name = ""
methods = [] methods = []
start_line = node.start_point[0] + 1 if node.start_point else 0 start_line = node.start_point[0] + 1 if node.start_point else 0
for child in node.children: for child in node.children:
if child.type in ['identifier', 'name', 'type_identifier']: if child.type in ["identifier", "name", "type_identifier"]:
if not name: if not name:
name = child.text.decode() if isinstance(child.text, bytes) else child.text name = child.text.decode() if isinstance(child.text, bytes) else child.text
return { return {
'name': name, "name": name,
'start_line': start_line, "start_line": start_line,
'methods': methods, "methods": methods,
} }
def _extract_parameters(self, node) -> list[str]: def _extract_parameters(self, node):
"""Extract parameter names from parameter list."""
params = [] params = []
for child in node.children: for child in node.children:
if child.type in ['identifier', 'parameter', 'positional_argument']: if child.type in ["identifier", "parameter", "positional_argument"]:
param_name = child.text.decode() if isinstance(child.text, bytes) else child.text param_name = child.text.decode() if isinstance(child.text, bytes) else child.text
if param_name and param_name not in [',', '(', ')']: if param_name and param_name not in [",", "(", ")"]:
params.append(param_name) params.append(param_name)
return params return params
def _analyze_without_parser(self, code: str) -> str: def _analyze_without_parser(self, code):
"""Fallback analysis without tree-sitter parser."""
lines = code.splitlines() lines = code.splitlines()
summary_parts = [] summary_parts = []
added_lines = [ added_lines = [
line for line in lines line for line in lines
if line.strip().startswith('+') and not line.strip().startswith('+++') if line.strip().startswith("+") and not line.strip().startswith("+++")
] ]
removed_lines = [ removed_lines = [
line for line in lines line for line in lines
if line.strip().startswith('-') and not line.strip().startswith('---') if line.strip().startswith("-") and not line.strip().startswith("---")
] ]
if added_lines or removed_lines: if added_lines or removed_lines:
@@ -174,18 +163,18 @@ class CodeAnalyzer:
) )
func_patterns = { func_patterns = {
'python': r'^def\\s+(\\w+)', "python": r"^def\\s+(\\w+)",
'javascript': r'^function\\s+(\\w+)|const\\s+(\\w+)\\s*=\\s*function', "javascript": r"^function\\s+(\\w+)|const\\s+(\\w+)\\s*=\\s*function",
'java': r'^\\s*(public|private|protected)?\\s*(static\\s+)?\\s*\\w+\\s+(\\w+)\\s*\\(', "java": r"^\\s*(public|private|protected)?\\s*(static\\s+)?\\s*\\w+\\s+(\\w+)\\s*\\(",
'go': r'^func\\s+(\\w+)', "go": r"^func\\s+(\\w+)",
'rust': r'^fn\\s+(\\w+)', "rust": r"^fn\\s+(\\w+)",
} }
for lang, pattern in func_patterns.items(): for lang, pattern in func_patterns.items():
funcs = re.findall(pattern, code, re.MULTILINE) funcs = re.findall(pattern, code, re.MULTILINE)
if funcs: if funcs:
func_names = [ func_names = [
f if isinstance(f, str) else next((x for x in f if x), '') f if isinstance(f, str) else next((x for x in f if x), "")
for f in funcs for f in funcs
] ]
func_names = [n for n in func_names if n] func_names = [n for n in func_names if n]
@@ -194,10 +183,10 @@ class CodeAnalyzer:
break break
class_patterns = { class_patterns = {
'python': r'^class\\s+(\\w+)', "python": r"^class\\s+(\\w+)",
'javascript': r'^class\\s+(\\w+)', "javascript": r"^class\\s+(\\w+)",
'java': r'^\\s*class\\s+(\\w+)', "java": r"^\\s*class\\s+(\\w+)",
'rust': r'^struct\\s+(\\w+)', "rust": r"^struct\\s+(\\w+)",
} }
for lang, pattern in class_patterns.items(): for lang, pattern in class_patterns.items():
@@ -206,39 +195,37 @@ class CodeAnalyzer:
summary_parts.append(f"Classes/Structs: {', '.join(classes[:3])}") summary_parts.append(f"Classes/Structs: {', '.join(classes[:3])}")
break break
return '. '.join(summary_parts) if summary_parts else "Code changes detected" return ". ".join(summary_parts) if summary_parts else "Code changes detected"
def _generate_summary(self, ast_info: dict) -> str: def _generate_summary(self, ast_info):
"""Generate a human-readable summary from AST info."""
summary_parts = [] summary_parts = []
funcs = ast_info.get('functions', []) funcs = ast_info.get("functions", [])
if funcs: if funcs:
func_names = [f['name'] for f in funcs if f.get('name')] func_names = [f["name"] for f in funcs if f.get("name")]
if func_names: if func_names:
summary_parts.append(f"Functions: {', '.join(func_names[:5])}") summary_parts.append(f"Functions: {', '.join(func_names[:5])}")
classes = ast_info.get('classes', []) classes = ast_info.get("classes", [])
if classes: if classes:
class_names = [c['name'] for c in classes if c.get('name')] class_names = [c["name"] for c in classes if c.get("name")]
if class_names: if class_names:
summary_parts.append(f"Classes: {', '.join(class_names[:3])}") summary_parts.append(f"Classes: {', '.join(class_names[:3])}")
imports = ast_info.get('imports', []) imports = ast_info.get("imports", [])
if imports: if imports:
summary_parts.append(f"Imports/Requires: {len(imports)} statements") summary_parts.append(f"Imports/Requires: {len(imports)} statements")
return '. '.join(summary_parts) if summary_parts else "Code changes detected" return ". ".join(summary_parts) if summary_parts else "Code changes detected"
def summarize_change(self, old_code: str, new_code: str, language: str = "text") -> str: def summarize_change(self, old_code, new_code, language="text"):
"""Summarize what changed between old and new code."""
old_analysis = self.analyze_code(old_code, language) old_analysis = self.analyze_code(old_code, language)
new_analysis = self.analyze_code(new_code, language) new_analysis = self.analyze_code(new_code, language)
summary_parts = [] summary_parts = []
old_funcs = set(f['name'] for f in old_analysis.get('functions', []) if f.get('name')) old_funcs = set(f["name"] for f in old_analysis.get("functions", []) if f.get("name"))
new_funcs = set(f['name'] for f in new_analysis.get('functions', []) if f.get('name')) new_funcs = set(f["name"] for f in new_analysis.get("functions", []) if f.get("name"))
added_funcs = new_funcs - old_funcs added_funcs = new_funcs - old_funcs
removed_funcs = old_funcs - new_funcs removed_funcs = old_funcs - new_funcs
@@ -248,8 +235,8 @@ class CodeAnalyzer:
if removed_funcs: if removed_funcs:
summary_parts.append(f"Removed functions: {', '.join(sorted(removed_funcs))}") summary_parts.append(f"Removed functions: {', '.join(sorted(removed_funcs))}")
old_classes = set(c['name'] for c in old_analysis.get('classes', []) if c.get('name')) old_classes = set(c["name"] for c in old_analysis.get("classes", []) if c.get("name"))
new_classes = set(c['name'] for c in new_analysis.get('classes', []) if c.get('name')) new_classes = set(c["name"] for c in new_analysis.get("classes", []) if c.get("name"))
added_classes = new_classes - old_classes added_classes = new_classes - old_classes
removed_classes = old_classes - new_classes removed_classes = old_classes - new_classes
@@ -265,16 +252,14 @@ class CodeAnalyzer:
if line_diff != 0: if line_diff != 0:
summary_parts.append(f"Line count: {'+' if line_diff > 0 else ''}{line_diff}") summary_parts.append(f"Line count: {'+' if line_diff > 0 else ''}{line_diff}")
return '. '.join(summary_parts) if summary_parts else "Code modified" return ". ".join(summary_parts) if summary_parts else "Code modified"
def analyze_code(code: str, language: str = "text") -> dict: def analyze_code(code, language="text"):
"""Analyze code and return structured information."""
analyzer = CodeAnalyzer() analyzer = CodeAnalyzer()
return analyzer.analyze_code(code, language) return analyzer.analyze_code(code, language)
def summarize_change(old_code: str, new_code: str, language: str = "text") -> str: def summarize_change(old_code, new_code, language="text"):
"""Summarize what changed between old and new code."""
analyzer = CodeAnalyzer() analyzer = CodeAnalyzer()
return analyzer.summarize_change(old_code, new_code, language) return analyzer.summarize_change(old_code, new_code, language)