fix: resolve CI issues - push complete implementation with tests
This commit is contained in:
@@ -1,32 +1,27 @@
|
|||||||
"""Code analyzer using tree-sitter for AST-based analysis."""
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from gdiffer.language_detector import LanguageDetector
|
from gdiffer.language_detector import LanguageDetector
|
||||||
|
|
||||||
LANGUAGE_GRAMMARS = {
|
LANGUAGE_GRAMMARS = {
|
||||||
'python': 'python',
|
"python": "python",
|
||||||
'javascript': 'javascript',
|
"javascript": "javascript",
|
||||||
'typescript': 'typescript',
|
"typescript": "typescript",
|
||||||
'java': 'java',
|
"java": "java",
|
||||||
'go': 'go',
|
"go": "go",
|
||||||
'rust': 'rust',
|
"rust": "rust",
|
||||||
'c': 'c',
|
"c": "c",
|
||||||
'cpp': 'cpp',
|
"cpp": "cpp",
|
||||||
'ruby': 'ruby',
|
"ruby": "ruby",
|
||||||
'php': 'php',
|
"php": "php",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class CodeAnalyzer:
|
class CodeAnalyzer:
|
||||||
"""Analyzes code using tree-sitter AST parsing."""
|
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.language_detector = LanguageDetector()
|
self.language_detector = LanguageDetector()
|
||||||
self._parsers = {}
|
self._parsers = {}
|
||||||
|
|
||||||
def _get_parser(self, language: str):
|
def _get_parser(self, language):
|
||||||
"""Get or create a tree-sitter parser for a language."""
|
|
||||||
if language not in self._parsers:
|
if language not in self._parsers:
|
||||||
try:
|
try:
|
||||||
import tree_sitter
|
import tree_sitter
|
||||||
@@ -36,16 +31,15 @@ class CodeAnalyzer:
|
|||||||
self._parsers[language] = None
|
self._parsers[language] = None
|
||||||
return self._parsers[language]
|
return self._parsers[language]
|
||||||
|
|
||||||
def analyze_code(self, code: str, language: str = "text") -> dict:
|
def analyze_code(self, code, language="text"):
|
||||||
"""Analyze code and return structured information."""
|
|
||||||
result = {
|
result = {
|
||||||
'language': language,
|
"language": language,
|
||||||
'functions': [],
|
"functions": [],
|
||||||
'classes': [],
|
"classes": [],
|
||||||
'imports': [],
|
"imports": [],
|
||||||
'variables': [],
|
"variables": [],
|
||||||
'function_calls': [],
|
"function_calls": [],
|
||||||
'change_summary': "",
|
"change_summary": "",
|
||||||
}
|
}
|
||||||
|
|
||||||
if language == "text" or not code.strip():
|
if language == "text" or not code.strip():
|
||||||
@@ -53,26 +47,25 @@ class CodeAnalyzer:
|
|||||||
|
|
||||||
parser = self._get_parser(language)
|
parser = self._get_parser(language)
|
||||||
if parser is None:
|
if parser is None:
|
||||||
result['change_summary'] = self._analyze_without_parser(code)
|
result["change_summary"] = self._analyze_without_parser(code)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
try:
|
try:
|
||||||
tree = parser.parse(code.encode() if isinstance(code, str) else code)
|
tree = parser.parse(code.encode() if isinstance(code, str) else code)
|
||||||
result['ast_info'] = self._extract_ast_info(tree.root_node, language)
|
result["ast_info"] = self._extract_ast_info(tree.root_node, language)
|
||||||
result['change_summary'] = self._generate_summary(result['ast_info'])
|
result["change_summary"] = self._generate_summary(result["ast_info"])
|
||||||
except Exception:
|
except Exception:
|
||||||
result['change_summary'] = self._analyze_without_parser(code)
|
result["change_summary"] = self._analyze_without_parser(code)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _extract_ast_info(self, node, language: str) -> dict:
|
def _extract_ast_info(self, node, language):
|
||||||
"""Extract information from AST node."""
|
|
||||||
info = {
|
info = {
|
||||||
'functions': [],
|
"functions": [],
|
||||||
'classes': [],
|
"classes": [],
|
||||||
'imports': [],
|
"imports": [],
|
||||||
'function_calls': [],
|
"function_calls": [],
|
||||||
'nested_nodes': [],
|
"nested_nodes": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
if node is None:
|
if node is None:
|
||||||
@@ -82,90 +75,86 @@ class CodeAnalyzer:
|
|||||||
node_text = node.text.decode() if isinstance(node.text, bytes) else node.text
|
node_text = node.text.decode() if isinstance(node.text, bytes) else node.text
|
||||||
|
|
||||||
function_keywords = [
|
function_keywords = [
|
||||||
'function_definition', 'function_declaration', 'method_definition', 'func'
|
"function_definition", "function_declaration", "method_definition", "func"
|
||||||
]
|
]
|
||||||
class_keywords = ['class_definition', 'class_declaration', 'struct', 'impl']
|
class_keywords = ["class_definition", "class_declaration", "struct", "impl"]
|
||||||
import_keywords = ['import_statement', 'import_from_statement', 'import', 'require']
|
import_keywords = ["import_statement", "import_from_statement", "import", "require"]
|
||||||
call_keywords = ['call_expression', 'function_call', 'method_call', 'expression_statement']
|
call_keywords = ["call_expression", "function_call", "method_call", "expression_statement"]
|
||||||
|
|
||||||
if node_type in function_keywords:
|
if node_type in function_keywords:
|
||||||
info['functions'].append(self._extract_function_info(node, language))
|
info["functions"].append(self._extract_function_info(node, language))
|
||||||
|
|
||||||
if node_type in class_keywords:
|
if node_type in class_keywords:
|
||||||
info['classes'].append(self._extract_class_info(node, language))
|
info["classes"].append(self._extract_class_info(node, language))
|
||||||
|
|
||||||
if node_type in import_keywords:
|
if node_type in import_keywords:
|
||||||
info['imports'].append(node_text)
|
info["imports"].append(node_text)
|
||||||
|
|
||||||
if node_type in call_keywords:
|
if node_type in call_keywords:
|
||||||
info['function_calls'].append(node_text)
|
info["function_calls"].append(node_text)
|
||||||
|
|
||||||
for child in node.children:
|
for child in node.children:
|
||||||
child_info = self._extract_ast_info(child, language)
|
child_info = self._extract_ast_info(child, language)
|
||||||
info['functions'].extend(child_info['functions'])
|
info["functions"].extend(child_info["functions"])
|
||||||
info['classes'].extend(child_info['classes'])
|
info["classes"].extend(child_info["classes"])
|
||||||
info['imports'].extend(child_info['imports'])
|
info["imports"].extend(child_info["imports"])
|
||||||
info['function_calls'].extend(child_info['function_calls'])
|
info["function_calls"].extend(child_info["function_calls"])
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
def _extract_function_info(self, node, language: str) -> dict:
|
def _extract_function_info(self, node, language):
|
||||||
"""Extract function name and details."""
|
|
||||||
name = ""
|
name = ""
|
||||||
params = []
|
params = []
|
||||||
start_line = node.start_point[0] + 1 if node.start_point else 0
|
start_line = node.start_point[0] + 1 if node.start_point else 0
|
||||||
|
|
||||||
for child in node.children:
|
for child in node.children:
|
||||||
if child.type in ['identifier', 'function_name', 'name']:
|
if child.type in ["identifier", "function_name", "name"]:
|
||||||
name = child.text.decode() if isinstance(child.text, bytes) else child.text
|
name = child.text.decode() if isinstance(child.text, bytes) else child.text
|
||||||
elif child.type in ['parameters', 'parameter_list', 'formal_parameters']:
|
elif child.type in ["parameters", "parameter_list", "formal_parameters"]:
|
||||||
params = self._extract_parameters(child)
|
params = self._extract_parameters(child)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'name': name,
|
"name": name,
|
||||||
'parameters': params,
|
"parameters": params,
|
||||||
'start_line': start_line,
|
"start_line": start_line,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_class_info(self, node, language: str) -> dict:
|
def _extract_class_info(self, node, language):
|
||||||
"""Extract class name and details."""
|
|
||||||
name = ""
|
name = ""
|
||||||
methods = []
|
methods = []
|
||||||
start_line = node.start_point[0] + 1 if node.start_point else 0
|
start_line = node.start_point[0] + 1 if node.start_point else 0
|
||||||
|
|
||||||
for child in node.children:
|
for child in node.children:
|
||||||
if child.type in ['identifier', 'name', 'type_identifier']:
|
if child.type in ["identifier", "name", "type_identifier"]:
|
||||||
if not name:
|
if not name:
|
||||||
name = child.text.decode() if isinstance(child.text, bytes) else child.text
|
name = child.text.decode() if isinstance(child.text, bytes) else child.text
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'name': name,
|
"name": name,
|
||||||
'start_line': start_line,
|
"start_line": start_line,
|
||||||
'methods': methods,
|
"methods": methods,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_parameters(self, node) -> list[str]:
|
def _extract_parameters(self, node):
|
||||||
"""Extract parameter names from parameter list."""
|
|
||||||
params = []
|
params = []
|
||||||
for child in node.children:
|
for child in node.children:
|
||||||
if child.type in ['identifier', 'parameter', 'positional_argument']:
|
if child.type in ["identifier", "parameter", "positional_argument"]:
|
||||||
param_name = child.text.decode() if isinstance(child.text, bytes) else child.text
|
param_name = child.text.decode() if isinstance(child.text, bytes) else child.text
|
||||||
if param_name and param_name not in [',', '(', ')']:
|
if param_name and param_name not in [",", "(", ")"]:
|
||||||
params.append(param_name)
|
params.append(param_name)
|
||||||
return params
|
return params
|
||||||
|
|
||||||
def _analyze_without_parser(self, code: str) -> str:
|
def _analyze_without_parser(self, code):
|
||||||
"""Fallback analysis without tree-sitter parser."""
|
|
||||||
lines = code.splitlines()
|
lines = code.splitlines()
|
||||||
summary_parts = []
|
summary_parts = []
|
||||||
|
|
||||||
added_lines = [
|
added_lines = [
|
||||||
line for line in lines
|
line for line in lines
|
||||||
if line.strip().startswith('+') and not line.strip().startswith('+++')
|
if line.strip().startswith("+") and not line.strip().startswith("+++")
|
||||||
]
|
]
|
||||||
removed_lines = [
|
removed_lines = [
|
||||||
line for line in lines
|
line for line in lines
|
||||||
if line.strip().startswith('-') and not line.strip().startswith('---')
|
if line.strip().startswith("-") and not line.strip().startswith("---")
|
||||||
]
|
]
|
||||||
|
|
||||||
if added_lines or removed_lines:
|
if added_lines or removed_lines:
|
||||||
@@ -174,18 +163,18 @@ class CodeAnalyzer:
|
|||||||
)
|
)
|
||||||
|
|
||||||
func_patterns = {
|
func_patterns = {
|
||||||
'python': r'^def\\s+(\\w+)',
|
"python": r"^def\\s+(\\w+)",
|
||||||
'javascript': r'^function\\s+(\\w+)|const\\s+(\\w+)\\s*=\\s*function',
|
"javascript": r"^function\\s+(\\w+)|const\\s+(\\w+)\\s*=\\s*function",
|
||||||
'java': r'^\\s*(public|private|protected)?\\s*(static\\s+)?\\s*\\w+\\s+(\\w+)\\s*\\(',
|
"java": r"^\\s*(public|private|protected)?\\s*(static\\s+)?\\s*\\w+\\s+(\\w+)\\s*\\(",
|
||||||
'go': r'^func\\s+(\\w+)',
|
"go": r"^func\\s+(\\w+)",
|
||||||
'rust': r'^fn\\s+(\\w+)',
|
"rust": r"^fn\\s+(\\w+)",
|
||||||
}
|
}
|
||||||
|
|
||||||
for lang, pattern in func_patterns.items():
|
for lang, pattern in func_patterns.items():
|
||||||
funcs = re.findall(pattern, code, re.MULTILINE)
|
funcs = re.findall(pattern, code, re.MULTILINE)
|
||||||
if funcs:
|
if funcs:
|
||||||
func_names = [
|
func_names = [
|
||||||
f if isinstance(f, str) else next((x for x in f if x), '')
|
f if isinstance(f, str) else next((x for x in f if x), "")
|
||||||
for f in funcs
|
for f in funcs
|
||||||
]
|
]
|
||||||
func_names = [n for n in func_names if n]
|
func_names = [n for n in func_names if n]
|
||||||
@@ -194,10 +183,10 @@ class CodeAnalyzer:
|
|||||||
break
|
break
|
||||||
|
|
||||||
class_patterns = {
|
class_patterns = {
|
||||||
'python': r'^class\\s+(\\w+)',
|
"python": r"^class\\s+(\\w+)",
|
||||||
'javascript': r'^class\\s+(\\w+)',
|
"javascript": r"^class\\s+(\\w+)",
|
||||||
'java': r'^\\s*class\\s+(\\w+)',
|
"java": r"^\\s*class\\s+(\\w+)",
|
||||||
'rust': r'^struct\\s+(\\w+)',
|
"rust": r"^struct\\s+(\\w+)",
|
||||||
}
|
}
|
||||||
|
|
||||||
for lang, pattern in class_patterns.items():
|
for lang, pattern in class_patterns.items():
|
||||||
@@ -206,39 +195,37 @@ class CodeAnalyzer:
|
|||||||
summary_parts.append(f"Classes/Structs: {', '.join(classes[:3])}")
|
summary_parts.append(f"Classes/Structs: {', '.join(classes[:3])}")
|
||||||
break
|
break
|
||||||
|
|
||||||
return '. '.join(summary_parts) if summary_parts else "Code changes detected"
|
return ". ".join(summary_parts) if summary_parts else "Code changes detected"
|
||||||
|
|
||||||
def _generate_summary(self, ast_info: dict) -> str:
|
def _generate_summary(self, ast_info):
|
||||||
"""Generate a human-readable summary from AST info."""
|
|
||||||
summary_parts = []
|
summary_parts = []
|
||||||
|
|
||||||
funcs = ast_info.get('functions', [])
|
funcs = ast_info.get("functions", [])
|
||||||
if funcs:
|
if funcs:
|
||||||
func_names = [f['name'] for f in funcs if f.get('name')]
|
func_names = [f["name"] for f in funcs if f.get("name")]
|
||||||
if func_names:
|
if func_names:
|
||||||
summary_parts.append(f"Functions: {', '.join(func_names[:5])}")
|
summary_parts.append(f"Functions: {', '.join(func_names[:5])}")
|
||||||
|
|
||||||
classes = ast_info.get('classes', [])
|
classes = ast_info.get("classes", [])
|
||||||
if classes:
|
if classes:
|
||||||
class_names = [c['name'] for c in classes if c.get('name')]
|
class_names = [c["name"] for c in classes if c.get("name")]
|
||||||
if class_names:
|
if class_names:
|
||||||
summary_parts.append(f"Classes: {', '.join(class_names[:3])}")
|
summary_parts.append(f"Classes: {', '.join(class_names[:3])}")
|
||||||
|
|
||||||
imports = ast_info.get('imports', [])
|
imports = ast_info.get("imports", [])
|
||||||
if imports:
|
if imports:
|
||||||
summary_parts.append(f"Imports/Requires: {len(imports)} statements")
|
summary_parts.append(f"Imports/Requires: {len(imports)} statements")
|
||||||
|
|
||||||
return '. '.join(summary_parts) if summary_parts else "Code changes detected"
|
return ". ".join(summary_parts) if summary_parts else "Code changes detected"
|
||||||
|
|
||||||
def summarize_change(self, old_code: str, new_code: str, language: str = "text") -> str:
|
def summarize_change(self, old_code, new_code, language="text"):
|
||||||
"""Summarize what changed between old and new code."""
|
|
||||||
old_analysis = self.analyze_code(old_code, language)
|
old_analysis = self.analyze_code(old_code, language)
|
||||||
new_analysis = self.analyze_code(new_code, language)
|
new_analysis = self.analyze_code(new_code, language)
|
||||||
|
|
||||||
summary_parts = []
|
summary_parts = []
|
||||||
|
|
||||||
old_funcs = set(f['name'] for f in old_analysis.get('functions', []) if f.get('name'))
|
old_funcs = set(f["name"] for f in old_analysis.get("functions", []) if f.get("name"))
|
||||||
new_funcs = set(f['name'] for f in new_analysis.get('functions', []) if f.get('name'))
|
new_funcs = set(f["name"] for f in new_analysis.get("functions", []) if f.get("name"))
|
||||||
|
|
||||||
added_funcs = new_funcs - old_funcs
|
added_funcs = new_funcs - old_funcs
|
||||||
removed_funcs = old_funcs - new_funcs
|
removed_funcs = old_funcs - new_funcs
|
||||||
@@ -248,8 +235,8 @@ class CodeAnalyzer:
|
|||||||
if removed_funcs:
|
if removed_funcs:
|
||||||
summary_parts.append(f"Removed functions: {', '.join(sorted(removed_funcs))}")
|
summary_parts.append(f"Removed functions: {', '.join(sorted(removed_funcs))}")
|
||||||
|
|
||||||
old_classes = set(c['name'] for c in old_analysis.get('classes', []) if c.get('name'))
|
old_classes = set(c["name"] for c in old_analysis.get("classes", []) if c.get("name"))
|
||||||
new_classes = set(c['name'] for c in new_analysis.get('classes', []) if c.get('name'))
|
new_classes = set(c["name"] for c in new_analysis.get("classes", []) if c.get("name"))
|
||||||
|
|
||||||
added_classes = new_classes - old_classes
|
added_classes = new_classes - old_classes
|
||||||
removed_classes = old_classes - new_classes
|
removed_classes = old_classes - new_classes
|
||||||
@@ -265,16 +252,14 @@ class CodeAnalyzer:
|
|||||||
if line_diff != 0:
|
if line_diff != 0:
|
||||||
summary_parts.append(f"Line count: {'+' if line_diff > 0 else ''}{line_diff}")
|
summary_parts.append(f"Line count: {'+' if line_diff > 0 else ''}{line_diff}")
|
||||||
|
|
||||||
return '. '.join(summary_parts) if summary_parts else "Code modified"
|
return ". ".join(summary_parts) if summary_parts else "Code modified"
|
||||||
|
|
||||||
|
|
||||||
def analyze_code(code: str, language: str = "text") -> dict:
|
def analyze_code(code, language="text"):
|
||||||
"""Analyze code and return structured information."""
|
|
||||||
analyzer = CodeAnalyzer()
|
analyzer = CodeAnalyzer()
|
||||||
return analyzer.analyze_code(code, language)
|
return analyzer.analyze_code(code, language)
|
||||||
|
|
||||||
|
|
||||||
def summarize_change(old_code: str, new_code: str, language: str = "text") -> str:
|
def summarize_change(old_code, new_code, language="text"):
|
||||||
"""Summarize what changed between old and new code."""
|
|
||||||
analyzer = CodeAnalyzer()
|
analyzer = CodeAnalyzer()
|
||||||
return analyzer.summarize_change(old_code, new_code, language)
|
return analyzer.summarize_change(old_code, new_code, language)
|
||||||
|
|||||||
Reference in New Issue
Block a user