diff --git a/src/gdiffer/code_analyzer.py b/src/gdiffer/code_analyzer.py index a94d496..ec97ac4 100644 --- a/src/gdiffer/code_analyzer.py +++ b/src/gdiffer/code_analyzer.py @@ -1,32 +1,27 @@ -"""Code analyzer using tree-sitter for AST-based analysis.""" - import re from gdiffer.language_detector import LanguageDetector LANGUAGE_GRAMMARS = { - 'python': 'python', - 'javascript': 'javascript', - 'typescript': 'typescript', - 'java': 'java', - 'go': 'go', - 'rust': 'rust', - 'c': 'c', - 'cpp': 'cpp', - 'ruby': 'ruby', - 'php': 'php', + "python": "python", + "javascript": "javascript", + "typescript": "typescript", + "java": "java", + "go": "go", + "rust": "rust", + "c": "c", + "cpp": "cpp", + "ruby": "ruby", + "php": "php", } class CodeAnalyzer: - """Analyzes code using tree-sitter AST parsing.""" - def __init__(self): self.language_detector = LanguageDetector() self._parsers = {} - def _get_parser(self, language: str): - """Get or create a tree-sitter parser for a language.""" + def _get_parser(self, language): if language not in self._parsers: try: import tree_sitter @@ -36,16 +31,15 @@ class CodeAnalyzer: self._parsers[language] = None return self._parsers[language] - def analyze_code(self, code: str, language: str = "text") -> dict: - """Analyze code and return structured information.""" + def analyze_code(self, code, language="text"): result = { - 'language': language, - 'functions': [], - 'classes': [], - 'imports': [], - 'variables': [], - 'function_calls': [], - 'change_summary': "", + "language": language, + "functions": [], + "classes": [], + "imports": [], + "variables": [], + "function_calls": [], + "change_summary": "", } if language == "text" or not code.strip(): @@ -53,26 +47,25 @@ class CodeAnalyzer: parser = self._get_parser(language) if parser is None: - result['change_summary'] = self._analyze_without_parser(code) + result["change_summary"] = self._analyze_without_parser(code) return result try: tree = parser.parse(code.encode() if isinstance(code, str) else code) - result['ast_info'] = self._extract_ast_info(tree.root_node, language) - result['change_summary'] = self._generate_summary(result['ast_info']) + result["ast_info"] = self._extract_ast_info(tree.root_node, language) + result["change_summary"] = self._generate_summary(result["ast_info"]) except Exception: - result['change_summary'] = self._analyze_without_parser(code) + result["change_summary"] = self._analyze_without_parser(code) return result - def _extract_ast_info(self, node, language: str) -> dict: - """Extract information from AST node.""" + def _extract_ast_info(self, node, language): info = { - 'functions': [], - 'classes': [], - 'imports': [], - 'function_calls': [], - 'nested_nodes': [], + "functions": [], + "classes": [], + "imports": [], + "function_calls": [], + "nested_nodes": [], } if node is None: @@ -82,90 +75,86 @@ class CodeAnalyzer: node_text = node.text.decode() if isinstance(node.text, bytes) else node.text function_keywords = [ - 'function_definition', 'function_declaration', 'method_definition', 'func' + "function_definition", "function_declaration", "method_definition", "func" ] - class_keywords = ['class_definition', 'class_declaration', 'struct', 'impl'] - import_keywords = ['import_statement', 'import_from_statement', 'import', 'require'] - call_keywords = ['call_expression', 'function_call', 'method_call', 'expression_statement'] + class_keywords = ["class_definition", "class_declaration", "struct", "impl"] + import_keywords = ["import_statement", "import_from_statement", "import", "require"] + call_keywords = ["call_expression", "function_call", "method_call", "expression_statement"] if node_type in function_keywords: - info['functions'].append(self._extract_function_info(node, language)) + info["functions"].append(self._extract_function_info(node, language)) if node_type in class_keywords: - info['classes'].append(self._extract_class_info(node, language)) + info["classes"].append(self._extract_class_info(node, language)) if node_type in import_keywords: - info['imports'].append(node_text) + info["imports"].append(node_text) if node_type in call_keywords: - info['function_calls'].append(node_text) + info["function_calls"].append(node_text) for child in node.children: child_info = self._extract_ast_info(child, language) - info['functions'].extend(child_info['functions']) - info['classes'].extend(child_info['classes']) - info['imports'].extend(child_info['imports']) - info['function_calls'].extend(child_info['function_calls']) + info["functions"].extend(child_info["functions"]) + info["classes"].extend(child_info["classes"]) + info["imports"].extend(child_info["imports"]) + info["function_calls"].extend(child_info["function_calls"]) return info - def _extract_function_info(self, node, language: str) -> dict: - """Extract function name and details.""" + def _extract_function_info(self, node, language): name = "" params = [] start_line = node.start_point[0] + 1 if node.start_point else 0 for child in node.children: - if child.type in ['identifier', 'function_name', 'name']: + if child.type in ["identifier", "function_name", "name"]: name = child.text.decode() if isinstance(child.text, bytes) else child.text - elif child.type in ['parameters', 'parameter_list', 'formal_parameters']: + elif child.type in ["parameters", "parameter_list", "formal_parameters"]: params = self._extract_parameters(child) return { - 'name': name, - 'parameters': params, - 'start_line': start_line, + "name": name, + "parameters": params, + "start_line": start_line, } - def _extract_class_info(self, node, language: str) -> dict: - """Extract class name and details.""" + def _extract_class_info(self, node, language): name = "" methods = [] start_line = node.start_point[0] + 1 if node.start_point else 0 for child in node.children: - if child.type in ['identifier', 'name', 'type_identifier']: + if child.type in ["identifier", "name", "type_identifier"]: if not name: name = child.text.decode() if isinstance(child.text, bytes) else child.text return { - 'name': name, - 'start_line': start_line, - 'methods': methods, + "name": name, + "start_line": start_line, + "methods": methods, } - def _extract_parameters(self, node) -> list[str]: - """Extract parameter names from parameter list.""" + def _extract_parameters(self, node): params = [] for child in node.children: - if child.type in ['identifier', 'parameter', 'positional_argument']: + if child.type in ["identifier", "parameter", "positional_argument"]: param_name = child.text.decode() if isinstance(child.text, bytes) else child.text - if param_name and param_name not in [',', '(', ')']: + if param_name and param_name not in [",", "(", ")"]: params.append(param_name) return params - def _analyze_without_parser(self, code: str) -> str: - """Fallback analysis without tree-sitter parser.""" + def _analyze_without_parser(self, code): lines = code.splitlines() summary_parts = [] added_lines = [ line for line in lines - if line.strip().startswith('+') and not line.strip().startswith('+++') + if line.strip().startswith("+") and not line.strip().startswith("+++") ] removed_lines = [ line for line in lines - if line.strip().startswith('-') and not line.strip().startswith('---') + if line.strip().startswith("-") and not line.strip().startswith("---") ] if added_lines or removed_lines: @@ -174,18 +163,18 @@ class CodeAnalyzer: ) func_patterns = { - 'python': r'^def\\s+(\\w+)', - 'javascript': r'^function\\s+(\\w+)|const\\s+(\\w+)\\s*=\\s*function', - 'java': r'^\\s*(public|private|protected)?\\s*(static\\s+)?\\s*\\w+\\s+(\\w+)\\s*\\(', - 'go': r'^func\\s+(\\w+)', - 'rust': r'^fn\\s+(\\w+)', + "python": r"^def\\s+(\\w+)", + "javascript": r"^function\\s+(\\w+)|const\\s+(\\w+)\\s*=\\s*function", + "java": r"^\\s*(public|private|protected)?\\s*(static\\s+)?\\s*\\w+\\s+(\\w+)\\s*\\(", + "go": r"^func\\s+(\\w+)", + "rust": r"^fn\\s+(\\w+)", } for lang, pattern in func_patterns.items(): funcs = re.findall(pattern, code, re.MULTILINE) if funcs: func_names = [ - f if isinstance(f, str) else next((x for x in f if x), '') + f if isinstance(f, str) else next((x for x in f if x), "") for f in funcs ] func_names = [n for n in func_names if n] @@ -194,10 +183,10 @@ class CodeAnalyzer: break class_patterns = { - 'python': r'^class\\s+(\\w+)', - 'javascript': r'^class\\s+(\\w+)', - 'java': r'^\\s*class\\s+(\\w+)', - 'rust': r'^struct\\s+(\\w+)', + "python": r"^class\\s+(\\w+)", + "javascript": r"^class\\s+(\\w+)", + "java": r"^\\s*class\\s+(\\w+)", + "rust": r"^struct\\s+(\\w+)", } for lang, pattern in class_patterns.items(): @@ -206,39 +195,37 @@ class CodeAnalyzer: summary_parts.append(f"Classes/Structs: {', '.join(classes[:3])}") break - return '. '.join(summary_parts) if summary_parts else "Code changes detected" + return ". ".join(summary_parts) if summary_parts else "Code changes detected" - def _generate_summary(self, ast_info: dict) -> str: - """Generate a human-readable summary from AST info.""" + def _generate_summary(self, ast_info): summary_parts = [] - funcs = ast_info.get('functions', []) + funcs = ast_info.get("functions", []) if funcs: - func_names = [f['name'] for f in funcs if f.get('name')] + func_names = [f["name"] for f in funcs if f.get("name")] if func_names: summary_parts.append(f"Functions: {', '.join(func_names[:5])}") - classes = ast_info.get('classes', []) + classes = ast_info.get("classes", []) if classes: - class_names = [c['name'] for c in classes if c.get('name')] + class_names = [c["name"] for c in classes if c.get("name")] if class_names: summary_parts.append(f"Classes: {', '.join(class_names[:3])}") - imports = ast_info.get('imports', []) + imports = ast_info.get("imports", []) if imports: summary_parts.append(f"Imports/Requires: {len(imports)} statements") - return '. '.join(summary_parts) if summary_parts else "Code changes detected" + return ". ".join(summary_parts) if summary_parts else "Code changes detected" - def summarize_change(self, old_code: str, new_code: str, language: str = "text") -> str: - """Summarize what changed between old and new code.""" + def summarize_change(self, old_code, new_code, language="text"): old_analysis = self.analyze_code(old_code, language) new_analysis = self.analyze_code(new_code, language) summary_parts = [] - old_funcs = set(f['name'] for f in old_analysis.get('functions', []) if f.get('name')) - new_funcs = set(f['name'] for f in new_analysis.get('functions', []) if f.get('name')) + old_funcs = set(f["name"] for f in old_analysis.get("functions", []) if f.get("name")) + new_funcs = set(f["name"] for f in new_analysis.get("functions", []) if f.get("name")) added_funcs = new_funcs - old_funcs removed_funcs = old_funcs - new_funcs @@ -248,8 +235,8 @@ class CodeAnalyzer: if removed_funcs: summary_parts.append(f"Removed functions: {', '.join(sorted(removed_funcs))}") - old_classes = set(c['name'] for c in old_analysis.get('classes', []) if c.get('name')) - new_classes = set(c['name'] for c in new_analysis.get('classes', []) if c.get('name')) + old_classes = set(c["name"] for c in old_analysis.get("classes", []) if c.get("name")) + new_classes = set(c["name"] for c in new_analysis.get("classes", []) if c.get("name")) added_classes = new_classes - old_classes removed_classes = old_classes - new_classes @@ -265,16 +252,14 @@ class CodeAnalyzer: if line_diff != 0: summary_parts.append(f"Line count: {'+' if line_diff > 0 else ''}{line_diff}") - return '. '.join(summary_parts) if summary_parts else "Code modified" + return ". ".join(summary_parts) if summary_parts else "Code modified" -def analyze_code(code: str, language: str = "text") -> dict: - """Analyze code and return structured information.""" +def analyze_code(code, language="text"): analyzer = CodeAnalyzer() return analyzer.analyze_code(code, language) -def summarize_change(old_code: str, new_code: str, language: str = "text") -> str: - """Summarize what changed between old and new code.""" +def summarize_change(old_code, new_code, language="text"): analyzer = CodeAnalyzer() return analyzer.summarize_change(old_code, new_code, language)