Compare commits
57 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 77a7e807db | |||
| 72e15f9b2b | |||
| 87536cec3d | |||
| 6ab5c50fcd | |||
| 2a344e3d82 | |||
| f4e02fb177 | |||
| 4213979b9f | |||
| 9d2ae8bc14 | |||
| e1f36c29b8 | |||
| 5fe6dd83c9 | |||
| 6577302aa4 | |||
| c055777858 | |||
| 9878d95b39 | |||
| cc6022cdc7 | |||
| 293dbd6ad3 | |||
| 124e0bbee3 | |||
| 8d07050a8e | |||
| 8bfeb95358 | |||
| b6f6549dc4 | |||
| bf55ea9294 | |||
| f9d071a586 | |||
| 74d01c6428 | |||
| bfaed70c17 | |||
| 54c46c759d | |||
| 07ec3fbb9e | |||
| 3a9a1b1c53 | |||
| 94818f5226 | |||
| 75e00a4aaa | |||
| a988dfdb39 | |||
| 4053bdfd11 | |||
| 201993c72a | |||
| 71a294886c | |||
| ef43479537 | |||
| 4f3a17e3a6 | |||
| a007304aa7 | |||
| 43f5271d7b | |||
| e52848b7dd | |||
| 7e4e1a68a8 | |||
| 4e81287aca | |||
| c6e77e610a | |||
| 24dda8f991 | |||
| 58dddd2d4b | |||
| 0a59041be1 | |||
| 8c22761f71 | |||
| ce5eb18ff5 | |||
| 8f15167e60 | |||
| 15edaf4587 | |||
| c31fe35c6d | |||
| 7b30556d2c | |||
| de13ca00bd | |||
| 7fa88e334f | |||
| 44ca0c1215 | |||
| e68a8b108f | |||
| d67666d2e2 | |||
| c850d70248 | |||
| f11ad90a9c | |||
| cbb76846da |
@@ -9,12 +9,17 @@ on:
|
|||||||
jobs:
|
jobs:
|
||||||
test:
|
test:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
python-version: ["3.10", "3.11", "3.12"]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- uses: actions/setup-python@v5
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: '3.11'
|
python-version: ${{ matrix.python-version }}
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
@@ -22,22 +27,51 @@ jobs:
|
|||||||
pip install -e ".[dev]"
|
pip install -e ".[dev]"
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: pytest tests/ -v
|
run: |
|
||||||
|
python -m pytest tests/ -v --tb=short
|
||||||
- name: Run tests with coverage
|
|
||||||
run: pytest tests/ --cov=src/gdiffer --cov-report=term-missing --cov-report=html
|
|
||||||
|
|
||||||
lint:
|
lint:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- uses: actions/setup-python@v5
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: '3.11'
|
python-version: "3.12"
|
||||||
|
|
||||||
- name: Install linting tools
|
- name: Install dependencies
|
||||||
run: pip install ruff
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install ruff
|
||||||
|
pip install -e .
|
||||||
|
|
||||||
- name: Run linter
|
- name: Run linter
|
||||||
run: ruff check src/gdiffer/ tests/
|
run: |
|
||||||
|
ruff check src/gdiffer/ tests/
|
||||||
|
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.12"
|
||||||
|
|
||||||
|
- name: Install build dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install build
|
||||||
|
|
||||||
|
- name: Build package
|
||||||
|
run: |
|
||||||
|
python -m build
|
||||||
|
|
||||||
|
- name: Verify installation
|
||||||
|
run: |
|
||||||
|
pip install dist/*.whl
|
||||||
|
gdiffer --version
|
||||||
|
|||||||
46
.gitignore
vendored
46
.gitignore
vendored
@@ -1,12 +1,40 @@
|
|||||||
*.pyc
|
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.egg-info/
|
*.py[cod]
|
||||||
.dist-info/
|
*$py.class
|
||||||
|
*.so
|
||||||
|
.Python
|
||||||
build/
|
build/
|
||||||
.env
|
develop-eggs/
|
||||||
.venv/
|
dist/
|
||||||
env/
|
downloads/
|
||||||
venv/
|
eggs/
|
||||||
.pytest_cache/
|
.eggs/
|
||||||
.coverage
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
tox.ini
|
||||||
|
coverage/
|
||||||
htmlcov/
|
htmlcov/
|
||||||
|
.pytest_cache/
|
||||||
|
.ruff_cache/
|
||||||
|
venv/
|
||||||
|
env/
|
||||||
|
.venv/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
.DS_Store
|
||||||
|
|||||||
4
LICENSE
4
LICENSE
@@ -1,5 +1,7 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2026
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
@@ -12,7 +14,7 @@ copies or substantial portions of the Software.
|
|||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFREMENTEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
|||||||
130
README.md
130
README.md
@@ -4,24 +4,22 @@ A CLI tool that parses git diffs and provides intelligent, contextual explanatio
|
|||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Parse git diffs/patches: Extract file names, hunks, and code changes from unified diff format
|
- **Parse git diffs/patches**: Extract file names, hunks, and code changes from unified diff format
|
||||||
- Identify programming language: Detect language from file extensions and code patterns
|
- **Identify programming language**: Detect language from file extensions and code patterns
|
||||||
- Summarize changes: Generate human-readable summaries of what each change does
|
- **Summarize changes**: Generate human-readable summaries of what each change does
|
||||||
- Flag potential issues: Detect bugs, security vulnerabilities, and code smells
|
- **Flag potential issues**: Detect bugs, security vulnerabilities, and code smells
|
||||||
- Suggest improvements: Provide specific refactoring suggestions
|
- **Suggest improvements**: Provide specific refactoring suggestions
|
||||||
- Local execution: Runs entirely offline using local libraries
|
- **Local execution**: Runs entirely offline using local libraries
|
||||||
- Color-coded output: Terminal output with ANSI colors for better readability
|
- **Color-coded output**: Terminal output with ANSI colors for better readability
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# Using pip
|
||||||
pip install git-diff-explainer-cli
|
pip install git-diff-explainer-cli
|
||||||
```
|
|
||||||
|
|
||||||
Or from source:
|
# From source
|
||||||
|
git clone <repository>
|
||||||
```bash
|
|
||||||
git clone https://7000pct.gitea.bloupla.net/7000pctAUTO/git-diff-explainer-cli.git
|
|
||||||
cd git-diff-explainer-cli
|
cd git-diff-explainer-cli
|
||||||
pip install -e .
|
pip install -e .
|
||||||
```
|
```
|
||||||
@@ -31,32 +29,136 @@ pip install -e .
|
|||||||
### Basic Usage
|
### Basic Usage
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# Explain a diff from stdin
|
||||||
git diff | gdiffer explain --stdin
|
git diff | gdiffer explain --stdin
|
||||||
|
|
||||||
|
# Explain a diff file
|
||||||
gdiffer explain --file changes.diff
|
gdiffer explain --file changes.diff
|
||||||
|
|
||||||
|
# Pass diff as argument
|
||||||
gdiffer explain "diff --git a/file.py b/file.py..."
|
gdiffer explain "diff --git a/file.py b/file.py..."
|
||||||
|
|
||||||
|
# Get just the summary
|
||||||
gdiffer summarize --file changes.diff
|
gdiffer summarize --file changes.diff
|
||||||
|
|
||||||
|
# Check for issues only
|
||||||
gdiffer issues --file changes.diff
|
gdiffer issues --file changes.diff
|
||||||
```
|
```
|
||||||
|
|
||||||
### Options
|
### Options
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
gdiffer explain --output json # terminal (default), json, plain
|
# Output format: terminal (default), json, plain
|
||||||
|
gdiffer explain --output json
|
||||||
|
|
||||||
|
# Verbose output
|
||||||
gdiffer explain --verbose
|
gdiffer explain --verbose
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
### Simple Diff Explanation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ git diff | gdiffer explain
|
||||||
|
=== Git Diff Analysis Summary ===
|
||||||
|
Total files changed: 1
|
||||||
|
Files added: 0
|
||||||
|
Files deleted: 0
|
||||||
|
Files modified: 1
|
||||||
|
Total changes: 3
|
||||||
|
|
||||||
|
Languages:
|
||||||
|
- python: 1 files
|
||||||
|
|
||||||
|
=== File Changes ===
|
||||||
|
|
||||||
|
1. src/main.py
|
||||||
|
Status: modify
|
||||||
|
Changes: 3 lines
|
||||||
|
Hunk 1:
|
||||||
|
+import os
|
||||||
|
+import sys
|
||||||
|
def main():
|
||||||
|
print("Hello, World!")
|
||||||
|
```
|
||||||
|
|
||||||
|
### JSON Output
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ gdiffer explain --output json "diff --git a/test.py..."
|
||||||
|
{
|
||||||
|
"summary": {
|
||||||
|
"total_files": 1,
|
||||||
|
"files_added": 0,
|
||||||
|
"files_deleted": 0,
|
||||||
|
"files_modified": 1,
|
||||||
|
"total_changes": 2
|
||||||
|
},
|
||||||
|
"files": [...]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Issue Detection
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ gdiffer issues --file sql_injection.diff
|
||||||
|
[CRITICAL] Potential SQL Injection
|
||||||
|
File: db.py:5
|
||||||
|
Description: String concatenation or interpolation used in SQL query
|
||||||
|
Suggestion: Use parameterized queries or ORM methods
|
||||||
|
```
|
||||||
|
|
||||||
## Supported Languages
|
## Supported Languages
|
||||||
|
|
||||||
Python, JavaScript/TypeScript, Java, Go, Rust, C/C++, Ruby, PHP, and more.
|
- Python
|
||||||
|
- JavaScript / TypeScript
|
||||||
|
- Java
|
||||||
|
- Go
|
||||||
|
- Rust
|
||||||
|
- C / C++
|
||||||
|
- Ruby
|
||||||
|
- PHP
|
||||||
|
- And more...
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
Environment variables:
|
||||||
|
|
||||||
|
| Variable | Description | Default |
|
||||||
|
|----------|-------------|---------|
|
||||||
|
| `GDIFF_OUTPUT` | Output format: terminal, json, plain | terminal |
|
||||||
|
| `GDIFF_VERBOSE` | Enable verbose output | false |
|
||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# Install development dependencies
|
||||||
pip install -e ".[dev]"
|
pip install -e ".[dev]"
|
||||||
|
|
||||||
|
# Run tests
|
||||||
pytest tests/ -v
|
pytest tests/ -v
|
||||||
|
|
||||||
|
# Run with coverage
|
||||||
pytest tests/ --cov=src/gdiffer
|
pytest tests/ --cov=src/gdiffer
|
||||||
|
|
||||||
|
# Type checking
|
||||||
|
mypy src/gdiffer/
|
||||||
|
|
||||||
|
# Linting
|
||||||
|
ruff check src/gdiffer/
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
Common errors and solutions:
|
||||||
|
|
||||||
|
| Error | Solution |
|
||||||
|
|-------|----------|
|
||||||
|
| Invalid git diff format | Provide a valid unified diff format |
|
||||||
|
| No changes detected | Ensure the diff contains actual code changes |
|
||||||
|
| Unsupported language | Use a supported language or file extension |
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
MIT
|
MIT
|
||||||
|
|||||||
@@ -55,4 +55,4 @@ target-version = "py310"
|
|||||||
|
|
||||||
[tool.ruff.lint]
|
[tool.ruff.lint]
|
||||||
select = ["E", "F", "W", "C90", "I", "N", "UP"]
|
select = ["E", "F", "W", "C90", "I", "N", "UP"]
|
||||||
ignore = []
|
ignore = ["C901"]
|
||||||
|
|||||||
0
src/.gitkeep
Normal file
0
src/.gitkeep
Normal file
@@ -1,8 +1,4 @@
|
|||||||
"""CLI interface for git diff explainer."""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import sys
|
import sys
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
|
||||||
@@ -11,11 +7,11 @@ from gdiffer.code_analyzer import CodeAnalyzer
|
|||||||
from gdiffer.issue_detector import IssueDetector
|
from gdiffer.issue_detector import IssueDetector
|
||||||
from gdiffer.language_detector import LanguageDetector
|
from gdiffer.language_detector import LanguageDetector
|
||||||
from gdiffer.models import DiffAnalysis, DiffFile
|
from gdiffer.models import DiffAnalysis, DiffFile
|
||||||
from gdiffer.output import OutputFormatter, OutputFormat
|
from gdiffer.output import OutputFormat, OutputFormatter
|
||||||
from gdiffer.parser import parse_diff
|
from gdiffer.parser import parse_diff
|
||||||
|
|
||||||
|
|
||||||
def create_analysis(files: list[DiffFile], verbose: bool = False) -> DiffAnalysis:
|
def create_analysis(files, verbose=False):
|
||||||
analysis = DiffAnalysis()
|
analysis = DiffAnalysis()
|
||||||
language_detector = LanguageDetector()
|
language_detector = LanguageDetector()
|
||||||
code_analyzer = CodeAnalyzer()
|
code_analyzer = CodeAnalyzer()
|
||||||
@@ -38,21 +34,21 @@ def create_analysis(files: list[DiffFile], verbose: bool = False) -> DiffAnalysi
|
|||||||
analysis.language_breakdown[lang] = analysis.language_breakdown.get(lang, 0) + 1
|
analysis.language_breakdown[lang] = analysis.language_breakdown.get(lang, 0) + 1
|
||||||
|
|
||||||
for hunk in file_obj.hunks:
|
for hunk in file_obj.hunks:
|
||||||
old_code = '\n'.join(hunk.old_lines_content)
|
old_code = "\n".join(hunk.old_lines_content)
|
||||||
new_code = '\n'.join(hunk.new_lines_content)
|
new_code = "\n".join(hunk.new_lines_content)
|
||||||
|
|
||||||
summary = code_analyzer.summarize_change(old_code, new_code, lang)
|
code_analyzer.summarize_change(old_code, new_code, lang)
|
||||||
|
|
||||||
issues = issue_detector.detect_diff_issues(old_code, new_code, lang)
|
issues = issue_detector.detect_diff_issues(old_code, new_code, lang)
|
||||||
for issue in issues:
|
for issue in issues:
|
||||||
issue_dict = {
|
issue_dict = {
|
||||||
'type': issue.type,
|
"type": issue.type,
|
||||||
'severity': issue.severity,
|
"severity": issue.severity,
|
||||||
'title': issue.title,
|
"title": issue.title,
|
||||||
'description': issue.description,
|
"description": issue.description,
|
||||||
'line': issue.line,
|
"line": issue.line,
|
||||||
'suggestion': issue.suggestion,
|
"suggestion": issue.suggestion,
|
||||||
'file': file_obj.filename,
|
"file": file_obj.filename,
|
||||||
}
|
}
|
||||||
analysis.all_issues.append(issue_dict)
|
analysis.all_issues.append(issue_dict)
|
||||||
|
|
||||||
@@ -62,42 +58,50 @@ def create_analysis(files: list[DiffFile], verbose: bool = False) -> DiffAnalysi
|
|||||||
analysis.total_changes += hunk.new_lines
|
analysis.total_changes += hunk.new_lines
|
||||||
|
|
||||||
analysis.total_files = len(files)
|
analysis.total_files = len(files)
|
||||||
|
|
||||||
return analysis
|
return analysis
|
||||||
|
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
@click.version_option(version=__version__)
|
@click.version_option(version=__version__)
|
||||||
@click.option('--verbose', '-v', is_flag=True, help='Enable verbose output')
|
@click.option("--verbose", "-v", is_flag=True, help="Enable verbose output")
|
||||||
@click.option('--output', '-o', type=click.Choice(['terminal', 'json', 'plain']),
|
@click.option(
|
||||||
default='terminal', help='Output format')
|
"--output",
|
||||||
|
"-o",
|
||||||
|
type=click.Choice(["terminal", "json", "plain"]),
|
||||||
|
default="terminal",
|
||||||
|
help="Output format",
|
||||||
|
)
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def main(ctx: click.Context, verbose: bool, output: str):
|
def main(ctx, verbose, output):
|
||||||
ctx.ensure_object(dict)
|
ctx.ensure_object(dict)
|
||||||
ctx.obj['verbose'] = verbose
|
ctx.obj["verbose"] = verbose
|
||||||
ctx.obj['output'] = output
|
ctx.obj["output"] = output
|
||||||
|
|
||||||
|
|
||||||
@main.command()
|
@main.command()
|
||||||
@click.argument('diff_input', type=click.STRING, required=False)
|
@click.argument("diff_input", type=click.STRING, required=False)
|
||||||
@click.option('--file', '-f', type=click.Path(exists=True), help='Read diff from file')
|
@click.option("--file", "-f", type=click.Path(exists=True), help="Read diff from file")
|
||||||
@click.option('--stdin', '-s', is_flag=True, help='Read diff from stdin')
|
@click.option("--stdin", "-s", is_flag=True, help="Read diff from stdin")
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def explain(ctx: click.Context, diff_input: Optional[str], file: Optional[str], stdin: bool):
|
def explain(ctx, diff_input, file, stdin):
|
||||||
verbose = ctx.obj.get('verbose', False)
|
verbose = ctx.obj.get("verbose", False)
|
||||||
output_format = ctx.obj.get('output', 'terminal')
|
output_format = ctx.obj.get("output", "terminal")
|
||||||
|
|
||||||
diff_content = ""
|
diff_content = ""
|
||||||
|
|
||||||
if stdin:
|
if stdin:
|
||||||
diff_content = sys.stdin.read()
|
diff_content = sys.stdin.read()
|
||||||
elif file:
|
elif file:
|
||||||
with open(file, 'r') as f:
|
with open(file) as f:
|
||||||
diff_content = f.read()
|
diff_content = f.read()
|
||||||
elif diff_input:
|
elif diff_input:
|
||||||
diff_content = diff_input
|
diff_content = diff_input
|
||||||
else:
|
else:
|
||||||
click.echo("No diff provided. Use --stdin, --file, or pass diff as argument.", err=True)
|
click.echo("No diff provided. Use --stdin, --file, or pass diff as argument.", err=True)
|
||||||
|
click.echo("\nUsage examples:", err=True)
|
||||||
|
click.echo(" gdiffer explain 'diff --git a/file.py...'", err=True)
|
||||||
|
click.echo(" git diff | gdiffer explain --stdin", err=True)
|
||||||
|
click.echo(" gdiffer explain --file changes.diff", err=True)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -109,7 +113,7 @@ def explain(ctx: click.Context, diff_input: Optional[str], file: Optional[str],
|
|||||||
|
|
||||||
analysis = create_analysis(files, verbose)
|
analysis = create_analysis(files, verbose)
|
||||||
|
|
||||||
if output_format == 'json':
|
if output_format == "json":
|
||||||
result = format_analysis_json(analysis)
|
result = format_analysis_json(analysis)
|
||||||
click.echo(result)
|
click.echo(result)
|
||||||
else:
|
else:
|
||||||
@@ -120,21 +124,22 @@ def explain(ctx: click.Context, diff_input: Optional[str], file: Optional[str],
|
|||||||
click.echo(f"Error analyzing diff: {e}", err=True)
|
click.echo(f"Error analyzing diff: {e}", err=True)
|
||||||
if verbose:
|
if verbose:
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
@main.command()
|
@main.command()
|
||||||
@click.option('--file', '-f', type=click.Path(exists=True), help='Read diff from file')
|
@click.option("--file", "-f", type=click.Path(exists=True), help="Read diff from file")
|
||||||
@click.option('--stdin', '-s', is_flag=True, help='Read diff from stdin')
|
@click.option("--stdin", "-s", is_flag=True, help="Read diff from stdin")
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def issues(ctx: click.Context, file: Optional[str], stdin: bool):
|
def issues(ctx, file, stdin):
|
||||||
diff_content = ""
|
diff_content = ""
|
||||||
|
|
||||||
if stdin:
|
if stdin:
|
||||||
diff_content = sys.stdin.read()
|
diff_content = sys.stdin.read()
|
||||||
elif file:
|
elif file:
|
||||||
with open(file, 'r') as f:
|
with open(file) as f:
|
||||||
diff_content = f.read()
|
diff_content = f.read()
|
||||||
else:
|
else:
|
||||||
diff_content = sys.stdin.read()
|
diff_content = sys.stdin.read()
|
||||||
@@ -151,33 +156,40 @@ def issues(ctx: click.Context, file: Optional[str], stdin: bool):
|
|||||||
|
|
||||||
for file_obj in files:
|
for file_obj in files:
|
||||||
for hunk in file_obj.hunks:
|
for hunk in file_obj.hunks:
|
||||||
old_code = '\n'.join(hunk.old_lines_content)
|
old_code = "\n".join(hunk.old_lines_content)
|
||||||
new_code = '\n'.join(hunk.new_lines_content)
|
new_code = "\n".join(hunk.new_lines_content)
|
||||||
lang = LanguageDetector().detect(file_obj.filename)
|
lang = LanguageDetector().detect(file_obj.filename)
|
||||||
|
|
||||||
issues = issue_detector.detect_diff_issues(old_code, new_code, lang)
|
issues = issue_detector.detect_diff_issues(old_code, new_code, lang)
|
||||||
for issue in issues:
|
for issue in issues:
|
||||||
all_issues.append({
|
all_issues.append(
|
||||||
'file': file_obj.filename,
|
{
|
||||||
'line': issue.line,
|
"file": file_obj.filename,
|
||||||
'severity': issue.severity,
|
"line": issue.line,
|
||||||
'title': issue.title,
|
"severity": issue.severity,
|
||||||
'description': issue.description,
|
"title": issue.title,
|
||||||
'suggestion': issue.suggestion,
|
"description": issue.description,
|
||||||
})
|
"suggestion": issue.suggestion,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
if all_issues:
|
if all_issues:
|
||||||
severity_priority = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3}
|
severity_priority = {"critical": 0, "high": 1, "medium": 2, "low": 3}
|
||||||
all_issues.sort(key=lambda x: severity_priority.get(x.get('severity', ''), 4))
|
all_issues.sort(key=lambda x: severity_priority.get(x.get("severity", ""), 4))
|
||||||
|
|
||||||
if ctx.obj.get('output') == 'json':
|
if ctx.obj.get("output") == "json":
|
||||||
click.echo(json.dumps(all_issues, indent=2))
|
click.echo(__import__("json").dumps(all_issues, indent=2))
|
||||||
else:
|
else:
|
||||||
for issue in all_issues:
|
for issue in all_issues:
|
||||||
color = {'critical': 'red', 'high': 'orange3', 'medium': 'yellow', 'low': 'cyan'}.get(
|
severity = issue["severity"].upper()
|
||||||
issue['severity'], 'white'
|
color_map = {
|
||||||
)
|
"critical": "red",
|
||||||
click.echo(f"[{color}][{issue['severity'].upper()}][/] {issue['title']}")
|
"high": "orange3",
|
||||||
|
"medium": "yellow",
|
||||||
|
"low": "cyan",
|
||||||
|
}
|
||||||
|
color = color_map.get(issue["severity"], "white")
|
||||||
|
click.echo(f"[{color}][{severity}][/] {issue['title']}")
|
||||||
click.echo(f" File: {issue['file']}:{issue['line']}")
|
click.echo(f" File: {issue['file']}:{issue['line']}")
|
||||||
click.echo(f" {issue['description']}")
|
click.echo(f" {issue['description']}")
|
||||||
click.echo(f" Suggestion: {issue['suggestion']}")
|
click.echo(f" Suggestion: {issue['suggestion']}")
|
||||||
@@ -191,16 +203,16 @@ def issues(ctx: click.Context, file: Optional[str], stdin: bool):
|
|||||||
|
|
||||||
|
|
||||||
@main.command()
|
@main.command()
|
||||||
@click.option('--file', '-f', type=click.Path(exists=True), help='Read diff from file')
|
@click.option("--file", "-f", type=click.Path(exists=True), help="Read diff from file")
|
||||||
@click.option('--stdin', '-s', is_flag=True, help='Read diff from stdin')
|
@click.option("--stdin", "-s", is_flag=True, help="Read diff from stdin")
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def summarize(ctx: click.Context, file: Optional[str], stdin: bool):
|
def summarize(ctx, file, stdin):
|
||||||
diff_content = ""
|
diff_content = ""
|
||||||
|
|
||||||
if stdin:
|
if stdin:
|
||||||
diff_content = sys.stdin.read()
|
diff_content = sys.stdin.read()
|
||||||
elif file:
|
elif file:
|
||||||
with open(file, 'r') as f:
|
with open(file) as f:
|
||||||
diff_content = f.read()
|
diff_content = f.read()
|
||||||
else:
|
else:
|
||||||
diff_content = sys.stdin.read()
|
diff_content = sys.stdin.read()
|
||||||
@@ -231,10 +243,10 @@ def summarize(ctx: click.Context, file: Optional[str], stdin: bool):
|
|||||||
click.echo(f" - {lang}: {count} files")
|
click.echo(f" - {lang}: {count} files")
|
||||||
|
|
||||||
if analysis.all_issues:
|
if analysis.all_issues:
|
||||||
critical = sum(1 for i in analysis.all_issues if i.get('severity') == 'critical')
|
critical = sum(1 for i in analysis.all_issues if i.get("severity") == "critical")
|
||||||
high = sum(1 for i in analysis.all_issues if i.get('severity') == 'high')
|
high = sum(1 for i in analysis.all_issues if i.get("severity") == "high")
|
||||||
medium = sum(1 for i in analysis.all_issues if i.get('severity') == 'medium')
|
medium = sum(1 for i in analysis.all_issues if i.get("severity") == "medium")
|
||||||
low = sum(1 for i in analysis.all_issues if i.get('severity') == 'low')
|
low = sum(1 for i in analysis.all_issues if i.get("severity") == "low")
|
||||||
|
|
||||||
click.echo(f"\nIssues found: {len(analysis.all_issues)}")
|
click.echo(f"\nIssues found: {len(analysis.all_issues)}")
|
||||||
if critical:
|
if critical:
|
||||||
@@ -251,41 +263,41 @@ def summarize(ctx: click.Context, file: Optional[str], stdin: bool):
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
def format_analysis_json(analysis: DiffAnalysis) -> str:
|
def format_analysis_json(analysis):
|
||||||
result = {
|
result = {
|
||||||
'summary': {
|
"summary": {
|
||||||
'total_files': analysis.total_files,
|
"total_files": analysis.total_files,
|
||||||
'files_added': analysis.files_added,
|
"files_added": analysis.files_added,
|
||||||
'files_deleted': analysis.files_deleted,
|
"files_deleted": analysis.files_deleted,
|
||||||
'files_modified': analysis.files_modified,
|
"files_modified": analysis.files_modified,
|
||||||
'files_renamed': analysis.files_renamed,
|
"files_renamed": analysis.files_renamed,
|
||||||
'total_changes': analysis.total_changes,
|
"total_changes": analysis.total_changes,
|
||||||
'language_breakdown': analysis.language_breakdown,
|
"language_breakdown": analysis.language_breakdown,
|
||||||
},
|
},
|
||||||
'files': [],
|
"files": [],
|
||||||
'issues': analysis.all_issues,
|
"issues": analysis.all_issues,
|
||||||
'suggestions': analysis.all_suggestions,
|
"suggestions": analysis.all_suggestions,
|
||||||
}
|
}
|
||||||
|
|
||||||
for file_obj in analysis.files:
|
for file_obj in analysis.files:
|
||||||
file_data = {
|
file_data = {
|
||||||
'filename': file_obj.filename,
|
"filename": file_obj.filename,
|
||||||
'change_type': file_obj.change_type,
|
"change_type": file_obj.change_type,
|
||||||
'language': file_obj.extension,
|
"language": file_obj.extension,
|
||||||
'hunks': [],
|
"hunks": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
for hunk in file_obj.hunks:
|
for hunk in file_obj.hunks:
|
||||||
hunk_data = {
|
hunk_data = {
|
||||||
'old_start': hunk.old_start,
|
"old_start": hunk.old_start,
|
||||||
'new_start': hunk.new_start,
|
"new_start": hunk.new_start,
|
||||||
'changes': {
|
"changes": {
|
||||||
'added': hunk.get_added_lines(),
|
"added": hunk.get_added_lines(),
|
||||||
'removed': hunk.get_removed_lines(),
|
"removed": hunk.get_removed_lines(),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
file_data['hunks'].append(hunk_data)
|
file_data["hunks"].append(hunk_data)
|
||||||
|
|
||||||
result['files'].append(file_data)
|
result["files"].append(file_data)
|
||||||
|
|
||||||
return json.dumps(result, indent=2)
|
return __import__("json").dumps(result, indent=2)
|
||||||
|
|||||||
@@ -1,14 +1,18 @@
|
|||||||
"""Code analyzer using tree-sitter for AST-based analysis."""
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from gdiffer.language_detector import LanguageDetector
|
from gdiffer.language_detector import LanguageDetector
|
||||||
|
|
||||||
|
|
||||||
LANGUAGE_GRAMMARS = {
|
LANGUAGE_GRAMMARS = {
|
||||||
'python': 'python', 'javascript': 'javascript', 'typescript': 'typescript',
|
"python": "python",
|
||||||
'java': 'java', 'go': 'go', 'rust': 'rust', 'c': 'c', 'cpp': 'cpp', 'ruby': 'ruby', 'php': 'php',
|
"javascript": "javascript",
|
||||||
|
"typescript": "typescript",
|
||||||
|
"java": "java",
|
||||||
|
"go": "go",
|
||||||
|
"rust": "rust",
|
||||||
|
"c": "c",
|
||||||
|
"cpp": "cpp",
|
||||||
|
"ruby": "ruby",
|
||||||
|
"php": "php",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -17,7 +21,7 @@ class CodeAnalyzer:
|
|||||||
self.language_detector = LanguageDetector()
|
self.language_detector = LanguageDetector()
|
||||||
self._parsers = {}
|
self._parsers = {}
|
||||||
|
|
||||||
def _get_parser(self, language: str):
|
def _get_parser(self, language):
|
||||||
if language not in self._parsers:
|
if language not in self._parsers:
|
||||||
try:
|
try:
|
||||||
import tree_sitter
|
import tree_sitter
|
||||||
@@ -27,147 +31,235 @@ class CodeAnalyzer:
|
|||||||
self._parsers[language] = None
|
self._parsers[language] = None
|
||||||
return self._parsers[language]
|
return self._parsers[language]
|
||||||
|
|
||||||
def analyze_code(self, code: str, language: str = "text") -> dict:
|
def analyze_code(self, code, language="text"):
|
||||||
result = {
|
result = {
|
||||||
'language': language, 'functions': [], 'classes': [],
|
"language": language,
|
||||||
'imports': [], 'function_calls': [], 'change_summary': "",
|
"functions": [],
|
||||||
|
"classes": [],
|
||||||
|
"imports": [],
|
||||||
|
"variables": [],
|
||||||
|
"function_calls": [],
|
||||||
|
"change_summary": "",
|
||||||
}
|
}
|
||||||
|
|
||||||
if language == "text" or not code.strip():
|
if language == "text" or not code.strip():
|
||||||
return result
|
return result
|
||||||
|
|
||||||
parser = self._get_parser(language)
|
parser = self._get_parser(language)
|
||||||
if parser is None:
|
if parser is None:
|
||||||
result['change_summary'] = self._analyze_without_parser(code)
|
result["change_summary"] = self._analyze_without_parser(code)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
try:
|
try:
|
||||||
tree = parser.parse(code.encode() if isinstance(code, str) else code)
|
tree = parser.parse(code.encode() if isinstance(code, str) else code)
|
||||||
result['ast_info'] = self._extract_ast_info(tree.root_node, language)
|
result["ast_info"] = self._extract_ast_info(tree.root_node, language)
|
||||||
result['change_summary'] = self._generate_summary(result['ast_info'])
|
result["change_summary"] = self._generate_summary(result["ast_info"])
|
||||||
except Exception:
|
except Exception:
|
||||||
result['change_summary'] = self._analyze_without_parser(code)
|
result["change_summary"] = self._analyze_without_parser(code)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _extract_ast_info(self, node, language: str) -> dict:
|
def _extract_ast_info(self, node, language):
|
||||||
info = {'functions': [], 'classes': [], 'imports': [], 'function_calls': [], 'nested_nodes': []}
|
info = {
|
||||||
|
"functions": [],
|
||||||
|
"classes": [],
|
||||||
|
"imports": [],
|
||||||
|
"function_calls": [],
|
||||||
|
"nested_nodes": [],
|
||||||
|
}
|
||||||
|
|
||||||
if node is None:
|
if node is None:
|
||||||
return info
|
return info
|
||||||
|
|
||||||
node_type = node.type
|
node_type = node.type
|
||||||
node_text = node.text.decode() if isinstance(node.text, bytes) else node.text
|
node_text = node.text.decode() if isinstance(node.text, bytes) else node.text
|
||||||
function_keywords = ['function_definition', 'function_declaration', 'method_definition', 'func']
|
|
||||||
class_keywords = ['class_definition', 'class_declaration', 'struct', 'impl']
|
function_keywords = [
|
||||||
import_keywords = ['import_statement', 'import_from_statement', 'import', 'require']
|
"function_definition", "function_declaration", "method_definition", "func"
|
||||||
|
]
|
||||||
|
class_keywords = ["class_definition", "class_declaration", "struct", "impl"]
|
||||||
|
import_keywords = ["import_statement", "import_from_statement", "import", "require"]
|
||||||
|
call_keywords = ["call_expression", "function_call", "method_call", "expression_statement"]
|
||||||
|
|
||||||
if node_type in function_keywords:
|
if node_type in function_keywords:
|
||||||
info['functions'].append(self._extract_function_info(node, language))
|
info["functions"].append(self._extract_function_info(node, language))
|
||||||
|
|
||||||
if node_type in class_keywords:
|
if node_type in class_keywords:
|
||||||
info['classes'].append(self._extract_class_info(node, language))
|
info["classes"].append(self._extract_class_info(node, language))
|
||||||
|
|
||||||
if node_type in import_keywords:
|
if node_type in import_keywords:
|
||||||
info['imports'].append(node_text)
|
info["imports"].append(node_text)
|
||||||
|
|
||||||
|
if node_type in call_keywords:
|
||||||
|
info["function_calls"].append(node_text)
|
||||||
|
|
||||||
for child in node.children:
|
for child in node.children:
|
||||||
child_info = self._extract_ast_info(child, language)
|
child_info = self._extract_ast_info(child, language)
|
||||||
info['functions'].extend(child_info['functions'])
|
info["functions"].extend(child_info["functions"])
|
||||||
info['classes'].extend(child_info['classes'])
|
info["classes"].extend(child_info["classes"])
|
||||||
info['imports'].extend(child_info['imports'])
|
info["imports"].extend(child_info["imports"])
|
||||||
info['function_calls'].extend(child_info['function_calls'])
|
info["function_calls"].extend(child_info["function_calls"])
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
def _extract_function_info(self, node, language: str) -> dict:
|
def _extract_function_info(self, node, language):
|
||||||
name = ""
|
name = ""
|
||||||
params = []
|
params = []
|
||||||
start_line = node.start_point[0] + 1 if node.start_point else 0
|
start_line = node.start_point[0] + 1 if node.start_point else 0
|
||||||
for child in node.children:
|
|
||||||
if child.type in ['identifier', 'function_name', 'name']:
|
|
||||||
name = child.text.decode() if isinstance(child.text, bytes) else child.text
|
|
||||||
elif child.type in ['parameters', 'parameter_list', 'formal_parameters']:
|
|
||||||
params = self._extract_parameters(child)
|
|
||||||
return {'name': name, 'parameters': params, 'start_line': start_line}
|
|
||||||
|
|
||||||
def _extract_class_info(self, node, language: str) -> dict:
|
|
||||||
name = ""
|
|
||||||
start_line = node.start_point[0] + 1 if node.start_point else 0
|
|
||||||
for child in node.children:
|
for child in node.children:
|
||||||
if child.type in ['identifier', 'name', 'type_identifier']:
|
if child.type in ["identifier", "function_name", "name"]:
|
||||||
|
name = child.text.decode() if isinstance(child.text, bytes) else child.text
|
||||||
|
elif child.type in ["parameters", "parameter_list", "formal_parameters"]:
|
||||||
|
params = self._extract_parameters(child)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"name": name,
|
||||||
|
"parameters": params,
|
||||||
|
"start_line": start_line,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_class_info(self, node, language):
|
||||||
|
name = ""
|
||||||
|
methods = []
|
||||||
|
start_line = node.start_point[0] + 1 if node.start_point else 0
|
||||||
|
|
||||||
|
for child in node.children:
|
||||||
|
if child.type in ["identifier", "name", "type_identifier"]:
|
||||||
if not name:
|
if not name:
|
||||||
name = child.text.decode() if isinstance(child.text, bytes) else child.text
|
name = child.text.decode() if isinstance(child.text, bytes) else child.text
|
||||||
return {'name': name, 'start_line': start_line, 'methods': []}
|
|
||||||
|
|
||||||
def _extract_parameters(self, node) -> list[str]:
|
return {
|
||||||
|
"name": name,
|
||||||
|
"start_line": start_line,
|
||||||
|
"methods": methods,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_parameters(self, node):
|
||||||
params = []
|
params = []
|
||||||
for child in node.children:
|
for child in node.children:
|
||||||
if child.type in ['identifier', 'parameter', 'positional_argument']:
|
if child.type in ["identifier", "parameter", "positional_argument"]:
|
||||||
param_name = child.text.decode() if isinstance(child.text, bytes) else child.text
|
param_name = child.text.decode() if isinstance(child.text, bytes) else child.text
|
||||||
if param_name and param_name not in [',', '(', ')']:
|
if param_name and param_name not in [",", "(", ")"]:
|
||||||
params.append(param_name)
|
params.append(param_name)
|
||||||
return params
|
return params
|
||||||
|
|
||||||
def _analyze_without_parser(self, code: str) -> str:
|
def _analyze_without_parser(self, code):
|
||||||
|
lines = code.splitlines()
|
||||||
summary_parts = []
|
summary_parts = []
|
||||||
added_lines = [l for l in code.splitlines() if l.strip().startswith('+') and not l.strip().startswith('+++')]
|
|
||||||
removed_lines = [l for l in code.splitlines() if l.strip().startswith('-') and not l.strip().startswith('---')]
|
added_lines = [
|
||||||
|
line for line in lines
|
||||||
|
if line.strip().startswith("+") and not line.strip().startswith("+++")
|
||||||
|
]
|
||||||
|
removed_lines = [
|
||||||
|
line for line in lines
|
||||||
|
if line.strip().startswith("-") and not line.strip().startswith("---")
|
||||||
|
]
|
||||||
|
|
||||||
if added_lines or removed_lines:
|
if added_lines or removed_lines:
|
||||||
summary_parts.append(f"Added {len(added_lines)} lines, removed {len(removed_lines)} lines")
|
summary_parts.append(
|
||||||
|
f"Added {len(added_lines)} lines, removed {len(removed_lines)} lines"
|
||||||
|
)
|
||||||
|
|
||||||
func_patterns = {
|
func_patterns = {
|
||||||
'python': r'^def\s+(\w+)', 'javascript': r'^function\s+(\w+)', 'java': r'\w+\s+\w+\s*\(',
|
"python": r"^def\\s+(\\w+)",
|
||||||
'go': r'^func\s+(\w+)', 'rust': r'^fn\s+(\w+)',
|
"javascript": r"^function\\s+(\\w+)|const\\s+(\\w+)\\s*=\\s*function",
|
||||||
|
"java": r"^\\s*(public|private|protected)?\\s*(static\\s+)?\\s*\\w+\\s+(\\w+)\\s*\\(",
|
||||||
|
"go": r"^func\\s+(\\w+)",
|
||||||
|
"rust": r"^fn\\s+(\\w+)",
|
||||||
}
|
}
|
||||||
|
|
||||||
for lang, pattern in func_patterns.items():
|
for lang, pattern in func_patterns.items():
|
||||||
funcs = re.findall(pattern, code, re.MULTILINE)
|
funcs = re.findall(pattern, code, re.MULTILINE)
|
||||||
if funcs:
|
if funcs:
|
||||||
func_names = [f if isinstance(f, str) else next((x for x in f if x), '') for f in funcs if f]
|
func_names = [
|
||||||
|
f if isinstance(f, str) else next((x for x in f if x), "")
|
||||||
|
for f in funcs
|
||||||
|
]
|
||||||
|
func_names = [n for n in func_names if n]
|
||||||
if func_names:
|
if func_names:
|
||||||
summary_parts.append(f"Functions: {', '.join(func_names[:5])}")
|
summary_parts.append(f"Functions: {', '.join(func_names[:5])}")
|
||||||
break
|
break
|
||||||
class_patterns = {'python': r'^class\s+(\w+)', 'javascript': r'^class\s+(\w+)', 'java': r'^\s*class\s+(\w+)'}
|
|
||||||
|
class_patterns = {
|
||||||
|
"python": r"^class\\s+(\\w+)",
|
||||||
|
"javascript": r"^class\\s+(\\w+)",
|
||||||
|
"java": r"^\\s*class\\s+(\\w+)",
|
||||||
|
"rust": r"^struct\\s+(\\w+)",
|
||||||
|
}
|
||||||
|
|
||||||
for lang, pattern in class_patterns.items():
|
for lang, pattern in class_patterns.items():
|
||||||
classes = re.findall(pattern, code, re.MULTILINE)
|
classes = re.findall(pattern, code, re.MULTILINE)
|
||||||
if classes:
|
if classes:
|
||||||
summary_parts.append(f"Classes/Structs: {', '.join(classes[:3])}")
|
summary_parts.append(f"Classes/Structs: {', '.join(classes[:3])}")
|
||||||
break
|
break
|
||||||
return '. '.join(summary_parts) if summary_parts else "Code changes detected"
|
|
||||||
|
|
||||||
def _generate_summary(self, ast_info: dict) -> str:
|
return ". ".join(summary_parts) if summary_parts else "Code changes detected"
|
||||||
|
|
||||||
|
def _generate_summary(self, ast_info):
|
||||||
summary_parts = []
|
summary_parts = []
|
||||||
funcs = ast_info.get('functions', [])
|
|
||||||
|
funcs = ast_info.get("functions", [])
|
||||||
if funcs:
|
if funcs:
|
||||||
func_names = [f['name'] for f in funcs if f.get('name')]
|
func_names = [f["name"] for f in funcs if f.get("name")]
|
||||||
if func_names:
|
if func_names:
|
||||||
summary_parts.append(f"Functions: {', '.join(func_names[:5])}")
|
summary_parts.append(f"Functions: {', '.join(func_names[:5])}")
|
||||||
classes = ast_info.get('classes', [])
|
|
||||||
|
classes = ast_info.get("classes", [])
|
||||||
if classes:
|
if classes:
|
||||||
class_names = [c['name'] for c in classes if c.get('name')]
|
class_names = [c["name"] for c in classes if c.get("name")]
|
||||||
if class_names:
|
if class_names:
|
||||||
summary_parts.append(f"Classes: {', '.join(class_names[:3])}")
|
summary_parts.append(f"Classes: {', '.join(class_names[:3])}")
|
||||||
return '. '.join(summary_parts) if summary_parts else "Code changes detected"
|
|
||||||
|
|
||||||
def summarize_change(self, old_code: str, new_code: str, language: str = "text") -> str:
|
imports = ast_info.get("imports", [])
|
||||||
|
if imports:
|
||||||
|
summary_parts.append(f"Imports/Requires: {len(imports)} statements")
|
||||||
|
|
||||||
|
return ". ".join(summary_parts) if summary_parts else "Code changes detected"
|
||||||
|
|
||||||
|
def summarize_change(self, old_code, new_code, language="text"):
|
||||||
old_analysis = self.analyze_code(old_code, language)
|
old_analysis = self.analyze_code(old_code, language)
|
||||||
new_analysis = self.analyze_code(new_code, language)
|
new_analysis = self.analyze_code(new_code, language)
|
||||||
|
|
||||||
summary_parts = []
|
summary_parts = []
|
||||||
old_funcs = set(f['name'] for f in old_analysis.get('functions', []) if f.get('name'))
|
|
||||||
new_funcs = set(f['name'] for f in new_analysis.get('functions', []) if f.get('name'))
|
old_funcs = set(f["name"] for f in old_analysis.get("functions", []) if f.get("name"))
|
||||||
|
new_funcs = set(f["name"] for f in new_analysis.get("functions", []) if f.get("name"))
|
||||||
|
|
||||||
added_funcs = new_funcs - old_funcs
|
added_funcs = new_funcs - old_funcs
|
||||||
removed_funcs = old_funcs - new_funcs
|
removed_funcs = old_funcs - new_funcs
|
||||||
|
|
||||||
if added_funcs:
|
if added_funcs:
|
||||||
summary_parts.append(f"Added functions: {', '.join(sorted(added_funcs))}")
|
summary_parts.append(f"Added functions: {', '.join(sorted(added_funcs))}")
|
||||||
if removed_funcs:
|
if removed_funcs:
|
||||||
summary_parts.append(f"Removed functions: {', '.join(sorted(removed_funcs))}")
|
summary_parts.append(f"Removed functions: {', '.join(sorted(removed_funcs))}")
|
||||||
old_classes = set(c['name'] for c in old_analysis.get('classes', []) if c.get('name'))
|
|
||||||
new_classes = set(c['name'] for c in new_analysis.get('classes', []) if c.get('name'))
|
old_classes = set(c["name"] for c in old_analysis.get("classes", []) if c.get("name"))
|
||||||
|
new_classes = set(c["name"] for c in new_analysis.get("classes", []) if c.get("name"))
|
||||||
|
|
||||||
added_classes = new_classes - old_classes
|
added_classes = new_classes - old_classes
|
||||||
removed_classes = old_classes - new_classes
|
removed_classes = old_classes - new_classes
|
||||||
|
|
||||||
if added_classes:
|
if added_classes:
|
||||||
summary_parts.append(f"Added classes: {', '.join(sorted(added_classes))}")
|
summary_parts.append(f"Added classes: {', '.join(sorted(added_classes))}")
|
||||||
if removed_classes:
|
if removed_classes:
|
||||||
summary_parts.append(f"Removed classes: {', '.join(sorted(removed_classes))}")
|
summary_parts.append(f"Removed classes: {', '.join(sorted(removed_classes))}")
|
||||||
line_diff = len(new_code.splitlines()) - len(old_code.splitlines())
|
|
||||||
|
old_lines = len(old_code.splitlines())
|
||||||
|
new_lines = len(new_code.splitlines())
|
||||||
|
line_diff = new_lines - old_lines
|
||||||
if line_diff != 0:
|
if line_diff != 0:
|
||||||
summary_parts.append(f"Line count: {'+' if line_diff > 0 else ''}{line_diff}")
|
summary_parts.append(f"Line count: {'+' if line_diff > 0 else ''}{line_diff}")
|
||||||
return '. '.join(summary_parts) if summary_parts else "Code modified"
|
|
||||||
|
return ". ".join(summary_parts) if summary_parts else "Code modified"
|
||||||
|
|
||||||
|
|
||||||
def analyze_code(code: str, language: str = "text") -> dict:
|
def analyze_code(code, language="text"):
|
||||||
analyzer = CodeAnalyzer()
|
analyzer = CodeAnalyzer()
|
||||||
return analyzer.analyze_code(code, language)
|
return analyzer.analyze_code(code, language)
|
||||||
|
|
||||||
|
|
||||||
def summarize_change(old_code: str, new_code: str, language: str = "text") -> str:
|
def summarize_change(old_code, new_code, language="text"):
|
||||||
analyzer = CodeAnalyzer()
|
analyzer = CodeAnalyzer()
|
||||||
return analyzer.summarize_change(old_code, new_code, language)
|
return analyzer.summarize_change(old_code, new_code, language)
|
||||||
|
|||||||
1
src/gdiffer/diff_parser.py
Normal file
1
src/gdiffer/diff_parser.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# src/gdiffer/diff_parser.py
|
||||||
@@ -1,8 +1,5 @@
|
|||||||
"""Issue detector for common bugs, security vulnerabilities, and code smells."""
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -11,33 +8,154 @@ class Issue:
|
|||||||
severity: str
|
severity: str
|
||||||
title: str
|
title: str
|
||||||
description: str
|
description: str
|
||||||
line: Optional[int] = None
|
line: int = None
|
||||||
suggestion: str = ""
|
suggestion: str = ""
|
||||||
pattern: str = ""
|
pattern: str = ""
|
||||||
|
|
||||||
|
|
||||||
class IssueDetector:
|
class IssueDetector:
|
||||||
SECURITY_PATTERNS = [
|
SECURITY_PATTERNS = [
|
||||||
{'pattern': r'(?i)(sql\s*\(|execute\s*\(|exec\s*\()', 'type': 'sql_injection', 'severity': 'critical', 'title': 'Potential SQL Injection', 'description': 'String concatenation in SQL query', 'suggestion': 'Use parameterized queries'},
|
{
|
||||||
{'pattern': r'(?i)(innerHTML\s*=|outerHTML\s*=)', 'type': 'xss', 'severity': 'critical', 'title': 'Potential XSS Vulnerability', 'description': 'Directly setting HTML content', 'suggestion': 'Use textContent or sanitize HTML'},
|
"pattern": (
|
||||||
{'pattern': r'(?i)(eval\s*\()', 'type': 'code_injection', 'severity': 'critical', 'title': 'Code Injection Risk', 'description': 'eval() detected', 'suggestion': 'Avoid eval()'},
|
r"(?i)(sql\\s*\\(|execute\\s*\\(|exec\\s*\\(|SELECT\\s+|UPDATE\\s+|"
|
||||||
{'pattern': r'(?i)(os\.system\s*\(|subprocess\.|shell=True)', 'type': 'command_injection', 'severity': 'critical', 'title': 'Command Injection Risk', 'description': 'Shell command execution', 'suggestion': 'Use subprocess with shell=False'},
|
r"INSERT\\s+|DELETE\\s+)"
|
||||||
{'pattern': r'(?i)(password\s*=|passwd\s*=|secret\s*=|token\s*=)', 'type': 'hardcoded_secret', 'severity': 'high', 'title': 'Hardcoded Secret', 'description': 'Potential hardcoded credential', 'suggestion': 'Use environment variables'},
|
),
|
||||||
{'pattern': r'(?i)(http://)', 'type': 'insecure_transport', 'severity': 'medium', 'title': 'Insecure HTTP', 'description': 'Using HTTP instead of HTTPS', 'suggestion': 'Use HTTPS'},
|
"type": "sql_injection",
|
||||||
{'pattern': r'(?i)(random\.randint\s*\()', 'type': 'weak_crypto', 'severity': 'medium', 'title': 'Weak Random', 'description': 'Using random module', 'suggestion': 'Use secrets module'},
|
"severity": "critical",
|
||||||
|
"title": "Potential SQL Injection",
|
||||||
|
"description": (
|
||||||
|
"String concatenation or interpolation used in SQL query"
|
||||||
|
),
|
||||||
|
"suggestion": (
|
||||||
|
"Use parameterized queries or ORM methods instead of string concatenation"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"pattern": r"(?i)(innerHTML\\s*=|outerHTML\\s*=|document\\.write\\s*\\()",
|
||||||
|
"type": "xss",
|
||||||
|
"severity": "critical",
|
||||||
|
"title": "Potential XSS Vulnerability",
|
||||||
|
"description": "Directly setting HTML content can lead to XSS attacks",
|
||||||
|
"suggestion": "Use textContent or sanitize HTML before insertion",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"pattern": r"(?i)(eval\\s*\\(|setTimeout\\s*\\(\\s*['\"]|setInterval\\s*\\(\\s*['\"])",
|
||||||
|
"type": "code_injection",
|
||||||
|
"severity": "critical",
|
||||||
|
"title": "Code Injection Risk",
|
||||||
|
"description": "eval() or dynamic code execution detected",
|
||||||
|
"suggestion": "Avoid eval() and dynamic code execution when possible",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"pattern": r"(?i)(os\\.system\\s*\\(|subprocess\\.|shell=True|popen)",
|
||||||
|
"type": "command_injection",
|
||||||
|
"severity": "critical",
|
||||||
|
"title": "Command Injection Risk",
|
||||||
|
"description": "Shell command execution with user input",
|
||||||
|
"suggestion": "Use subprocess with shell=False and validate/sanitize inputs",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"pattern": r"(?i)(password\\s*=|passwd\\s*=|secret\\s*=|token\\s*=|api_key\\s*=)",
|
||||||
|
"type": "hardcoded_secret",
|
||||||
|
"severity": "high",
|
||||||
|
"title": "Hardcoded Secret Detected",
|
||||||
|
"description": "Potential hardcoded password, token, or API key",
|
||||||
|
"suggestion": "Use environment variables or secure configuration management",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"pattern": r"(?i)(http://)",
|
||||||
|
"type": "insecure_transport",
|
||||||
|
"severity": "medium",
|
||||||
|
"title": "Insecure HTTP Transport",
|
||||||
|
"description": "Using HTTP instead of HTTPS for network requests",
|
||||||
|
"suggestion": "Use HTTPS for all network communications",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"pattern": r"(?i)(random\\.randint\\s*\\(|random\\.random\\s*\\()",
|
||||||
|
"type": "weak_crypto",
|
||||||
|
"severity": "medium",
|
||||||
|
"title": "Weak Random Number Generator",
|
||||||
|
"description": "Using random module for cryptographic purposes",
|
||||||
|
"suggestion": "Use secrets module for cryptographic randomness",
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
BUG_PATTERNS = [
|
BUG_PATTERNS = [
|
||||||
{'pattern': r'(?i)(if\s*\([^)]*==[^)]*\)\s*:)', 'type': 'assignment_in_condition', 'severity': 'high', 'title': 'Assignment in Condition', 'description': 'Possible typo = instead of ==', 'suggestion': 'Use == for comparison'},
|
{
|
||||||
{'pattern': r'(?i)(\bNone\b.*==)', 'type': 'none_comparison', 'severity': 'medium', 'title': 'Direct None Comparison', 'description': 'Using == None', 'suggestion': 'Use is None'},
|
"pattern": r"(?i)(if\\s*\\([^)]*==[^)]*\\)\\s*:|if\\s*\\([^)]*=\\s*[^)]*\\)\\s*:)",
|
||||||
{'pattern': r'\bexcept\s*:\s*$', 'type': 'bare_except', 'severity': 'medium', 'title': 'Bare Except Clause', 'description': 'Catching all exceptions', 'suggestion': 'Catch specific exceptions'},
|
"type": "assignment_in_condition",
|
||||||
|
"severity": "high",
|
||||||
|
"title": "Assignment in Condition",
|
||||||
|
"description": "Assignment used inside if condition (possible typo)",
|
||||||
|
"suggestion": "Use == for comparison, not =",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"pattern": r"(?i)(\\bNone\\b.*==|==.*\\bNone\\b)",
|
||||||
|
"type": "none_comparison",
|
||||||
|
"severity": "medium",
|
||||||
|
"title": "Direct None Comparison",
|
||||||
|
"description": "Using == None instead of \"is None\"",
|
||||||
|
"suggestion": "Use \"is None\" for None comparisons in Python",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"pattern": r"\\bexcept\\s*:\\s*$",
|
||||||
|
"type": "bare_except",
|
||||||
|
"severity": "medium",
|
||||||
|
"title": "Bare Except Clause",
|
||||||
|
"description": "Catching all exceptions without specifying type",
|
||||||
|
"suggestion": "Catch specific exceptions or at least Exception",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"pattern": r"(?i)(\\.get\\s*\\(\\s*['\"]?\\s*['\"]?\\s*\\))",
|
||||||
|
"type": "unused_get",
|
||||||
|
"severity": "low",
|
||||||
|
"title": "Dictionary get() with no default",
|
||||||
|
"description": "Using dict.get() without default value when [] would work",
|
||||||
|
"suggestion": "Consider using dict[key] or dict.get(key, default)",
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
CODE_SMELL_PATTERNS = [
|
CODE_SMELL_PATTERNS = [
|
||||||
{'pattern': r'(?i)(\bTODO\b|\bFIXME\b)', 'type': 'code_tag', 'severity': 'low', 'title': 'Code Tag', 'description': 'TODO/FIXME comment', 'suggestion': 'Address or create ticket'},
|
{
|
||||||
{'pattern': r'(?i)(\bprint\s*\()', 'type': 'debug_statement', 'severity': 'low', 'title': 'Debug Statement', 'description': 'print() detected', 'suggestion': 'Remove debug statements'},
|
"pattern": r"^\\s*for\\s+.*\\s+in\\s+.*:\\s*$",
|
||||||
{'pattern': r'.{80,}', 'type': 'long_line', 'severity': 'low', 'title': 'Long Line', 'description': 'Line exceeds 80 characters', 'suggestion': 'Split long lines'},
|
"type": "long_loop",
|
||||||
{'pattern': r'\bpass\b', 'type': 'empty_block', 'severity': 'low', 'title': 'Empty Code Block', 'description': 'Empty pass statement', 'suggestion': 'Add explanatory comment'},
|
"severity": "low",
|
||||||
|
"title": "Complex Loop",
|
||||||
|
"description": "Nested loop detected - consider if it can be optimized",
|
||||||
|
"suggestion": "Consider using list comprehensions or vectorized operations",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"pattern": r"(?i)(\\bTODO\\b|\\bFIXME\\b|\\bHACK\\b|\\bXXX\\b)",
|
||||||
|
"type": "code_tag",
|
||||||
|
"severity": "low",
|
||||||
|
"title": "Code Tag Found",
|
||||||
|
"description": "TODO/FIXME/HACK comments indicate technical debt",
|
||||||
|
"suggestion": "Address the TODO or create a ticket to track it",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"pattern": r"(?i)(\\bprint\\s*\\(|console\\.log\\s*\\())",
|
||||||
|
"type": "debug_statement",
|
||||||
|
"severity": "low",
|
||||||
|
"title": "Debug Statement",
|
||||||
|
"description": "Print or console.log statement detected",
|
||||||
|
"suggestion": "Remove debug statements before committing",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"pattern": r".{80,}",
|
||||||
|
"type": "long_line",
|
||||||
|
"severity": "low",
|
||||||
|
"title": "Long Line",
|
||||||
|
"description": "Line exceeds 80 characters",
|
||||||
|
"suggestion": "Split long lines for better readability",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"pattern": r"\\bpass\\b",
|
||||||
|
"type": "empty_block",
|
||||||
|
"severity": "low",
|
||||||
|
"title": "Empty Code Block",
|
||||||
|
"description": "Empty pass statement in code block",
|
||||||
|
"suggestion": "Add a comment explaining why the block is empty",
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -46,56 +164,126 @@ class IssueDetector:
|
|||||||
|
|
||||||
def _compile_patterns(self):
|
def _compile_patterns(self):
|
||||||
self._compiled_patterns = []
|
self._compiled_patterns = []
|
||||||
|
|
||||||
for pattern_info in self.SECURITY_PATTERNS + self.BUG_PATTERNS + self.CODE_SMELL_PATTERNS:
|
for pattern_info in self.SECURITY_PATTERNS + self.BUG_PATTERNS + self.CODE_SMELL_PATTERNS:
|
||||||
try:
|
try:
|
||||||
compiled = re.compile(pattern_info['pattern'])
|
compiled = re.compile(pattern_info["pattern"])
|
||||||
self._compiled_patterns.append((compiled, pattern_info))
|
self._compiled_patterns.append((compiled, pattern_info))
|
||||||
except re.error:
|
except re.error:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def detect_issues(self, code: str, language: str = "text") -> list[Issue]:
|
def detect_issues(self, code, language="text"):
|
||||||
issues = []
|
issues = []
|
||||||
for line_num, line in enumerate(code.splitlines(), 1):
|
lines = code.splitlines()
|
||||||
|
|
||||||
|
for line_num, line in enumerate(lines, 1):
|
||||||
for compiled, pattern_info in self._compiled_patterns:
|
for compiled, pattern_info in self._compiled_patterns:
|
||||||
if compiled.search(line):
|
if compiled.search(line):
|
||||||
issues.append(Issue(
|
issue = Issue(
|
||||||
type=pattern_info['type'], severity=pattern_info['severity'],
|
type=pattern_info["type"],
|
||||||
title=pattern_info['title'], description=pattern_info['description'],
|
severity=pattern_info["severity"],
|
||||||
line=line_num, suggestion=pattern_info['suggestion'], pattern=pattern_info['pattern'],
|
title=pattern_info["title"],
|
||||||
))
|
description=pattern_info["description"],
|
||||||
|
line=line_num,
|
||||||
|
suggestion=pattern_info["suggestion"],
|
||||||
|
pattern=pattern_info["pattern"],
|
||||||
|
)
|
||||||
|
issues.append(issue)
|
||||||
|
|
||||||
return issues
|
return issues
|
||||||
|
|
||||||
def detect_diff_issues(self, old_code: str, new_code: str, language: str = "text") -> list[Issue]:
|
def detect_diff_issues(self, old_code, new_code, language="text"):
|
||||||
issues = []
|
issues = []
|
||||||
for i, line in enumerate(new_code.splitlines(), 1):
|
new_lines = new_code.splitlines()
|
||||||
if line.startswith('+') and not line.startswith('+++'):
|
|
||||||
|
added_lines = []
|
||||||
|
for i, line in enumerate(new_lines, 1):
|
||||||
|
if line.startswith("+") and not line.startswith("+++"):
|
||||||
clean_line = line[1:]
|
clean_line = line[1:]
|
||||||
for compiled, pattern_info in self._compiled_patterns:
|
added_lines.append((i, clean_line))
|
||||||
if compiled.search(clean_line):
|
|
||||||
issues.append(Issue(
|
for line_num, clean_line in added_lines:
|
||||||
type=pattern_info['type'], severity=pattern_info['severity'],
|
for compiled, pattern_info in self._compiled_patterns:
|
||||||
title=pattern_info['title'], description=pattern_info['description'],
|
if compiled.search(clean_line):
|
||||||
line=i, suggestion=pattern_info['suggestion'], pattern=pattern_info['pattern'],
|
issue = Issue(
|
||||||
))
|
type=pattern_info["type"],
|
||||||
|
severity=pattern_info["severity"],
|
||||||
|
title=pattern_info["title"],
|
||||||
|
description=pattern_info["description"],
|
||||||
|
line=line_num,
|
||||||
|
suggestion=pattern_info["suggestion"],
|
||||||
|
pattern=pattern_info["pattern"],
|
||||||
|
)
|
||||||
|
issues.append(issue)
|
||||||
|
|
||||||
return issues
|
return issues
|
||||||
|
|
||||||
def suggest_improvements(self, code: str, language: str = "text") -> list[str]:
|
def check_security_patterns(self, code):
|
||||||
|
issues = []
|
||||||
|
lines = code.splitlines()
|
||||||
|
|
||||||
|
for line_num, line in enumerate(lines, 1):
|
||||||
|
for pattern_info in self.SECURITY_PATTERNS:
|
||||||
|
try:
|
||||||
|
if re.search(pattern_info["pattern"], line):
|
||||||
|
issue = Issue(
|
||||||
|
type=pattern_info["type"],
|
||||||
|
severity=pattern_info["severity"],
|
||||||
|
title=pattern_info["title"],
|
||||||
|
description=pattern_info["description"],
|
||||||
|
line=line_num,
|
||||||
|
suggestion=pattern_info["suggestion"],
|
||||||
|
pattern=pattern_info["pattern"],
|
||||||
|
)
|
||||||
|
issues.append(issue)
|
||||||
|
except re.error:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return issues
|
||||||
|
|
||||||
|
def check_code_quality(self, code):
|
||||||
|
issues = []
|
||||||
|
lines = code.splitlines()
|
||||||
|
|
||||||
|
for line_num, line in enumerate(lines, 1):
|
||||||
|
for pattern_info in self.CODE_SMELL_PATTERNS:
|
||||||
|
try:
|
||||||
|
if re.search(pattern_info["pattern"], line):
|
||||||
|
issue = Issue(
|
||||||
|
type=pattern_info["type"],
|
||||||
|
severity=pattern_info["severity"],
|
||||||
|
title=pattern_info["title"],
|
||||||
|
description=pattern_info["description"],
|
||||||
|
line=line_num,
|
||||||
|
suggestion=pattern_info["suggestion"],
|
||||||
|
pattern=pattern_info["pattern"],
|
||||||
|
)
|
||||||
|
issues.append(issue)
|
||||||
|
except re.error:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return issues
|
||||||
|
|
||||||
|
def suggest_improvements(self, code, language="text"):
|
||||||
suggestions = []
|
suggestions = []
|
||||||
issues = self.detect_issues(code, language)
|
issues = self.detect_issues(code, language)
|
||||||
severity_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3}
|
|
||||||
|
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
|
||||||
seen_types = set()
|
seen_types = set()
|
||||||
|
|
||||||
for issue in sorted(issues, key=lambda x: (severity_order.get(x.severity, 4), x.title)):
|
for issue in sorted(issues, key=lambda x: (severity_order.get(x.severity, 4), x.title)):
|
||||||
if issue.type not in seen_types and issue.suggestion:
|
if issue.type not in seen_types and issue.suggestion:
|
||||||
suggestions.append(f"{issue.title}: {issue.suggestion}")
|
suggestions.append(f"{issue.title}: {issue.suggestion}")
|
||||||
seen_types.add(issue.type)
|
seen_types.add(issue.type)
|
||||||
|
|
||||||
return suggestions
|
return suggestions
|
||||||
|
|
||||||
|
|
||||||
def detect_issues(code: str, language: str = "text") -> list[Issue]:
|
def detect_issues(code, language="text"):
|
||||||
detector = IssueDetector()
|
detector = IssueDetector()
|
||||||
return detector.detect_issues(code, language)
|
return detector.detect_issues(code, language)
|
||||||
|
|
||||||
|
|
||||||
def suggest_improvements(code: str, language: str = "text") -> list[str]:
|
def suggest_improvements(code, language="text"):
|
||||||
detector = IssueDetector()
|
detector = IssueDetector()
|
||||||
return detector.suggest_improvements(code, language)
|
return detector.suggest_improvements(code, language)
|
||||||
|
|||||||
@@ -1,75 +1,195 @@
|
|||||||
"""Language detection for code files."""
|
|
||||||
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
|
|
||||||
class LanguageDetector:
|
class LanguageDetector:
|
||||||
EXTENSION_MAP = {
|
EXTENSION_MAP = {
|
||||||
'py': 'python', 'pyw': 'python', 'pyx': 'python',
|
"py": "python",
|
||||||
'js': 'javascript', 'mjs': 'javascript', 'cjs': 'javascript', 'jsx': 'javascript',
|
"pyw": "python",
|
||||||
'ts': 'typescript', 'tsx': 'typescript', 'mts': 'typescript', 'cts': 'typescript',
|
"pyx": "python",
|
||||||
'java': 'java', 'kt': 'kotlin', 'kts': 'kotlin',
|
"js": "javascript",
|
||||||
'go': 'go', 'rs': 'rust', 'c': 'c', 'h': 'c',
|
"mjs": "javascript",
|
||||||
'cpp': 'cpp', 'cc': 'cpp', 'cxx': 'cpp', 'hpp': 'cpp', 'hxx': 'cpp',
|
"cjs": "javascript",
|
||||||
'cs': 'csharp', 'rb': 'ruby', 'erb': 'ruby', 'php': 'php',
|
"jsx": "javascript",
|
||||||
'swift': 'swift', 'm': 'objective-c', 'mm': 'objective-c',
|
"ts": "typescript",
|
||||||
'scala': 'scala', 'sc': 'scala', 'jl': 'julia',
|
"tsx": "typescript",
|
||||||
'r': 'r', 'R': 'r', 'lua': 'lua',
|
"mts": "typescript",
|
||||||
'pl': 'perl', 'pm': 'perl', 'sql': 'sql',
|
"cts": "typescript",
|
||||||
'sh': 'bash', 'bash': 'bash', 'zsh': 'bash', 'fish': 'bash',
|
"java": "java",
|
||||||
'yaml': 'yaml', 'yml': 'yaml', 'json': 'json',
|
"kt": "kotlin",
|
||||||
'xml': 'xml', 'html': 'html', 'htm': 'html',
|
"kts": "kotlin",
|
||||||
'css': 'css', 'scss': 'scss', 'sass': 'sass', 'less': 'less',
|
"go": "go",
|
||||||
'md': 'markdown', 'markdown': 'markdown',
|
"rs": "rust",
|
||||||
'txt': 'text', 'dockerfile': 'dockerfile', 'Dockerfile': 'dockerfile',
|
"c": "c",
|
||||||
|
"h": "c",
|
||||||
|
"cpp": "cpp",
|
||||||
|
"cc": "cpp",
|
||||||
|
"cxx": "cpp",
|
||||||
|
"hpp": "cpp",
|
||||||
|
"hxx": "cpp",
|
||||||
|
"cs": "csharp",
|
||||||
|
"rb": "ruby",
|
||||||
|
"erb": "ruby",
|
||||||
|
"php": "php",
|
||||||
|
"swift": "swift",
|
||||||
|
"m": "objective-c",
|
||||||
|
"mm": "objective-c",
|
||||||
|
"scala": "scala",
|
||||||
|
"sc": "scala",
|
||||||
|
"jl": "julia",
|
||||||
|
"r": "r",
|
||||||
|
"R": "r",
|
||||||
|
"lua": "lua",
|
||||||
|
"pl": "perl",
|
||||||
|
"pm": "perl",
|
||||||
|
"sql": "sql",
|
||||||
|
"sh": "bash",
|
||||||
|
"bash": "bash",
|
||||||
|
"zsh": "bash",
|
||||||
|
"fish": "bash",
|
||||||
|
"yaml": "yaml",
|
||||||
|
"yml": "yaml",
|
||||||
|
"json": "json",
|
||||||
|
"xml": "xml",
|
||||||
|
"html": "html",
|
||||||
|
"htm": "html",
|
||||||
|
"css": "css",
|
||||||
|
"scss": "scss",
|
||||||
|
"sass": "sass",
|
||||||
|
"less": "less",
|
||||||
|
"md": "markdown",
|
||||||
|
"markdown": "markdown",
|
||||||
|
"txt": "text",
|
||||||
|
"dockerfile": "dockerfile",
|
||||||
|
"Dockerfile": "dockerfile",
|
||||||
}
|
}
|
||||||
|
|
||||||
CONTENT_PATTERNS = {
|
CONTENT_PATTERNS = {
|
||||||
'python': [r'^import\s+\w+', r'^from\s+\w+\s+import', r'^def\s+\w+\s*\(', r'^class\s+\w+'],
|
"python": [
|
||||||
'javascript': [r'^const\s+\w+', r'^let\s+\w+', r'^var\s+\w+', r'^function\s+\w+', r'=>\s*\{'],
|
r"^import\\s+\\w+",
|
||||||
'typescript': [r'^interface\s+\w+', r'^type\s+\w+', r':\s*(string|number|boolean)'],
|
r"^from\\s+\\w+\\s+import",
|
||||||
'java': [r'^package\s+[\w.]+;', r'^import\s+[\w.]+;', r'^public\s+class\s+\w+'],
|
r"^def\\s+\\w+\\s*\\(",
|
||||||
'go': [r'^package\s+\w+', r'^import\s+\(', r'func\s+\w+'],
|
r"^class\\s+\\w+\\s*[:\\(]",
|
||||||
'rust': [r'^fn\s+\w+', r'^impl\s+\w+', r'^struct\s+\w+', r'^enum\s+\w+'],
|
r"^if\\s+__name__\\s*==\\s*['\"]__main__['\"]",
|
||||||
'c': [r'#include\s*<', r'#include\s*"', r'int\s+main\s*\('],
|
],
|
||||||
'cpp': [r'#include\s*<', r'#include\s*"', r'class\s+\w+', r'std::\w+'],
|
"javascript": [
|
||||||
'ruby': [r'^require\s+', r'^class\s+\w+', r'^module\s+\w+', r'def\s+\w+'],
|
r"^const\\s+\\w+\\s*=",
|
||||||
'php': [r'<\?php', r'\$\w+\s*=', r'function\s+\w+', r'class\s+\w+'],
|
r"^let\\s+\\w+\\s*=",
|
||||||
|
r"^var\\s+\\w+\\s*=",
|
||||||
|
r"^function\\s+\\w+\\s*\\(",
|
||||||
|
r"=>\\s*\\{",
|
||||||
|
r"import\\s+.*\\s+from",
|
||||||
|
r"export\\s+(default\\s+)?",
|
||||||
|
],
|
||||||
|
"typescript": [
|
||||||
|
r"^interface\\s+\\w+\\s*\\{",
|
||||||
|
r"^type\\s+\\w+\\s*=",
|
||||||
|
r":\\s*(string|number|boolean|any|void|null|undefined)",
|
||||||
|
r"<[A-Z]\\w*>",
|
||||||
|
],
|
||||||
|
"java": [
|
||||||
|
r"^package\\s+[\\w.]+;",
|
||||||
|
r"^import\\s+[\\w.]+;",
|
||||||
|
r"^public\\s+(class|interface|enum)\\s+\\w+",
|
||||||
|
r"^private\\s+(static\\s+)?(final\\s+)?\\w+\\s+\\w+;",
|
||||||
|
],
|
||||||
|
"go": [
|
||||||
|
r"^package\\s+\\w+",
|
||||||
|
r"^import\\s*\\(",
|
||||||
|
r"func\\s+\\w+\\s*\\(",
|
||||||
|
r":=",
|
||||||
|
r"go\\s+func",
|
||||||
|
],
|
||||||
|
"rust": [
|
||||||
|
r"^fn\\s+\\w+\\s*\\(",
|
||||||
|
r"^impl\\s+\\w+",
|
||||||
|
r"^struct\\s+\\w+",
|
||||||
|
r"^enum\\s+\\w+",
|
||||||
|
r"let\\s+mut\\s+\\w+",
|
||||||
|
r"->\\s*\\w+",
|
||||||
|
],
|
||||||
|
"c": [
|
||||||
|
r"#include\\s*<",
|
||||||
|
r"#include\\s*\"",
|
||||||
|
r"int\\s+main\\s*\\(",
|
||||||
|
r"struct\\s+\\w+\\s*\\{",
|
||||||
|
r"void\\s+\\*?\\s*\\w+\\s*\\(",
|
||||||
|
],
|
||||||
|
"cpp": [
|
||||||
|
r"#include\\s*<",
|
||||||
|
r"#include\\s*\"",
|
||||||
|
r"class\\s+\\w+\\s*(:\\s*public)?",
|
||||||
|
r"std::\\w+",
|
||||||
|
r"using\\s+namespace\\s+std",
|
||||||
|
],
|
||||||
|
"ruby": [
|
||||||
|
r"^require\\s+['\"]",
|
||||||
|
r"^class\\s+\\w+(\\s*<\\s*\\w+)?",
|
||||||
|
r"^module\\s+\\w+",
|
||||||
|
r"def\\s+\\w+",
|
||||||
|
r"puts\\s+",
|
||||||
|
r"puts!",
|
||||||
|
],
|
||||||
|
"php": [
|
||||||
|
r"<\?php",
|
||||||
|
r"\$\\w+\\s*=",
|
||||||
|
r"function\\s+\\w+\\s*\\(",
|
||||||
|
r"class\\s+\\w+\\s*\\{",
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
def detect_from_filename(self, filename: str) -> Optional[str]:
|
def __init__(self):
|
||||||
if '.' not in filename:
|
self._tree_sitter_languages = {}
|
||||||
|
|
||||||
|
def detect_from_filename(self, filename):
|
||||||
|
if "." not in filename:
|
||||||
return None
|
return None
|
||||||
ext = filename.rsplit('.', 1)[-1].lower()
|
|
||||||
|
ext = filename.rsplit(".", 1)[-1].lower()
|
||||||
return self.EXTENSION_MAP.get(ext)
|
return self.EXTENSION_MAP.get(ext)
|
||||||
|
|
||||||
def detect_from_content(self, content: str) -> Optional[str]:
|
def detect_from_content(self, content):
|
||||||
first_lines = '\n'.join(content.splitlines()[:50])
|
first_lines = "\n".join(content.splitlines()[:50])
|
||||||
|
|
||||||
scores = {}
|
scores = {}
|
||||||
|
|
||||||
for lang, patterns in self.CONTENT_PATTERNS.items():
|
for lang, patterns in self.CONTENT_PATTERNS.items():
|
||||||
import re
|
score = 0
|
||||||
score = sum(len(re.findall(p, first_lines, re.MULTILINE)) for p in patterns)
|
for pattern in patterns:
|
||||||
|
matches = len(re.findall(pattern, first_lines, re.MULTILINE))
|
||||||
|
score += matches
|
||||||
|
|
||||||
if score > 0:
|
if score > 0:
|
||||||
scores[lang] = score
|
scores[lang] = score
|
||||||
return max(scores, key=scores.get) if scores else None
|
|
||||||
|
|
||||||
def detect(self, filename: str, content: str = "") -> str:
|
if scores:
|
||||||
|
best_lang = max(scores, key=scores.get)
|
||||||
|
return best_lang
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def detect(self, filename, content=""):
|
||||||
ext_lang = self.detect_from_filename(filename)
|
ext_lang = self.detect_from_filename(filename)
|
||||||
if ext_lang and ext_lang not in ['text', 'markdown', 'json', 'yaml', 'xml', 'html', 'css', 'dockerfile']:
|
|
||||||
|
if ext_lang and ext_lang not in [
|
||||||
|
"text", "markdown", "json", "yaml", "xml", "html", "css", "dockerfile"
|
||||||
|
]:
|
||||||
|
if content:
|
||||||
|
content_lang = self.detect_from_content(content)
|
||||||
|
if content_lang and content_lang != ext_lang:
|
||||||
|
return content_lang
|
||||||
return ext_lang
|
return ext_lang
|
||||||
|
|
||||||
if content:
|
if content:
|
||||||
content_lang = self.detect_from_content(content)
|
content_lang = self.detect_from_content(content)
|
||||||
if content_lang:
|
if content_lang:
|
||||||
return content_lang
|
return content_lang
|
||||||
|
|
||||||
return ext_lang or "text"
|
return ext_lang or "text"
|
||||||
|
|
||||||
def get_supported_languages(self) -> list[str]:
|
def get_supported_languages(self):
|
||||||
return sorted(set(self.EXTENSION_MAP.values()))
|
return sorted(set(self.EXTENSION_MAP.values()))
|
||||||
|
|
||||||
def is_language_supported(self, language: str) -> bool:
|
def is_language_supported(self, language):
|
||||||
return language in self.get_supported_languages()
|
return language in self.get_supported_languages()
|
||||||
|
|
||||||
|
|
||||||
def detect_language(filename: str, content: str = "") -> str:
|
def detect_language(filename, content=""):
|
||||||
detector = LanguageDetector()
|
detector = LanguageDetector()
|
||||||
return detector.detect(filename, content)
|
return detector.detect(filename, content)
|
||||||
|
|||||||
1
src/gdiffer/llm.py
Normal file
1
src/gdiffer/llm.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# src/gdiffer/llm.py
|
||||||
@@ -1,35 +1,31 @@
|
|||||||
"""Data models for git diff parsing and analysis."""
|
|
||||||
|
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class DiffHunk:
|
class DiffHunk:
|
||||||
"""Represents a single hunk (chunk) of changes in a diff."""
|
|
||||||
old_start: int
|
old_start: int
|
||||||
old_lines: int
|
old_lines: int
|
||||||
new_start: int
|
new_start: int
|
||||||
new_lines: int
|
new_lines: int
|
||||||
old_lines_content: list[str] = field(default_factory=list)
|
old_lines_content: list = field(default_factory=list)
|
||||||
new_lines_content: list[str] = field(default_factory=list)
|
new_lines_content: list = field(default_factory=list)
|
||||||
header: str = ""
|
header: str = ""
|
||||||
|
|
||||||
def get_added_lines(self) -> list[tuple[int, str]]:
|
def get_added_lines(self):
|
||||||
result = []
|
result = []
|
||||||
for i, line in enumerate(self.new_lines_content):
|
for i, line in enumerate(self.new_lines_content):
|
||||||
if line.startswith('+') and not line.startswith('+++'):
|
if line.startswith("+") and not line.startswith("+++"):
|
||||||
result.append((self.new_start + i, line[1:]))
|
result.append((self.new_start + i, line[1:]))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def get_removed_lines(self) -> list[tuple[int, str]]:
|
def get_removed_lines(self):
|
||||||
result = []
|
result = []
|
||||||
for i, line in enumerate(self.old_lines_content):
|
for i, line in enumerate(self.old_lines_content):
|
||||||
if line.startswith('-') and not line.startswith('---'):
|
if line.startswith("-") and not line.startswith("---"):
|
||||||
result.append((self.old_start + i, line[1:]))
|
result.append((self.old_start + i, line[1:]))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def get_modified_lines(self) -> list[tuple[int, str, str]]:
|
def get_modified_lines(self):
|
||||||
result = []
|
result = []
|
||||||
added = self.get_added_lines()
|
added = self.get_added_lines()
|
||||||
removed = self.get_removed_lines()
|
removed = self.get_removed_lines()
|
||||||
@@ -43,66 +39,63 @@ class DiffHunk:
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class DiffFile:
|
class DiffFile:
|
||||||
"""Represents a file in the diff with its changes."""
|
old_path: str
|
||||||
old_path: Optional[str]
|
new_path: str
|
||||||
new_path: Optional[str]
|
new_file_mode: str = None
|
||||||
new_file_mode: Optional[str] = None
|
deleted_file_mode: str = None
|
||||||
deleted_file_mode: Optional[str] = None
|
similarity_index: str = None
|
||||||
similarity_index: Optional[str] = None
|
rename_from: str = None
|
||||||
rename_from: Optional[str] = None
|
rename_to: str = None
|
||||||
rename_to: Optional[str] = None
|
hunks: list = field(default_factory=list)
|
||||||
hunks: list[DiffHunk] = field(default_factory=list)
|
|
||||||
change_type: str = "modify"
|
change_type: str = "modify"
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def filename(self) -> str:
|
def filename(self):
|
||||||
if self.new_path:
|
if self.new_path:
|
||||||
return self.new_path
|
return self.new_path
|
||||||
return self.old_path or ""
|
return self.old_path or ""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_new(self) -> bool:
|
def is_new(self):
|
||||||
return self.new_file_mode is not None or self.old_path in [None, "/dev/null"]
|
return self.new_file_mode is not None or self.old_path in [None, "/dev/null"]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_deleted(self) -> bool:
|
def is_deleted(self):
|
||||||
return self.deleted_file_mode is not None
|
return self.deleted_file_mode is not None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_rename(self) -> bool:
|
def is_rename(self):
|
||||||
return self.rename_from is not None
|
return self.rename_from is not None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def extension(self) -> str:
|
def extension(self):
|
||||||
filename = self.filename
|
filename = self.filename
|
||||||
if '.' in filename:
|
if "." in filename:
|
||||||
return filename.rsplit('.', 1)[-1].lower()
|
return filename.rsplit(".", 1)[-1].lower()
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class CodeChange:
|
class CodeChange:
|
||||||
"""Represents a code change with context."""
|
|
||||||
file: DiffFile
|
file: DiffFile
|
||||||
hunk: Optional[DiffHunk]
|
hunk: DiffHunk
|
||||||
old_code: str
|
old_code: str
|
||||||
new_code: str
|
new_code: str
|
||||||
language: str = "unknown"
|
language: str = "unknown"
|
||||||
summary: str = ""
|
summary: str = ""
|
||||||
issues: list[dict] = field(default_factory=list)
|
issues: list = field(default_factory=list)
|
||||||
suggestions: list[str] = field(default_factory=list)
|
suggestions: list = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class DiffAnalysis:
|
class DiffAnalysis:
|
||||||
"""Complete analysis result for a diff."""
|
files: list = field(default_factory=list)
|
||||||
files: list[DiffFile] = field(default_factory=list)
|
|
||||||
total_files: int = 0
|
total_files: int = 0
|
||||||
files_added: int = 0
|
files_added: int = 0
|
||||||
files_deleted: int = 0
|
files_deleted: int = 0
|
||||||
files_modified: int = 0
|
files_modified: int = 0
|
||||||
files_renamed: int = 0
|
files_renamed: int = 0
|
||||||
total_changes: int = 0
|
total_changes: int = 0
|
||||||
language_breakdown: dict[str, int] = field(default_factory=dict)
|
language_breakdown: dict = field(default_factory=dict)
|
||||||
all_issues: list[dict] = field(default_factory=list)
|
all_issues: list = field(default_factory=list)
|
||||||
all_suggestions: list[str] = field(default_factory=list)
|
all_suggestions: list = field(default_factory=list)
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
"""Output formatter for color-coded terminal display."""
|
|
||||||
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
@@ -23,7 +21,7 @@ class SeverityColors:
|
|||||||
|
|
||||||
|
|
||||||
class OutputFormatter:
|
class OutputFormatter:
|
||||||
def __init__(self, output_format: OutputFormat = OutputFormat.TERMINAL):
|
def __init__(self, output_format=OutputFormat.TERMINAL):
|
||||||
self.output_format = output_format
|
self.output_format = output_format
|
||||||
self.console = Console(theme=Theme({
|
self.console = Console(theme=Theme({
|
||||||
"critical": "bold red",
|
"critical": "bold red",
|
||||||
@@ -37,7 +35,7 @@ class OutputFormatter:
|
|||||||
"filename": "bold blue",
|
"filename": "bold blue",
|
||||||
}))
|
}))
|
||||||
|
|
||||||
def format_analysis(self, analysis: DiffAnalysis) -> str:
|
def format_analysis(self, analysis):
|
||||||
if self.output_format == OutputFormat.JSON:
|
if self.output_format == OutputFormat.JSON:
|
||||||
return self._format_json(analysis)
|
return self._format_json(analysis)
|
||||||
elif self.output_format == OutputFormat.PLAIN:
|
elif self.output_format == OutputFormat.PLAIN:
|
||||||
@@ -45,7 +43,7 @@ class OutputFormatter:
|
|||||||
else:
|
else:
|
||||||
return self._format_terminal(analysis)
|
return self._format_terminal(analysis)
|
||||||
|
|
||||||
def _format_terminal(self, analysis: DiffAnalysis) -> str:
|
def _format_terminal(self, analysis):
|
||||||
output_parts = []
|
output_parts = []
|
||||||
|
|
||||||
output_parts.append(self._format_summary(analysis))
|
output_parts.append(self._format_summary(analysis))
|
||||||
@@ -57,9 +55,9 @@ class OutputFormatter:
|
|||||||
if analysis.all_suggestions:
|
if analysis.all_suggestions:
|
||||||
output_parts.append(self._format_suggestions(analysis.all_suggestions))
|
output_parts.append(self._format_suggestions(analysis.all_suggestions))
|
||||||
|
|
||||||
return '\n'.join(output_parts)
|
return "\n".join(output_parts)
|
||||||
|
|
||||||
def _format_summary(self, analysis: DiffAnalysis) -> str:
|
def _format_summary(self, analysis):
|
||||||
lines = []
|
lines = []
|
||||||
lines.append("[bold blue]=== Git Diff Analysis Summary ===[/bold blue]")
|
lines.append("[bold blue]=== Git Diff Analysis Summary ===[/bold blue]")
|
||||||
lines.append(f"[info]Total files changed:[/info] [bold]{analysis.total_files}[/bold]")
|
lines.append(f"[info]Total files changed:[/info] [bold]{analysis.total_files}[/bold]")
|
||||||
@@ -73,9 +71,9 @@ class OutputFormatter:
|
|||||||
for lang, count in sorted(analysis.language_breakdown.items()):
|
for lang, count in sorted(analysis.language_breakdown.items()):
|
||||||
lines.append(f" - {lang}: {count}")
|
lines.append(f" - {lang}: {count}")
|
||||||
|
|
||||||
return '\n'.join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
def _format_files(self, analysis: DiffAnalysis) -> str:
|
def _format_files(self, analysis):
|
||||||
lines = []
|
lines = []
|
||||||
lines.append("\n[bold blue]=== File Changes ===[/bold blue]")
|
lines.append("\n[bold blue]=== File Changes ===[/bold blue]")
|
||||||
|
|
||||||
@@ -83,10 +81,10 @@ class OutputFormatter:
|
|||||||
lines.append(f"\n[filename]{i}. {file_obj.filename}[/filename]")
|
lines.append(f"\n[filename]{i}. {file_obj.filename}[/filename]")
|
||||||
|
|
||||||
change_emoji = {
|
change_emoji = {
|
||||||
"add": "[added][✚][/added]",
|
"add": "[added]✚[/added]",
|
||||||
"delete": "[removed][✖][/removed]",
|
"delete": "[removed]✖[/removed]",
|
||||||
"rename": "[info][↪][/info]",
|
"rename": "[info]↪[/info]",
|
||||||
"modify": "[modified][✎][/modified]",
|
"modify": "[modified]✎[/modified]",
|
||||||
}
|
}
|
||||||
change_label = change_emoji.get(file_obj.change_type, "")
|
change_label = change_emoji.get(file_obj.change_type, "")
|
||||||
lines.append(f" Status: {change_label} {file_obj.change_type}")
|
lines.append(f" Status: {change_label} {file_obj.change_type}")
|
||||||
@@ -99,102 +97,105 @@ class OutputFormatter:
|
|||||||
lines.append(f" Changes: {total_changes} lines")
|
lines.append(f" Changes: {total_changes} lines")
|
||||||
|
|
||||||
for j, hunk in enumerate(file_obj.hunks, 1):
|
for j, hunk in enumerate(file_obj.hunks, 1):
|
||||||
lines.append(f" Hunk {j} (lines {hunk.old_start}-{hunk.old_start + hunk.old_lines}):")
|
hunk_range = f"{hunk.old_start}-{hunk.old_start + hunk.old_lines}"
|
||||||
|
lines.append(f" Hunk {j} (lines {hunk_range}):")
|
||||||
lines.append(self._format_hunk(hunk))
|
lines.append(self._format_hunk(hunk))
|
||||||
|
|
||||||
return '\n'.join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
def _format_hunk(self, hunk) -> str:
|
def _format_hunk(self, hunk):
|
||||||
lines = []
|
lines = []
|
||||||
for line in hunk.new_lines_content:
|
for line in hunk.new_lines_content:
|
||||||
if line.startswith('+++'):
|
if line.startswith("+++"):
|
||||||
continue
|
continue
|
||||||
if line.startswith('+'):
|
if line.startswith("+"):
|
||||||
lines.append(f" [added]{line}[/added]")
|
lines.append(f" [added]{line}[/added]")
|
||||||
elif line.startswith('-'):
|
elif line.startswith("-"):
|
||||||
lines.append(f" [removed]{line}[/removed]")
|
lines.append(f" [removed]{line}[/removed]")
|
||||||
elif line.startswith('@@'):
|
elif line.startswith("@@"):
|
||||||
lines.append(f" [info]{line}[/info]")
|
lines.append(f" [info]{line}[/info]")
|
||||||
else:
|
else:
|
||||||
lines.append(f" {line}")
|
lines.append(f" {line}")
|
||||||
return '\n'.join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
def _format_issues(self, issues: list[dict]) -> str:
|
def _format_issues(self, issues):
|
||||||
lines = []
|
lines = []
|
||||||
lines.append("\n[bold blue]=== Detected Issues ===[/bold blue]")
|
lines.append("\n[bold blue]=== Detected Issues ===[/bold blue]")
|
||||||
|
|
||||||
severity_priority = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3}
|
severity_priority = {"critical": 0, "high": 1, "medium": 2, "low": 3}
|
||||||
sorted_issues = sorted(issues, key=lambda x: severity_priority.get(x.get('severity', ''), 4))
|
sorted_issues = sorted(
|
||||||
|
issues, key=lambda x: severity_priority.get(x.get("severity", ""), 4)
|
||||||
|
)
|
||||||
|
|
||||||
for issue in sorted_issues:
|
for issue in sorted_issues:
|
||||||
severity = issue.get('severity', 'info').lower()
|
severity = issue.get("severity", "info").lower()
|
||||||
color = getattr(SeverityColors, severity.upper(), 'info')
|
color = getattr(SeverityColors, severity.upper(), "info")
|
||||||
lines.append(f"\n[{color}][✖] {issue.get('title', 'Issue')}[/]")
|
lines.append(f"\n[{color}]✖ {issue.get('title', 'Issue')}[/[{color}]]")
|
||||||
lines.append(f" Severity: [{color}]{severity.upper()}[/]")
|
lines.append(f" Severity: [{color}]{severity.upper()}[/[{color}]]")
|
||||||
lines.append(f" Description: {issue.get('description', '')}")
|
lines.append(f" Description: {issue.get('description', '')}")
|
||||||
if issue.get('line'):
|
if issue.get("line"):
|
||||||
lines.append(f" Line: {issue['line']}")
|
lines.append(f" Line: {issue['line']}")
|
||||||
if issue.get('suggestion'):
|
if issue.get("suggestion"):
|
||||||
lines.append(f" Suggestion: {issue['suggestion']}")
|
lines.append(f" Suggestion: {issue['suggestion']}")
|
||||||
|
|
||||||
return '\n'.join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
def _format_suggestions(self, suggestions: list[str]) -> str:
|
def _format_suggestions(self, suggestions):
|
||||||
lines = []
|
lines = []
|
||||||
lines.append("\n[bold blue]=== Suggestions ===[/bold blue]")
|
lines.append("\n[bold blue]=== Suggestions ===[/bold blue]")
|
||||||
|
|
||||||
for i, suggestion in enumerate(suggestions, 1):
|
for i, suggestion in enumerate(suggestions, 1):
|
||||||
lines.append(f"\n[info]{i}. {suggestion}[/info]")
|
lines.append(f"\n[info]{i}. {suggestion}[/info]")
|
||||||
|
|
||||||
return '\n'.join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
def _format_json(self, analysis: DiffAnalysis) -> str:
|
def _format_json(self, analysis):
|
||||||
import json
|
import json
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
'summary': {
|
"summary": {
|
||||||
'total_files': analysis.total_files,
|
"total_files": analysis.total_files,
|
||||||
'files_added': analysis.files_added,
|
"files_added": analysis.files_added,
|
||||||
'files_deleted': analysis.files_deleted,
|
"files_deleted": analysis.files_deleted,
|
||||||
'files_modified': analysis.files_modified,
|
"files_modified": analysis.files_modified,
|
||||||
'files_renamed': analysis.files_renamed,
|
"files_renamed": analysis.files_renamed,
|
||||||
'total_changes': analysis.total_changes,
|
"total_changes": analysis.total_changes,
|
||||||
'language_breakdown': analysis.language_breakdown,
|
"language_breakdown": analysis.language_breakdown,
|
||||||
},
|
},
|
||||||
'files': [],
|
"files": [],
|
||||||
'issues': analysis.all_issues,
|
"issues": analysis.all_issues,
|
||||||
'suggestions': analysis.all_suggestions,
|
"suggestions": analysis.all_suggestions,
|
||||||
}
|
}
|
||||||
|
|
||||||
for file_obj in analysis.files:
|
for file_obj in analysis.files:
|
||||||
file_data = {
|
file_data = {
|
||||||
'filename': file_obj.filename,
|
"filename": file_obj.filename,
|
||||||
'change_type': file_obj.change_type,
|
"change_type": file_obj.change_type,
|
||||||
'old_path': file_obj.old_path,
|
"old_path": file_obj.old_path,
|
||||||
'new_path': file_obj.new_path,
|
"new_path": file_obj.new_path,
|
||||||
'is_new': file_obj.is_new,
|
"is_new": file_obj.is_new,
|
||||||
'is_deleted': file_obj.is_deleted,
|
"is_deleted": file_obj.is_deleted,
|
||||||
'is_rename': file_obj.is_rename,
|
"is_rename": file_obj.is_rename,
|
||||||
'language': file_obj.extension,
|
"language": file_obj.extension,
|
||||||
'hunks': [],
|
"hunks": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
for hunk in file_obj.hunks:
|
for hunk in file_obj.hunks:
|
||||||
hunk_data = {
|
hunk_data = {
|
||||||
'old_start': hunk.old_start,
|
"old_start": hunk.old_start,
|
||||||
'old_lines': hunk.old_lines,
|
"old_lines": hunk.old_lines,
|
||||||
'new_start': hunk.new_start,
|
"new_start": hunk.new_start,
|
||||||
'new_lines': hunk.new_lines,
|
"new_lines": hunk.new_lines,
|
||||||
'added_lines': hunk.get_added_lines(),
|
"added_lines": hunk.get_added_lines(),
|
||||||
'removed_lines': hunk.get_removed_lines(),
|
"removed_lines": hunk.get_removed_lines(),
|
||||||
}
|
}
|
||||||
file_data['hunks'].append(hunk_data)
|
file_data["hunks"].append(hunk_data)
|
||||||
|
|
||||||
result['files'].append(file_data)
|
result["files"].append(file_data)
|
||||||
|
|
||||||
return json.dumps(result, indent=2)
|
return json.dumps(result, indent=2)
|
||||||
|
|
||||||
def _format_plain(self, analysis: DiffAnalysis) -> str:
|
def _format_plain(self, analysis):
|
||||||
lines = []
|
lines = []
|
||||||
lines.append("=== Git Diff Analysis Summary ===")
|
lines.append("=== Git Diff Analysis Summary ===")
|
||||||
lines.append(f"Total files changed: {analysis.total_files}")
|
lines.append(f"Total files changed: {analysis.total_files}")
|
||||||
@@ -220,7 +221,7 @@ class OutputFormatter:
|
|||||||
for j, hunk in enumerate(file_obj.hunks, 1):
|
for j, hunk in enumerate(file_obj.hunks, 1):
|
||||||
lines.append(f" Hunk {j}:")
|
lines.append(f" Hunk {j}:")
|
||||||
for line in hunk.new_lines_content:
|
for line in hunk.new_lines_content:
|
||||||
if line.startswith('+++'):
|
if line.startswith("+++"):
|
||||||
continue
|
continue
|
||||||
lines.append(f" {line}")
|
lines.append(f" {line}")
|
||||||
|
|
||||||
@@ -236,21 +237,21 @@ class OutputFormatter:
|
|||||||
for i, suggestion in enumerate(analysis.all_suggestions, 1):
|
for i, suggestion in enumerate(analysis.all_suggestions, 1):
|
||||||
lines.append(f"{i}. {suggestion}")
|
lines.append(f"{i}. {suggestion}")
|
||||||
|
|
||||||
return '\n'.join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
def print(self, content: str) -> None:
|
def print(self, content):
|
||||||
self.console.print(content)
|
self.console.print(content)
|
||||||
|
|
||||||
def print_analysis(self, analysis: DiffAnalysis) -> None:
|
def print_analysis(self, analysis):
|
||||||
formatted = self.format_analysis(analysis)
|
formatted = self.format_analysis(analysis)
|
||||||
self.print(formatted)
|
self.print(formatted)
|
||||||
|
|
||||||
|
|
||||||
def format_analysis(analysis: DiffAnalysis, output_format: str = "terminal") -> str:
|
def format_analysis(analysis, output_format="terminal"):
|
||||||
fmt = OutputFormatter(OutputFormat(output_format))
|
fmt = OutputFormatter(OutputFormat(output_format))
|
||||||
return fmt.format_analysis(analysis)
|
return fmt.format_analysis(analysis)
|
||||||
|
|
||||||
|
|
||||||
def print_analysis(analysis: DiffAnalysis, output_format: str = "terminal") -> None:
|
def print_analysis(analysis, output_format="terminal"):
|
||||||
fmt = OutputFormatter(OutputFormat(output_format))
|
fmt = OutputFormatter(OutputFormat(output_format))
|
||||||
fmt.print_analysis(analysis)
|
fmt.print_analysis(analysis)
|
||||||
|
|||||||
@@ -1,21 +1,16 @@
|
|||||||
"""Diff parser for unified git diff format."""
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from gdiffer.models import DiffFile, DiffHunk
|
from gdiffer.models import DiffFile, DiffHunk
|
||||||
|
|
||||||
|
|
||||||
class DiffParser:
|
class DiffParser:
|
||||||
"""Parser for unified diff format (as produced by git diff)."""
|
HUNK_PATTERN = re.compile(r"^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@")
|
||||||
|
|
||||||
HUNK_PATTERN = re.compile(r'^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@')
|
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.files: list[DiffFile] = []
|
self.files = []
|
||||||
self.errors: list[str] = []
|
self.errors = []
|
||||||
|
|
||||||
def parse(self, diff_content: str) -> list[DiffFile]:
|
def parse(self, diff_content):
|
||||||
self.files = []
|
self.files = []
|
||||||
self.errors = []
|
self.errors = []
|
||||||
|
|
||||||
@@ -26,14 +21,14 @@ class DiffParser:
|
|||||||
self._parse_lines(lines)
|
self._parse_lines(lines)
|
||||||
return self.files
|
return self.files
|
||||||
|
|
||||||
def _parse_lines(self, lines: list[str]) -> None:
|
def _parse_lines(self, lines):
|
||||||
i = 0
|
i = 0
|
||||||
n = len(lines)
|
n = len(lines)
|
||||||
|
|
||||||
while i < n:
|
while i < n:
|
||||||
line = lines[i].rstrip('\n')
|
line = lines[i].rstrip("\n")
|
||||||
|
|
||||||
if line.startswith('diff --git'):
|
if line.startswith("diff --git"):
|
||||||
file_obj = self._parse_file(lines, i)
|
file_obj = self._parse_file(lines, i)
|
||||||
if file_obj:
|
if file_obj:
|
||||||
self.files.append(file_obj)
|
self.files.append(file_obj)
|
||||||
@@ -42,24 +37,24 @@ class DiffParser:
|
|||||||
|
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
def _parse_file(self, lines: list[str], start: int) -> Optional[DiffFile]:
|
def _parse_file(self, lines, start):
|
||||||
if start >= len(lines):
|
if start >= len(lines):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
first_line = lines[start]
|
first_line = lines[start]
|
||||||
if not first_line.startswith('diff --git'):
|
if not first_line.startswith("diff --git"):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
parts = first_line.split(' ', 3)
|
parts = first_line.split(" ", 3)
|
||||||
if len(parts) < 4:
|
if len(parts) < 4:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
old_path = parts[2][2:] if len(parts) > 2 else ''
|
old_path = parts[2][2:] if len(parts) > 2 else ""
|
||||||
new_path = parts[3][2:] if len(parts) > 3 else old_path
|
new_path = parts[3][2:] if len(parts) > 3 else old_path
|
||||||
|
|
||||||
if old_path.startswith('a/'):
|
if old_path.startswith("a/"):
|
||||||
old_path = old_path[2:]
|
old_path = old_path[2:]
|
||||||
if new_path.startswith('b/'):
|
if new_path.startswith("b/"):
|
||||||
new_path = new_path[2:]
|
new_path = new_path[2:]
|
||||||
|
|
||||||
file_obj = DiffFile(old_path=old_path, new_path=new_path)
|
file_obj = DiffFile(old_path=old_path, new_path=new_path)
|
||||||
@@ -68,63 +63,63 @@ class DiffParser:
|
|||||||
n = len(lines)
|
n = len(lines)
|
||||||
|
|
||||||
while i < n:
|
while i < n:
|
||||||
line = lines[i].rstrip('\n')
|
line = lines[i].rstrip("\n")
|
||||||
|
|
||||||
if line.startswith('new file mode '):
|
if line.startswith("new file mode "):
|
||||||
file_obj.new_file_mode = line.split()[-1]
|
file_obj.new_file_mode = line.split()[-1]
|
||||||
file_obj.change_type = "add"
|
file_obj.change_type = "add"
|
||||||
i += 1
|
i += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if line.startswith('deleted file mode '):
|
if line.startswith("deleted file mode "):
|
||||||
file_obj.deleted_file_mode = line.split()[-1]
|
file_obj.deleted_file_mode = line.split()[-1]
|
||||||
file_obj.change_type = "delete"
|
file_obj.change_type = "delete"
|
||||||
i += 1
|
i += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if line.startswith('similarity index '):
|
if line.startswith("similarity index "):
|
||||||
file_obj.similarity_index = line.split()[-1].rstrip('%')
|
file_obj.similarity_index = line.split()[-1].rstrip("%")
|
||||||
i += 1
|
i += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if line.startswith('rename from '):
|
if line.startswith("rename from "):
|
||||||
file_obj.rename_from = line[12:]
|
file_obj.rename_from = line[12:]
|
||||||
i += 1
|
i += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if line.startswith('rename to '):
|
if line.startswith("rename to "):
|
||||||
file_obj.rename_to = line[10:]
|
file_obj.rename_to = line[10:]
|
||||||
file_obj.change_type = "rename"
|
file_obj.change_type = "rename"
|
||||||
i += 1
|
i += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if line.startswith('---'):
|
if line.startswith("---"):
|
||||||
i += 1
|
i += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if line.startswith('+++'):
|
if line.startswith("+++"):
|
||||||
i += 1
|
i += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if line.startswith('@@'):
|
if line.startswith("@@"):
|
||||||
hunk, consumed = self._parse_hunk(lines, i)
|
hunk, consumed = self._parse_hunk(lines, i)
|
||||||
if hunk:
|
if hunk:
|
||||||
file_obj.hunks.append(hunk)
|
file_obj.hunks.append(hunk)
|
||||||
i += consumed
|
i += consumed
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if line.startswith('diff --git'):
|
if line.startswith("diff --git"):
|
||||||
break
|
break
|
||||||
|
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
return file_obj
|
return file_obj
|
||||||
|
|
||||||
def _parse_hunk(self, lines: list[str], start: int) -> tuple[Optional[DiffHunk], int]:
|
def _parse_hunk(self, lines, start):
|
||||||
if start >= len(lines):
|
if start >= len(lines):
|
||||||
return None, 0
|
return None, 0
|
||||||
|
|
||||||
line = lines[start].rstrip('\n')
|
line = lines[start].rstrip("\n")
|
||||||
match = self.HUNK_PATTERN.match(line)
|
match = self.HUNK_PATTERN.match(line)
|
||||||
|
|
||||||
if not match:
|
if not match:
|
||||||
@@ -151,30 +146,30 @@ class DiffParser:
|
|||||||
new_content = []
|
new_content = []
|
||||||
|
|
||||||
while i < n:
|
while i < n:
|
||||||
line = lines[i].rstrip('\n')
|
line = lines[i].rstrip("\n")
|
||||||
|
|
||||||
if line.startswith('@@'):
|
if line.startswith("@@"):
|
||||||
break
|
break
|
||||||
|
|
||||||
if line.startswith('diff --git'):
|
if line.startswith("diff --git"):
|
||||||
break
|
break
|
||||||
|
|
||||||
if line.startswith('---'):
|
if line.startswith("---"):
|
||||||
break
|
break
|
||||||
|
|
||||||
if line.startswith('+++'):
|
if line.startswith("+++"):
|
||||||
break
|
break
|
||||||
|
|
||||||
if old_lines_collected >= old_lines and new_lines_collected >= new_lines:
|
if old_lines_collected >= old_lines and new_lines_collected >= new_lines:
|
||||||
break
|
break
|
||||||
|
|
||||||
if line.startswith('+') and not line.startswith('+++'):
|
if line.startswith("+") and not line.startswith("+++"):
|
||||||
new_content.append(line)
|
new_content.append(line)
|
||||||
new_lines_collected += 1
|
new_lines_collected += 1
|
||||||
elif line.startswith('-') and not line.startswith('---'):
|
elif line.startswith("-") and not line.startswith("---"):
|
||||||
old_content.append(line)
|
old_content.append(line)
|
||||||
old_lines_collected += 1
|
old_lines_collected += 1
|
||||||
elif line.startswith(' ') or line == '':
|
elif line.startswith(" ") or line == "":
|
||||||
old_content.append(line)
|
old_content.append(line)
|
||||||
new_content.append(line)
|
new_content.append(line)
|
||||||
old_lines_collected += 1
|
old_lines_collected += 1
|
||||||
@@ -190,14 +185,12 @@ class DiffParser:
|
|||||||
return hunk, i - start
|
return hunk, i - start
|
||||||
|
|
||||||
|
|
||||||
def parse_diff(diff_content: str) -> list[DiffFile]:
|
def parse_diff(diff_content):
|
||||||
"""Parse diff content and return list of DiffFile objects."""
|
|
||||||
parser = DiffParser()
|
parser = DiffParser()
|
||||||
return parser.parse(diff_content)
|
return parser.parse(diff_content)
|
||||||
|
|
||||||
|
|
||||||
def parse_diff_from_file(filepath: str) -> list[DiffFile]:
|
def parse_diff_from_file(filepath):
|
||||||
"""Read a diff file and parse its contents."""
|
with open(filepath) as f:
|
||||||
with open(filepath, 'r') as f:
|
|
||||||
content = f.read()
|
content = f.read()
|
||||||
return parse_diff(content)
|
return parse_diff(content)
|
||||||
|
|||||||
0
tests/.gitkeep
Normal file
0
tests/.gitkeep
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Test configuration and fixtures."""
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
"""Pytest configuration and fixtures for gdiffer tests."""
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -26,6 +24,21 @@ index 1234567..89abcde 100644
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def python_diff():
|
||||||
|
return """diff --git a/utils.py b/utils.py
|
||||||
|
index abc123..def456 100644
|
||||||
|
--- a/utils.py
|
||||||
|
+++ b/utils.py
|
||||||
|
@@ -5,8 +5,10 @@ def calculate(a, b):
|
||||||
|
result = a + b
|
||||||
|
return result
|
||||||
|
|
||||||
|
+def multiply(a, b):
|
||||||
|
+ return a * b
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def multi_file_diff():
|
def multi_file_diff():
|
||||||
return """diff --git a/app.py b/app.py
|
return """diff --git a/app.py b/app.py
|
||||||
@@ -56,7 +69,7 @@ index abc123..xyz789 100644
|
|||||||
return True
|
return True
|
||||||
+
|
+
|
||||||
+def new_func(): pass
|
+def new_func(): pass
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
@@ -66,7 +79,7 @@ def sql_injection_diff():
|
|||||||
query = "SELECT * FROM users WHERE name = '" + username + "'"
|
query = "SELECT * FROM users WHERE name = '" + username + "'"
|
||||||
return execute_query(query)
|
return execute_query(query)
|
||||||
+ query = "SELECT * FROM users WHERE id = " + user_id
|
+ query = "SELECT * FROM users WHERE id = " + user_id
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|||||||
@@ -1,11 +1,10 @@
|
|||||||
"""Tests for the CLI module."""
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from click.testing import CliRunner
|
||||||
|
|
||||||
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
|
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
|
||||||
|
|
||||||
from click.testing import CliRunner
|
|
||||||
from gdiffer.cli import main
|
from gdiffer.cli import main
|
||||||
|
|
||||||
|
|
||||||
@@ -14,6 +13,7 @@ class TestCLIMain:
|
|||||||
runner = CliRunner()
|
runner = CliRunner()
|
||||||
result = runner.invoke(main, ["--help"])
|
result = runner.invoke(main, ["--help"])
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
|
assert "Git Diff Explainer" in result.output or "diff" in result.output.lower()
|
||||||
|
|
||||||
def test_main_version(self):
|
def test_main_version(self):
|
||||||
runner = CliRunner()
|
runner = CliRunner()
|
||||||
@@ -44,6 +44,11 @@ index 123..456 100644
|
|||||||
result = runner.invoke(main, ["explain"])
|
result = runner.invoke(main, ["explain"])
|
||||||
assert result.exit_code != 0
|
assert result.exit_code != 0
|
||||||
|
|
||||||
|
def test_explain_invalid_diff(self):
|
||||||
|
runner = CliRunner()
|
||||||
|
result = runner.invoke(main, ["explain", "not a valid diff"])
|
||||||
|
assert result.exit_code != 0
|
||||||
|
|
||||||
def test_explain_json_format(self):
|
def test_explain_json_format(self):
|
||||||
diff = """diff --git a/test.py b/test.py
|
diff = """diff --git a/test.py b/test.py
|
||||||
new file mode 100644
|
new file mode 100644
|
||||||
@@ -57,6 +62,18 @@ new file mode 100644
|
|||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
assert "{" in result.output
|
assert "{" in result.output
|
||||||
|
|
||||||
|
def test_explain_plain_format(self):
|
||||||
|
diff = """diff --git a/test.py b/test.py
|
||||||
|
--- a/test.py
|
||||||
|
+++ b/test.py
|
||||||
|
@@ -1 +1 @@
|
||||||
|
-old
|
||||||
|
+new
|
||||||
|
"""
|
||||||
|
runner = CliRunner()
|
||||||
|
result = runner.invoke(main, ["--output", "plain", "explain", diff])
|
||||||
|
assert result.exit_code == 0
|
||||||
|
|
||||||
|
|
||||||
class TestIssuesCommand:
|
class TestIssuesCommand:
|
||||||
def test_issues_with_security_issue(self):
|
def test_issues_with_security_issue(self):
|
||||||
|
|||||||
@@ -1,11 +1,9 @@
|
|||||||
"""Tests for the CodeAnalyzer module."""
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
|
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
|
||||||
|
|
||||||
from gdiffer.code_analyzer import CodeAnalyzer, analyze_code, summarize_change
|
from gdiffer.code_analyzer import analyze_code, summarize_change
|
||||||
|
|
||||||
|
|
||||||
class TestCodeAnalyzer:
|
class TestCodeAnalyzer:
|
||||||
@@ -23,32 +21,65 @@ class Greeter:
|
|||||||
return f"Hello, {name}"
|
return f"Hello, {name}"
|
||||||
"""
|
"""
|
||||||
result = code_analyzer.analyze_code(code, "python")
|
result = code_analyzer.analyze_code(code, "python")
|
||||||
|
|
||||||
assert result['language'] == "python"
|
assert result['language'] == "python"
|
||||||
|
assert 'functions' in result or 'ast_info' in result
|
||||||
|
|
||||||
def test_analyze_javascript_code(self, code_analyzer):
|
def test_analyze_javascript_code(self, code_analyzer):
|
||||||
code = """function add(a, b) {
|
code = """function add(a, b) {
|
||||||
return a + b;
|
return a + b;
|
||||||
}"""
|
}
|
||||||
|
|
||||||
|
const multiply = (x, y) => x * y;
|
||||||
|
"""
|
||||||
result = code_analyzer.analyze_code(code, "javascript")
|
result = code_analyzer.analyze_code(code, "javascript")
|
||||||
|
|
||||||
assert result['language'] == "javascript"
|
assert result['language'] == "javascript"
|
||||||
|
|
||||||
|
def test_analyze_rust_code(self, code_analyzer):
|
||||||
|
code = """fn main() {
|
||||||
|
println!("Hello");
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Point {
|
||||||
|
x: i32,
|
||||||
|
y: i32,
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
result = code_analyzer.analyze_code(code, "rust")
|
||||||
|
|
||||||
|
assert result['language'] == "rust"
|
||||||
|
|
||||||
def test_summarize_change_simple(self, code_analyzer):
|
def test_summarize_change_simple(self, code_analyzer):
|
||||||
old_code = "def hello():\n return 'Hello'"
|
old_code = "def hello():\n return 'Hello'"
|
||||||
new_code = "def hello():\n return 'Hello, World!'"
|
new_code = "def hello():\n return 'Hello, World!'"
|
||||||
|
|
||||||
summary = code_analyzer.summarize_change(old_code, new_code, "python")
|
summary = code_analyzer.summarize_change(old_code, new_code, "python")
|
||||||
|
|
||||||
assert isinstance(summary, str)
|
assert isinstance(summary, str)
|
||||||
assert len(summary) > 0
|
assert len(summary) > 0
|
||||||
|
|
||||||
def test_summarize_change_added_function(self, code_analyzer):
|
def test_summarize_change_added_function(self, code_analyzer):
|
||||||
old_code = ""
|
old_code = ""
|
||||||
new_code = "def new_func():\n pass"
|
new_code = "def new_func():\n pass"
|
||||||
|
|
||||||
summary = code_analyzer.summarize_change(old_code, new_code, "python")
|
summary = code_analyzer.summarize_change(old_code, new_code, "python")
|
||||||
|
|
||||||
assert isinstance(summary, str)
|
assert isinstance(summary, str)
|
||||||
assert len(summary) > 0
|
assert len(summary) > 0
|
||||||
|
|
||||||
|
def test_summarize_change_removed_function(self, code_analyzer):
|
||||||
|
old_code = "def old_func():\n pass"
|
||||||
|
new_code = ""
|
||||||
|
|
||||||
|
summary = code_analyzer.summarize_change(old_code, new_code, "python")
|
||||||
|
|
||||||
|
assert isinstance(summary, str)
|
||||||
|
|
||||||
def test_analyze_code_without_parser(self, code_analyzer):
|
def test_analyze_code_without_parser(self, code_analyzer):
|
||||||
code = "def test(): pass"
|
code = "def test(): pass"
|
||||||
result = code_analyzer.analyze_code(code, "unknown_language")
|
result = code_analyzer.analyze_code(code, "unknown_language")
|
||||||
|
|
||||||
assert 'change_summary' in result
|
assert 'change_summary' in result
|
||||||
|
|
||||||
def test_fallback_analysis_detects_functions(self, code_analyzer):
|
def test_fallback_analysis_detects_functions(self, code_analyzer):
|
||||||
@@ -59,7 +90,9 @@ def multiply(x, y):
|
|||||||
return x * y
|
return x * y
|
||||||
"""
|
"""
|
||||||
result = code_analyzer._analyze_without_parser(code)
|
result = code_analyzer._analyze_without_parser(code)
|
||||||
|
|
||||||
assert isinstance(result, str)
|
assert isinstance(result, str)
|
||||||
|
assert "calculate_sum" in result or "multiply" in result or "function" in result.lower()
|
||||||
|
|
||||||
def test_fallback_analysis_detects_classes(self, code_analyzer):
|
def test_fallback_analysis_detects_classes(self, code_analyzer):
|
||||||
code = """class Calculator:
|
code = """class Calculator:
|
||||||
@@ -67,8 +100,16 @@ def multiply(x, y):
|
|||||||
return a + b
|
return a + b
|
||||||
"""
|
"""
|
||||||
result = code_analyzer._analyze_without_parser(code)
|
result = code_analyzer._analyze_without_parser(code)
|
||||||
|
|
||||||
assert "Calculator" in result or "class" in result.lower()
|
assert "Calculator" in result or "class" in result.lower()
|
||||||
|
|
||||||
|
def test_fallback_analysis_line_count(self, code_analyzer):
|
||||||
|
new_code = "line1\nline2\nline3"
|
||||||
|
|
||||||
|
result = code_analyzer._analyze_without_parser(new_code)
|
||||||
|
|
||||||
|
assert isinstance(result, str)
|
||||||
|
|
||||||
|
|
||||||
class TestAnalyzeCodeFunction:
|
class TestAnalyzeCodeFunction:
|
||||||
def test_analyze_code_function(self):
|
def test_analyze_code_function(self):
|
||||||
|
|||||||
@@ -1,17 +1,16 @@
|
|||||||
"""Tests for the DiffParser module."""
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
|
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
|
||||||
|
|
||||||
from gdiffer.parser import DiffParser, parse_diff
|
|
||||||
from gdiffer.models import DiffFile, DiffHunk
|
from gdiffer.models import DiffFile, DiffHunk
|
||||||
|
from gdiffer.parser import parse_diff
|
||||||
|
|
||||||
|
|
||||||
class TestDiffParser:
|
class TestDiffParser:
|
||||||
def test_parse_simple_diff(self, diff_parser, sample_diff):
|
def test_parse_simple_diff(self, diff_parser, sample_diff):
|
||||||
files = diff_parser.parse(sample_diff)
|
files = diff_parser.parse(sample_diff)
|
||||||
|
|
||||||
assert len(files) == 1
|
assert len(files) == 1
|
||||||
assert files[0].filename == "src/main.py"
|
assert files[0].filename == "src/main.py"
|
||||||
assert len(files[0].hunks) == 1
|
assert len(files[0].hunks) == 1
|
||||||
@@ -27,9 +26,11 @@ index 0000000..1234567
|
|||||||
+ pass
|
+ pass
|
||||||
"""
|
"""
|
||||||
files = diff_parser.parse(diff)
|
files = diff_parser.parse(diff)
|
||||||
|
|
||||||
assert len(files) == 1
|
assert len(files) == 1
|
||||||
assert files[0].is_new
|
assert files[0].is_new
|
||||||
assert files[0].change_type == "add"
|
assert files[0].change_type == "add"
|
||||||
|
assert files[0].new_file_mode == "100644"
|
||||||
|
|
||||||
def test_parse_deleted_file(self, diff_parser):
|
def test_parse_deleted_file(self, diff_parser):
|
||||||
diff = """diff --git a/old_file.py b/old_file.py
|
diff = """diff --git a/old_file.py b/old_file.py
|
||||||
@@ -43,6 +44,7 @@ index 1234567..0000000
|
|||||||
-
|
-
|
||||||
"""
|
"""
|
||||||
files = diff_parser.parse(diff)
|
files = diff_parser.parse(diff)
|
||||||
|
|
||||||
assert len(files) == 1
|
assert len(files) == 1
|
||||||
assert files[0].is_deleted
|
assert files[0].is_deleted
|
||||||
assert files[0].change_type == "delete"
|
assert files[0].change_type == "delete"
|
||||||
@@ -58,21 +60,38 @@ index 1234567..89abcde 100644
|
|||||||
@@ -1,3 +1,3 @@
|
@@ -1,3 +1,3 @@
|
||||||
def renamed_function():
|
def renamed_function():
|
||||||
- return "old"
|
- return "old"
|
||||||
+ return "new""""
|
+ return "new"
|
||||||
|
"""
|
||||||
files = diff_parser.parse(diff)
|
files = diff_parser.parse(diff)
|
||||||
|
|
||||||
assert len(files) == 1
|
assert len(files) == 1
|
||||||
assert files[0].is_rename
|
assert files[0].is_rename
|
||||||
assert files[0].rename_from == "old_name.py"
|
assert files[0].rename_from == "old_name.py"
|
||||||
|
assert files[0].rename_to == "new_name.py"
|
||||||
assert files[0].change_type == "rename"
|
assert files[0].change_type == "rename"
|
||||||
|
|
||||||
def test_parse_multi_file(self, diff_parser, multi_file_diff):
|
def test_parse_multi_file(self, diff_parser, multi_file_diff):
|
||||||
files = diff_parser.parse(multi_file_diff)
|
files = diff_parser.parse(multi_file_diff)
|
||||||
|
|
||||||
assert len(files) == 3
|
assert len(files) == 3
|
||||||
|
|
||||||
file_types = [f.change_type for f in files]
|
file_types = [f.change_type for f in files]
|
||||||
assert "add" in file_types
|
assert "add" in file_types
|
||||||
assert "delete" in file_types
|
assert "delete" in file_types
|
||||||
assert "modify" in file_types
|
assert "modify" in file_types
|
||||||
|
|
||||||
|
def test_hunk_parsing(self, diff_parser, sample_diff):
|
||||||
|
files = diff_parser.parse(sample_diff)
|
||||||
|
file_obj = files[0]
|
||||||
|
|
||||||
|
assert len(file_obj.hunks) == 1
|
||||||
|
hunk = file_obj.hunks[0]
|
||||||
|
|
||||||
|
assert hunk.old_start > 0
|
||||||
|
assert hunk.new_start > 0
|
||||||
|
assert hunk.old_lines > 0
|
||||||
|
assert hunk.new_lines > 0
|
||||||
|
|
||||||
def test_get_added_lines(self, diff_parser):
|
def test_get_added_lines(self, diff_parser):
|
||||||
diff = """diff --git a/test.py b/test.py
|
diff = """diff --git a/test.py b/test.py
|
||||||
--- a/test.py
|
--- a/test.py
|
||||||
@@ -85,6 +104,7 @@ index 1234567..89abcde 100644
|
|||||||
"""
|
"""
|
||||||
files = diff_parser.parse(diff)
|
files = diff_parser.parse(diff)
|
||||||
hunk = files[0].hunks[0]
|
hunk = files[0].hunks[0]
|
||||||
|
|
||||||
added_lines = hunk.get_added_lines()
|
added_lines = hunk.get_added_lines()
|
||||||
assert len(added_lines) == 2
|
assert len(added_lines) == 2
|
||||||
|
|
||||||
@@ -99,9 +119,32 @@ index 1234567..89abcde 100644
|
|||||||
"""
|
"""
|
||||||
files = diff_parser.parse(diff)
|
files = diff_parser.parse(diff)
|
||||||
hunk = files[0].hunks[0]
|
hunk = files[0].hunks[0]
|
||||||
|
|
||||||
removed_lines = hunk.get_removed_lines()
|
removed_lines = hunk.get_removed_lines()
|
||||||
assert len(removed_lines) == 1
|
assert len(removed_lines) == 1
|
||||||
|
|
||||||
|
def test_file_extension(self, diff_parser):
|
||||||
|
diff_py = """diff --git a/test.py b/test.py
|
||||||
|
--- a/test.py
|
||||||
|
+++ b/test.py
|
||||||
|
@@ -1 +1 @@
|
||||||
|
-old
|
||||||
|
+new
|
||||||
|
"""
|
||||||
|
diff_js = """diff --git a/app.js b/app.js
|
||||||
|
--- a/app.js
|
||||||
|
+++ b/app.js
|
||||||
|
@@ -1 +1 @@
|
||||||
|
-old
|
||||||
|
+new
|
||||||
|
"""
|
||||||
|
|
||||||
|
files_py = diff_parser.parse(diff_py)
|
||||||
|
files_js = diff_parser.parse(diff_js)
|
||||||
|
|
||||||
|
assert files_py[0].extension == "py"
|
||||||
|
assert files_js[0].extension == "js"
|
||||||
|
|
||||||
def test_empty_diff(self, diff_parser):
|
def test_empty_diff(self, diff_parser):
|
||||||
files = diff_parser.parse("")
|
files = diff_parser.parse("")
|
||||||
assert len(files) == 0
|
assert len(files) == 0
|
||||||
@@ -132,19 +175,33 @@ class TestDiffFile:
|
|||||||
file_obj = DiffFile(old_path=None, new_path="new.py", new_file_mode="100644")
|
file_obj = DiffFile(old_path=None, new_path="new.py", new_file_mode="100644")
|
||||||
assert file_obj.is_new
|
assert file_obj.is_new
|
||||||
|
|
||||||
|
file_obj2 = DiffFile(old_path="old.py", new_path="new.py")
|
||||||
|
assert not file_obj2.is_new
|
||||||
|
|
||||||
def test_is_deleted_property(self):
|
def test_is_deleted_property(self):
|
||||||
file_obj = DiffFile(old_path="old.py", new_path=None, deleted_file_mode="100644")
|
file_obj = DiffFile(old_path="old.py", new_path=None, deleted_file_mode="100644")
|
||||||
assert file_obj.is_deleted
|
assert file_obj.is_deleted
|
||||||
|
|
||||||
def test_is_rename_property(self):
|
def test_is_rename_property(self):
|
||||||
file_obj = DiffFile(old_path="old.py", new_path="new.py", rename_from="old.py", rename_to="new.py")
|
file_obj = DiffFile(
|
||||||
|
old_path="old.py",
|
||||||
|
new_path="new.py",
|
||||||
|
rename_from="old.py",
|
||||||
|
rename_to="new.py"
|
||||||
|
)
|
||||||
assert file_obj.is_rename
|
assert file_obj.is_rename
|
||||||
|
|
||||||
|
|
||||||
class TestDiffHunk:
|
class TestDiffHunk:
|
||||||
def test_get_modified_lines(self):
|
def test_get_modified_lines(self):
|
||||||
hunk = DiffHunk(old_start=1, old_lines=3, new_start=1, new_lines=3,
|
hunk = DiffHunk(
|
||||||
old_lines_content=["-old1", "-old2", "-old3"],
|
old_start=1,
|
||||||
new_lines_content=["+new1", "+new2", "+new3"])
|
old_lines=3,
|
||||||
|
new_start=1,
|
||||||
|
new_lines=3,
|
||||||
|
old_lines_content=["-old1", "-old2", "-old3"],
|
||||||
|
new_lines_content=["+new1", "+new2", "+new3"]
|
||||||
|
)
|
||||||
|
|
||||||
modified = hunk.get_modified_lines()
|
modified = hunk.get_modified_lines()
|
||||||
assert len(modified) == 3
|
assert len(modified) == 3
|
||||||
|
|||||||
@@ -1,59 +1,86 @@
|
|||||||
"""Tests for the IssueDetector module."""
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
|
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
|
||||||
|
|
||||||
from gdiffer.issue_detector import IssueDetector, detect_issues, suggest_improvements
|
from gdiffer.issue_detector import detect_issues, suggest_improvements
|
||||||
|
|
||||||
|
|
||||||
class TestIssueDetector:
|
class TestIssueDetector:
|
||||||
def test_detect_sql_injection(self, issue_detector):
|
def test_detect_sql_injection(self, issue_detector):
|
||||||
code = 'query = "SELECT * FROM users WHERE name = \'" + username + "\'"'
|
code = 'query = "SELECT * FROM users WHERE name = \'" + username + "\'"'
|
||||||
issues = issue_detector.detect_issues(code, "python")
|
issues = issue_detector.detect_issues(code, "python")
|
||||||
|
|
||||||
sql_issues = [i for i in issues if i.type == "sql_injection"]
|
sql_issues = [i for i in issues if i.type == "sql_injection"]
|
||||||
assert len(sql_issues) > 0
|
assert len(sql_issues) > 0
|
||||||
|
|
||||||
|
issue = sql_issues[0]
|
||||||
|
assert issue.severity == "critical"
|
||||||
|
assert "SQL" in issue.title
|
||||||
|
|
||||||
def test_detect_xss(self, issue_detector):
|
def test_detect_xss(self, issue_detector):
|
||||||
code = "element.innerHTML = userInput"
|
code = "element.innerHTML = userInput"
|
||||||
issues = issue_detector.detect_issues(code, "javascript")
|
issues = issue_detector.detect_issues(code, "javascript")
|
||||||
|
|
||||||
xss_issues = [i for i in issues if i.type == "xss"]
|
xss_issues = [i for i in issues if i.type == "xss"]
|
||||||
assert len(xss_issues) > 0
|
assert len(xss_issues) > 0
|
||||||
|
|
||||||
def test_detect_command_injection(self, issue_detector):
|
def test_detect_command_injection(self, issue_detector):
|
||||||
code = "os.system('rm -rf /tmp/' + user_input)"
|
code = "os.system('rm -rf /tmp/' + user_input)"
|
||||||
issues = issue_detector.detect_issues(code, "python")
|
issues = issue_detector.detect_issues(code, "python")
|
||||||
|
|
||||||
cmd_issues = [i for i in issues if i.type == "command_injection"]
|
cmd_issues = [i for i in issues if i.type == "command_injection"]
|
||||||
assert len(cmd_issues) > 0
|
assert len(cmd_issues) > 0
|
||||||
|
|
||||||
def test_detect_eval_usage(self, issue_detector):
|
def test_detect_eval_usage(self, issue_detector):
|
||||||
code = "result = eval(user_code)"
|
code = "result = eval(user_code)"
|
||||||
issues = issue_detector.detect_issues(code, "python")
|
issues = issue_detector.detect_issues(code, "python")
|
||||||
|
|
||||||
eval_issues = [i for i in issues if i.type == "code_injection"]
|
eval_issues = [i for i in issues if i.type == "code_injection"]
|
||||||
assert len(eval_issues) > 0
|
assert len(eval_issues) > 0
|
||||||
|
|
||||||
def test_detect_hardcoded_secret(self, issue_detector):
|
def test_detect_hardcoded_secret(self, issue_detector):
|
||||||
code = 'api_key = "sk-1234567890abcdef"'
|
code = 'api_key = "sk-1234567890abcdef"'
|
||||||
issues = issue_detector.detect_issues(code, "python")
|
issues = issue_detector.detect_issues(code, "python")
|
||||||
|
|
||||||
secret_issues = [i for i in issues if i.type == "hardcoded_secret"]
|
secret_issues = [i for i in issues if i.type == "hardcoded_secret"]
|
||||||
assert len(secret_issues) > 0
|
assert len(secret_issues) > 0
|
||||||
|
|
||||||
def test_detect_insecure_http(self, issue_detector):
|
def test_detect_insecure_http(self, issue_detector):
|
||||||
code = 'response = requests.get("http://api.example.com")'
|
code = 'response = requests.get("http://api.example.com")'
|
||||||
issues = issue_detector.detect_issues(code, "python")
|
issues = issue_detector.detect_issues(code, "python")
|
||||||
|
|
||||||
http_issues = [i for i in issues if i.type == "insecure_transport"]
|
http_issues = [i for i in issues if i.type == "insecure_transport"]
|
||||||
assert len(http_issues) > 0
|
assert len(http_issues) > 0
|
||||||
|
|
||||||
|
def test_detect_weak_random(self, issue_detector):
|
||||||
|
code = "token = random.randint(0, 9999)"
|
||||||
|
issues = issue_detector.detect_issues(code, "python")
|
||||||
|
|
||||||
|
crypto_issues = [i for i in issues if i.type == "weak_crypto"]
|
||||||
|
assert len(crypto_issues) > 0
|
||||||
|
|
||||||
|
def test_detect_bare_except(self, issue_detector):
|
||||||
|
code = """try:
|
||||||
|
dangerous_operation()
|
||||||
|
except:
|
||||||
|
pass"""
|
||||||
|
issues = issue_detector.detect_issues(code, "python")
|
||||||
|
|
||||||
|
bare_except = [i for i in issues if i.type == "bare_except"]
|
||||||
|
assert len(bare_except) > 0
|
||||||
|
|
||||||
def test_detect_debug_statements(self, issue_detector):
|
def test_detect_debug_statements(self, issue_detector):
|
||||||
code = "print('Debug: value =', value)"
|
code = "print('Debug: value =', value)"
|
||||||
issues = issue_detector.detect_issues(code, "python")
|
issues = issue_detector.detect_issues(code, "python")
|
||||||
|
|
||||||
debug_issues = [i for i in issues if i.type == "debug_statement"]
|
debug_issues = [i for i in issues if i.type == "debug_statement"]
|
||||||
assert len(debug_issues) > 0
|
assert len(debug_issues) > 0
|
||||||
|
|
||||||
def test_detect_todo_comments(self, issue_detector):
|
def test_detect_todo_comments(self, issue_detector):
|
||||||
code = "# TODO: Fix this later"
|
code = "# TODO: Fix this later"
|
||||||
issues = issue_detector.detect_issues(code, "python")
|
issues = issue_detector.detect_issues(code, "python")
|
||||||
|
|
||||||
todo_issues = [i for i in issues if i.type == "code_tag"]
|
todo_issues = [i for i in issues if i.type == "code_tag"]
|
||||||
assert len(todo_issues) > 0
|
assert len(todo_issues) > 0
|
||||||
|
|
||||||
@@ -63,6 +90,7 @@ class TestIssueDetector:
|
|||||||
return result
|
return result
|
||||||
"""
|
"""
|
||||||
issues = issue_detector.detect_issues(code, "python")
|
issues = issue_detector.detect_issues(code, "python")
|
||||||
|
|
||||||
assert len(issues) == 0
|
assert len(issues) == 0
|
||||||
|
|
||||||
def test_issue_line_number(self, issue_detector):
|
def test_issue_line_number(self, issue_detector):
|
||||||
@@ -71,16 +99,49 @@ line2 = 2
|
|||||||
password = "secret"
|
password = "secret"
|
||||||
"""
|
"""
|
||||||
issues = issue_detector.detect_issues(code, "python")
|
issues = issue_detector.detect_issues(code, "python")
|
||||||
|
|
||||||
secret_issues = [i for i in issues if i.type == "hardcoded_secret"]
|
secret_issues = [i for i in issues if i.type == "hardcoded_secret"]
|
||||||
assert len(secret_issues) > 0
|
assert len(secret_issues) > 0
|
||||||
assert secret_issues[0].line == 3
|
assert secret_issues[0].line == 3
|
||||||
|
|
||||||
|
def test_detect_diff_issues(self, issue_detector, sql_injection_diff):
|
||||||
|
old_code = "x = 1"
|
||||||
|
new_code = "x = 1\nquery = 'SELECT * FROM users WHERE id = ' + user_id"
|
||||||
|
|
||||||
|
issues = issue_detector.detect_diff_issues(old_code, new_code, "python")
|
||||||
|
|
||||||
|
assert isinstance(issues, list)
|
||||||
|
|
||||||
def test_suggest_improvements(self, issue_detector):
|
def test_suggest_improvements(self, issue_detector):
|
||||||
code = 'password = "secret"'
|
code = 'query = "SELECT * FROM users WHERE id = " + user_id'
|
||||||
suggestions = issue_detector.suggest_improvements(code, "python")
|
suggestions = issue_detector.suggest_improvements(code, "python")
|
||||||
|
|
||||||
assert isinstance(suggestions, list)
|
assert isinstance(suggestions, list)
|
||||||
assert len(suggestions) > 0
|
assert len(suggestions) > 0
|
||||||
|
|
||||||
|
def test_check_security_patterns_only(self, issue_detector):
|
||||||
|
code = """password = "secret"
|
||||||
|
query = "SELECT * FROM users"
|
||||||
|
"""
|
||||||
|
issues = issue_detector.check_security_patterns(code)
|
||||||
|
|
||||||
|
assert all(i.severity in ['critical', 'high', 'medium'] for i in issues)
|
||||||
|
|
||||||
|
def test_check_code_quality_only(self, issue_detector):
|
||||||
|
code = """# TODO: fix later
|
||||||
|
print("debug")
|
||||||
|
"""
|
||||||
|
issues = issue_detector.check_code_quality(code)
|
||||||
|
|
||||||
|
assert all(i.severity == 'low' for i in issues)
|
||||||
|
|
||||||
|
def test_issue_has_suggestion(self, issue_detector):
|
||||||
|
code = 'password = "secret"'
|
||||||
|
issues = issue_detector.detect_issues(code, "python")
|
||||||
|
|
||||||
|
if issues:
|
||||||
|
assert issues[0].suggestion
|
||||||
|
|
||||||
|
|
||||||
class TestDetectIssuesFunction:
|
class TestDetectIssuesFunction:
|
||||||
def test_detect_issues_function(self):
|
def test_detect_issues_function(self):
|
||||||
@@ -91,6 +152,11 @@ class TestDetectIssuesFunction:
|
|||||||
issues = detect_issues("def test():\n return 1", "python")
|
issues = detect_issues("def test():\n return 1", "python")
|
||||||
assert issues == []
|
assert issues == []
|
||||||
|
|
||||||
|
def test_detect_issues_with_pass(self):
|
||||||
|
issues = detect_issues("def test(): pass", "python")
|
||||||
|
pass_issues = [i for i in issues if i.type == "empty_block"]
|
||||||
|
assert len(pass_issues) > 0
|
||||||
|
|
||||||
|
|
||||||
class TestSuggestImprovementsFunction:
|
class TestSuggestImprovementsFunction:
|
||||||
def test_suggest_improvements_function(self):
|
def test_suggest_improvements_function(self):
|
||||||
@@ -101,12 +167,26 @@ class TestSuggestImprovementsFunction:
|
|||||||
suggestions = suggest_improvements("def test():\n return 1", "python")
|
suggestions = suggest_improvements("def test():\n return 1", "python")
|
||||||
assert suggestions == []
|
assert suggestions == []
|
||||||
|
|
||||||
|
def test_suggest_improvements_with_pass(self):
|
||||||
|
suggestions = suggest_improvements("def test(): pass", "python")
|
||||||
|
assert len(suggestions) > 0
|
||||||
|
|
||||||
|
|
||||||
class TestIssueModel:
|
class TestIssueModel:
|
||||||
def test_issue_creation(self):
|
def test_issue_creation(self):
|
||||||
from gdiffer.issue_detector import Issue
|
from gdiffer.issue_detector import Issue
|
||||||
issue = Issue(type="test", severity="high", title="Test Issue",
|
|
||||||
description="Test description", line=10, suggestion="Fix this")
|
issue = Issue(
|
||||||
|
type="test",
|
||||||
|
severity="high",
|
||||||
|
title="Test Issue",
|
||||||
|
description="Test description",
|
||||||
|
line=10,
|
||||||
|
suggestion="Fix this"
|
||||||
|
)
|
||||||
|
|
||||||
assert issue.type == "test"
|
assert issue.type == "test"
|
||||||
assert issue.severity == "high"
|
assert issue.severity == "high"
|
||||||
|
assert issue.title == "Test Issue"
|
||||||
assert issue.line == 10
|
assert issue.line == 10
|
||||||
|
assert issue.suggestion == "Fix this"
|
||||||
|
|||||||
@@ -1,21 +1,27 @@
|
|||||||
"""Tests for the LanguageDetector module."""
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
|
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
|
||||||
|
|
||||||
from gdiffer.language_detector import LanguageDetector, detect_language
|
from gdiffer.language_detector import detect_language
|
||||||
|
|
||||||
|
|
||||||
class TestLanguageDetector:
|
class TestLanguageDetector:
|
||||||
def test_detect_python_extension(self, language_detector):
|
def test_detect_python_extension(self, language_detector):
|
||||||
assert language_detector.detect_from_filename("test.py") == "python"
|
assert language_detector.detect_from_filename("test.py") == "python"
|
||||||
assert language_detector.detect_from_filename("script.pyw") == "python"
|
assert language_detector.detect_from_filename("script.pyw") == "python"
|
||||||
|
assert language_detector.detect_from_filename("module.pyx") == "python"
|
||||||
|
|
||||||
def test_detect_javascript_extension(self, language_detector):
|
def test_detect_javascript_extension(self, language_detector):
|
||||||
assert language_detector.detect_from_filename("app.js") == "javascript"
|
assert language_detector.detect_from_filename("app.js") == "javascript"
|
||||||
|
assert language_detector.detect_from_filename("module.mjs") == "javascript"
|
||||||
|
assert language_detector.detect_from_filename("component.cjs") == "javascript"
|
||||||
|
assert language_detector.detect_from_filename("file.jsx") == "javascript"
|
||||||
|
|
||||||
|
def test_detect_typescript_extension(self, language_detector):
|
||||||
|
assert language_detector.detect_from_filename("app.ts") == "typescript"
|
||||||
assert language_detector.detect_from_filename("component.tsx") == "typescript"
|
assert language_detector.detect_from_filename("component.tsx") == "typescript"
|
||||||
|
assert language_detector.detect_from_filename("module.mts") == "typescript"
|
||||||
|
|
||||||
def test_detect_java_extension(self, language_detector):
|
def test_detect_java_extension(self, language_detector):
|
||||||
assert language_detector.detect_from_filename("Main.java") == "java"
|
assert language_detector.detect_from_filename("Main.java") == "java"
|
||||||
@@ -28,10 +34,14 @@ class TestLanguageDetector:
|
|||||||
|
|
||||||
def test_detect_c_extensions(self, language_detector):
|
def test_detect_c_extensions(self, language_detector):
|
||||||
assert language_detector.detect_from_filename("file.c") == "c"
|
assert language_detector.detect_from_filename("file.c") == "c"
|
||||||
|
assert language_detector.detect_from_filename("header.h") == "c"
|
||||||
assert language_detector.detect_from_filename("source.cpp") == "cpp"
|
assert language_detector.detect_from_filename("source.cpp") == "cpp"
|
||||||
|
assert language_detector.detect_from_filename("file.cc") == "cpp"
|
||||||
|
assert language_detector.detect_from_filename("header.hpp") == "cpp"
|
||||||
|
|
||||||
def test_detect_ruby_extension(self, language_detector):
|
def test_detect_ruby_extension(self, language_detector):
|
||||||
assert language_detector.detect_from_filename("script.rb") == "ruby"
|
assert language_detector.detect_from_filename("script.rb") == "ruby"
|
||||||
|
assert language_detector.detect_from_filename("template.erb") == "ruby"
|
||||||
|
|
||||||
def test_detect_php_extension(self, language_detector):
|
def test_detect_php_extension(self, language_detector):
|
||||||
assert language_detector.detect_from_filename("index.php") == "php"
|
assert language_detector.detect_from_filename("index.php") == "php"
|
||||||
@@ -42,28 +52,62 @@ class TestLanguageDetector:
|
|||||||
|
|
||||||
def test_detect_from_content_python(self, language_detector):
|
def test_detect_from_content_python(self, language_detector):
|
||||||
code = """def hello():
|
code = """def hello():
|
||||||
return "Hello"
|
return "Hello, World!"
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
hello()
|
||||||
"""
|
"""
|
||||||
assert language_detector.detect_from_content(code) == "python"
|
assert language_detector.detect_from_content(code) == "python"
|
||||||
|
|
||||||
def test_detect_from_content_javascript(self, language_detector):
|
def test_detect_from_content_javascript(self, language_detector):
|
||||||
code = """function greet(name) {
|
code = """function greet(name) {
|
||||||
return "Hello";
|
return "Hello, " + name;
|
||||||
}"""
|
}
|
||||||
|
|
||||||
|
const result = greet("World");
|
||||||
|
"""
|
||||||
assert language_detector.detect_from_content(code) == "javascript"
|
assert language_detector.detect_from_content(code) == "javascript"
|
||||||
|
|
||||||
|
def test_detect_from_content_rust(self, language_detector):
|
||||||
|
code = """fn main() {
|
||||||
|
println!("Hello, World!");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add(a: i32, b: i32) -> i32 {
|
||||||
|
a + b
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
assert language_detector.detect_from_content(code) == "rust"
|
||||||
|
|
||||||
|
def test_detect_from_content_go(self, language_detector):
|
||||||
|
code = """package main
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
fmt.Println("Hello")
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
assert language_detector.detect_from_content(code) == "go"
|
||||||
|
|
||||||
def test_detect_combined_filename_content(self, language_detector):
|
def test_detect_combined_filename_content(self, language_detector):
|
||||||
result = language_detector.detect("test.py", "def hello(): pass")
|
result = language_detector.detect("test.py", "def hello(): pass")
|
||||||
assert result == "python"
|
assert result == "python"
|
||||||
|
|
||||||
|
def test_detect_script_without_extension(self, language_detector):
|
||||||
|
result = language_detector.detect("Makefile", "all:\n\techo hello")
|
||||||
|
assert result == "text"
|
||||||
|
|
||||||
def test_get_supported_languages(self, language_detector):
|
def test_get_supported_languages(self, language_detector):
|
||||||
languages = language_detector.get_supported_languages()
|
languages = language_detector.get_supported_languages()
|
||||||
|
assert isinstance(languages, list)
|
||||||
assert "python" in languages
|
assert "python" in languages
|
||||||
assert "javascript" in languages
|
assert "javascript" in languages
|
||||||
assert "java" in languages
|
assert "java" in languages
|
||||||
|
|
||||||
def test_is_language_supported(self, language_detector):
|
def test_is_language_supported(self, language_detector):
|
||||||
assert language_detector.is_language_supported("python")
|
assert language_detector.is_language_supported("python")
|
||||||
|
assert language_detector.is_language_supported("javascript")
|
||||||
assert not language_detector.is_language_supported("brainfuck")
|
assert not language_detector.is_language_supported("brainfuck")
|
||||||
|
|
||||||
|
|
||||||
@@ -75,3 +119,7 @@ class TestDetectLanguageFunction:
|
|||||||
def test_detect_language_unknown(self):
|
def test_detect_language_unknown(self):
|
||||||
result = detect_language("file.xyz", "")
|
result = detect_language("file.xyz", "")
|
||||||
assert result == "text" or result is None
|
assert result == "text" or result is None
|
||||||
|
|
||||||
|
def test_detect_language_from_filename_only(self):
|
||||||
|
result = detect_language("main.java")
|
||||||
|
assert result == "java"
|
||||||
|
|||||||
Reference in New Issue
Block a user