Compare commits
17 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b650ff0edf | |||
| bf578ebdb9 | |||
| e0aa1be2d6 | |||
| a36ff976ab | |||
| 7c90b04181 | |||
| 345e55626f | |||
| 4d560c024b | |||
| 1ec6ccbf1b | |||
| 58a16b9eef | |||
| 1d20812638 | |||
| 31c89214ce | |||
| b01fa6d6fb | |||
| 7151f75346 | |||
| 275119db73 | |||
| 8186d226f2 | |||
| 4efc8894d1 | |||
| 59f45ffefc |
0
.gitattributes
vendored
Normal file
0
.gitattributes
vendored
Normal file
@@ -17,24 +17,36 @@ jobs:
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install Poetry
|
||||
uses: snok/install-poetry@v1
|
||||
with:
|
||||
virtualenvs-create: true
|
||||
virtualenvs-in-project: true
|
||||
installer-parallel: true
|
||||
|
||||
- name: Cache Poetry dependencies
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pypoetry
|
||||
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-poetry-
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -e ".[dev]"
|
||||
poetry install --with dev
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
pytest tests/ -v
|
||||
poetry run pytest tests/ -v
|
||||
|
||||
- name: Run linting
|
||||
run: |
|
||||
pip install ruff
|
||||
ruff check .
|
||||
poetry run ruff check codechunk tests
|
||||
|
||||
- name: Run type checking
|
||||
run: |
|
||||
pip install mypy
|
||||
mypy codechunk
|
||||
poetry run mypy codechunk
|
||||
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -47,14 +59,25 @@ jobs:
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
pip install build
|
||||
- name: Install Poetry
|
||||
uses: snok/install-poetry@v1
|
||||
with:
|
||||
virtualenvs-create: true
|
||||
virtualenvs-in-project: true
|
||||
installer-parallel: true
|
||||
|
||||
- name: Cache Poetry dependencies
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pypoetry
|
||||
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-poetry-
|
||||
|
||||
- name: Build package
|
||||
run: |
|
||||
python -m build
|
||||
poetry build
|
||||
|
||||
- name: Verify build
|
||||
run: |
|
||||
pip install dist/*.whl --dry-run
|
||||
poetry run pip install dist/*.whl --dry-run
|
||||
|
||||
54
.gitignore
vendored
54
.gitignore
vendored
@@ -1,12 +1,7 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
@@ -20,56 +15,19 @@ parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
.pytest_cache/
|
||||
.coverage
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
.venv/
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# IDEs
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# codechunk specific
|
||||
.output/
|
||||
*.chunked.*
|
||||
.codechunk.yaml
|
||||
.env
|
||||
.venv
|
||||
codechunk.yaml
|
||||
|
||||
0
.travis.yml
Normal file
0
.travis.yml
Normal file
@@ -1,6 +1,6 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional, List
|
||||
from typing import Optional, List, cast
|
||||
|
||||
import click
|
||||
from rich.console import Console
|
||||
@@ -24,8 +24,8 @@ logger = get_logger(__name__)
|
||||
@click.pass_context
|
||||
def main(ctx: click.Context, verbose: bool, config: Optional[str]) -> None:
|
||||
ctx.ensure_object(dict)
|
||||
ctx.obj["verbose"] = verbose
|
||||
ctx.obj["config_path"] = config
|
||||
ctx.obj["verbose"] = verbose # type: ignore[index]
|
||||
ctx.obj["config_path"] = config # type: ignore[index]
|
||||
|
||||
if verbose:
|
||||
logger.setLevel("DEBUG")
|
||||
@@ -43,8 +43,9 @@ def main(ctx: click.Context, verbose: bool, config: Optional[str]) -> None:
|
||||
def generate(ctx: click.Context, path: str, output: Optional[str], format: str,
|
||||
max_tokens: int, include: tuple, exclude: tuple) -> None:
|
||||
"""Generate optimized context bundle for LLM."""
|
||||
config_path = ctx.obj.get("config_path")
|
||||
verbose = ctx.obj.get("verbose", False)
|
||||
ctx_obj = cast(dict, ctx.obj)
|
||||
config_path = ctx_obj.get("config_path")
|
||||
verbose = ctx_obj.get("verbose", False)
|
||||
|
||||
try:
|
||||
config = load_config(config_path) if config_path else Config()
|
||||
@@ -98,7 +99,8 @@ def generate(ctx: click.Context, path: str, output: Optional[str], format: str,
|
||||
@click.pass_context
|
||||
def analyze(ctx: click.Context, path: str, json: bool) -> None:
|
||||
"""Analyze codebase and report statistics."""
|
||||
verbose = ctx.obj.get("verbose", False)
|
||||
ctx_obj = cast(dict, ctx.obj)
|
||||
verbose = ctx_obj.get("verbose", False)
|
||||
|
||||
try:
|
||||
project_path = Path(path)
|
||||
@@ -113,24 +115,34 @@ def analyze(ctx: click.Context, path: str, json: bool) -> None:
|
||||
chunker = CodeChunker(config.chunking)
|
||||
chunks = chunker.chunk_all(chunks)
|
||||
|
||||
stats = {
|
||||
stats: dict[str, int] = {
|
||||
"total_files": len(parser.files),
|
||||
"total_chunks": len(chunks),
|
||||
"files_by_language": {},
|
||||
"chunks_by_type": {},
|
||||
"total_lines": sum(c.metadata.line_count for c in chunks),
|
||||
"total_functions": sum(1 for c in chunks if c.chunk_type == "function"),
|
||||
"total_classes": sum(1 for c in chunks if c.chunk_type == "class"),
|
||||
}
|
||||
|
||||
files_by_lang: dict[str, int] = {}
|
||||
chunks_by_type: dict[str, int] = {}
|
||||
|
||||
for chunk in chunks:
|
||||
lang = chunk.metadata.language
|
||||
stats["files_by_language"][lang] = stats["files_by_language"].get(lang, 0) + 1
|
||||
stats["chunks_by_type"][chunk.chunk_type] = stats["chunks_by_type"].get(chunk.chunk_type, 0) + 1
|
||||
files_by_lang[lang] = files_by_lang.get(lang, 0) + 1
|
||||
chunks_by_type[chunk.chunk_type] = chunks_by_type.get(chunk.chunk_type, 0) + 1
|
||||
|
||||
if json:
|
||||
import json as json_module
|
||||
console.print(json_module.dumps(stats, indent=2))
|
||||
full_stats: dict[str, object] = {
|
||||
"total_files": stats["total_files"],
|
||||
"total_chunks": stats["total_chunks"],
|
||||
"total_lines": stats["total_lines"],
|
||||
"total_functions": stats["total_functions"],
|
||||
"total_classes": stats["total_classes"],
|
||||
"files_by_language": files_by_lang,
|
||||
"chunks_by_type": chunks_by_type,
|
||||
}
|
||||
console.print(json_module.dumps(full_stats, indent=2))
|
||||
else:
|
||||
console.print(Panel(
|
||||
Text.from_markup(f"""
|
||||
@@ -143,10 +155,10 @@ Total Functions: {stats['total_functions']}
|
||||
Total Classes: {stats['total_classes']}
|
||||
|
||||
[b]Files by Language[/b]
|
||||
{chr(10).join(f' - {lang}: {count}' for lang, count in stats['files_by_language'].items())}
|
||||
{chr(10).join(f' - {lang}: {count}' for lang, count in files_by_lang.items())}
|
||||
|
||||
[b]Chunks by Type[/b]
|
||||
{chr(10).join(f' - {type_}: {count}' for type_, count in stats['chunks_by_type'].items())}
|
||||
{chr(10).join(f' - {type_}: {count}' for type_, count in chunks_by_type.items())}
|
||||
"""),
|
||||
title="Analysis Results",
|
||||
expand=False
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, Any
|
||||
from typing import Any, Optional
|
||||
from dataclasses import dataclass, field
|
||||
import yaml
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChunkingConfig:
|
||||
include_patterns: List[str] = field(default_factory=lambda: [
|
||||
include_patterns: list[str] = field(default_factory=lambda: [
|
||||
"*.py", "*.js", "*.ts", "*.go", "*.rs", "*.java", "*.cpp", "*.c", "*.h"
|
||||
])
|
||||
exclude_patterns: List[str] = field(default_factory=lambda: [
|
||||
exclude_patterns: list[str] = field(default_factory=lambda: [
|
||||
"**/test_*.py", "**/__pycache__/**", "**/node_modules/**",
|
||||
"**/.git/**", "**/venv/**", "**/.env/**"
|
||||
])
|
||||
@@ -17,7 +17,7 @@ class ChunkingConfig:
|
||||
min_chunk_size: int = 3
|
||||
preserve_docstrings: bool = True
|
||||
remove_comments: bool = False
|
||||
boilerplate_patterns: List[str] = field(default_factory=lambda: [
|
||||
boilerplate_patterns: list[str] = field(default_factory=lambda: [
|
||||
r"@property\s*\n\s*def\s+\w+\s*\(\s*\)\s*:",
|
||||
r"@abstractmethod",
|
||||
r"@staticmethod",
|
||||
@@ -27,14 +27,14 @@ class ChunkingConfig:
|
||||
|
||||
@dataclass
|
||||
class PrioritizationConfig:
|
||||
keywords: List[str] = field(default_factory=lambda: [
|
||||
keywords: list[str] = field(default_factory=lambda: [
|
||||
"main", "core", "handler", "controller", "service", "model"
|
||||
])
|
||||
size_limit: int = 10000
|
||||
exclude_patterns: List[str] = field(default_factory=lambda: [
|
||||
exclude_patterns: list[str] = field(default_factory=lambda: [
|
||||
"**/test_*.py", "**/*_test.py", "**/conftest.py"
|
||||
])
|
||||
include_only: List[str] = field(default_factory=list)
|
||||
include_only: list[str] = field(default_factory=list)
|
||||
weight_by_depth: bool = True
|
||||
|
||||
|
||||
@@ -51,21 +51,21 @@ class Config:
|
||||
chunking: ChunkingConfig = field(default_factory=ChunkingConfig)
|
||||
prioritization: PrioritizationConfig = field(default_factory=PrioritizationConfig)
|
||||
output: OutputConfig = field(default_factory=OutputConfig)
|
||||
env_overrides: Dict[str, str] = field(default_factory=dict)
|
||||
env_overrides: dict[str, str] = field(default_factory=dict)
|
||||
|
||||
|
||||
def load_config(config_path: Optional[str] = None) -> Config:
|
||||
"""Load configuration from YAML file."""
|
||||
if config_path is None:
|
||||
config_path = Path.cwd() / ".codechunk.yaml"
|
||||
|
||||
config_file = Path(config_path)
|
||||
config_file = Path.cwd() / ".codechunk.yaml"
|
||||
else:
|
||||
config_file = Path(config_path)
|
||||
|
||||
if not config_file.exists():
|
||||
return Config()
|
||||
|
||||
try:
|
||||
with open(config_file, 'r') as f:
|
||||
with open(config_file) as f:
|
||||
data = yaml.safe_load(f)
|
||||
|
||||
if data is None:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Dict, Any
|
||||
from typing import Optional, Any
|
||||
from pathlib import Path
|
||||
import re
|
||||
|
||||
@@ -13,9 +13,9 @@ class ChunkMetadata:
|
||||
end_line: int
|
||||
line_count: int
|
||||
docstring: Optional[str] = None
|
||||
imports: List[str] = field(default_factory=list)
|
||||
decorators: List[str] = field(default_factory=list)
|
||||
parameters: List[str] = field(default_factory=list)
|
||||
imports: list[str] = field(default_factory=list)
|
||||
decorators: list[str] = field(default_factory=list)
|
||||
parameters: list[str] = field(default_factory=list)
|
||||
return_type: Optional[str] = None
|
||||
complexity_score: int = 1
|
||||
original_content: str = ""
|
||||
@@ -28,7 +28,7 @@ class ParsedChunk:
|
||||
content: str
|
||||
metadata: ChunkMetadata
|
||||
priority: int = 0
|
||||
dependencies: List[str] = field(default_factory=list)
|
||||
dependencies: list[str] = field(default_factory=list)
|
||||
summary: str = ""
|
||||
is_boilerplate: bool = False
|
||||
|
||||
@@ -52,7 +52,7 @@ class CodeChunker:
|
||||
(r'def\s+__ge__\s*\([^)]*\)\s*:', '__ge__'),
|
||||
]
|
||||
|
||||
def chunk_all(self, chunks: List[ParsedChunk]) -> List[ParsedChunk]:
|
||||
def chunk_all(self, chunks: list[ParsedChunk]) -> list[ParsedChunk]:
|
||||
"""Process all chunks: remove boilerplate, add priorities."""
|
||||
result = []
|
||||
for chunk in chunks:
|
||||
@@ -67,7 +67,7 @@ class CodeChunker:
|
||||
return chunk
|
||||
|
||||
content = chunk.content
|
||||
for pattern, pattern_type in self.boilerplate_patterns:
|
||||
for pattern, _pattern_type in self.boilerplate_patterns:
|
||||
if re.search(pattern, content, re.MULTILINE):
|
||||
chunk.is_boilerplate = True
|
||||
break
|
||||
@@ -116,17 +116,17 @@ class CodeChunker:
|
||||
chunk.priority = priority
|
||||
return chunk
|
||||
|
||||
def _sort_by_priority(self, chunks: List[ParsedChunk]) -> List[ParsedChunk]:
|
||||
def _sort_by_priority(self, chunks: list[ParsedChunk]) -> list[ParsedChunk]:
|
||||
"""Sort chunks by priority (highest first)."""
|
||||
return sorted(chunks, key=lambda c: c.priority, reverse=True)
|
||||
|
||||
def split_large_chunk(self, chunk: ParsedChunk) -> List[ParsedChunk]:
|
||||
def split_large_chunk(self, chunk: ParsedChunk) -> list[ParsedChunk]:
|
||||
"""Split a large chunk into smaller pieces."""
|
||||
if chunk.metadata.line_count <= self.config.max_chunk_size:
|
||||
return [chunk]
|
||||
|
||||
lines = chunk.content.split('\n')
|
||||
parts = []
|
||||
parts: list[ParsedChunk] = []
|
||||
current_part = []
|
||||
current_lines = 0
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from typing import List, Dict, Set, Optional
|
||||
from typing import Optional
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from codechunk.core.chunking import ParsedChunk
|
||||
@@ -9,19 +9,18 @@ class DependencyNode:
|
||||
chunk_name: str
|
||||
file_path: Path
|
||||
module_name: str
|
||||
dependencies: Set[str] = field(default_factory=set)
|
||||
dependents: Set[str] = field(default_factory=set)
|
||||
dependencies: set[str] = field(default_factory=set)
|
||||
dependents: set[str] = field(default_factory=set)
|
||||
is_circular: bool = False
|
||||
|
||||
|
||||
class DependencyAnalyzer:
|
||||
def __init__(self):
|
||||
self.dependency_graph: Dict[str, DependencyNode] = {}
|
||||
self.module_to_chunks: Dict[str, List[str]] = {}
|
||||
self.dependency_graph: dict[str, DependencyNode] = {}
|
||||
self.module_to_chunks: dict[str, list[str]] = {}
|
||||
|
||||
def analyze_dependencies(self, chunks: List[ParsedChunk],
|
||||
project_files: List[Path]) -> Dict[str, DependencyNode]:
|
||||
"""Analyze dependencies between chunks."""
|
||||
def analyze_dependencies(self, chunks: list[ParsedChunk],
|
||||
project_files: list[Path]) -> dict[str, DependencyNode]:
|
||||
self.dependency_graph = {}
|
||||
self.module_to_chunks = {}
|
||||
|
||||
@@ -53,16 +52,14 @@ class DependencyAnalyzer:
|
||||
|
||||
return self.dependency_graph
|
||||
|
||||
def _build_module_cache(self, project_files: List[Path]) -> Dict[Path, str]:
|
||||
"""Build cache of file to module name mappings."""
|
||||
cache = {}
|
||||
def _build_module_cache(self, project_files: list[Path]) -> dict[Path, str]:
|
||||
cache: dict[Path, str] = {}
|
||||
for file_path in project_files:
|
||||
module_name = self._get_module_name(file_path, set(project_files))
|
||||
cache[file_path] = module_name
|
||||
return cache
|
||||
|
||||
def _get_module_name(self, file_path: Path, project_root: Set[Path]) -> str:
|
||||
"""Get module name from file path."""
|
||||
def _get_module_name(self, file_path: Path, project_root: set[Path]) -> str:
|
||||
try:
|
||||
if project_root:
|
||||
root = min(project_root, key=lambda p: len(p.parts))
|
||||
@@ -84,8 +81,7 @@ class DependencyAnalyzer:
|
||||
return file_path.stem
|
||||
|
||||
def _resolve_import(self, import_str: str, current_file: Path,
|
||||
project_root: Set[Path], module_cache: Dict[Path, str]) -> Optional[str]:
|
||||
"""Resolve import string to module name."""
|
||||
project_root: set[Path], module_cache: dict[Path, str]) -> Optional[str]:
|
||||
clean_import = import_str.strip()
|
||||
|
||||
parts = clean_import.split('.')
|
||||
@@ -102,7 +98,7 @@ class DependencyAnalyzer:
|
||||
'torch', 'tensorflow', 'matplotlib', 'scipy', 'sklearn']:
|
||||
return None
|
||||
|
||||
for file_path, module_name in module_cache.items():
|
||||
for _file_path, module_name in module_cache.items():
|
||||
if module_name.endswith(base_module) or module_name == base_module:
|
||||
return module_name
|
||||
|
||||
@@ -113,19 +109,17 @@ class DependencyAnalyzer:
|
||||
|
||||
return clean_import
|
||||
|
||||
def _build_dependency_links(self):
|
||||
"""Build reverse dependency links (dependents)."""
|
||||
def _build_dependency_links(self) -> None:
|
||||
for node in self.dependency_graph.values():
|
||||
for dep in node.dependencies:
|
||||
if dep in self.dependency_graph:
|
||||
self.dependency_graph[dep].dependents.add(node.chunk_name)
|
||||
|
||||
def _detect_circular_dependencies(self):
|
||||
"""Detect circular dependencies in the graph."""
|
||||
def _detect_circular_dependencies(self) -> None:
|
||||
visited = set()
|
||||
rec_stack = set()
|
||||
|
||||
def detect_cycle(node_name: str, path: List[str]) -> bool:
|
||||
def detect_cycle(node_name: str, path: list[str]) -> bool:
|
||||
visited.add(node_name)
|
||||
rec_stack.add(node_name)
|
||||
path.append(node_name)
|
||||
@@ -150,8 +144,7 @@ class DependencyAnalyzer:
|
||||
if node_name not in visited:
|
||||
detect_cycle(node_name, [])
|
||||
|
||||
def get_essential_chunks(self, selected_chunks: List[str]) -> List[str]:
|
||||
"""Get all chunks needed including transitive dependencies."""
|
||||
def get_essential_chunks(self, selected_chunks: list[str]) -> list[str]:
|
||||
essential = set(selected_chunks)
|
||||
to_process = list(selected_chunks)
|
||||
|
||||
@@ -166,8 +159,7 @@ class DependencyAnalyzer:
|
||||
|
||||
return list(essential)
|
||||
|
||||
def get_impacted_chunks(self, modified_chunks: List[str]) -> List[str]:
|
||||
"""Get all chunks that depend on the modified chunks."""
|
||||
def get_impacted_chunks(self, modified_chunks: list[str]) -> list[str]:
|
||||
impacted = set(modified_chunks)
|
||||
to_process = list(modified_chunks)
|
||||
|
||||
@@ -182,8 +174,7 @@ class DependencyAnalyzer:
|
||||
|
||||
return list(impacted)
|
||||
|
||||
def get_dependency_stats(self) -> Dict[str, int]:
|
||||
"""Get statistics about dependencies."""
|
||||
def get_dependency_stats(self) -> dict[str, int]:
|
||||
stats = {
|
||||
"total_nodes": len(self.dependency_graph),
|
||||
"nodes_with_deps": 0,
|
||||
@@ -199,16 +190,12 @@ class DependencyAnalyzer:
|
||||
depent_count = len(node.dependents)
|
||||
|
||||
stats["total_edges"] += dep_count
|
||||
|
||||
if dep_count > 0:
|
||||
stats["nodes_with_deps"] += 1
|
||||
|
||||
if depent_count > 0:
|
||||
stats["nodes_with_dependents"] += 1
|
||||
|
||||
if node.is_circular:
|
||||
stats["circular_deps"] += 1
|
||||
|
||||
stats["max_dependencies"] = max(stats["max_dependencies"], dep_count)
|
||||
stats["max_dependents"] = max(stats["max_dependents"], depent_count)
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
from typing import List, Optional
|
||||
from codechunk.core.chunking import ParsedChunk
|
||||
|
||||
|
||||
@@ -10,7 +9,7 @@ class OutputFormatter:
|
||||
self.max_tokens = max_tokens
|
||||
self.token_warning_thresholds = [0.7, 0.9, 1.0]
|
||||
|
||||
def format(self, chunks: List[ParsedChunk]) -> str:
|
||||
def format(self, chunks: list[ParsedChunk]) -> str:
|
||||
"""Format chunks for output."""
|
||||
if self.format_type == "ollama":
|
||||
return self._format_ollama(chunks)
|
||||
@@ -19,7 +18,7 @@ class OutputFormatter:
|
||||
else:
|
||||
return self._format_markdown(chunks)
|
||||
|
||||
def _format_ollama(self, chunks: List[ParsedChunk]) -> str:
|
||||
def _format_ollama(self, chunks: list[ParsedChunk]) -> str:
|
||||
"""Format for Ollama."""
|
||||
lines = []
|
||||
lines.append("### System")
|
||||
@@ -56,7 +55,7 @@ class OutputFormatter:
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _format_lmstudio(self, chunks: List[ParsedChunk]) -> str:
|
||||
def _format_lmstudio(self, chunks: list[ParsedChunk]) -> str:
|
||||
"""Format for LM Studio."""
|
||||
import json
|
||||
|
||||
@@ -99,7 +98,7 @@ Provide clear, accurate code analysis and assistance."""
|
||||
|
||||
return json.dumps(messages, indent=2)
|
||||
|
||||
def _format_markdown(self, chunks: List[ParsedChunk]) -> str:
|
||||
def _format_markdown(self, chunks: list[ParsedChunk]) -> str:
|
||||
"""Format as markdown."""
|
||||
lines = []
|
||||
lines.append("# Code Context")
|
||||
@@ -183,7 +182,7 @@ Provide clear, accurate code analysis and assistance."""
|
||||
else:
|
||||
return True, ratio, "OK"
|
||||
|
||||
def prune_for_limit(self, chunks: List[ParsedChunk], max_tokens: int) -> List[ParsedChunk]:
|
||||
def prune_for_limit(self, chunks: list[ParsedChunk], max_tokens: int) -> list[ParsedChunk]:
|
||||
"""Prune chunks to fit within token limit."""
|
||||
result = []
|
||||
current_tokens = 0
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, Any
|
||||
from dataclasses import dataclass, field
|
||||
import re
|
||||
from typing import Optional
|
||||
import os
|
||||
import re
|
||||
from codechunk.core.chunking import ParsedChunk, ChunkMetadata
|
||||
|
||||
|
||||
@@ -41,23 +40,23 @@ LANGUAGE_EXTENSIONS = {
|
||||
|
||||
class CodeParser:
|
||||
def __init__(self):
|
||||
self.files: List[Path] = []
|
||||
self.file_contents: Dict[Path, str] = {}
|
||||
self.files: list[Path] = []
|
||||
self.file_contents: dict[Path, str] = {}
|
||||
|
||||
def detect_language(self, file_path: Path) -> Optional[str]:
|
||||
"""Detect programming language from file extension."""
|
||||
ext = file_path.suffix.lower()
|
||||
return LANGUAGE_EXTENSIONS.get(ext)
|
||||
|
||||
def discover_files(self, project_path: Path, include_patterns: List[str],
|
||||
exclude_patterns: List[str]) -> None:
|
||||
def discover_files(self, project_path: Path, include_patterns: list[str],
|
||||
exclude_patterns: list[str]) -> None:
|
||||
"""Discover source files in project directory."""
|
||||
from fnmatch import fnmatch
|
||||
|
||||
self.files = []
|
||||
project_path = Path(project_path)
|
||||
|
||||
for root, dirs, files in os.walk(project_path):
|
||||
for root, _dirs, files in os.walk(project_path):
|
||||
root_path = Path(root)
|
||||
|
||||
for file_name in files:
|
||||
@@ -96,7 +95,7 @@ class CodeParser:
|
||||
self.file_contents[file_path] = content
|
||||
return content
|
||||
|
||||
def parse_all(self) -> List[ParsedChunk]:
|
||||
def parse_all(self) -> list[ParsedChunk]:
|
||||
"""Parse all discovered files."""
|
||||
chunks = []
|
||||
for file_path in self.files:
|
||||
@@ -104,7 +103,7 @@ class CodeParser:
|
||||
chunks.extend(file_chunks)
|
||||
return chunks
|
||||
|
||||
def parse_file(self, file_path: Path) -> List[ParsedChunk]:
|
||||
def parse_file(self, file_path: Path) -> list[ParsedChunk]:
|
||||
"""Parse a single file and extract chunks."""
|
||||
language = self.detect_language(file_path)
|
||||
if not language:
|
||||
@@ -124,7 +123,7 @@ class CodeParser:
|
||||
else:
|
||||
return self._parse_generic(file_path, content, lines, language)
|
||||
|
||||
def _parse_python(self, file_path: Path, content: str, lines: List[str]) -> List[ParsedChunk]:
|
||||
def _parse_python(self, file_path: Path, content: str, lines: list[str]) -> list[ParsedChunk]:
|
||||
"""Parse Python file for classes and functions."""
|
||||
chunks = []
|
||||
current_class = None
|
||||
@@ -284,8 +283,8 @@ class CodeParser:
|
||||
|
||||
return chunks
|
||||
|
||||
def _parse_js_like(self, file_path: Path, content: str, lines: List[str],
|
||||
language: str) -> List[ParsedChunk]:
|
||||
def _parse_js_like(self, file_path: Path, content: str, lines: list[str],
|
||||
language: str) -> list[ParsedChunk]:
|
||||
"""Parse JavaScript/TypeScript file."""
|
||||
chunks = []
|
||||
imports = self._extract_imports(content, language)
|
||||
@@ -392,7 +391,7 @@ class CodeParser:
|
||||
|
||||
return chunks
|
||||
|
||||
def _parse_go(self, file_path: Path, content: str, lines: List[str]) -> List[ParsedChunk]:
|
||||
def _parse_go(self, file_path: Path, content: str, lines: list[str]) -> list[ParsedChunk]:
|
||||
"""Parse Go file."""
|
||||
chunks = []
|
||||
imports = self._extract_imports(content, "go")
|
||||
@@ -460,7 +459,7 @@ class CodeParser:
|
||||
|
||||
return chunks
|
||||
|
||||
def _parse_rust(self, file_path: Path, content: str, lines: List[str]) -> List[ParsedChunk]:
|
||||
def _parse_rust(self, file_path: Path, content: str, lines: list[str]) -> list[ParsedChunk]:
|
||||
"""Parse Rust file."""
|
||||
chunks = []
|
||||
imports = self._extract_imports(content, "rust")
|
||||
@@ -528,8 +527,8 @@ class CodeParser:
|
||||
|
||||
return chunks
|
||||
|
||||
def _parse_generic(self, file_path: Path, content: str, lines: List[str],
|
||||
language: str) -> List[ParsedChunk]:
|
||||
def _parse_generic(self, file_path: Path, content: str, lines: list[str],
|
||||
language: str) -> list[ParsedChunk]:
|
||||
"""Generic parser for unknown languages."""
|
||||
chunks = []
|
||||
imports = self._extract_imports(content, language)
|
||||
@@ -554,7 +553,7 @@ class CodeParser:
|
||||
|
||||
return chunks
|
||||
|
||||
def _extract_imports(self, content: str, language: str) -> List[str]:
|
||||
def _extract_imports(self, content: str, language: str) -> list[str]:
|
||||
"""Extract import statements from content."""
|
||||
imports = []
|
||||
|
||||
@@ -588,7 +587,7 @@ class CodeParser:
|
||||
|
||||
return list(set(imports))
|
||||
|
||||
def _extract_docstring(self, lines: List[str]) -> Optional[str]:
|
||||
def _extract_docstring(self, lines: list[str]) -> Optional[str]:
|
||||
"""Extract docstring from lines."""
|
||||
if not lines:
|
||||
return None
|
||||
@@ -618,7 +617,7 @@ class CodeParser:
|
||||
|
||||
return None
|
||||
|
||||
def _parse_params(self, params_str: str) -> List[str]:
|
||||
def _parse_params(self, params_str: str) -> list[str]:
|
||||
"""Parse function parameters."""
|
||||
if not params_str.strip():
|
||||
return []
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from typing import Optional, List
|
||||
from typing import Optional
|
||||
from codechunk.core.chunking import ParsedChunk
|
||||
|
||||
|
||||
@@ -62,8 +62,6 @@ class CodeSummarizer:
|
||||
|
||||
def _summarize_method(self, chunk: ParsedChunk) -> str:
|
||||
"""Summarize a method."""
|
||||
class_name = chunk.name.split(".")[0] if "." in chunk.name else "Unknown"
|
||||
|
||||
parts = []
|
||||
parts.append(f"Method: {chunk.name}")
|
||||
|
||||
@@ -115,18 +113,18 @@ class CodeSummarizer:
|
||||
"""Summarize a generic chunk."""
|
||||
return f"{chunk.chunk_type.capitalize()}: {chunk.name} ({chunk.metadata.line_count} lines)"
|
||||
|
||||
def batch_summarize(self, chunks: List[ParsedChunk]) -> List[str]:
|
||||
def batch_summarize(self, chunks: list[ParsedChunk]) -> list[str]:
|
||||
"""Generate summaries for multiple chunks."""
|
||||
return [self.summarize(chunk) for chunk in chunks]
|
||||
|
||||
def generate_overview(self, chunks: List[ParsedChunk], project_name: str = "Project") -> str:
|
||||
def generate_overview(self, chunks: list[ParsedChunk], project_name: str = "Project") -> str:
|
||||
"""Generate an overview of the project structure."""
|
||||
lines = []
|
||||
lines.append(f"# {project_name} Overview")
|
||||
lines.append("")
|
||||
|
||||
type_counts = {}
|
||||
lang_counts = {}
|
||||
type_counts: dict[str, int] = {}
|
||||
lang_counts: dict[str, int] = {}
|
||||
|
||||
for chunk in chunks:
|
||||
type_counts[chunk.chunk_type] = type_counts.get(chunk.chunk_type, 0) + 1
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
import hashlib
|
||||
@@ -47,7 +48,7 @@ def find_files_pattern(directory: Path, patterns: list) -> list:
|
||||
from fnmatch import fnmatch
|
||||
matches = []
|
||||
|
||||
for root, dirs, files in directory.walk():
|
||||
for root, dirs, files in os.walk(directory):
|
||||
for file in files:
|
||||
file_path = Path(root) / file
|
||||
rel_path = str(file_path.relative_to(directory))
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
"""Pytest configuration and fixtures for codechunk tests."""
|
||||
|
||||
import tempfile
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
@@ -458,6 +456,63 @@ export default App;
|
||||
return temp_project_dir
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def vscode_project(temp_project_dir: Path) -> Path:
|
||||
"""Create a mock VSCode project structure."""
|
||||
vscode_dir = temp_project_dir / ".vscode"
|
||||
vscode_dir.mkdir(exist_ok=True)
|
||||
|
||||
(vscode_dir / "extensions.json").write_text('''
|
||||
{
|
||||
"recommendations": [
|
||||
"ms-python.python",
|
||||
"ms-vscode.vscode-typescript-next"
|
||||
]
|
||||
}
|
||||
''')
|
||||
|
||||
(vscode_dir / "settings.json").write_text('''
|
||||
{
|
||||
"python.defaultInterpreterPath": "/usr/bin/python3",
|
||||
"editor.formatOnSave": true,
|
||||
"files.exclude": {
|
||||
"**/__pycache__": true
|
||||
}
|
||||
}
|
||||
''')
|
||||
|
||||
return temp_project_dir
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def jetbrains_project(temp_project_dir: Path) -> Path:
|
||||
"""Create a mock JetBrains project structure."""
|
||||
idea_dir = temp_project_dir / ".idea"
|
||||
idea_dir.mkdir(exist_ok=True)
|
||||
|
||||
(idea_dir / "misc.xml").write_text('''
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager">
|
||||
<output url="file://$PROJECT_DIR$/out" />
|
||||
</component>
|
||||
</project>
|
||||
''')
|
||||
|
||||
(idea_dir / "modules.xml").write_text('''
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/test_project.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
''')
|
||||
|
||||
return temp_project_dir
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mixed_project(temp_project_dir: Path) -> Path:
|
||||
"""Create a mixed project with Python and Node.js."""
|
||||
|
||||
Reference in New Issue
Block a user