Add codechunk package files
This commit is contained in:
143
codechunk/config.py
Normal file
143
codechunk/config.py
Normal file
@@ -0,0 +1,143 @@
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, Any
|
||||
from dataclasses import dataclass, field
|
||||
import yaml
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChunkingConfig:
|
||||
include_patterns: List[str] = field(default_factory=lambda: [
|
||||
"*.py", "*.js", "*.ts", "*.go", "*.rs", "*.java", "*.cpp", "*.c", "*.h"
|
||||
])
|
||||
exclude_patterns: List[str] = field(default_factory=lambda: [
|
||||
"**/test_*.py", "**/__pycache__/**", "**/node_modules/**",
|
||||
"**/.git/**", "**/venv/**", "**/.env/**"
|
||||
])
|
||||
max_chunk_size: int = 500
|
||||
min_chunk_size: int = 3
|
||||
preserve_docstrings: bool = True
|
||||
remove_comments: bool = False
|
||||
boilerplate_patterns: List[str] = field(default_factory=lambda: [
|
||||
r"@property\s*\n\s*def\s+\w+\s*\(\s*\)\s*:",
|
||||
r"@abstractmethod",
|
||||
r"@staticmethod",
|
||||
r"@classmethod"
|
||||
])
|
||||
|
||||
|
||||
@dataclass
|
||||
class PrioritizationConfig:
|
||||
keywords: List[str] = field(default_factory=lambda: [
|
||||
"main", "core", "handler", "controller", "service", "model"
|
||||
])
|
||||
size_limit: int = 10000
|
||||
exclude_patterns: List[str] = field(default_factory=lambda: [
|
||||
"**/test_*.py", "**/*_test.py", "**/conftest.py"
|
||||
])
|
||||
include_only: List[str] = field(default_factory=list)
|
||||
weight_by_depth: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class OutputConfig:
|
||||
format: str = "markdown"
|
||||
max_tokens: int = 8192
|
||||
include_metadata: bool = True
|
||||
syntax_highlighting: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
chunking: ChunkingConfig = field(default_factory=ChunkingConfig)
|
||||
prioritization: PrioritizationConfig = field(default_factory=PrioritizationConfig)
|
||||
output: OutputConfig = field(default_factory=OutputConfig)
|
||||
env_overrides: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
|
||||
def load_config(config_path: Optional[str] = None) -> Config:
|
||||
"""Load configuration from YAML file."""
|
||||
if config_path is None:
|
||||
config_path = Path.cwd() / ".codechunk.yaml"
|
||||
|
||||
config_file = Path(config_path)
|
||||
|
||||
if not config_file.exists():
|
||||
return Config()
|
||||
|
||||
try:
|
||||
with open(config_file, 'r') as f:
|
||||
data = yaml.safe_load(f)
|
||||
|
||||
if data is None:
|
||||
return Config()
|
||||
|
||||
config = Config()
|
||||
|
||||
if "chunking" in data:
|
||||
chunking_data = data["chunking"]
|
||||
config.chunking = ChunkingConfig(
|
||||
include_patterns=chunking_data.get("include_patterns", config.chunking.include_patterns),
|
||||
exclude_patterns=chunking_data.get("exclude_patterns", config.chunking.exclude_patterns),
|
||||
max_chunk_size=chunking_data.get("max_chunk_size", config.chunking.max_chunk_size),
|
||||
min_chunk_size=chunking_data.get("min_chunk_size", config.chunking.min_chunk_size),
|
||||
preserve_docstrings=chunking_data.get("preserve_docstrings", config.chunking.preserve_docstrings),
|
||||
remove_comments=chunking_data.get("remove_comments", config.chunking.remove_comments),
|
||||
boilerplate_patterns=chunking_data.get("boilerplate_patterns", config.chunking.boilerplate_patterns)
|
||||
)
|
||||
|
||||
if "prioritization" in data:
|
||||
prio_data = data["prioritization"]
|
||||
config.prioritization = PrioritizationConfig(
|
||||
keywords=prio_data.get("keywords", config.prioritization.keywords),
|
||||
size_limit=prio_data.get("size_limit", config.prioritization.size_limit),
|
||||
exclude_patterns=prio_data.get("exclude_patterns", config.prioritization.exclude_patterns),
|
||||
include_only=prio_data.get("include_only", config.prioritization.include_only),
|
||||
weight_by_depth=prio_data.get("weight_by_depth", config.prioritization.weight_by_depth)
|
||||
)
|
||||
|
||||
if "output" in data:
|
||||
out_data = data["output"]
|
||||
config.output = OutputConfig(
|
||||
format=out_data.get("format", config.output.format),
|
||||
max_tokens=out_data.get("max_tokens", config.output.max_tokens),
|
||||
include_metadata=out_data.get("include_metadata", config.output.include_metadata),
|
||||
syntax_highlighting=out_data.get("syntax_highlighting", config.output.syntax_highlighting)
|
||||
)
|
||||
|
||||
return config
|
||||
|
||||
except yaml.YAMLError as e:
|
||||
raise ValueError(f"Invalid YAML in config file: {e}")
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error loading config file: {e}")
|
||||
|
||||
|
||||
def save_config(config: Config, config_path: str = ".codechunk.yaml") -> None:
|
||||
"""Save configuration to YAML file."""
|
||||
data = {
|
||||
"chunking": {
|
||||
"include_patterns": config.chunking.include_patterns,
|
||||
"exclude_patterns": config.chunking.exclude_patterns,
|
||||
"max_chunk_size": config.chunking.max_chunk_size,
|
||||
"min_chunk_size": config.chunking.min_chunk_size,
|
||||
"preserve_docstrings": config.chunking.preserve_docstrings,
|
||||
"remove_comments": config.chunking.remove_comments,
|
||||
"boilerplate_patterns": config.chunking.boilerplate_patterns
|
||||
},
|
||||
"prioritization": {
|
||||
"keywords": config.prioritization.keywords,
|
||||
"size_limit": config.prioritization.size_limit,
|
||||
"exclude_patterns": config.prioritization.exclude_patterns,
|
||||
"include_only": config.prioritization.include_only,
|
||||
"weight_by_depth": config.prioritization.weight_by_depth
|
||||
},
|
||||
"output": {
|
||||
"format": config.output.format,
|
||||
"max_tokens": config.output.max_tokens,
|
||||
"include_metadata": config.output.include_metadata,
|
||||
"syntax_highlighting": config.output.syntax_highlighting
|
||||
}
|
||||
}
|
||||
|
||||
with open(config_path, 'w') as f:
|
||||
yaml.dump(data, f, default_flow_style=False, indent=2)
|
||||
Reference in New Issue
Block a user