Add codechunk package files
Some checks failed
CI / test (push) Failing after 9s
CI / build (push) Has been skipped

This commit is contained in:
2026-02-01 23:42:00 +00:00
parent fce6fb3fec
commit 160970f039

143
codechunk/config.py Normal file
View File

@@ -0,0 +1,143 @@
from pathlib import Path
from typing import List, Optional, Dict, Any
from dataclasses import dataclass, field
import yaml
@dataclass
class ChunkingConfig:
include_patterns: List[str] = field(default_factory=lambda: [
"*.py", "*.js", "*.ts", "*.go", "*.rs", "*.java", "*.cpp", "*.c", "*.h"
])
exclude_patterns: List[str] = field(default_factory=lambda: [
"**/test_*.py", "**/__pycache__/**", "**/node_modules/**",
"**/.git/**", "**/venv/**", "**/.env/**"
])
max_chunk_size: int = 500
min_chunk_size: int = 3
preserve_docstrings: bool = True
remove_comments: bool = False
boilerplate_patterns: List[str] = field(default_factory=lambda: [
r"@property\s*\n\s*def\s+\w+\s*\(\s*\)\s*:",
r"@abstractmethod",
r"@staticmethod",
r"@classmethod"
])
@dataclass
class PrioritizationConfig:
keywords: List[str] = field(default_factory=lambda: [
"main", "core", "handler", "controller", "service", "model"
])
size_limit: int = 10000
exclude_patterns: List[str] = field(default_factory=lambda: [
"**/test_*.py", "**/*_test.py", "**/conftest.py"
])
include_only: List[str] = field(default_factory=list)
weight_by_depth: bool = True
@dataclass
class OutputConfig:
format: str = "markdown"
max_tokens: int = 8192
include_metadata: bool = True
syntax_highlighting: bool = True
@dataclass
class Config:
chunking: ChunkingConfig = field(default_factory=ChunkingConfig)
prioritization: PrioritizationConfig = field(default_factory=PrioritizationConfig)
output: OutputConfig = field(default_factory=OutputConfig)
env_overrides: Dict[str, str] = field(default_factory=dict)
def load_config(config_path: Optional[str] = None) -> Config:
"""Load configuration from YAML file."""
if config_path is None:
config_path = Path.cwd() / ".codechunk.yaml"
config_file = Path(config_path)
if not config_file.exists():
return Config()
try:
with open(config_file, 'r') as f:
data = yaml.safe_load(f)
if data is None:
return Config()
config = Config()
if "chunking" in data:
chunking_data = data["chunking"]
config.chunking = ChunkingConfig(
include_patterns=chunking_data.get("include_patterns", config.chunking.include_patterns),
exclude_patterns=chunking_data.get("exclude_patterns", config.chunking.exclude_patterns),
max_chunk_size=chunking_data.get("max_chunk_size", config.chunking.max_chunk_size),
min_chunk_size=chunking_data.get("min_chunk_size", config.chunking.min_chunk_size),
preserve_docstrings=chunking_data.get("preserve_docstrings", config.chunking.preserve_docstrings),
remove_comments=chunking_data.get("remove_comments", config.chunking.remove_comments),
boilerplate_patterns=chunking_data.get("boilerplate_patterns", config.chunking.boilerplate_patterns)
)
if "prioritization" in data:
prio_data = data["prioritization"]
config.prioritization = PrioritizationConfig(
keywords=prio_data.get("keywords", config.prioritization.keywords),
size_limit=prio_data.get("size_limit", config.prioritization.size_limit),
exclude_patterns=prio_data.get("exclude_patterns", config.prioritization.exclude_patterns),
include_only=prio_data.get("include_only", config.prioritization.include_only),
weight_by_depth=prio_data.get("weight_by_depth", config.prioritization.weight_by_depth)
)
if "output" in data:
out_data = data["output"]
config.output = OutputConfig(
format=out_data.get("format", config.output.format),
max_tokens=out_data.get("max_tokens", config.output.max_tokens),
include_metadata=out_data.get("include_metadata", config.output.include_metadata),
syntax_highlighting=out_data.get("syntax_highlighting", config.output.syntax_highlighting)
)
return config
except yaml.YAMLError as e:
raise ValueError(f"Invalid YAML in config file: {e}")
except Exception as e:
raise ValueError(f"Error loading config file: {e}")
def save_config(config: Config, config_path: str = ".codechunk.yaml") -> None:
"""Save configuration to YAML file."""
data = {
"chunking": {
"include_patterns": config.chunking.include_patterns,
"exclude_patterns": config.chunking.exclude_patterns,
"max_chunk_size": config.chunking.max_chunk_size,
"min_chunk_size": config.chunking.min_chunk_size,
"preserve_docstrings": config.chunking.preserve_docstrings,
"remove_comments": config.chunking.remove_comments,
"boilerplate_patterns": config.chunking.boilerplate_patterns
},
"prioritization": {
"keywords": config.prioritization.keywords,
"size_limit": config.prioritization.size_limit,
"exclude_patterns": config.prioritization.exclude_patterns,
"include_only": config.prioritization.include_only,
"weight_by_depth": config.prioritization.weight_by_depth
},
"output": {
"format": config.output.format,
"max_tokens": config.output.max_tokens,
"include_metadata": config.output.include_metadata,
"syntax_highlighting": config.output.syntax_highlighting
}
}
with open(config_path, 'w') as f:
yaml.dump(data, f, default_flow_style=False, indent=2)