diff --git a/codechunk/core/chunking.py b/codechunk/core/chunking.py index c57e4f1..c4913f2 100644 --- a/codechunk/core/chunking.py +++ b/codechunk/core/chunking.py @@ -1,5 +1,5 @@ from dataclasses import dataclass, field -from typing import List, Optional, Dict, Any +from typing import Optional, Any from pathlib import Path import re @@ -13,9 +13,9 @@ class ChunkMetadata: end_line: int line_count: int docstring: Optional[str] = None - imports: List[str] = field(default_factory=list) - decorators: List[str] = field(default_factory=list) - parameters: List[str] = field(default_factory=list) + imports: list[str] = field(default_factory=list) + decorators: list[str] = field(default_factory=list) + parameters: list[str] = field(default_factory=list) return_type: Optional[str] = None complexity_score: int = 1 original_content: str = "" @@ -28,7 +28,7 @@ class ParsedChunk: content: str metadata: ChunkMetadata priority: int = 0 - dependencies: List[str] = field(default_factory=list) + dependencies: list[str] = field(default_factory=list) summary: str = "" is_boilerplate: bool = False @@ -52,7 +52,7 @@ class CodeChunker: (r'def\s+__ge__\s*\([^)]*\)\s*:', '__ge__'), ] - def chunk_all(self, chunks: List[ParsedChunk]) -> List[ParsedChunk]: + def chunk_all(self, chunks: list[ParsedChunk]) -> list[ParsedChunk]: """Process all chunks: remove boilerplate, add priorities.""" result = [] for chunk in chunks: @@ -67,7 +67,7 @@ class CodeChunker: return chunk content = chunk.content - for pattern, pattern_type in self.boilerplate_patterns: + for pattern, _pattern_type in self.boilerplate_patterns: if re.search(pattern, content, re.MULTILINE): chunk.is_boilerplate = True break @@ -116,17 +116,17 @@ class CodeChunker: chunk.priority = priority return chunk - def _sort_by_priority(self, chunks: List[ParsedChunk]) -> List[ParsedChunk]: + def _sort_by_priority(self, chunks: list[ParsedChunk]) -> list[ParsedChunk]: """Sort chunks by priority (highest first).""" return sorted(chunks, key=lambda c: c.priority, reverse=True) - def split_large_chunk(self, chunk: ParsedChunk) -> List[ParsedChunk]: + def split_large_chunk(self, chunk: ParsedChunk) -> list[ParsedChunk]: """Split a large chunk into smaller pieces.""" if chunk.metadata.line_count <= self.config.max_chunk_size: return [chunk] lines = chunk.content.split('\n') - parts = [] + parts: list[ParsedChunk] = [] current_part = [] current_lines = 0