from pathlib import Path from codechunk.config import ChunkingConfig from codechunk.core.chunking import ChunkMetadata, ChunkPriority, CodeChunker, ParsedChunk class TestCodeChunker: """Tests for CodeChunker.""" def test_calculate_priority_high_for_main_function(self): """Test that 'main' function gets high priority.""" config = ChunkingConfig() chunker = CodeChunker(config) chunk = ParsedChunk( name="main", chunk_type="function", content="def main():\n pass", metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=2, line_count=2 ) ) result = chunker._calculate_priority(chunk) assert result.priority >= 50 def test_calculate_priority_high_for_run_function(self): """Test that 'run' function gets high priority.""" config = ChunkingConfig() chunker = CodeChunker(config) chunk = ParsedChunk( name="run_app", chunk_type="function", content="def run_app():\n pass", metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=2, line_count=2 ) ) result = chunker._calculate_priority(chunk) assert result.priority >= 50 def test_calculate_priority_class_higher_than_function(self): """Test that classes get higher priority than functions.""" config = ChunkingConfig() chunker = CodeChunker(config) func_chunk = ParsedChunk( name="helper", chunk_type="function", content="def helper():\n pass", metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=2, line_count=2 ) ) class_chunk = ParsedChunk( name="MyClass", chunk_type="class", content="class MyClass:\n pass", metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=2, line_count=2 ) ) func_priority = chunker._calculate_priority(func_chunk) class_priority = chunker._calculate_priority(class_chunk) assert class_priority.priority > func_priority.priority def test_calculate_priority_line_count_factor(self): """Test that larger chunks get higher priority.""" config = ChunkingConfig() chunker = CodeChunker(config) small_chunk = ParsedChunk( name="small", chunk_type="function", content="def small():\n pass", metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=2, line_count=2 ) ) large_chunk = ParsedChunk( name="large", chunk_type="function", content="def large():\n " + "\n ".join(["x = 1"] * 50), metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=52, line_count=52 ) ) small_priority = chunker._calculate_priority(small_chunk) large_priority = chunker._calculate_priority(large_chunk) assert large_priority.priority > small_priority.priority def test_calculate_priority_complexity_factor(self): """Test that complexity affects priority.""" config = ChunkingConfig() chunker = CodeChunker(config) simple_chunk = ParsedChunk( name="simple", chunk_type="function", content="def simple():\n return 1", metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=2, line_count=2, complexity_score=1 ) ) complex_chunk = ParsedChunk( name="complex", chunk_type="function", content="def complex():\n if True:\n for i in range(10):\n if i > 5:\n return i\n return 0", metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=6, line_count=6, complexity_score=10 ) ) simple_priority = chunker._calculate_priority(simple_chunk) complex_priority = chunker._calculate_priority(complex_chunk) assert complex_priority.priority > simple_priority.priority def test_calculate_priority_decorators_factor(self): """Test that decorators increase priority.""" config = ChunkingConfig() chunker = CodeChunker(config) no_decorator_chunk = ParsedChunk( name="no_decorator", chunk_type="function", content="def no_decorator():\n pass", metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=2, line_count=2, decorators=[] ) ) with_decorator_chunk = ParsedChunk( name="with_decorator", chunk_type="function", content="@property\ndef with_decorator():\n pass", metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=3, line_count=3, decorators=["@property"] ) ) no_dec_priority = chunker._calculate_priority(no_decorator_chunk) with_dec_priority = chunker._calculate_priority(with_decorator_chunk) assert with_dec_priority.priority >= no_dec_priority.priority def test_remove_boilerplate_property(self): """Test that boilerplate detection works for functions.""" config = ChunkingConfig() chunker = CodeChunker(config) chunk = ParsedChunk( name="MyClass.value", chunk_type="function", content="def value(self):\n return self._value", metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=2, line_count=2 ) ) result = chunker._remove_boilerplate(chunk) assert result.is_boilerplate is False def test_remove_boilerplate_dunder_methods(self): """Test that dunder methods are detected.""" config = ChunkingConfig() chunker = CodeChunker(config) chunk = ParsedChunk( name="MyClass.__str__", chunk_type="function", content="def __str__(self):\n return 'MyClass'", metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=2, line_count=2 ) ) result = chunker._remove_boilerplate(chunk) assert result.is_boilerplate is True def test_remove_boilerplate_regular_function(self): """Test that regular functions are not marked as boilerplate.""" config = ChunkingConfig() chunker = CodeChunker(config) chunk = ParsedChunk( name="process_data", chunk_type="function", content="def process_data(data):\n return [x for x in data if x > 0]", metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=2, line_count=2 ) ) result = chunker._remove_boilerplate(chunk) assert result.is_boilerplate is False def test_sort_by_priority(self): """Test that chunks are sorted by priority.""" config = ChunkingConfig() chunker = CodeChunker(config) low_chunk = ParsedChunk( name="helper", chunk_type="function", content="def helper():\n pass", metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=2, line_count=2 ), priority=10 ) high_chunk = ParsedChunk( name="main", chunk_type="function", content="def main():\n pass", metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=2, line_count=2 ), priority=100 ) chunks = [low_chunk, high_chunk] sorted_chunks = chunker._sort_by_priority(chunks) assert sorted_chunks[0].name == "main" assert sorted_chunks[1].name == "helper" def test_chunk_all_processes_all_chunks(self): """Test that chunk_all processes all chunks correctly.""" config = ChunkingConfig() chunker = CodeChunker(config) chunks = [ ParsedChunk( name="helper", chunk_type="function", content="def helper():\n pass", metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=2, line_count=2 ) ), ParsedChunk( name="main", chunk_type="function", content="def main():\n pass", metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=4, end_line=5, line_count=2 ) ) ] result = chunker.chunk_all(chunks) assert len(result) == 2 assert result[0].priority > result[1].priority def test_split_large_chunk(self, tmp_path): """Test splitting a large chunk into smaller pieces.""" config = ChunkingConfig() config.max_chunk_size = 10 chunker = CodeChunker(config) large_content = "\n".join([f"line {i}" for i in range(30)]) chunk = ParsedChunk( name="large_function", chunk_type="function", content=large_content, metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=30, line_count=30 ) ) parts = chunker.split_large_chunk(chunk) assert len(parts) > 1 for part in parts: assert part.metadata.line_count <= config.max_chunk_size def test_split_small_chunk(self, tmp_path): """Test that small chunks are not split.""" config = ChunkingConfig() chunker = CodeChunker(config) small_content = "def small():\n pass" chunk = ParsedChunk( name="small", chunk_type="function", content=small_content, metadata=ChunkMetadata( file_path=Path("test.py"), file_name="test.py", language="python", start_line=1, end_line=2, line_count=2 ) ) parts = chunker.split_large_chunk(chunk) assert len(parts) == 1 assert parts[0].content == small_content class TestChunkPriority: """Tests for ChunkPriority constants.""" def test_priority_values(self): """Test that priority constants have expected values.""" assert ChunkPriority.CRITICAL == 100 assert ChunkPriority.HIGH == 75 assert ChunkPriority.MEDIUM == 50 assert ChunkPriority.LOW == 25 assert ChunkPriority.MINIMAL == 10