From b7cc2028a20b71d544e9a913d5bfccb646397820 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Sun, 22 Mar 2026 18:15:41 +0000 Subject: [PATCH] Initial upload: shell-history-semantic-search v0.1.0 --- tests/test_embeddings.py | 90 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 tests/test_embeddings.py diff --git a/tests/test_embeddings.py b/tests/test_embeddings.py new file mode 100644 index 0000000..31d705e --- /dev/null +++ b/tests/test_embeddings.py @@ -0,0 +1,90 @@ +import pytest +import numpy as np + +from shell_history_search.core import EmbeddingService + + +class TestEmbeddingService: + def test_init_default(self): + service = EmbeddingService() + assert service.model_name == "all-MiniLM-L6-v2" + assert service.device == "cpu" + + def test_init_custom_model(self, temp_cache_dir): + service = EmbeddingService( + model_name="all-MiniLM-L6-v2", + cache_dir=temp_cache_dir, + ) + assert service.model_name == "all-MiniLM-L6-v2" + assert service.cache_dir == temp_cache_dir + + def test_embedding_dim(self, temp_cache_dir): + service = EmbeddingService(cache_dir=temp_cache_dir) + assert service.embedding_dim == 384 + + def test_encode_single(self, temp_cache_dir): + service = EmbeddingService(cache_dir=temp_cache_dir) + embedding = service.encode_single("git commit") + + assert isinstance(embedding, np.ndarray) + assert embedding.shape == (384,) + assert embedding.dtype == np.float32 + + def test_encode_batch(self, temp_cache_dir): + service = EmbeddingService(cache_dir=temp_cache_dir) + embeddings = service.encode(["git add .", "git commit", "git push"]) + + assert isinstance(embeddings, np.ndarray) + assert embeddings.shape == (3, 384) + assert embeddings.dtype == np.float32 + + def test_encode_empty_list(self, temp_cache_dir): + service = EmbeddingService(cache_dir=temp_cache_dir) + embeddings = service.encode([]) + + assert isinstance(embeddings, np.ndarray) + assert embeddings.shape == (0,) + + def test_encode_returns_normalized(self, temp_cache_dir): + service = EmbeddingService(cache_dir=temp_cache_dir) + embedding = service.encode_single("test command") + + norm = np.linalg.norm(embedding) + assert 0.99 < norm <= 1.01 + + def test_embedding_to_blob(self, temp_cache_dir): + service = EmbeddingService(cache_dir=temp_cache_dir) + embedding = service.encode_single("test") + + blob = EmbeddingService.embedding_to_blob(embedding) + assert isinstance(blob, bytes) + assert len(blob) == 384 * 4 + + def test_blob_to_embedding(self, temp_cache_dir): + service = EmbeddingService(cache_dir=temp_cache_dir) + embedding = service.encode_single("test") + blob = EmbeddingService.embedding_to_blob(embedding) + + recovered = EmbeddingService.blob_to_embedding(blob, 384) + + assert np.allclose(embedding, recovered) + + def test_cosine_similarity(self, temp_cache_dir): + service = EmbeddingService(cache_dir=temp_cache_dir) + e1 = service.encode_single("git commit") + e2 = service.encode_single("git add .") + e3 = service.encode_single("docker run") + + sim_same = EmbeddingService.cosine_similarity(e1, e2) + sim_diff = EmbeddingService.cosine_similarity(e1, e3) + + assert -1 <= sim_same <= 1 + assert -1 <= sim_diff <= 1 + assert sim_same > sim_diff + + def test_cosine_similarity_perfect_match(self, temp_cache_dir): + service = EmbeddingService(cache_dir=temp_cache_dir) + e1 = service.encode_single("same command") + + sim = EmbeddingService.cosine_similarity(e1, e1) + assert 0.9999 < sim <= 1.0001 \ No newline at end of file