Initial upload: Local AI Commit Reviewer CLI with CI/CD workflow

2026-02-05 06:34:40 +00:00
parent 117240b858
commit abb9f3317e
1 changed files with 143 additions and 0 deletions
--- a/src/llm/ollama.py
+++ b/src/llm/ollama.py
@@ -0,0 +1,143 @@
+import asyncio
+from collections.abc import AsyncIterator
+from datetime import datetime
+
+import ollama
+
+from .provider import LLMProvider, LLMResponse, ModelInfo
+
+
+class OllamaProvider(LLMProvider):
+    def __init__(
+        self,
+        endpoint: str = "http://localhost:11434",
+        model: str = "codellama",
+        timeout: int = 120
+    ):
+        self.endpoint = endpoint
+        self.model = model
+        self.timeout = timeout
+        self._client: ollama.Client | None = None
+
+    @property
+    def client(self) -> ollama.Client:
+        if self._client is None:
+            self._client = ollama.Client(host=self.endpoint)
+        return self._client
+
+    def is_available(self) -> bool:
+        try:
+            self.health_check()
+            return True
+        except Exception:
+            return False
+
+    def health_check(self) -> bool:
+        try:
+            response = self.client.ps()
+            return response is not None
+        except Exception as e:
+            raise ConnectionError(f"Ollama health check failed: {e}") from None
+
+    def generate(self, prompt: str, **kwargs) -> LLMResponse:
+        try:
+            max_tokens = kwargs.get("max_tokens", 2048)
+            temperature = kwargs.get("temperature", 0.3)
+
+            response = self.client.chat(
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": "You are a helpful code review assistant. Provide concise, constructive feedback on code changes."},
+                    {"role": "user", "content": prompt}
+                ],
+                options={
+                    "num_predict": max_tokens,
+                    "temperature": temperature,
+                },
+                stream=False
+            )
+
+            return LLMResponse(
+                text=response["message"]["content"],
+                model=self.model,
+                tokens_used=response.get("eval_count", 0),
+                finish_reason=response.get("done_reason", "stop")
+            )
+        except Exception as e:
+            raise RuntimeError(f"Ollama generation failed: {e}") from None
+
+    async def agenerate(self, prompt: str, **kwargs) -> LLMResponse:
+        try:
+            max_tokens = kwargs.get("max_tokens", 2048)
+            temperature = kwargs.get("temperature", 0.3)
+
+            response = await asyncio.to_thread(
+                self.client.chat,
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": "You are a helpful code review assistant. Provide concise, constructive feedback on code changes."},
+                    {"role": "user", "content": prompt}
+                ],
+                options={
+                    "num_predict": max_tokens,
+                    "temperature": temperature,
+                },
+                stream=False
+            )
+
+            return LLMResponse(
+                text=response["message"]["content"],
+                model=self.model,
+                tokens_used=response.get("eval_count", 0),
+                finish_reason=response.get("done_reason", "stop")
+            )
+        except Exception as e:
+            raise RuntimeError(f"Ollama async generation failed: {e}") from None
+
+    def stream_generate(self, prompt: str, **kwargs) -> AsyncIterator[str]:
+        try:
+            max_tokens = kwargs.get("max_tokens", 2048)
+            temperature = kwargs.get("temperature", 0.3)
+
+            response = self.client.chat(
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": "You are a helpful code review assistant. Provide concise, constructive feedback on code changes."},
+                    {"role": "user", "content": prompt}
+                ],
+                options={
+                    "num_predict": max_tokens,
+                    "temperature": temperature,
+                },
+                stream=True
+            )
+
+            for chunk in response:
+                if "message" in chunk and "content" in chunk["message"]:
+                    yield chunk["message"]["content"]
+        except Exception as e:
+            raise RuntimeError(f"Ollama streaming failed: {e}") from None
+
+    def list_models(self) -> list[ModelInfo]:
+        try:
+            response = self.client.ps()
+            models = []
+            if response and "models" in response:
+                for model in response["models"]:
+                    models.append(ModelInfo(
+                        name=model.get("name", "unknown"),
+                        size=model.get("size", "unknown"),
+                        modified=model.get("modified", datetime.now().isoformat()),
+                        digest=model.get("digest", "")
+                    ))
+            return models
+        except Exception:
+            return []
+
+    def pull_model(self, model_name: str) -> bool:
+        try:
+            for _ in self.client.pull(model_name, stream=True):
+                pass
+            return True
+        except Exception:
+            return False