local-code-assistant/local_code_assistant/services/ollama.py

"""Ollama service for Local Code Assistant."""

import json
from collections.abc import Generator
from typing import Any, Optional

import requests
from requests.exceptions import ConnectionError, Timeout

from local_code_assistant.services.config import ConfigService


class OllamaServiceError(Exception):
    """Base exception for Ollama service errors."""
    pass


class OllamaConnectionError(OllamaServiceError):
    """Exception raised when connection to Ollama fails."""
    pass


class OllamaModelError(OllamaServiceError):
    """Exception raised when model operation fails."""
    pass


class OllamaService:
    """Service for interacting with Ollama API."""

    def __init__(self, config: ConfigService):
        """Initialize Ollama service.

        Args:
            config: Configuration service instance.
        """
        self.config = config
        self.base_url = config.ollama_base_url
        self.timeout = config.ollama_timeout

    def _make_request(
        self,
        endpoint: str,
        method: str = "GET",
        data: Optional[dict[str, Any]] = None
    ) -> dict[str, Any]:
        """Make HTTP request to Ollama API.

        Args:
            endpoint: API endpoint.
            method: HTTP method.
            data: Request data.

        Returns:
            Response data as dictionary.

        Raises:
            OllamaConnectionError: If connection fails.
            OllamaModelError: If API returns error.
        """
        url = f"{self.base_url}/{endpoint}"

        try:
            if method == "GET":
                response = requests.get(url, timeout=self.timeout)
            elif method == "POST":
                response = requests.post(url, json=data, timeout=self.timeout)
            else:
                raise ValueError(f"Unsupported HTTP method: {method}")

            response.raise_for_status()
            return response.json()

        except ConnectionError as e:
            raise OllamaConnectionError(
                f"Failed to connect to Ollama at {self.base_url}. "
                "Make sure Ollama is running."
            ) from e
        except Timeout as e:
            raise OllamaServiceError(f"Request timed out after {self.timeout}s") from e
        except requests.exceptions.HTTPError as e:
            error_msg = f"API request failed: {e.response.text}"
            try:
                error_data = e.response.json()
                if "error" in error_data:
                    error_msg = f"Ollama error: {error_data['error']}"
            except Exception:
                pass
            raise OllamaModelError(error_msg) from e
        except Exception as e:
            raise OllamaServiceError(f"Unexpected error: {str(e)}") from e

    def check_connection(self) -> bool:
        """Check if Ollama is running and accessible.

        Returns:
            True if connection successful, False otherwise.
        """
        try:
            self._make_request("api/tags")
            return True
        except Exception:
            return False

    def list_models(self) -> list[str]:
        """List available models.

        Returns:
            List of model names.
        """
        try:
            response = self._make_request("api/tags")
            models = response.get("models", [])
            return [model["name"] for model in models]
        except Exception:
            return []

    def generate(
        self,
        prompt: str,
        model: Optional[str] = None,
        stream: Optional[bool] = None,
        system: Optional[str] = None,
        temperature: Optional[float] = None,
        max_tokens: Optional[int] = None
    ) -> str:
        """Generate response from model.

        Args:
            prompt: User prompt.
            model: Model to use. Defaults to config default.
            stream: Whether to stream response. Defaults to config setting.
            system: System prompt.
            temperature: Temperature for generation.
            max_tokens: Maximum tokens to generate.

        Returns:
            Generated response text.
        """
        model = model or self.config.ollama_model
        stream = stream if stream is not None else self.config.streaming

        data: dict[str, Any] = {
            "model": model,
            "prompt": prompt,
            "stream": stream,
            "options": {}
        }

        if system:
            data["system"] = system
        if temperature is not None:
            data["options"]["temperature"] = temperature
        if max_tokens is not None:
            data["options"]["num_predict"] = max_tokens

        if stream:
            response_text = ""
            for chunk in self._stream_generate(data):
                if "response" in chunk:
                    response_text += chunk["response"]
            return response_text
        else:
            response = self._make_request("api/generate", method="POST", data=data)
            return response.get("response", "")

    def _stream_generate(self, data: dict[str, Any]) -> Generator[dict[str, Any], None, None]:
        """Stream response from model.

        Args:
            data: Request data.

        Yields:
            Response chunks.
        """
        url = f"{self.base_url}/api/generate"
        try:
            response = requests.post(
                url,
                json=data,
                timeout=self.timeout,
                stream=True
            )
            response.raise_for_status()

            for line in response.iter_lines():
                if line:
                    chunk = json.loads(line.decode('utf-8'))
                    yield chunk
                    if chunk.get("done", False):
                        break

        except ConnectionError as e:
            raise OllamaConnectionError(
                f"Failed to connect to Ollama at {self.base_url}."
            ) from e
        except Timeout as e:
            raise OllamaServiceError(f"Streaming timed out after {self.timeout}s") from e
        except Exception as e:
            raise OllamaServiceError(f"Streaming error: {str(e)}") from e

    def chat(
        self,
        messages: list[dict[str, str]],
        model: Optional[str] = None,
        stream: Optional[bool] = None,
        temperature: Optional[float] = None,
        max_tokens: Optional[int] = None
    ) -> str:
        """Chat with model using message history.

        Args:
            messages: List of messages with 'role' and 'content'.
            model: Model to use. Defaults to config default.
            stream: Whether to stream response.
            temperature: Temperature for generation.
            max_tokens: Maximum tokens to generate.

        Returns:
            Generated response text.
        """
        model = model or self.config.ollama_model
        stream = stream if stream is not None else self.config.streaming

        data: dict[str, Any] = {
            "model": model,
            "messages": messages,
            "stream": stream,
            "options": {}
        }

        if temperature is not None:
            data["options"]["temperature"] = temperature
        if max_tokens is not None:
            data["options"]["num_predict"] = max_tokens

        if stream:
            response_text = ""
            for chunk in self._stream_chat(data):
                if "message" in chunk and "content" in chunk["message"]:
                    response_text += chunk["message"]["content"]
            return response_text
        else:
            response = self._make_request("api/chat", method="POST", data=data)
            return response.get("message", {}).get("response", "")

    def _stream_chat(self, data: dict[str, Any]) -> Generator[dict[str, Any], None, None]:
        """Stream chat response from model.

        Args:
            data: Request data.

        Yields:
            Response chunks.
        """
        url = f"{self.base_url}/api/chat"
        try:
            response = requests.post(
                url,
                json=data,
                timeout=self.timeout,
                stream=True
            )
            response.raise_for_status()

            for line in response.iter_lines():
                if line:
                    chunk = json.loads(line.decode('utf-8'))
                    yield chunk
                    if chunk.get("done", False):
                        break

        except ConnectionError as e:
            raise OllamaConnectionError(
                f"Failed to connect to Ollama at {self.base_url}."
            ) from e
        except Timeout as e:
            raise OllamaServiceError(f"Streaming timed out after {self.timeout}s") from e
        except Exception as e:
            raise OllamaServiceError(f"Streaming error: {str(e)}") from e

    def get_model_info(self, model: str) -> dict[str, Any]:
        """Get information about a specific model.

        Args:
            model: Model name.

        Returns:
            Model information.
        """
        try:
            response = self._make_request("api/show", method="POST", data={"name": model})
            return response
        except Exception:
            return {}