shell-command-generator-cli/app/shellgen/backends/llama_cpp.py

"""Llama.cpp backend implementation."""

from typing import Optional, TYPE_CHECKING

if TYPE_CHECKING:
    from llama_cpp import Llama

from .base import LLMBackend


class LlamaCppBackend(LLMBackend):
    """Llama.cpp Python bindings backend."""

    def __init__(
        self,
        model_path: str = "~/.cache/llama-cpp/models/",
        n_ctx: int = 2048,
        n_threads: int = 4,
        temperature: float = 0.1,
        max_tokens: int = 500,
    ):
        """Initialize the Llama.cpp backend.

        Args:
            model_path: Path to the model file.
            n_ctx: Context window size.
            n_threads: Number of threads to use.
            temperature: Generation temperature.
            max_tokens: Maximum tokens to generate.
        """
        self.model_path = model_path
        self.n_ctx = n_ctx
        self.n_threads = n_threads
        self.temperature = temperature
        self.max_tokens = max_tokens
        self._llm: Optional["Llama"] = None

    def _load_model(self) -> "Llama":
        """Load the model if not already loaded.

        Returns:
            Loaded Llama instance.
        """
        from llama_cpp import Llama as LlamaClass
        if self._llm is None:
            expanded_path = self.model_path.replace("~", "$HOME")
            self._llm = LlamaClass(
                model_path=expanded_path,
                n_ctx=self.n_ctx,
                n_threads=self.n_threads,
                temperature=self.temperature,
            )
        return self._llm

    def generate(self, prompt: str) -> str:
        """Generate response using llama-cpp-python.

        Args:
            prompt: The prompt to send.

        Returns:
            Generated response text.
        """
        try:
            llm = self._load_model()
            response = llm(
                prompt,
                max_tokens=self.max_tokens,
                temperature=self.temperature,
                stop=["</s>", "###"],
            )

            return response["choices"][0]["text"]

        except Exception as e:
            raise ConnectionError(f"Llama.cpp error: {e}")

    def is_available(self) -> bool:
        """Check if model can be loaded.

        Returns:
            True if backend is available.
        """
        try:
            self._load_model()
            return True
        except Exception:
            return False

    def get_model_name(self) -> str:
        """Get the model name from path.

        Returns:
            Model name string.
        """
        return self.model_path.split("/")[-1]

    def set_model(self, model: str) -> None:
        """Set the model path.

        Args:
            model: Path to the model.
        """
        self.model_path = model
        self._llm = None

    def close(self) -> None:
        """Clean up model resources."""
        if self._llm is not None:
            del self._llm
            self._llm = None