Add shellgen core and backends modules

2026-01-29 12:42:43 +00:00
parent 73172fe0b3
commit 1f37e9538f
1 changed files with 111 additions and 0 deletions
--- a/app/shellgen/backends/llama_cpp.py
+++ b/app/shellgen/backends/llama_cpp.py
@@ -0,0 +1,111 @@
 """Llama.cpp backend implementation."""
 from typing import Optional, TYPE_CHECKING
 if TYPE_CHECKING:
    from llama_cpp import Llama
 from .base import LLMBackend
 class LlamaCppBackend(LLMBackend):
    """Llama.cpp Python bindings backend."""
    def __init__(
        self,
        model_path: str = "~/.cache/llama-cpp/models/",
        n_ctx: int = 2048,
        n_threads: int = 4,
        temperature: float = 0.1,
        max_tokens: int = 500,
    ):
        """Initialize the Llama.cpp backend.
        Args:
            model_path: Path to the model file.
            n_ctx: Context window size.
            n_threads: Number of threads to use.
            temperature: Generation temperature.
            max_tokens: Maximum tokens to generate.
        """
        self.model_path = model_path
        self.n_ctx = n_ctx
        self.n_threads = n_threads
        self.temperature = temperature
        self.max_tokens = max_tokens
        self._llm: Optional["Llama"] = None
    def _load_model(self) -> "Llama":
        """Load the model if not already loaded.
        Returns:
            Loaded Llama instance.
        """
        from llama_cpp import Llama as LlamaClass
        if self._llm is None:
            expanded_path = self.model_path.replace("~", "$HOME")
            self._llm = LlamaClass(
                model_path=expanded_path,
                n_ctx=self.n_ctx,
                n_threads=self.n_threads,
                temperature=self.temperature,
            )
        return self._llm
    def generate(self, prompt: str) -> str:
        """Generate response using llama-cpp-python.
        Args:
            prompt: The prompt to send.
        Returns:
            Generated response text.
        """
        try:
            llm = self._load_model()
            response = llm(
                prompt,
                max_tokens=self.max_tokens,
                temperature=self.temperature,
                stop=["</s>", "###"],
            )
            return response["choices"][0]["text"]
        except Exception as e:
            raise ConnectionError(f"Llama.cpp error: {e}")
    def is_available(self) -> bool:
        """Check if model can be loaded.
        Returns:
            True if backend is available.
        """
        try:
            self._load_model()
            return True
        except Exception:
            return False
    def get_model_name(self) -> str:
        """Get the model name from path.
        Returns:
            Model name string.
        """
        return self.model_path.split("/")[-1]
    def set_model(self, model: str) -> None:
        """Set the model path.
        Args:
            model: Path to the model.
        """
        self.model_path = model
        self._llm = None
    def close(self) -> None:
        """Clean up model resources."""
        if self._llm is not None:
            del self._llm
            self._llm = None