Files
shell-command-generator-cli/app/shellgen/backends/llama_cpp.py

112 lines
2.9 KiB
Python

"""Llama.cpp backend implementation."""
from typing import Optional, TYPE_CHECKING
if TYPE_CHECKING:
from llama_cpp import Llama
from .base import LLMBackend
class LlamaCppBackend(LLMBackend):
"""Llama.cpp Python bindings backend."""
def __init__(
self,
model_path: str = "~/.cache/llama-cpp/models/",
n_ctx: int = 2048,
n_threads: int = 4,
temperature: float = 0.1,
max_tokens: int = 500,
):
"""Initialize the Llama.cpp backend.
Args:
model_path: Path to the model file.
n_ctx: Context window size.
n_threads: Number of threads to use.
temperature: Generation temperature.
max_tokens: Maximum tokens to generate.
"""
self.model_path = model_path
self.n_ctx = n_ctx
self.n_threads = n_threads
self.temperature = temperature
self.max_tokens = max_tokens
self._llm: Optional["Llama"] = None
def _load_model(self) -> "Llama":
"""Load the model if not already loaded.
Returns:
Loaded Llama instance.
"""
from llama_cpp import Llama as LlamaClass
if self._llm is None:
expanded_path = self.model_path.replace("~", "$HOME")
self._llm = LlamaClass(
model_path=expanded_path,
n_ctx=self.n_ctx,
n_threads=self.n_threads,
temperature=self.temperature,
)
return self._llm
def generate(self, prompt: str) -> str:
"""Generate response using llama-cpp-python.
Args:
prompt: The prompt to send.
Returns:
Generated response text.
"""
try:
llm = self._load_model()
response = llm(
prompt,
max_tokens=self.max_tokens,
temperature=self.temperature,
stop=["</s>", "###"],
)
return response["choices"][0]["text"]
except Exception as e:
raise ConnectionError(f"Llama.cpp error: {e}")
def is_available(self) -> bool:
"""Check if model can be loaded.
Returns:
True if backend is available.
"""
try:
self._load_model()
return True
except Exception:
return False
def get_model_name(self) -> str:
"""Get the model name from path.
Returns:
Model name string.
"""
return self.model_path.split("/")[-1]
def set_model(self, model: str) -> None:
"""Set the model path.
Args:
model: Path to the model.
"""
self.model_path = model
self._llm = None
def close(self) -> None:
"""Clean up model resources."""
if self._llm is not None:
del self._llm
self._llm = None