"""Base indexer interface for documentation parsing.""" from abc import ABC, abstractmethod from pathlib import Path from typing import Generator, List from local_api_docs_search.models.document import Document, SourceType class BaseIndexer(ABC): """Abstract base class for document indexers.""" source_type: SourceType @abstractmethod def index(self, path: Path, recursive: bool = False) -> List[Document]: """Index documents from the given path. Args: path: Path to file or directory to index recursive: Whether to search directories recursively Returns: List of indexed Document objects """ pass @abstractmethod def get_documents(self) -> List[Document]: """Get all indexed documents. Returns: List of Document objects """ pass def _find_files(self, path: Path, recursive: bool = False) -> Generator[Path, None, None]: """Find files to index in the given path. Args: path: Path to file or directory recursive: Whether to search recursively Yields: Path objects for each file found """ if path.is_file(): if self._is_supported_file(path): yield path elif path.is_dir(): pattern = "**/*" if recursive else "*" for file_path in path.glob(pattern): if file_path.is_file() and self._is_supported_file(file_path): yield file_path @abstractmethod def _is_supported_file(self, path: Path) -> bool: """Check if the file is supported by this indexer. Args: path: Path to the file Returns: True if the file is supported """ pass def _generate_id(self, file_path: Path, suffix: str = "") -> str: """Generate a unique document ID. Args: file_path: Path to the source file suffix: Optional suffix to add to the ID Returns: Unique document ID string """ stem = file_path.stem.replace(" ", "_").lower() if suffix: return f"{stem}_{suffix}" return stem