Source code for lmcache.storage_backend.abstract_backend

import abc
from typing import Iterable, Optional, Tuple

import torch

from lmcache.logging import init_logger
from lmcache.utils import CacheEngineKey

logger = init_logger(__name__)



[docs]
class LMCBackendInterface(metaclass=abc.ABCMeta):


[docs]
    @abc.abstractmethod
    def put(
        self,
        key: CacheEngineKey,
        kv_chunk: torch.Tensor,
        blocking=True,
    ) -> None:
        """
        Store the KV cache of the tokens into the cache engine.

        :param key: the key of the token chunk, in the format of 
                    CacheEngineKey
        :param kv_chunk: the kv cache of the token chunk, as a big tensor.
        :param blocking: to block the call before the operation is
            completed.

        :return: None

        Note:
            The KV cache should NOT have the "batch" dimension.
        """
        raise NotImplementedError



[docs]
    @abc.abstractmethod
    def contains(
        self,
        key: CacheEngineKey,
    ) -> bool:
        """
        Query if a key is in the cache or not
        """
        raise NotImplementedError



[docs]
    @abc.abstractmethod
    def get(
        self,
        key: CacheEngineKey,
    ) -> Optional[torch.Tensor]:
        """
        Retrieve the KV cache chunk by the given key

        :param key: the key of the token chunk, including 
         prefix hash and format

        :return: the kv cache of the token chunk, in the format 
            of a big tensor and None if the key is not found
        """
        raise NotImplementedError



[docs]
    def batched_put(
        self,
        keys_and_chunks: Iterable[Tuple[CacheEngineKey, torch.Tensor]],
        blocking=True,
    ) -> int:
        """
        Store the multiple keys and KV cache chunks into the cache engine in a
        batched manner.

        :param keys: the iterable of keys of the token chunks, in the format of 
                CacheEngineKey
        :param kv_chunks: the iterable of kv cache of the token chunks, in the 
                format of a big tensor
        :param blocking: whether to block the call before the operation is 
                completed

        :return: the number of chunks are stored
        """
        logger.info("Using default batched implementation of the put() method")
        nchunks = 0
        for key, kv_chunk in keys_and_chunks:
            self.put(key, kv_chunk, blocking=blocking)
            nchunks += 1
        return nchunks



[docs]
    def batched_get(
        self,
        keys: Iterable[CacheEngineKey],
    ) -> Iterable[Optional[torch.Tensor]]:
        """
        Retrieve the kv cache chunks by the given keys in a batched manner

        
        :param keys: the iterator of keys of the token chunks, including prefix 
                hash and format

        :return: the iterator of kv cache of the token chunks, in the format
            of a big tensor and None if the key is not found
        """
        logger.info("Using default batched implementation of the get() method")
        for key in keys:
            if self.contains(key):  # Jiayi: This seems to be redundant?
                yield self.get(key)
            else:
                yield None



[docs]
    @abc.abstractmethod
    def close(self):
        """
        Do the cleanup things
        Children classes should override this method if necessary
        """
        pass