Source code for lmcache.storage_backend.evictor.lru_evictor

from collections import OrderedDict
from typing import Union

import torch

from lmcache.logging import init_logger
from lmcache.storage_backend.evictor.base_evictor import BaseEvictor, PutStatus
from lmcache.utils import CacheEngineKey

logger = init_logger(__name__)


[docs] class LRUEvictor(BaseEvictor): """ LRU cache evictor """ def __init__(self, max_cache_size: float = 10.0): # TODO(Jiayi): need to be configured # the storage size limit (in GB) self.MAX_CACHE_SIZE = max_cache_size # TODO(Jiayi): need a way to avoid fragmentation # current storage size (in GB) self.current_cache_size = 0.0
[docs] def update_on_get(self, key: Union[CacheEngineKey, str], cache_dict: OrderedDict) -> None: """ Evict cache when a new cache comes and the storage is full Input: key: a CacheEngineKey or a str cache_dict: a dict consists of current cache """ cache_dict.move_to_end(key)
# FIXME(Jiayi): comment out return type to bypass type checks # Need to align CacheEngineKey & str
[docs] def update_on_put(self, cache_dict: OrderedDict, kv_obj: Union[ torch.Tensor, bytes]): #-> Tuple[List[Union[CacheEngineKey, str]], PutStatus]: """ Evict cache when a new cache comes and the storage is full Input: cache_dict: a dict consists of current cache kv_obj: the new kv cache to be injected Return: evict_keys: a list of keys to be evicted """ evict_keys = [] cache_size = self.get_size(kv_obj) iter_cache_dict = iter(cache_dict) if cache_size > self.MAX_CACHE_SIZE: logger.info("Put failed due to limited cache storage") return [], PutStatus.ILLEGAL # evict cache until there's enough space while cache_size + self.current_cache_size > \ self.MAX_CACHE_SIZE: evict_key = next(iter_cache_dict) evict_cache_size = self.get_size(cache_dict[evict_key]) self.current_cache_size -= evict_cache_size evict_keys.append(evict_key) # update cache size self.current_cache_size += cache_size logger.debug(f"Evicting {len(evict_keys)} chunks") return evict_keys, PutStatus.LEGAL