Configuring LMCache#

class lmcache.config.GlobalConfig[source]#
enable_debug: bool = True#
classmethod is_debug() bool[source]#
classmethod set_debug(enable: bool)[source]#
class lmcache.config.LMCacheEngineConfig(chunk_size: int, local_device: str | None, remote_url: str | None, remote_serde: str | None, pipelined_backend: bool, save_decode_cache: bool, enable_blending: bool)[source]#
chunk_size: int#
enable_blending: bool#
static from_defaults(chunk_size: int = 256, local_device: str = 'cuda', remote_url: str = 'redis://localhost:6379', remote_serde: str = 'torch', pipelined_backend: bool = False, save_decode_cache: bool = False, enable_blending: bool = False) LMCacheEngineConfig[source]#
static from_file(file_path: str) LMCacheEngineConfig[source]#

Load the config from a yaml file

static from_legacy(chunk_size: int = 256, backend: str = 'cuda', persist_path: str | None = None, remote_serde: str | None = 'torch', pipelined_backend: bool = False, save_decode_cache: bool = False) LMCacheEngineConfig[source]#
local_device: str | None#
pipelined_backend: bool#
remote_serde: str | None#
remote_url: str | None#
save_decode_cache: bool#
class lmcache.config.LMCacheEngineMetadata(model_name: str, world_size: int, worker_id: int, fmt: str, dtype: str)[source]#

name of the LLM model

dtype: str#
fmt: str#

the data type of kv tensors

model_name: str#

world size when running under a distributed setting

worker_id: int#

the format of kv tensors

world_size: int#

worker id when running under a distributed setting