Source code for lmcache.experimental.config

import os
import re
from dataclasses import dataclass
from typing import Any, Optional

import yaml

import lmcache.config as orig_config



[docs]
@dataclass
class LMCacheEngineConfig:
    chunk_size: int
    local_cpu: bool
    max_local_cpu_size: float  # in GB
    # need to be assigned a non-zero
    # value even if local_cpu is disabled
    local_disk: Optional[str]
    max_local_disk_size: float  # in GB

    remote_url: Optional[str]
    remote_serde: Optional[str]  # Can be "naive" or "cachegen"

    save_decode_cache: bool  # whether to store decode kv cache

    # Blending related configurations
    enable_blending: bool  # whether to enable blending
    blend_recompute_ratio: float  # the ratio of blending recompute
    blend_min_tokens: int  # the minimum number of tokens for blending

    # P2P related configurations
    enable_p2p: bool  # whether to enable peer-to-peer sharing
    lookup_url: Optional[str]  # the url of the lookup server
    distributed_url: Optional[str]  # the url of the distributed server

    # Error handling related configurations
    error_handling: bool  # whether to enable error handling with
    # full recompute


[docs]
    @staticmethod
    def from_defaults(
        chunk_size: int = 256,
        local_cpu: bool = True,
        max_local_cpu_size: float = 5.0,
        local_disk: Optional[str] = None,
        max_local_disk_size: int = 0,
        remote_url: Optional[str] = "lm://localhost:65432",
        remote_serde: Optional[str] = "naive",
        save_decode_cache: bool = False,
        enable_blending: bool = False,
        blend_recompute_ratio: float = 0.15,
        blend_min_tokens: int = 256,
        enable_p2p: bool = False,
        lookup_url: Optional[str] = None,
        distributed_url: Optional[str] = None,
        error_handling: bool = False,
    ) -> "LMCacheEngineConfig":
        return LMCacheEngineConfig(chunk_size, local_cpu, max_local_cpu_size,
                                   local_disk, max_local_disk_size, remote_url,
                                   remote_serde, save_decode_cache,
                                   enable_blending, blend_recompute_ratio,
                                   blend_min_tokens, enable_p2p, lookup_url,
                                   distributed_url, error_handling)



[docs]
    @staticmethod
    def from_legacy(
        chunk_size: int = 256,
        backend: str = "cpu",
        remote_url: Optional[str] = "lm://localhost:65432",
        remote_serde: str = "naive",
        save_decode_cache: bool = False,
        enable_blending: bool = False,
        blend_recompute_ratio: float = 0.15,
        blend_min_tokens: int = 256,
        max_local_disk_size: float = 0.0,
        enable_p2p: bool = False,
        lookup_url: Optional[str] = None,
        distributed_url: Optional[str] = None,
        error_handling: bool = False,
    ) -> "LMCacheEngineConfig":
        if backend == "cpu":
            local_cpu = True
            max_local_cpu_size = 5
            local_disk = None
            max_local_disk_size = 0
            remote_url = None
        elif backend == "local_disk":
            local_cpu = False
            max_local_cpu_size = 5
            local_disk = "/local/disk_test/local_disk/"
            max_local_disk_size = 5
            remote_url = None
        elif backend == "local_cpu_disk":
            local_cpu = True
            max_local_cpu_size = 5
            local_disk = "/local/disk_test/local_disk/"
            max_local_disk_size = 5
            remote_url = None
        elif backend == "remote":
            local_cpu = False
            max_local_cpu_size = 5
            local_disk = None
        elif backend == "local_cpu_remote":
            local_cpu = True
            max_local_cpu_size = 5
            local_disk = None
        elif backend == "local_disk_remote":
            local_cpu = False
            max_local_cpu_size = 5
            local_disk = "/local/disk_test/local_disk/"
            max_local_disk_size = 5
        elif backend == "local_cpu_disk_remote":
            local_cpu = True
            max_local_cpu_size = 5
            local_disk = "/local/disk_test/local_disk/"
            max_local_disk_size = 5
        else:
            raise ValueError(f"Invalid backend: {backend}")
        return LMCacheEngineConfig(chunk_size, local_cpu, max_local_cpu_size,
                                   local_disk, max_local_disk_size, remote_url,
                                   remote_serde, save_decode_cache,
                                   enable_blending, blend_recompute_ratio,
                                   blend_min_tokens, enable_p2p, lookup_url,
                                   distributed_url, error_handling)



[docs]
    @staticmethod
    def from_file(file_path: str) -> "LMCacheEngineConfig":
        """
        Load the config from a yaml file
        """
        with open(file_path, "r") as fin:
            config = yaml.safe_load(fin)

        chunk_size = config.get("chunk_size", 256)

        local_cpu = config.get("local_cpu", True)
        max_local_cpu_size = config.get("max_local_cpu_size", 5)

        local_disk = config.get("local_disk", None)
        max_local_disk_size = config.get("max_local_disk_size", 5)

        remote_url = config.get("remote_url", None)
        remote_serde = config.get("remote_serde", "naive")

        save_decode_cache = config.get("save_decode_cache", False)
        enable_blending = config.get("enable_blending", False)
        blend_recompute_ratio = config.get("blend_recompute_ratio", 0.15)
        blend_min_tokens = config.get("blend_min_tokens", 256)

        enable_p2p = config.get("enable_p2p", False)
        lookup_url = config.get("lookup_url", None)
        distributed_url = config.get("distributed_url", None)

        error_handling = config.get("error_handling", False)
        if enable_p2p:
            assert lookup_url is not None
            assert distributed_url is not None

        match local_disk:
            case None:
                local_disk_path = None
            case path if re.match(r"file://(.*)/",
                                  path):  # local disk directory
                local_disk_path = path[7:]

        match remote_url:
            case None:
                pass
            case url if re.match(r"(.*)://(.*):(\d+)", url):
                pass
            case _:
                raise ValueError(f"Invalid remote storage url: {remote_url}")

        return LMCacheEngineConfig(
            chunk_size,
            local_cpu,
            max_local_cpu_size,
            local_disk_path,
            max_local_disk_size,
            remote_url,
            remote_serde,
            save_decode_cache,
            enable_blending,
            blend_recompute_ratio,
            blend_min_tokens,
            enable_p2p,
            lookup_url,
            distributed_url,
            error_handling,
        )



[docs]
    @staticmethod
    def from_env() -> "LMCacheEngineConfig":
        """Load the config from the environment variables
        It will first create a config by `from_defaults` and overwrite
        the configuration values from the environment variables.
        The environment variables should starts with LMCACHE and be in
        uppercase. For example, `LMCACHE_CHUNK_SIZE`.
        :note: the default configuration only uses cpu
        """

        def get_env_name(attr_name: str) -> str:
            return f"LMCACHE_{attr_name.upper()}"

        def parse_env(name: str, default: Optional[Any]) -> Optional[str]:
            if default is not None:
                return os.getenv(name, str(default))
            else:
                return os.getenv(name)

        def to_bool(value: Optional[str]) -> bool:
            if value is None:
                return False
            return value.lower() in ["true", "1"]

        def to_int(value: Optional[str]) -> int:
            if value is None:
                return 0
            return int(value)

        def to_float(value: Optional[str]) -> float:
            if value is None:
                return 0.0
            return float(value)

        config = LMCacheEngineConfig.from_defaults(remote_url=None,
                                                   remote_serde=None)
        config.chunk_size = to_int(
            parse_env(get_env_name("chunk_size"), config.chunk_size))
        config.local_cpu = to_bool(
            parse_env(get_env_name("local_cpu"), config.local_cpu))
        config.max_local_cpu_size = to_float(
            parse_env(get_env_name("max_local_cpu_size"),
                      config.max_local_cpu_size))
        config.local_disk = parse_env(get_env_name("local_disk"),
                                      config.local_disk)
        config.max_local_disk_size = to_float(
            parse_env(get_env_name("max_local_disk_size"),
                      config.max_local_disk_size))
        config.remote_url = parse_env(get_env_name("remote_url"),
                                      config.remote_url)
        config.remote_serde = parse_env(get_env_name("remote_serde"),
                                        config.remote_serde)
        config.save_decode_cache = to_bool(
            parse_env(get_env_name("save_decode_cache"),
                      config.save_decode_cache))
        config.enable_blending = to_bool(
            parse_env(get_env_name("enable_blending"), config.enable_blending))
        config.blend_recompute_ratio = to_float(
            parse_env(get_env_name("blend_recompute_ratio"),
                      config.blend_recompute_ratio))
        config.blend_min_tokens = to_int(
            parse_env(get_env_name("blend_min_tokens"),
                      config.blend_min_tokens))

        config.enable_p2p = to_bool(
            parse_env(get_env_name("enable_p2p"), config.enable_p2p))
        config.lookup_url = parse_env(get_env_name("lookup_url"),
                                      config.lookup_url)
        config.distributed_url = parse_env(get_env_name("distributed_url"),
                                           config.distributed_url)

        config.error_handling = to_bool(
            parse_env(get_env_name("error_handling"), config.error_handling))

        return config



[docs]
    def to_original_config(self) -> orig_config.LMCacheEngineConfig:
        # NOTE: This function is purely for UsageContext compatibility
        return orig_config.LMCacheEngineConfig(
            chunk_size=self.chunk_size,
            local_device="cpu" if self.local_cpu else "cuda",
            max_local_cache_size=int(self.max_local_cpu_size),
            remote_url=None,
            remote_serde=None,
            pipelined_backend=False,
            save_decode_cache=self.save_decode_cache,
            enable_blending=self.enable_blending,
            blend_recompute_ratio=self.blend_recompute_ratio,
            blend_min_tokens=self.blend_min_tokens,
            blend_separator="[BLEND_SEP]",
            blend_add_special_in_precomp=False,
        )