Add kvcache-centric profiling and admission controls

This commit is contained in:
2026-04-25 16:00:52 +00:00
parent 08b13d22bc
commit 13bb31a446
9 changed files with 1044 additions and 34 deletions

View File

@@ -176,7 +176,7 @@ NSA_CHOICES = [
"trtllm",
]
RADIX_EVICTION_POLICY_CHOICES = ["lru", "lfu", "slru"]
RADIX_EVICTION_POLICY_CHOICES = ["lru", "lfu", "slru", "priority"]
RL_ON_POLICY_TARGET_CHOICES = ["fsdp"]
@@ -4049,7 +4049,7 @@ class ServerArgs:
type=str,
choices=RADIX_EVICTION_POLICY_CHOICES,
default=ServerArgs.radix_eviction_policy,
help="The eviction policy of radix trees. 'lru' stands for Least Recently Used, 'lfu' stands for Least Frequently Used, and 'slru' stands for Segmented Least Recently Used.",
help="The eviction policy of radix trees. 'lru' stands for Least Recently Used, 'lfu' stands for Least Frequently Used, 'slru' stands for Segmented Least Recently Used, and 'priority' evicts lower request priority values first.",
)
parser.add_argument(
"--enable-prefill-delayer",