Add kvcache-centric profiling and admission controls
This commit is contained in:
@@ -176,7 +176,7 @@ NSA_CHOICES = [
|
||||
"trtllm",
|
||||
]
|
||||
|
||||
RADIX_EVICTION_POLICY_CHOICES = ["lru", "lfu", "slru"]
|
||||
RADIX_EVICTION_POLICY_CHOICES = ["lru", "lfu", "slru", "priority"]
|
||||
|
||||
RL_ON_POLICY_TARGET_CHOICES = ["fsdp"]
|
||||
|
||||
@@ -4049,7 +4049,7 @@ class ServerArgs:
|
||||
type=str,
|
||||
choices=RADIX_EVICTION_POLICY_CHOICES,
|
||||
default=ServerArgs.radix_eviction_policy,
|
||||
help="The eviction policy of radix trees. 'lru' stands for Least Recently Used, 'lfu' stands for Least Frequently Used, and 'slru' stands for Segmented Least Recently Used.",
|
||||
help="The eviction policy of radix trees. 'lru' stands for Least Recently Used, 'lfu' stands for Least Frequently Used, 'slru' stands for Segmented Least Recently Used, and 'priority' evicts lower request priority values first.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-prefill-delayer",
|
||||
|
||||
Reference in New Issue
Block a user