Initial AITuner study orchestrator
This commit is contained in:
59
infra/gpu_fleet/config/fleet.example.toml
Normal file
59
infra/gpu_fleet/config/fleet.example.toml
Normal file
@@ -0,0 +1,59 @@
|
||||
version = 1
|
||||
|
||||
[paths]
|
||||
state_dir = ".aituner/gpu_fleet/state"
|
||||
artifacts_dir = ".aituner/gpu_fleet/artifacts"
|
||||
|
||||
[ssh]
|
||||
connect_timeout_sec = 10
|
||||
|
||||
[scheduler]
|
||||
gpu_free_memory_mb = 1024
|
||||
gpu_free_utilization_pct = 10
|
||||
prefer_pack = true
|
||||
|
||||
[sync]
|
||||
mode = "rsync"
|
||||
local_path = "."
|
||||
exclude = [
|
||||
".git/",
|
||||
".venv/",
|
||||
".aituner/",
|
||||
"__pycache__/",
|
||||
"*.pyc",
|
||||
]
|
||||
|
||||
[[hosts]]
|
||||
name = "dash0"
|
||||
ssh_alias = "dash0"
|
||||
enabled = true
|
||||
sync_remote_path = "~/workspace/aituner"
|
||||
fleet_root = "~/.aituner_gpu_fleet"
|
||||
|
||||
[[hosts]]
|
||||
name = "dash1"
|
||||
ssh_alias = "dash1"
|
||||
enabled = true
|
||||
sync_remote_path = "~/workspace/aituner"
|
||||
fleet_root = "~/.aituner_gpu_fleet"
|
||||
|
||||
[[hosts]]
|
||||
name = "dash2"
|
||||
ssh_alias = "dash2"
|
||||
enabled = true
|
||||
sync_remote_path = "~/workspace/aituner"
|
||||
fleet_root = "~/.aituner_gpu_fleet"
|
||||
|
||||
[[hosts]]
|
||||
name = "dash3"
|
||||
ssh_alias = "dash3"
|
||||
enabled = true
|
||||
sync_remote_path = "~/aituner"
|
||||
fleet_root = "~/.aituner_gpu_fleet"
|
||||
|
||||
[[hosts]]
|
||||
name = "dash5"
|
||||
ssh_alias = "dash5"
|
||||
enabled = true
|
||||
sync_remote_path = "~/workspace/aituner"
|
||||
fleet_root = "~/.aituner_gpu_fleet"
|
||||
Reference in New Issue
Block a user