tenseleyflow/sway / 5ab3e61

Browse files

Wire sway serve CLI command

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
5ab3e6121f402e3b0bad377af8571b8fb30cda3c
Parents
5190589
Tree
900463b

2 changed files

StatusFile+-
M src/dlm_sway/cli/app.py 1 0
M src/dlm_sway/cli/commands.py 92 0
src/dlm_sway/cli/app.pymodified
@@ -55,6 +55,7 @@ app.command("list-probes")(commands.list_probes_cmd)
5555
 app.command("convert-adapter")(commands.convert_adapter_cmd)
5656
 app.command("pack")(commands.pack_cmd)
5757
 app.command("unpack")(commands.unpack_cmd)
58
+app.command("serve")(commands.serve_cmd)
5859
 
5960
 
6061
 def main() -> None:
src/dlm_sway/cli/commands.pymodified
@@ -1387,3 +1387,95 @@ def unpack_cmd(
13871387
         typer.echo(f"  SWAY_NULL_CACHE_DIR={report.null_stats_dir} sway run {report.spec_path}")
13881388
     else:
13891389
         typer.echo(f"  sway run {report.spec_path}")
1390
+
1391
+
1392
+def serve_cmd(
1393
+    host: Annotated[
1394
+        str,
1395
+        typer.Option(
1396
+            "--host",
1397
+            help=(
1398
+                "Interface to bind. Default 127.0.0.1 (localhost only). "
1399
+                "Binding to 0.0.0.0 requires --api-key."
1400
+            ),
1401
+        ),
1402
+    ] = "127.0.0.1",
1403
+    port: Annotated[
1404
+        int,
1405
+        typer.Option("--port", help="TCP port to bind."),
1406
+    ] = 8787,
1407
+    max_loaded_models: Annotated[
1408
+        int,
1409
+        typer.Option(
1410
+            "--max-loaded-models",
1411
+            help=(
1412
+                "How many backends to keep warm in memory. Each loaded "
1413
+                "model holds its own VRAM/RAM; default 2 fits a 16 GB GPU "
1414
+                "with two ~1.5B fp16 adapters."
1415
+            ),
1416
+        ),
1417
+    ] = 2,
1418
+    api_key: Annotated[
1419
+        str | None,
1420
+        typer.Option(
1421
+            "--api-key",
1422
+            help=(
1423
+                "Bearer token required on every non-/health request. "
1424
+                "Required when --host is not loopback."
1425
+            ),
1426
+        ),
1427
+    ] = None,
1428
+    log_level: Annotated[
1429
+        str,
1430
+        typer.Option("--log-level", help="uvicorn log level."),
1431
+    ] = "info",
1432
+) -> None:
1433
+    """Run the warm-backend HTTP daemon (S36).
1434
+
1435
+    First call loads the backend (~15s); subsequent calls reuse it
1436
+    (~2s). See ``sway run`` for the equivalent one-shot CLI.
1437
+    """
1438
+    try:
1439
+        import uvicorn  # noqa: F401  — presence check
1440
+    except ImportError as exc:
1441
+        typer.secho(
1442
+            "sway serve requires the [serve] extra: pip install 'dlm-sway[serve]'",
1443
+            fg=typer.colors.RED,
1444
+            err=True,
1445
+        )
1446
+        raise typer.Exit(code=2) from exc
1447
+
1448
+    from dlm_sway.serve.app import create_app, parse_host_port
1449
+    from dlm_sway.serve.cache import BackendCache
1450
+
1451
+    # Public-bind safety — refuse before any uvicorn startup work.
1452
+    loopback = host in ("127.0.0.1", "::1", "localhost")
1453
+    if not loopback and api_key is None:
1454
+        typer.secho(
1455
+            f"refusing to bind {host}:{port} without --api-key. "
1456
+            "Either pass --api-key <key> or use --host 127.0.0.1.",
1457
+            fg=typer.colors.RED,
1458
+            err=True,
1459
+        )
1460
+        raise typer.Exit(code=2)
1461
+
1462
+    parse_host_port(host, port)
1463
+    if max_loaded_models < 1:
1464
+        typer.secho("--max-loaded-models must be >= 1", fg=typer.colors.RED, err=True)
1465
+        raise typer.Exit(code=2)
1466
+
1467
+    cache = BackendCache(max_size=max_loaded_models)
1468
+    app = create_app(cache=cache, api_key=api_key)
1469
+
1470
+    typer.echo(f"sway serve {__version__} listening on http://{host}:{port}")
1471
+    typer.echo(f"  max_loaded_models={max_loaded_models}  auth={'yes' if api_key else 'no'}")
1472
+    if not loopback:
1473
+        typer.secho(
1474
+            "  WARNING: bound to a non-loopback interface — anyone on "
1475
+            "this network with the API key can drive your GPU.",
1476
+            fg=typer.colors.YELLOW,
1477
+        )
1478
+
1479
+    import uvicorn as _uvicorn
1480
+
1481
+    _uvicorn.run(app, host=host, port=port, log_level=log_level)