From 90cb71319b5b12533f7afe663082c3bea3f69ee9 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 16 Jun 2026 16:32:48 +0200 Subject: [PATCH 1/2] fix(docker): honor configured supervisor image Signed-off-by: Evan Lezar --- crates/openshell-driver-docker/README.md | 9 ++- crates/openshell-driver-docker/src/lib.rs | 28 ++++--- docs/reference/gateway-config.mdx | 1 + e2e/with-docker-gateway.sh | 91 +++++------------------ 4 files changed, 41 insertions(+), 88 deletions(-) diff --git a/crates/openshell-driver-docker/README.md b/crates/openshell-driver-docker/README.md index 71159fe66..754802cbe 100644 --- a/crates/openshell-driver-docker/README.md +++ b/crates/openshell-driver-docker/README.md @@ -79,10 +79,11 @@ The Docker driver bind-mounts a host-side Linux `openshell-sandbox` binary into each sandbox container. Resolution order is: 1. `supervisor_bin` in `[openshell.drivers.docker]`. -2. A sibling `openshell-sandbox` next to the running `openshell-gateway` binary. -3. A local Linux cargo target build for the Docker daemon architecture. -4. `supervisor_image` in `[openshell.drivers.docker]`, or the - release-matched default supervisor image, extracting `/openshell-sandbox`. +2. `supervisor_image` in `[openshell.drivers.docker]`, extracting + `/openshell-sandbox` from that image. +3. A sibling `openshell-sandbox` next to the running `openshell-gateway` binary. +4. A local Linux cargo target build for the Docker daemon architecture. +5. The release-matched default supervisor image, extracting `/openshell-sandbox`. Release and Docker-image gateway builds bake the matching supervisor image tag into the binary at compile time. The default Docker supervisor image is not diff --git a/crates/openshell-driver-docker/src/lib.rs b/crates/openshell-driver-docker/src/lib.rs index 963e7a0f7..800f0b68c 100644 --- a/crates/openshell-driver-docker/src/lib.rs +++ b/crates/openshell-driver-docker/src/lib.rs @@ -79,7 +79,8 @@ const DOCKER_NETWORK_DRIVER: &str = "bridge"; /// Default image holding the Linux `openshell-sandbox` binary. The gateway /// pulls this image and extracts the binary to a host-side cache when no -/// explicit `supervisor_bin` override or local build is available. +/// explicit `supervisor_bin`, configured `supervisor_image`, sibling binary, +/// or local build is available. const DEFAULT_DOCKER_SUPERVISOR_IMAGE_REPO: &str = "ghcr.io/nvidia/openshell/supervisor"; /// Return the default `ghcr.io/nvidia/openshell/supervisor:` reference @@ -2960,7 +2961,14 @@ pub(crate) async fn resolve_supervisor_bin( return Ok(path); } - // Tier 2: sibling `openshell-sandbox` next to the running gateway + // Tier 2: explicit supervisor_image in [openshell.drivers.docker]. + // A configured image should be the source of truth even when a local + // developer build is present under target/. + if let Some(image) = docker_config.supervisor_image.clone() { + return extract_supervisor_bin_from_image(docker, &image).await; + } + + // Tier 3: sibling `openshell-sandbox` next to the running gateway // (release artifact layout). Linux-only because the sibling must be a // Linux ELF to bind-mount into a Linux container. if cfg!(target_os = "linux") { @@ -2977,9 +2985,9 @@ pub(crate) async fn resolve_supervisor_bin( } } - // Tier 3: local cargo target build (developer workflow). Preferred - // over a registry pull when available because it matches whatever the - // developer just built. + // Tier 4: local cargo target build (developer workflow). Preferred + // over the default registry image when available because it matches + // whatever the developer just built. let target_candidates = linux_supervisor_candidates(daemon_arch); for candidate in &target_candidates { if candidate.is_file() { @@ -2990,13 +2998,9 @@ pub(crate) async fn resolve_supervisor_bin( } } - // Tier 4: pull the supervisor image from a registry and extract the - // binary to a host-side cache keyed by image content digest. This is - // the default path for released gateway binaries. - let image = docker_config - .supervisor_image - .clone() - .unwrap_or_else(default_docker_supervisor_image); + // Tier 5: pull the release-matched default supervisor image and extract + // the binary to a host-side cache keyed by image content digest. + let image = default_docker_supervisor_image(); extract_supervisor_bin_from_image(docker, &image).await } diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx index ff4542136..d820a131d 100644 --- a/docs/reference/gateway-config.mdx +++ b/docs/reference/gateway-config.mdx @@ -218,6 +218,7 @@ sandbox_namespace = "docker-dev" grpc_endpoint = "https://host.openshell.internal:17670" # Skip the image-pull-and-extract step by pointing at a locally built binary. supervisor_bin = "/usr/local/libexec/openshell/openshell-sandbox" +# When supervisor_bin is omitted, Docker extracts /openshell-sandbox from this image. supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest" guest_tls_ca = "/etc/openshell/certs/ca.pem" guest_tls_cert = "/etc/openshell/certs/client.pem" diff --git a/e2e/with-docker-gateway.sh b/e2e/with-docker-gateway.sh index f8e17661d..d2d809c18 100755 --- a/e2e/with-docker-gateway.sh +++ b/e2e/with-docker-gateway.sh @@ -81,7 +81,6 @@ DOCKER_NETWORK_NAME="" DOCKER_NETWORK_CONNECTED_CONTAINER="" DOCKER_NETWORK_MANAGED=0 GPU_MODE="${OPENSHELL_E2E_DOCKER_GPU:-0}" -DOCKER_SUPERVISOR_ARGS=() # Isolate CLI/SDK gateway metadata from the developer's real config. export XDG_CONFIG_HOME="${WORKDIR}/config" @@ -263,25 +262,6 @@ if [ "${GPU_MODE}" = "1" ]; then fi fi -normalize_arch() { - case "$1" in - x86_64|amd64) echo "amd64" ;; - aarch64|arm64) echo "arm64" ;; - *) echo "$1" ;; - esac -} - -linux_target_triple() { - case "$1" in - amd64) echo "x86_64-unknown-linux-gnu" ;; - arm64) echo "aarch64-unknown-linux-gnu" ;; - *) - echo "ERROR: unsupported Docker daemon architecture '$1'" >&2 - exit 2 - ;; - esac -} - resolve_docker_supervisor_image() { if [ -n "${OPENSHELL_DOCKER_SUPERVISOR_IMAGE:-}" ]; then printf '%s\n' "${OPENSHELL_DOCKER_SUPERVISOR_IMAGE}" @@ -304,7 +284,7 @@ resolve_docker_supervisor_image() { return 0 fi - printf '%s\n' "" + printf '%s\n' "openshell/supervisor:dev" } docker_pull_with_retry() { @@ -336,6 +316,21 @@ docker_pull_with_retry() { ensure_docker_supervisor_image() { local image=$1 + if [ "${image}" = "openshell/supervisor:dev" ] \ + && [ -z "${OPENSHELL_DOCKER_SUPERVISOR_IMAGE:-}" ] \ + && [ -z "${OPENSHELL_SUPERVISOR_IMAGE:-}" ] \ + && [ -z "${CI:-}" ]; then + echo "Building local Docker supervisor image ${image}..." + CONTAINER_ENGINE=docker IMAGE_TAG=dev \ + bash "${ROOT}/tasks/scripts/docker-build-image.sh" supervisor + if docker image inspect "${image}" >/dev/null 2>&1; then + return 0 + fi + + echo "ERROR: expected supervisor image '${image}' after local build." >&2 + exit 2 + fi + if docker image inspect "${image}" >/dev/null 2>&1; then return 0 fi @@ -385,47 +380,11 @@ ensure_sandbox_image_available() { docker_pull_with_retry "${image}" } -DAEMON_ARCH="$(normalize_arch "$(docker info --format '{{.Architecture}}' 2>/dev/null || true)")" -SUPERVISOR_TARGET="$(linux_target_triple "${DAEMON_ARCH}")" -HOST_OS="$(uname -s)" -HOST_ARCH="$(normalize_arch "$(uname -m)")" -SUPERVISOR_OUT_DIR="${WORKDIR}/supervisor/${DAEMON_ARCH}" -SUPERVISOR_BIN="${SUPERVISOR_OUT_DIR}/openshell-sandbox" - -CARGO_BUILD_JOBS_ARG=() -if [ -n "${CARGO_BUILD_JOBS:-}" ]; then - CARGO_BUILD_JOBS_ARG=(-j "${CARGO_BUILD_JOBS}") -fi - e2e_build_gateway_binaries "${ROOT}" TARGET_DIR GATEWAY_BIN CLI_BIN SUPERVISOR_IMAGE="$(resolve_docker_supervisor_image)" -if [ -n "${SUPERVISOR_IMAGE}" ]; then - ensure_docker_supervisor_image "${SUPERVISOR_IMAGE}" - echo "Using Docker supervisor image: ${SUPERVISOR_IMAGE}" - DOCKER_SUPERVISOR_ARGS=(--docker-supervisor-image "${SUPERVISOR_IMAGE}") -else - echo "Building openshell-sandbox for ${SUPERVISOR_TARGET}..." - mkdir -p "${SUPERVISOR_OUT_DIR}" - if [ "${HOST_OS}" = "Linux" ] && [ "${HOST_ARCH}" = "${DAEMON_ARCH}" ]; then - rustup target add "${SUPERVISOR_TARGET}" >/dev/null 2>&1 || true - cargo build ${CARGO_BUILD_JOBS_ARG[@]+"${CARGO_BUILD_JOBS_ARG[@]}"} \ - --release -p openshell-sandbox --target "${SUPERVISOR_TARGET}" - cp "${TARGET_DIR}/${SUPERVISOR_TARGET}/release/openshell-sandbox" "${SUPERVISOR_BIN}" - else - CONTAINER_ENGINE=docker \ - DOCKER_PLATFORM="linux/${DAEMON_ARCH}" \ - DOCKER_OUTPUT="type=local,dest=${SUPERVISOR_OUT_DIR}" \ - bash "${ROOT}/tasks/scripts/docker-build-image.sh" supervisor-output - fi - - if [ ! -f "${SUPERVISOR_BIN}" ]; then - echo "ERROR: expected supervisor binary at ${SUPERVISOR_BIN}" >&2 - exit 1 - fi - chmod +x "${SUPERVISOR_BIN}" - DOCKER_SUPERVISOR_ARGS=(--docker-supervisor-bin "${SUPERVISOR_BIN}") -fi +ensure_docker_supervisor_image "${SUPERVISOR_IMAGE}" +echo "Using Docker supervisor image: ${SUPERVISOR_IMAGE}" DEFAULT_SANDBOX_IMAGE="ghcr.io/nvidia/openshell-community/sandboxes/base:latest" SANDBOX_IMAGE="${OPENSHELL_E2E_DOCKER_SANDBOX_IMAGE:-${OPENSHELL_SANDBOX_IMAGE:-${DEFAULT_SANDBOX_IMAGE}}}" @@ -493,19 +452,7 @@ GATEWAY_CONFIG="${STATE_DIR}/gateway.toml" printf 'guest_tls_cert = %s\n' "$(toml_string "${PKI_DIR}/client/tls.crt")" printf 'guest_tls_key = %s\n' "$(toml_string "${PKI_DIR}/client/tls.key")" printf 'enable_bind_mounts = true\n' - # DOCKER_SUPERVISOR_ARGS holds either ("--docker-supervisor-bin" "") - # or ("--docker-supervisor-image" ""); both map to TOML keys on - # the docker driver config. - for ((i=0; i<${#DOCKER_SUPERVISOR_ARGS[@]}; i+=2)); do - case "${DOCKER_SUPERVISOR_ARGS[$i]}" in - --docker-supervisor-bin) - printf 'supervisor_bin = %s\n' "$(toml_string "${DOCKER_SUPERVISOR_ARGS[$((i+1))]}")" - ;; - --docker-supervisor-image) - printf 'supervisor_image = %s\n' "$(toml_string "${DOCKER_SUPERVISOR_ARGS[$((i+1))]}")" - ;; - esac - done + printf 'supervisor_image = %s\n' "$(toml_string "${SUPERVISOR_IMAGE}")" if [ -n "${GATEWAY_HOST_ALIAS_IP}" ]; then printf 'host_gateway_ip = %s\n' "$(toml_string "${GATEWAY_HOST_ALIAS_IP}")" fi From 242ace258f3cb2eda8dd8d4150db6ba600da10e7 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Tue, 16 Jun 2026 20:22:05 +0200 Subject: [PATCH 2/2] fix(cli): isolate ssh from host linker environment Signed-off-by: Evan Lezar --- crates/openshell-cli/src/ssh.rs | 123 ++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) diff --git a/crates/openshell-cli/src/ssh.rs b/crates/openshell-cli/src/ssh.rs index f5986a1d8..ab9bca2b1 100644 --- a/crates/openshell-cli/src/ssh.rs +++ b/crates/openshell-cli/src/ssh.rs @@ -29,6 +29,16 @@ use tokio::process::Command as TokioCommand; use tokio_stream::wrappers::ReceiverStream; const FOREGROUND_FORWARD_STARTUP_GRACE_PERIOD: Duration = Duration::from_secs(2); +const HOST_TOOL_LINKER_ENV: &[&str] = &[ + "DYLD_FALLBACK_LIBRARY_PATH", + "DYLD_INSERT_LIBRARIES", + "DYLD_LIBRARY_PATH", + "LD_AUDIT", + "LD_LIBRARY_PATH", + "LD_PRELOAD", + "LIBRARY_PATH", + "NIX_LD_LIBRARY_PATH", +]; #[derive(Clone, Copy, Debug)] pub enum Editor { @@ -121,6 +131,7 @@ async fn ssh_session_config( &session.token, gateway_name, ); + let proxy_command = proxy_command_with_preserved_environment(proxy_command); Ok(SshSessionConfig { proxy_command, @@ -137,6 +148,7 @@ fn ssh_base_command(proxy_command: &str) -> Command { std::env::var("OPENSHELL_SSH_LOG_LEVEL").unwrap_or_else(|_| "ERROR".to_string()); let mut command = Command::new("ssh"); + sanitize_host_tool_environment(&mut command); command .arg("-o") .arg(format!("ProxyCommand={proxy_command}")) @@ -159,6 +171,30 @@ fn ssh_base_command(proxy_command: &str) -> Command { command } +fn sanitize_host_tool_environment(command: &mut Command) { + for key in HOST_TOOL_LINKER_ENV { + command.env_remove(key); + } +} + +fn proxy_command_with_preserved_environment(proxy_command: String) -> String { + let assignments = HOST_TOOL_LINKER_ENV + .iter() + .filter_map(|key| { + std::env::var_os(key).map(|value| { + let value = value.to_string_lossy(); + format!("{key}={}", shell_escape(&value)) + }) + }) + .collect::>(); + + if assignments.is_empty() { + proxy_command + } else { + format!("env {} {proxy_command}", assignments.join(" ")) + } +} + #[cfg(unix)] const TRANSIENT_TTY_SIGNALS: &[Signal] = &[Signal::SIGINT, Signal::SIGQUIT, Signal::SIGTERM]; @@ -1508,6 +1544,93 @@ mod tests { use super::*; use crate::TEST_ENV_LOCK; + #[test] + fn ssh_base_command_removes_host_linker_environment() { + let command = ssh_base_command("openshell ssh-proxy"); + let removed_keys = command + .get_envs() + .filter(|(_, value)| value.is_none()) + .map(|(key, _)| key.to_string_lossy().into_owned()) + .collect::>(); + + for key in HOST_TOOL_LINKER_ENV { + assert!( + removed_keys.iter().any(|removed| removed == key), + "expected ssh command to remove {key}" + ); + } + } + + #[test] + #[allow(unsafe_code)] // Test-only: env vars require unsafe in Rust 2024. + fn proxy_command_preserves_linker_environment_for_proxy_child() { + let _guard = TEST_ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let old_env = HOST_TOOL_LINKER_ENV + .iter() + .map(|key| (*key, std::env::var_os(key))) + .collect::>(); + + unsafe { + for key in HOST_TOOL_LINKER_ENV { + std::env::remove_var(key); + } + std::env::set_var("LD_LIBRARY_PATH", "/nix/store/z3 lib:/opt/lib"); + } + + let proxy_command = + proxy_command_with_preserved_environment("openshell ssh-proxy".to_string()); + let has_assignment = proxy_command.contains("LD_LIBRARY_PATH='/nix/store/z3 lib:/opt/lib'"); + let has_env_prefix = proxy_command.starts_with("env "); + let has_command = proxy_command.ends_with(" openshell ssh-proxy"); + + unsafe { + for (key, value) in old_env { + match value { + Some(value) => std::env::set_var(key, value), + None => std::env::remove_var(key), + } + } + } + + assert!(has_assignment, "unexpected proxy command: {proxy_command}"); + assert!(has_env_prefix, "unexpected proxy command: {proxy_command}"); + assert!(has_command, "unexpected proxy command: {proxy_command}"); + } + + #[test] + #[allow(unsafe_code)] // Test-only: env vars require unsafe in Rust 2024. + fn proxy_command_is_unchanged_without_linker_environment() { + let _guard = TEST_ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let old_env = HOST_TOOL_LINKER_ENV + .iter() + .map(|key| (*key, std::env::var_os(key))) + .collect::>(); + + unsafe { + for key in HOST_TOOL_LINKER_ENV { + std::env::remove_var(key); + } + } + + let proxy_command = + proxy_command_with_preserved_environment("openshell ssh-proxy".to_string()); + + unsafe { + for (key, value) in old_env { + match value { + Some(value) => std::env::set_var(key, value), + None => std::env::remove_var(key), + } + } + } + + assert_eq!(proxy_command, "openshell ssh-proxy"); + } + #[test] fn upsert_host_block_appends_when_missing() { let input = "Host existing\n HostName example.com\n";