Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 23 additions & 12 deletions bot/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -261,13 +261,6 @@ declare -a BUILD_STEP_ARGS=()
BUILD_STEP_ARGS+=("--save" "${TARBALL_TMP_BUILD_STEP_DIR}")
BUILD_STEP_ARGS+=("--storage" "${STORAGE}")

# add options required to handle NVIDIA support
if nvidia_gpu_available; then
BUILD_STEP_ARGS+=("--nvidia" "all")
else
BUILD_STEP_ARGS+=("--nvidia" "install")
fi

# Retain location for host injections so we don't reinstall CUDA
# (Always need to run the driver installation as available driver may change)
if [[ ! -z ${SHARED_FS_PATH} ]]; then
Expand All @@ -294,19 +287,37 @@ else
# prepend accel/ to all array elements
EESSI_ACCELERATOR_TARGET_OVERRIDES=("${ACCEL_OVERRIDES_ARRAY[@]/#/accel/}")
fi
RESUME_DIR=""

for ACCEL_OVERRIDE in "${EESSI_ACCELERATOR_TARGET_OVERRIDES[@]}"; do
# copy the common build step arguments to a a
BUILD_STEP_ARGS_ACCEL=("${BUILD_STEP_ARGS[@]}")
if [[ "${ACCEL_OVERRIDE}" == "accel/nvidia/"* ]]; then
nvidia_cc=${ACCEL_OVERRIDE##*/cc}
# add options required to handle NVIDIA support
# only make the GPU available in the container if the host has a GPU and it has the correct compute capability
if nvidia_gpu_available && nvidia_gpu_has_compute_capability "${nvidia_cc}" ; then
BUILD_STEP_ARGS_ACCEL+=("--nvidia" "all")
else
BUILD_STEP_ARGS_ACCEL+=("--nvidia" "install")
fi
fi
# resume from the previous accelerator's build directory
# as we want to combine all accelerator builds into a single tarball in the end
if [[ ! -z "${RESUME_DIR}" ]]; then
BUILD_STEP_ARGS_ACCEL+=("--resume" "${RESUME_DIR}")
fi

export EESSI_ACCELERATOR_TARGET_OVERRIDE="${ACCEL_OVERRIDE}"
echo "bot/build.sh: EESSI_ACCELERATOR_TARGET_OVERRIDE='${ACCEL_OVERRIDE}'"
echo "Executing command to build software:"
echo "$software_layer_dir/eessi_container.sh ${COMMON_ARGS[@]} ${BUILD_STEP_ARGS[@]}"
echo "$software_layer_dir/eessi_container.sh ${COMMON_ARGS[@]} ${BUILD_STEP_ARGS_ACCEL[@]}"
echo " -- $software_layer_dir/install_software_layer.sh \"${INSTALL_SCRIPT_ARGS[@]}\" \"$@\" 2>&1 | tee -a ${build_outerr}"
$software_layer_dir/eessi_container.sh "${COMMON_ARGS[@]}" "${BUILD_STEP_ARGS[@]}" \
$software_layer_dir/eessi_container.sh "${COMMON_ARGS[@]}" "${BUILD_STEP_ARGS_ACCEL[@]}" \
-- $software_layer_dir/install_software_layer.sh "${INSTALL_SCRIPT_ARGS[@]}" "$@" 2>&1 | tee -a ${build_outerr}

# determine temporary directory to resume from for the next accelerator,
# as we want to combine all accelerator builds into a single tarball in the end
BUILD_TMPDIR=$(grep ' as tmp directory ' ${build_outerr} | cut -d ' ' -f 2)
BUILD_STEP_ARGS+=("--resume" "${BUILD_TMPDIR}")
RESUME_DIR=$(grep ' as tmp directory ' ${build_outerr} | cut -d ' ' -f 2)
done

# prepare directory to store tarball of tmp for tarball step
Expand Down
31 changes: 31 additions & 0 deletions scripts/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,34 @@ function nvidia_gpu_available {
return 2
fi
}

function nvidia_gpu_has_compute_capability {
# Ensure we are given a single compute capability argument
if [ $# -ne 1 ]; then
echo_red "Function requires a single compute capability argument" >&2
return $ANY_ERROR_EXITCODE
fi
# Remove period (if present) from the given compute capability, i.e. 8.0 -> 80
requested_cc=${1//./}
# We are careful here in case we are running in a container and LD_LIBARY_PATH has been wiped.
mapfile -t gpu_ccs < <(LD_LIBRARY_PATH="/.singularity.d/libs:${LD_LIBRARY_PATH}" nvidia-smi --query-gpu=compute_cap --format=noheader)
# Remove the periods from all compute capabilities
gpu_ccs=("${gpu_ccs[@]//./}")
# On a multi-GPU system we may get the compute capabilities of all GPUs, one per line.
# In that case we print a warning and check the first GPU.
if [ ${#gpu_ccs[@]} -eq 0 ]; then
echo_red "Error: querying for the GPU's compute capability did not return anything."
return 1
else
if [ ${#gpu_ccs[@]} -gt 1 ]; then
echo_yellow "Warning: multiple GPUs detected, checking the compute capability of the first GPU".
fi
if [ "$requested_cc" == "${gpu_ccs[0]}" ]; then
echo_green "Requested compute capability matches the one from the GPU."
return 0
else
echo_red "Error: the compute capability of the GPU (${gpu_ccs[0]}) does not match the requested compute capability ($requested_cc)."
return 2
fi
fi
}
Loading