From c4524db945b28b71c4dc8720caee6590769f1edc Mon Sep 17 00:00:00 2001 From: eshulman2 Date: Mon, 22 Jun 2026 14:59:18 +0300 Subject: [PATCH 1/7] Add .claude to .gitignore Change-Id: I7c112db92016ddd7bd2c93b5c404455c479bb722 --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 2f17b83e..6889243a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ *.log .vscode .venv +.claude \ No newline at end of file From ff52b9198a2a890bc72c311bc6d91af3192babcc Mon Sep 17 00:00:00 2001 From: eshulman2 Date: Mon, 22 Jun 2026 14:59:28 +0300 Subject: [PATCH 2/7] Add IPI bootstrap flavor workaround and additionalTrustBundle support Allow overriding the bootstrap VM flavor (needed because CAPI mode uses the control-plane flavor for bootstrap). Also add additionalTrustBundle to the install-config so bootstrap VMs trust self-signed OSP TLS certs (e.g. Glance endpoint). Change-Id: I9b8c15b5797cbc2066cd1984ee64eab3d4e91a19 --- .../ipi_install_config.yml | 27 +++++++ .../tasks/ipi_bootstrap_flavor_workaround.yml | 73 +++++++++++++++++++ .../stages/roles/install/tasks/ipi_tenant.yml | 9 ++- .../templates/install-config-ipi.yaml.j2 | 21 ++++++ 4 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 collection/stages/roles/install/tasks/ipi_bootstrap_flavor_workaround.yml diff --git a/collection/stages/roles/install/tasks/install_config_generation/ipi_install_config.yml b/collection/stages/roles/install/tasks/install_config_generation/ipi_install_config.yml index e376e0ee..156aeee8 100644 --- a/collection/stages/roles/install/tasks/install_config_generation/ipi_install_config.yml +++ b/collection/stages/roles/install/tasks/install_config_generation/ipi_install_config.yml @@ -1,4 +1,27 @@ --- +- name: Discover machines subnet ID by name + when: ocp_deployment_topology.machines_subnet is defined + openstack.cloud.subnets_info: + cloud: "{{ user_cloud }}" + name: "{{ ocp_deployment_topology.machines_subnet }}" + register: machines_subnet_info + +- name: Set machines subnet ID fact + when: ocp_deployment_topology.machines_subnet is defined + ansible.builtin.set_fact: + machines_subnet_id: "{{ machines_subnet_info.subnets[0].id }}" + +- name: Check if CA cert file exists + ansible.builtin.stat: + path: "{{ cacert }}" + register: _cacert_stat + +- name: Read CA cert content + when: _cacert_stat.stat.exists + ansible.builtin.command: "cat {{ cacert }}" + changed_when: false + register: _cacert_content + - name: Generate install-config.yaml from install-config-ipi.yaml.j2 template ansible.builtin.template: src: install-config-ipi.yaml.j2 @@ -22,5 +45,9 @@ installcfg_api_vips: "{{ ocp_deployment_topology.primary_ip_protocol == 'ipv6' }}" installcfg_api_floating_ip: "{{ precreated_api_fip }}" installcfg_ingress_floating_ip: "{{ precreated_ingress_fip }}" + installcfg_machines_subnet: "{{ machines_subnet_id | default(omit) }}" installcfg_cluster_network: "{{ ocp_deployment_topology[ocp_deployment_topology.primary_ip_protocol].cluster_network }}" installcfg_service_network: "{{ ocp_deployment_topology[ocp_deployment_topology.primary_ip_protocol].service_network }}" + installcfg_default_machine_platform: "{{ ocp_deployment_topology.defaultMachinePlatform | default({}) }}" + installcfg_cluster_os_image_properties: "{{ ocp_deployment_topology.platform.openstack.clusterOSImageProperties | default({}) }}" + installcfg_additional_trust_bundle: "{{ _cacert_content.stdout_lines | default(omit) }}" diff --git a/collection/stages/roles/install/tasks/ipi_bootstrap_flavor_workaround.yml b/collection/stages/roles/install/tasks/ipi_bootstrap_flavor_workaround.yml new file mode 100644 index 00000000..7806d54e --- /dev/null +++ b/collection/stages/roles/install/tasks/ipi_bootstrap_flavor_workaround.yml @@ -0,0 +1,73 @@ +--- +# Workaround for bootstrap flavor configuration (CAPI mode) +# This allows overriding the bootstrap machine flavor by: +# 1. Creating manifests (generates cluster-api/machines/ directory) +# 2. Modifying the bootstrap OpenStackMachine manifest in cluster-api/machines/ +# 3. Running cluster creation with modified manifests +# +# Note: In CAPI-based OpenStack installs, bootstrap uses the same flavor as masters +# (from controlPlane.platform.openstack.type), NOT defaultMachinePlatform.type. +# This workaround allows using a different (typically smaller) flavor for bootstrap. + +- name: Create OpenShift manifests + ansible.builtin.shell: | + openshift-install create manifests --log-level debug --dir {{ ocp_installation_dir }} + environment: + OS_CLOUD: "{{ user_cloud }}" + changed_when: true + +- name: Find bootstrap OpenStackMachine manifest (CAPI mode) + ansible.builtin.find: + paths: "{{ ocp_installation_dir }}/cluster-api/machines" + patterns: "10_inframachine_*-bootstrap.yaml" + contains: "kind: OpenStackMachine" + register: bootstrap_manifest + +- name: Fail if bootstrap manifest not found + ansible.builtin.fail: + msg: "Bootstrap OpenStackMachine manifest not found in {{ ocp_installation_dir }}/cluster-api/machines" + when: bootstrap_manifest.matched == 0 + +- name: Override bootstrap flavor in manifest + ansible.builtin.replace: + path: "{{ bootstrap_manifest.files[0].path }}" + regexp: '^(\s+flavor:\s+).*$' + replace: '\1{{ bootstrap_flavor_override }}' + when: bootstrap_flavor_override is defined and bootstrap_flavor_override | length > 0 + +- name: Display modified bootstrap manifest path + ansible.builtin.debug: + msg: "Modified bootstrap flavor in {{ bootstrap_manifest.files[0].path }} to {{ bootstrap_flavor_override }}" + when: bootstrap_flavor_override is defined and bootstrap_flavor_override | length > 0 + +- name: Install OpenShift cluster with modified manifests + block: + - name: Install Openshift + ansible.builtin.shell: | + openshift-install create cluster --log-level debug --dir {{ ocp_installation_dir }} + environment: + OS_CLOUD: "{{ user_cloud }}" + changed_when: true + rescue: + - name: Use an openshift-install flag to wait until the cluster is ready + ansible.builtin.shell: | + openshift-install wait-for install-complete --log-level debug --dir {{ ocp_installation_dir }} + environment: + OS_CLOUD: "{{ user_cloud }}" + changed_when: true + + - name: Remove the bootstrap resources after the OCP installation succeeded + ansible.builtin.shell: | + openshift-install destroy bootstrap --log-level debug --dir {{ ocp_installation_dir }} + environment: + OS_CLOUD: "{{ user_cloud }}" + changed_when: true + + - name: Mark the openshift tests as UNSTABLE + ansible.builtin.include_role: + name: tools_stage_results + tasks_from: mark_stage_unstable.yml + vars: + unstable_msg: >- + The openshift installation passed but unexpectedly needed the wait-for flag. + More info in Jira KURYRQE-1002. diff --git a/collection/stages/roles/install/tasks/ipi_tenant.yml b/collection/stages/roles/install/tasks/ipi_tenant.yml index a0233b2e..3717bc23 100644 --- a/collection/stages/roles/install/tasks/ipi_tenant.yml +++ b/collection/stages/roles/install/tasks/ipi_tenant.yml @@ -1,5 +1,12 @@ --- -- name: Install OpenShift cluster using openshift-install +- name: Install OpenShift cluster with bootstrap flavor workaround + ansible.builtin.include_tasks: ipi_bootstrap_flavor_workaround.yml + when: + - bootstrap_flavor_override is defined + - bootstrap_flavor_override | length > 0 + +- name: Install OpenShift cluster using openshift-install (standard) + when: bootstrap_flavor_override is not defined or bootstrap_flavor_override | length == 0 block: - name: Install Openshift ansible.builtin.shell: | diff --git a/collection/stages/roles/install/templates/install-config-ipi.yaml.j2 b/collection/stages/roles/install/templates/install-config-ipi.yaml.j2 index e8b5223f..58d96458 100644 --- a/collection/stages/roles/install/templates/install-config-ipi.yaml.j2 +++ b/collection/stages/roles/install/templates/install-config-ipi.yaml.j2 @@ -35,6 +35,19 @@ platform: openstack: cloud: "{{ user_cloud }}" region: "{{ installcfg_region }}" + {%- if installcfg_default_machine_platform.type is defined +%} + defaultMachinePlatform: + type: "{{ installcfg_default_machine_platform.type }}" + {%- endif +%} + {%- if installcfg_cluster_os_image_properties != {} +%} + clusterOSImageProperties: + {%- for key, value in installcfg_cluster_os_image_properties.items() +%} + {{ key }}: {{ value }} + {%- endfor +%} + {%- endif +%} + {%- if installcfg_machines_subnet is defined +%} + machinesSubnet: {{ installcfg_machines_subnet }} + {%- endif +%} {%- if installcfg_api_vips +%} apiVIPs: ["{{ installcfg_api_floating_ip }}"] ingressVIPs: ["{{ installcfg_ingress_floating_ip }}"] @@ -46,9 +59,17 @@ platform: externalNetwork: "{{ installcfg_external_network }}" apiFloatingIP: "{{ installcfg_api_floating_ip }}" ingressFloatingIP: "{{ installcfg_ingress_floating_ip }}" + {%- if installcfg_machines_subnet is not defined +%} externalDNS: {{ installcfg_dns_servers }} {%- endif +%} + {%- endif +%} pullSecret: | {{ ocp_pull_secret }} sshKey: | {{ ocp_public_key }} +{%- if installcfg_additional_trust_bundle is defined +%} +additionalTrustBundle: | +{% for line in installcfg_additional_trust_bundle %} + {{ line }} +{% endfor %} +{%- endif +%} From 09e3482c52d48461b5376d740fc8e1d7fd0b6568 Mon Sep 17 00:00:00 2001 From: eshulman2 Date: Mon, 22 Jun 2026 15:07:23 +0300 Subject: [PATCH 3/7] prepare: skip flavors that only define a name Telco flavors pre-exist on the cloud and only have a 'name' field in the topology definition. Guard the flavor-creation task so it only runs when 'ram' is defined, avoiding failures when iterating over these entries. Change-Id: I88c1c19a168615306fc182741be6b6bd2e94343e --- collection/stages/roles/prepare/tasks/project.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/collection/stages/roles/prepare/tasks/project.yml b/collection/stages/roles/prepare/tasks/project.yml index b6b9c89b..2f5b7de2 100644 --- a/collection/stages/roles/prepare/tasks/project.yml +++ b/collection/stages/roles/prepare/tasks/project.yml @@ -61,4 +61,6 @@ ephemeral: "{{ item.value.ephemeral | default(omit) }}" verify: "{{ admin_verify_cacert }}" register: flavors - loop: "{{ lookup('ansible.builtin.dict', ocp_deployment_topology.flavors) }}" + loop: "{{ lookup('ansible.builtin.dict', ocp_deployment_topology.flavors, wantlist=True) }}" + # Skip flavors that only define 'name' (e.g., telco flavors that pre-exist on the cloud) + when: item.value.ram is defined From b20914781c35b1356bd2f1327bb78d9e13757195 Mon Sep 17 00:00:00 2001 From: eshulman2 Date: Mon, 22 Jun 2026 15:07:56 +0300 Subject: [PATCH 4/7] day2ops: add run_procedure_no_verify task Add a variant of the procedure runner that skips result verification, needed for setup steps that do not produce a JUnit report. Change-Id: Ib5dc76d9804c1a6b97e01fc0764392b70d226e76 --- collection/stages/roles/day2ops/tasks/main.yml | 2 +- .../day2ops/tasks/run_procedure_no_verify.yml | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 collection/stages/roles/day2ops/tasks/run_procedure_no_verify.yml diff --git a/collection/stages/roles/day2ops/tasks/main.yml b/collection/stages/roles/day2ops/tasks/main.yml index a1e10d05..aeeb820c 100644 --- a/collection/stages/roles/day2ops/tasks/main.yml +++ b/collection/stages/roles/day2ops/tasks/main.yml @@ -40,7 +40,7 @@ mode: u=rw,g=rw,o=r - name: Run day2ops procedures sequentially - ansible.builtin.include_tasks: run_procedure.yml + ansible.builtin.include_tasks: "{{ day2ops_run_procedure_task | default('run_procedure.yml') }}" vars: procedure_task_file: "{{ item }}.yml" loop: "{{ day2ops_steps }}" diff --git a/collection/stages/roles/day2ops/tasks/run_procedure_no_verify.yml b/collection/stages/roles/day2ops/tasks/run_procedure_no_verify.yml new file mode 100644 index 00000000..71f07cd4 --- /dev/null +++ b/collection/stages/roles/day2ops/tasks/run_procedure_no_verify.yml @@ -0,0 +1,15 @@ +--- +- name: Assert that the procedure task file exists + ansible.builtin.assert: + that: + - procedure_task_file in day2ops_available_procedures + fail_msg: | + The {{ procedure_task_file }} is not available inside this role. + Please use one of the following by setting it inside the list var + 'day2ops_step' removing the extension '.yml': + {{ day2ops_available_procedures | to_nice_json }} + success_msg: | + Running task file {{ procedure_task_file }} + +- name: Run procedure {{ procedure_task_file }} + ansible.builtin.include_tasks: "procedures/{{ procedure_task_file }}" From 38f07c43dae6e063122b159e6e040bc066e9778b Mon Sep 17 00:00:00 2001 From: eshulman2 Date: Mon, 22 Jun 2026 15:08:10 +0300 Subject: [PATCH 5/7] day2ops: add telco SR-IOV/DPDK MachineSet procedures Add procedures and templates for creating telco worker MachineSets with SR-IOV/DPDK networking, including SR-IOV operator static manifests and defaults for the machineset configuration variables. Change-Id: I223bbcf83d552cc63ff65cc8df6f8c818b6358b3 --- .../stages/roles/day2ops/defaults/main.yml | 79 +++++++++ .../sriov-dpdk-machine-config-pools.yaml | 37 ++++ .../sriov-network-operator-namespace.yaml | 19 ++ .../day2ops/files/sriov-operator-config.yaml | 12 ++ .../procedures/apply-telco-machineset.yml | 78 +++++++++ .../procedures/create-telco-machinesets.yml | 164 ++++++++++++++++++ .../templates/telco-machineset.yaml.j2 | 89 ++++++++++ 7 files changed, 478 insertions(+) create mode 100644 collection/stages/roles/day2ops/files/sriov-dpdk-machine-config-pools.yaml create mode 100644 collection/stages/roles/day2ops/files/sriov-network-operator-namespace.yaml create mode 100644 collection/stages/roles/day2ops/files/sriov-operator-config.yaml create mode 100644 collection/stages/roles/day2ops/tasks/procedures/apply-telco-machineset.yml create mode 100644 collection/stages/roles/day2ops/tasks/procedures/create-telco-machinesets.yml create mode 100644 collection/stages/roles/day2ops/templates/telco-machineset.yaml.j2 diff --git a/collection/stages/roles/day2ops/defaults/main.yml b/collection/stages/roles/day2ops/defaults/main.yml index ce63b11a..c775828e 100644 --- a/collection/stages/roles/day2ops/defaults/main.yml +++ b/collection/stages/roles/day2ops/defaults/main.yml @@ -2,3 +2,82 @@ # defaults file for day2ops day2ops_steps: [] day2ops_report_filename: shiftstack-qa-day2ops-results.xml + +# Telco MachineSet configuration for SRIOV/DPDK workers +# Used by the create-telco-machinesets procedure +telco_machinesets: + delete_default_workers: true # Whether to delete the default worker machineset + disable_masters_schedulable: true # Set mastersSchedulable: false after adding workers + machinesets: [] # List of machinesets to create (configure in job definition) + # Example machineset configuration: + # machinesets: + # - name: sriov + # role: worker + # type: sriov + # replicas: 1 + # flavor: worker_0_numa_0 + # networks: + # - name: sriov_net_nic0 + # port_name_suffix: sriov_net_nic0_direct_worker_port + # - name: sriov_net_nic1 + # port_name_suffix: sriov_net_nic1_direct_worker_port + # - name: dpdk + # role: worker + # type: dpdk + # replicas: 1 + # flavor: worker_1_numa_1 + # networks: + # - name: dpdk_net_nic0 + # port_name_suffix: dpdk_net_nic0_normal_worker_port + # - name: dpdk_net_nic1 + # port_name_suffix: dpdk_net_nic1_normal_worker_port + +# Telco tuning configuration for SRIOV/DPDK workloads +# Used by the configure-telco-tuning procedure +telco_tuning: + sriov: + performance_profile: + cpu_isolated: "4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20" + cpu_reserved: "0,1,2,3" + hugepages_size: "1G" + hugepages_count: 7 + numa_node: 0 + numa_topology_policy: "best-effort" + realtime_kernel: false + additional_kernel_args: + - nosmt + - tsc=reliable + networks: [] # List of SRIOV networks for SriovNetworkNodePolicy (configure in job definition) + # Example network configuration: + # networks: + # - name: sriov_net_nic0_9 + # resource_name: sriov9 + # device_type: vfio-pci + # num_vfs: 1 + # - name: sriov_net_nic1_10 + # resource_name: sriov10 + # device_type: vfio-pci + # num_vfs: 1 + dpdk: + performance_profile: + cpu_isolated: "4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20" + cpu_reserved: "0,1,2,3" + hugepages_size: "1G" + hugepages_count: 7 + numa_node: 0 + numa_topology_policy: "best-effort" + realtime_kernel: false + additional_kernel_args: + - nosmt + - tsc=reliable + networks: [] # List of DPDK networks for SriovNetworkNodePolicy (configure in job definition) + # Example network configuration: + # networks: + # - name: dpdk_net_nic0_9 + # resource_name: dpdk9 + # device_type: vfio-pci + # num_vfs: 1 + # - name: dpdk_net_nic1_10 + # resource_name: dpdk10 + # device_type: vfio-pci + # num_vfs: 1 diff --git a/collection/stages/roles/day2ops/files/sriov-dpdk-machine-config-pools.yaml b/collection/stages/roles/day2ops/files/sriov-dpdk-machine-config-pools.yaml new file mode 100644 index 00000000..e1633d22 --- /dev/null +++ b/collection/stages/roles/day2ops/files/sriov-dpdk-machine-config-pools.yaml @@ -0,0 +1,37 @@ +--- +# MachineConfigPool for SRIOV worker nodes +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfigPool +metadata: + name: sriov + labels: + machineconfiguration.openshift.io/role: sriov +spec: + machineConfigSelector: + matchExpressions: + - key: machineconfiguration.openshift.io/role + operator: In + values: [sriov, worker] + paused: false + nodeSelector: + matchLabels: + node-role.kubernetes.io/sriov: "" + +--- +# MachineConfigPool for DPDK worker nodes +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfigPool +metadata: + name: dpdk + labels: + machineconfiguration.openshift.io/role: dpdk +spec: + machineConfigSelector: + matchExpressions: + - key: machineconfiguration.openshift.io/role + operator: In + values: [dpdk, worker] + paused: false + nodeSelector: + matchLabels: + node-role.kubernetes.io/dpdk: "" diff --git a/collection/stages/roles/day2ops/files/sriov-network-operator-namespace.yaml b/collection/stages/roles/day2ops/files/sriov-network-operator-namespace.yaml new file mode 100644 index 00000000..44997dc7 --- /dev/null +++ b/collection/stages/roles/day2ops/files/sriov-network-operator-namespace.yaml @@ -0,0 +1,19 @@ +--- +# Namespace for SRIOV Network Operator +apiVersion: v1 +kind: Namespace +metadata: + name: openshift-sriov-network-operator + annotations: + workload.openshift.io/allowed: management + +--- +# OperatorGroup for SRIOV Network Operator +apiVersion: operators.coreos.com/v1 +kind: OperatorGroup +metadata: + name: sriov-network-operators + namespace: openshift-sriov-network-operator +spec: + targetNamespaces: + - openshift-sriov-network-operator diff --git a/collection/stages/roles/day2ops/files/sriov-operator-config.yaml b/collection/stages/roles/day2ops/files/sriov-operator-config.yaml new file mode 100644 index 00000000..4711e09e --- /dev/null +++ b/collection/stages/roles/day2ops/files/sriov-operator-config.yaml @@ -0,0 +1,12 @@ +--- +# SriovOperatorConfig for SRIOV Network Operator +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovOperatorConfig +metadata: + name: default + namespace: openshift-sriov-network-operator +spec: + enableInjector: true + enableOperatorWebhook: true + logLevel: 2 + disableDrain: false diff --git a/collection/stages/roles/day2ops/tasks/procedures/apply-telco-machineset.yml b/collection/stages/roles/day2ops/tasks/procedures/apply-telco-machineset.yml new file mode 100644 index 00000000..901b7817 --- /dev/null +++ b/collection/stages/roles/day2ops/tasks/procedures/apply-telco-machineset.yml @@ -0,0 +1,78 @@ +--- +# Helper task to apply a single telco MachineSet +# This task is included by create-telco-machinesets.yml for each machineset + +- name: "Get network details for {{ telco_machineset.networks[0].name }}" + openstack.cloud.networks_info: + cloud: "{{ user_cloud }}" + name: "{{ telco_machineset.networks[0].name }}" + register: network_0 + +- name: "Get network details for {{ telco_machineset.networks[1].name }}" + openstack.cloud.networks_info: + cloud: "{{ user_cloud }}" + name: "{{ telco_machineset.networks[1].name }}" + register: network_1 + +- name: Set the telco network and subnet IDs + ansible.builtin.set_fact: + network_id_0: "{{ network_0.networks[0].id }}" + network_subnet_id_0: "{{ network_0.networks[0].subnet_ids[0] }}" + network_id_1: "{{ network_1.networks[0].id }}" + network_subnet_id_1: "{{ network_1.networks[0].subnet_ids[0] }}" + +- name: "Generate telco MachineSet manifest for {{ telco_machineset.name }}" + ansible.builtin.template: + src: telco-machineset.yaml.j2 + dest: "{{ ocp_installation_dir }}/{{ telco_machineset.name }}-machineset.yaml" + mode: u=rw,g=rw,o=r + vars: + _infrastructure_id: "{{ infrastructure_id }}" + _machine_role: "{{ telco_machineset.role | default('worker') }}" + _machineset_type: "{{ telco_machineset.type }}" + _machineset_replicas: "{{ telco_machineset.replicas }}" + _osp_flavor: "{{ telco_machineset.flavor }}" + _api_vip_port_ip: "{{ api_ip }}" + _ingress_vip_port_ip: "{{ apps_ip }}" + _machines_subnet: "{{ machines_subnet_id }}" + _machines_network: "{{ machines_subnet_net_id }}" + _security_group: "{{ machines_security_group }}" + _subnet_id_0: "{{ network_subnet_id_0 }}" + _name_suffix_0: "{{ telco_machineset.networks[0].port_name_suffix }}" + _network_id_0: "{{ network_id_0 }}" + _subnet_id_1: "{{ network_subnet_id_1 }}" + _name_suffix_1: "{{ telco_machineset.networks[1].port_name_suffix }}" + _network_id_1: "{{ network_id_1 }}" + +- name: "Apply telco MachineSet manifest for {{ telco_machineset.name }}" + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + state: present + src: "{{ ocp_installation_dir }}/{{ telco_machineset.name }}-machineset.yaml" + wait: true + wait_timeout: "{{ manifests_wait_timeout }}" + +- name: "Wait for cluster health after applying {{ telco_machineset.name }} machineset" + block: + - name: Wait for MCP updates + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_mcp_updated.yml + vars: + wait_retries: 60 + wait_delay: 60 + + - name: Wait until cluster is healthy + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_until_cluster_is_healthy.yml + + - name: Wait until nodes are ready + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_until_nodes_ready.yml + + - name: Wait until ClusterOperators are ready + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_until_cluster_operators_ready.yml diff --git a/collection/stages/roles/day2ops/tasks/procedures/create-telco-machinesets.yml b/collection/stages/roles/day2ops/tasks/procedures/create-telco-machinesets.yml new file mode 100644 index 00000000..90c15669 --- /dev/null +++ b/collection/stages/roles/day2ops/tasks/procedures/create-telco-machinesets.yml @@ -0,0 +1,164 @@ +--- +# Day2ops procedure to create SRIOV/DPDK telco MachineSets +# This procedure assumes telco networks already exist in OpenStack + +- name: Discover OCP version for apiVersion selection + ansible.builtin.include_role: + name: tools_get_deploy_info + tasks_from: discover_ocp_version.yml + +- name: Get the OCP Cluster infrastructure ID + kubernetes.core.k8s_info: + kubeconfig: "{{ kubeconfig }}" + api_version: config.openshift.io/v1 + kind: Infrastructure + name: cluster + register: cluster_infrastructure + +- name: Set infrastructure_id fact + ansible.builtin.set_fact: + infrastructure_id: "{{ cluster_infrastructure.resources[0].status.infrastructureName }}" + +- name: Delete the default worker MachineSet + when: telco_machinesets.delete_default_workers | default(false) | bool + block: + - name: Get the current worker MachineSets + kubernetes.core.k8s_info: + kubeconfig: "{{ kubeconfig }}" + api_version: machine.openshift.io/v1beta1 + kind: MachineSet + namespace: openshift-machine-api + label_selectors: + - machine.openshift.io/cluster-api-machine-role=worker + register: worker_machinesets + + - name: Delete default worker MachineSets + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + api_version: machine.openshift.io/v1beta1 + kind: MachineSet + name: "{{ item.metadata.name }}" + namespace: openshift-machine-api + state: absent + wait: true + wait_timeout: "{{ manifests_wait_timeout }}" + loop: "{{ worker_machinesets.resources }}" + loop_control: + label: "{{ item.metadata.name }}" + when: + - "'sriov' not in item.metadata.name" + - "'dpdk' not in item.metadata.name" + + - name: Wait for cluster health after deleting default workers + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_until_cluster_is_healthy.yml + +- name: Get cluster install-config content for VIP IPs and subnet info + ansible.builtin.include_role: + name: tools_get_deploy_info + tasks_from: get_ic_content.yml + +- name: Set the API-IP, APPs-IP, and machinesSubnet from install-config + # NOTE: This assumes IPv4 mode (apiFloatingIP/ingressFloatingIP). + # IPv6 deployments use apiVIPs/ingressVIPs instead. + ansible.builtin.set_fact: + api_ip: "{{ ic_content.platform.openstack.apiFloatingIP }}" + apps_ip: "{{ ic_content.platform.openstack.ingressFloatingIP }}" + machines_subnet_name: "{{ ic_content.platform.openstack.machinesSubnet | default('') }}" + +- name: Get the machinesSubnet details from OpenStack + openstack.cloud.subnets_info: + cloud: "{{ user_cloud }}" + name: "{{ machines_subnet_name }}" + register: os_subnets + +- name: Get the security group details from OpenStack + openstack.cloud.security_group_info: + cloud: "{{ user_cloud }}" + name: "{{ infrastructure_id }}-worker" + register: os_security_group + +- name: Set the machinesSubnet network ID and security group ID + ansible.builtin.set_fact: + machines_subnet_id: "{{ os_subnets.subnets[0].id }}" + machines_subnet_net_id: "{{ os_subnets.subnets[0].network_id }}" + machines_security_group: "{{ os_security_group.security_groups[0].id }}" + +- name: Create telco MachineSets + ansible.builtin.include_tasks: apply-telco-machineset.yml + loop: "{{ telco_machinesets.machinesets }}" + loop_control: + loop_var: telco_machineset + label: "{{ telco_machineset.name }}" + +- name: Disable mastersSchedulable since we now have dedicated worker nodes + when: telco_machinesets.disable_masters_schedulable | default(true) | bool + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + api_version: config.openshift.io/v1 + kind: Scheduler + name: cluster + state: present + merge_type: merge + definition: + spec: + mastersSchedulable: false + +- name: Final cluster health check after all telco machinesets + block: + - name: Wait for MCP to finish updates + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_mcp_updated.yml + vars: + wait_retries: 60 + wait_delay: 60 + + - name: Wait until cluster is healthy + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_until_cluster_is_healthy.yml + + - name: Wait until all nodes are ready + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_until_nodes_ready.yml + + - name: Wait until all MachineSets report availableReplicas + kubernetes.core.k8s_info: + kubeconfig: "{{ kubeconfig }}" + api_version: machine.openshift.io/v1beta1 + kind: MachineSet + register: _ms_status + until: >- + _ms_status.resources + | selectattr('spec.replicas', '>', 0) + | rejectattr('status.availableReplicas', 'defined') + | list | length == 0 + retries: 60 + delay: 30 + + - name: Wait until all MachineSets reach desired available replicas + kubernetes.core.k8s_info: + kubeconfig: "{{ kubeconfig }}" + api_version: machine.openshift.io/v1beta1 + kind: MachineSet + register: _ms_status + until: >- + _ms_status.resources + | selectattr('spec.replicas', '>', 0) + | json_query('[?spec.replicas != status.availableReplicas]') + | list | length == 0 + retries: 60 + delay: 30 + + - name: Check MachineSets health + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: check_machinesets.yml + + - name: Wait until ClusterOperators are ready + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_until_cluster_operators_ready.yml diff --git a/collection/stages/roles/day2ops/templates/telco-machineset.yaml.j2 b/collection/stages/roles/day2ops/templates/telco-machineset.yaml.j2 new file mode 100644 index 00000000..61907cc4 --- /dev/null +++ b/collection/stages/roles/day2ops/templates/telco-machineset.yaml.j2 @@ -0,0 +1,89 @@ +# Telco MachineSet template for SRIOV/DPDK workers +# Ref: https://docs.openshift.com/container-platform/4.15/machine_management/creating_machinesets/creating-machineset-osp.html#machineset-yaml-osp-sr-iov-port-security_creating-machineset-osp +apiVersion: machine.openshift.io/v1beta1 +kind: MachineSet +metadata: + labels: + machine.openshift.io/cluster-api-cluster: {{ _infrastructure_id }} + machine.openshift.io/cluster-api-machine-role: {{ _machine_role }} + machine.openshift.io/cluster-api-machine-type: {{ _machine_role }} + name: {{ _infrastructure_id }}-{{ _machine_role }}-{{ _machineset_type }} + namespace: openshift-machine-api +spec: + replicas: {{ _machineset_replicas }} + selector: + matchLabels: + machine.openshift.io/cluster-api-cluster: {{ _infrastructure_id }} + machine.openshift.io/cluster-api-machineset: {{ _infrastructure_id }}-{{ _machine_role }}-{{ _machineset_type }} + template: + metadata: + labels: + machine.openshift.io/cluster-api-cluster: {{ _infrastructure_id }} + machine.openshift.io/cluster-api-machine-role: {{ _machine_role }} + machine.openshift.io/cluster-api-machine-type: {{ _machine_role }} + machine.openshift.io/cluster-api-machineset: {{ _infrastructure_id }}-{{ _machine_role }}-{{ _machineset_type }} + spec: + metadata: + labels: +{% if _machineset_type == "sriov" %} + node-role.kubernetes.io/sriov: "" +{% elif _machineset_type == "dpdk" %} + node-role.kubernetes.io/dpdk: "" +{% endif %} + providerSpec: + value: +{% if discovered_openshift_release is version('4.16', '>=') %} + apiVersion: machine.openshift.io/v1alpha1 +{% else %} + apiVersion: openstackproviderconfig.openshift.io/v1alpha1 +{% endif %} + cloudName: openstack + cloudsSecret: + name: openstack-cloud-credentials + namespace: openshift-machine-api + configDrive: true + flavor: {{ _osp_flavor }} + image: {{ _infrastructure_id }}-rhcos + kind: OpenstackProviderSpec + ports: + - allowedAddressPairs: + - ipAddress: {{ _api_vip_port_ip }} + - ipAddress: {{ _ingress_vip_port_ip }} + fixedIPs: + - subnetID: {{ _machines_subnet }} + nameSuffix: nodes + networkID: {{ _machines_network }} + securityGroups: + - {{ _security_group }} + - fixedIPs: + - subnetID: {{ _subnet_id_0 }} + nameSuffix: {{ _name_suffix_0 }} + networkID: {{ _network_id_0 }} + portSecurity: false + tags: + - openshiftClusterID={{ _infrastructure_id }} + trunk: false +{% if _machineset_type == "sriov" %} + vnicType: direct +{% endif %} + - fixedIPs: + - subnetID: {{ _subnet_id_1 }} + nameSuffix: {{ _name_suffix_1 }} + networkID: {{ _network_id_1 }} + portSecurity: false + tags: + - openshiftClusterID={{ _infrastructure_id }} + trunk: false +{% if _machineset_type == "sriov" %} + vnicType: direct +{% endif %} + primarySubnet: {{ _machines_subnet }} + serverGroupName: {{ _infrastructure_id }}-{{ _machine_role }} + serverMetadata: + Name: {{ _infrastructure_id }}-{{ _machine_role }} + openshiftClusterID: {{ _infrastructure_id }} + tags: + - openshiftClusterID={{ _infrastructure_id }} + trunk: true + userDataSecret: + name: {{ _machine_role }}-user-data From 52ff9677ebc08ad0dc4611f95e713f994e755cf7 Mon Sep 17 00:00:00 2001 From: eshulman2 Date: Mon, 22 Jun 2026 15:09:05 +0300 Subject: [PATCH 6/7] day2ops: add telco performance tuning and DPDK test procedures Add procedures to apply performance profiles and SR-IOV network node policies, and to run testpmd-based DPDK throughput tests via ansible-performance-test. Change-Id: I58f563ed8bf5513a8729f198ecc9281c74add5d5 --- .../procedures/configure-telco-tuning.yml | 352 ++++++++++++++++++ .../tasks/procedures/run-performance-test.yml | 274 ++++++++++++++ .../day2ops/templates/perf-extra-vars.yaml.j2 | 21 ++ .../day2ops/templates/perf-inventory.ini.j2 | 8 + .../templates/performance-profile.yaml.j2 | 28 ++ .../sriov-network-node-policy.yaml.j2 | 20 + .../day2ops/templates/testpmd-dut-pod.yaml.j2 | 59 +++ 7 files changed, 762 insertions(+) create mode 100644 collection/stages/roles/day2ops/tasks/procedures/configure-telco-tuning.yml create mode 100644 collection/stages/roles/day2ops/tasks/procedures/run-performance-test.yml create mode 100644 collection/stages/roles/day2ops/templates/perf-extra-vars.yaml.j2 create mode 100644 collection/stages/roles/day2ops/templates/perf-inventory.ini.j2 create mode 100644 collection/stages/roles/day2ops/templates/performance-profile.yaml.j2 create mode 100644 collection/stages/roles/day2ops/templates/sriov-network-node-policy.yaml.j2 create mode 100644 collection/stages/roles/day2ops/templates/testpmd-dut-pod.yaml.j2 diff --git a/collection/stages/roles/day2ops/tasks/procedures/configure-telco-tuning.yml b/collection/stages/roles/day2ops/tasks/procedures/configure-telco-tuning.yml new file mode 100644 index 00000000..0c8a5259 --- /dev/null +++ b/collection/stages/roles/day2ops/tasks/procedures/configure-telco-tuning.yml @@ -0,0 +1,352 @@ +--- +# Day2ops procedure to configure OCP telco tuning for SRIOV/DPDK workloads +# This procedure configures MachineConfigPools, PerformanceProfiles, and SRIOV Network Operator + +- name: Discover OCP version + ansible.builtin.include_role: + name: tools_get_deploy_info + tasks_from: discover_ocp_version.yml + +- name: Get the OCP Cluster infrastructure ID + kubernetes.core.k8s_info: + kubeconfig: "{{ kubeconfig }}" + api_version: config.openshift.io/v1 + kind: Infrastructure + name: cluster + register: cluster_infrastructure + +- name: Set infrastructure_id fact + ansible.builtin.set_fact: + infrastructure_id: "{{ cluster_infrastructure.resources[0].status.infrastructureName }}" + +# Step 1: Create MachineConfigPools for SRIOV and DPDK nodes +- name: Create MachineConfigPools for SRIOV and DPDK worker nodes + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + state: present + src: "{{ role_path }}/files/sriov-dpdk-machine-config-pools.yaml" + +- name: Pause to allow MachineConfigPools to synchronize + ansible.builtin.pause: + seconds: 15 + +# Step 2: Apply PerformanceProfiles for SRIOV nodes +- name: Generate SRIOV PerformanceProfile manifest + ansible.builtin.template: + src: performance-profile.yaml.j2 + dest: "{{ ocp_installation_dir }}/sriov-performance-profile.yaml" + mode: u=rw,g=rw,o=r + vars: + _profile_type: sriov + _cpu_isolated: "{{ telco_tuning.sriov.performance_profile.cpu_isolated }}" + _cpu_reserved: "{{ telco_tuning.sriov.performance_profile.cpu_reserved }}" + _hugepages_size: "{{ telco_tuning.sriov.performance_profile.hugepages_size | default('1G') }}" + _hugepages_count: "{{ telco_tuning.sriov.performance_profile.hugepages_count | default(7) }}" + _numa_node: "{{ telco_tuning.sriov.performance_profile.numa_node | default(0) }}" + _numa_topology_policy: "{{ telco_tuning.sriov.performance_profile.numa_topology_policy | default('best-effort') }}" + _realtime_kernel: "{{ telco_tuning.sriov.performance_profile.realtime_kernel | default(false) }}" + _additional_kernel_args: "{{ telco_tuning.sriov.performance_profile.additional_kernel_args | default(['nosmt', 'tsc=reliable']) }}" + +- name: Apply SRIOV PerformanceProfile + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + state: present + src: "{{ ocp_installation_dir }}/sriov-performance-profile.yaml" + +- name: Pause to allow MachineConfigPools to start update + ansible.builtin.pause: + seconds: 15 + +- name: Wait for MCP updates after SRIOV PerformanceProfile + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_mcp_updated.yml + vars: + wait_retries: 60 + wait_delay: 60 + +# Step 3: Apply PerformanceProfiles for DPDK nodes +- name: Generate DPDK PerformanceProfile manifest + ansible.builtin.template: + src: performance-profile.yaml.j2 + dest: "{{ ocp_installation_dir }}/dpdk-performance-profile.yaml" + mode: u=rw,g=rw,o=r + vars: + _profile_type: dpdk + _cpu_isolated: "{{ telco_tuning.dpdk.performance_profile.cpu_isolated }}" + _cpu_reserved: "{{ telco_tuning.dpdk.performance_profile.cpu_reserved }}" + _hugepages_size: "{{ telco_tuning.dpdk.performance_profile.hugepages_size | default('1G') }}" + _hugepages_count: "{{ telco_tuning.dpdk.performance_profile.hugepages_count | default(7) }}" + _numa_node: "{{ telco_tuning.dpdk.performance_profile.numa_node | default(0) }}" + _numa_topology_policy: "{{ telco_tuning.dpdk.performance_profile.numa_topology_policy | default('best-effort') }}" + _realtime_kernel: "{{ telco_tuning.dpdk.performance_profile.realtime_kernel | default(false) }}" + _additional_kernel_args: "{{ telco_tuning.dpdk.performance_profile.additional_kernel_args | default(['nosmt', 'tsc=reliable']) }}" + +- name: Apply DPDK PerformanceProfile + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + state: present + src: "{{ ocp_installation_dir }}/dpdk-performance-profile.yaml" + +- name: Pause to allow MachineConfigPools to start update + ansible.builtin.pause: + seconds: 15 + +- name: Wait for MCP updates after DPDK PerformanceProfile + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_mcp_updated.yml + vars: + wait_retries: 60 + wait_delay: 60 + +# Step 4: Install SRIOV Network Operator +- name: Create namespace and OperatorGroup for SRIOV Network Operator + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + state: present + src: "{{ role_path }}/files/sriov-network-operator-namespace.yaml" + +- name: Get SRIOV Network Operator subscription channel version + ansible.builtin.command: + cmd: > + oc get packagemanifest sriov-network-operator + -n openshift-marketplace + -o jsonpath='{.status.defaultChannel}' + environment: + KUBECONFIG: "{{ kubeconfig }}" + register: sno_channel + changed_when: false + failed_when: false + +- name: Create SRIOV Network Operator Subscription + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + state: present + definition: + apiVersion: operators.coreos.com/v1alpha1 + kind: Subscription + metadata: + name: sriov-network-operator-subscription + namespace: openshift-sriov-network-operator + spec: + channel: "{{ sno_channel.stdout | default('stable') }}" + name: sriov-network-operator + source: redhat-operators + sourceNamespace: openshift-marketplace + +- name: Wait for SRIOV Network Operator deployment + kubernetes.core.k8s_info: + kubeconfig: "{{ kubeconfig }}" + api_version: apps/v1 + kind: Deployment + namespace: openshift-sriov-network-operator + name: sriov-network-operator + register: sno_deployment + until: + - sno_deployment.resources | length > 0 + - sno_deployment.resources[0].status.readyReplicas is defined + - sno_deployment.resources[0].status.readyReplicas == sno_deployment.resources[0].status.replicas + retries: 30 + delay: 30 + +# Step 5: Apply SriovOperatorConfig (OCP 4.15+) +- name: Apply SriovOperatorConfig + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + state: present + src: "{{ role_path }}/files/sriov-operator-config.yaml" + when: discovered_openshift_release is version('4.15', '>=') + +- name: Wait for SRIOV operator pods to be ready + kubernetes.core.k8s_info: + kubeconfig: "{{ kubeconfig }}" + api_version: v1 + kind: Pod + namespace: openshift-sriov-network-operator + field_selectors: + - status.phase=Running + register: sno_pods + until: sno_pods.resources | length >= 1 + retries: 10 + delay: 30 + +- name: Wait for SRIOV operator webhook service to have endpoints + kubernetes.core.k8s_info: + kubeconfig: "{{ kubeconfig }}" + api_version: v1 + kind: Endpoints + namespace: openshift-sriov-network-operator + name: operator-webhook-service + register: sno_webhook_endpoints + until: + - sno_webhook_endpoints.resources | length > 0 + - sno_webhook_endpoints.resources[0].subsets is defined + - sno_webhook_endpoints.resources[0].subsets | length > 0 + retries: 20 + delay: 30 + +# Step 6: Label SRIOV nodes as SR-IOV capable +- name: Get SRIOV worker nodes + kubernetes.core.k8s_info: + kubeconfig: "{{ kubeconfig }}" + api_version: v1 + kind: Node + label_selectors: + - node-role.kubernetes.io/sriov + register: sriov_nodes + +- name: Label SRIOV nodes as SR-IOV capable + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + api_version: v1 + kind: Node + name: "{{ item.metadata.name }}" + state: present + merge_type: merge + definition: + metadata: + labels: + feature.node.kubernetes.io/network-sriov.capable: "true" + loop: "{{ sriov_nodes.resources }}" + loop_control: + label: "{{ item.metadata.name }}" + +# Step 6b: Wait for SRIOV operator to inventory SR-IOV worker nodes +- name: Wait for SriovNetworkNodeState to be created for SRIOV worker nodes + kubernetes.core.k8s_info: + kubeconfig: "{{ kubeconfig }}" + api_version: sriovnetwork.openshift.io/v1 + kind: SriovNetworkNodeState + namespace: openshift-sriov-network-operator + register: sriov_node_states + until: + - sriov_node_states.resources | length > 0 + - sriov_node_states.resources | selectattr('status.syncStatus', 'defined') | selectattr('status.syncStatus', 'equalto', 'Succeeded') | list | length > 0 + retries: 20 + delay: 30 + +# Step 7: Create SriovNetworkNodePolicies for SRIOV networks +- name: Get SRIOV network IDs from OpenStack + openstack.cloud.networks_info: + cloud: "{{ user_cloud }}" + name: "{{ item.name }}" + loop: "{{ telco_tuning.sriov.networks }}" + register: sriov_networks + +- name: Create SriovNetworkNodePolicies for SRIOV networks + ansible.builtin.template: + src: sriov-network-node-policy.yaml.j2 + dest: "{{ ocp_installation_dir }}/sriov-policy-{{ item.item.resource_name }}.yaml" + mode: u=rw,g=rw,o=r + vars: + _policy_name: "{{ item.item.resource_name }}" + _network_id: "{{ item.networks[0].id }}" + _resource_name: "{{ item.item.resource_name }}" + _node_selector_label: "feature.node.kubernetes.io/network-sriov.capable" + _node_selector_value: "true" + _device_type: "{{ item.item.device_type | default('vfio-pci') }}" + _num_vfs: "{{ item.item.num_vfs | default(1) }}" + loop: "{{ sriov_networks.results }}" + loop_control: + label: "{{ item.item.name }}" + +- name: Apply SriovNetworkNodePolicies for SRIOV networks + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + state: present + src: "{{ ocp_installation_dir }}/sriov-policy-{{ item.item.resource_name }}.yaml" + loop: "{{ sriov_networks.results }}" + loop_control: + label: "{{ item.item.name }}" + +# Step 8: Create SriovNetworkNodePolicies for DPDK networks +- name: Get DPDK network IDs from OpenStack + openstack.cloud.networks_info: + cloud: "{{ user_cloud }}" + name: "{{ item.name }}" + loop: "{{ telco_tuning.dpdk.networks }}" + register: dpdk_networks + +- name: Create SriovNetworkNodePolicies for DPDK networks + ansible.builtin.template: + src: sriov-network-node-policy.yaml.j2 + dest: "{{ ocp_installation_dir }}/dpdk-policy-{{ item.item.resource_name }}.yaml" + mode: u=rw,g=rw,o=r + vars: + _policy_name: "{{ item.item.resource_name }}" + _network_id: "{{ item.networks[0].id }}" + _resource_name: "{{ item.item.resource_name }}" + _node_selector_label: "node-role.kubernetes.io/dpdk" + _node_selector_value: "" + _device_type: "{{ item.item.device_type | default('vfio-pci') }}" + _num_vfs: "{{ item.item.num_vfs | default(1) }}" + loop: "{{ dpdk_networks.results }}" + loop_control: + label: "{{ item.item.name }}" + +- name: Apply SriovNetworkNodePolicies for DPDK networks + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + state: present + src: "{{ ocp_installation_dir }}/dpdk-policy-{{ item.item.resource_name }}.yaml" + loop: "{{ dpdk_networks.results }}" + loop_control: + label: "{{ item.item.name }}" + +# Step 9: Wait for SRIOV operator to apply policies +- name: Wait for SRIOV operator pods to be ready after policies + kubernetes.core.k8s_info: + kubeconfig: "{{ kubeconfig }}" + api_version: v1 + kind: Pod + namespace: openshift-sriov-network-operator + field_selectors: + - status.phase=Running + register: sno_pods_final + until: sno_pods_final.resources | length >= 1 + retries: 10 + delay: 30 + +# Step 10: Enforce Pod Security Standards (OCP 4.11+) +- name: Enforce Pod Security Standards on default namespace + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + api_version: v1 + kind: Namespace + name: default + state: present + merge_type: merge + definition: + metadata: + labels: + pod-security.kubernetes.io/audit: privileged + pod-security.kubernetes.io/enforce: privileged + pod-security.kubernetes.io/warn: privileged + when: discovered_openshift_release is version('4.11', '>=') + +# Final cluster health check +- name: Final cluster health check after telco tuning + block: + - name: Wait for MCP to finish updates + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_mcp_updated.yml + vars: + wait_retries: 60 + wait_delay: 60 + + - name: Wait until cluster is healthy + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_until_cluster_is_healthy.yml + + - name: Wait until all nodes are ready + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_until_nodes_ready.yml + + - name: Wait until ClusterOperators are ready + ansible.builtin.include_role: + name: tools_cluster_checks + tasks_from: wait_until_cluster_operators_ready.yml diff --git a/collection/stages/roles/day2ops/tasks/procedures/run-performance-test.yml b/collection/stages/roles/day2ops/tasks/procedures/run-performance-test.yml new file mode 100644 index 00000000..219b5ff8 --- /dev/null +++ b/collection/stages/roles/day2ops/tasks/procedures/run-performance-test.yml @@ -0,0 +1,274 @@ +--- +# Run NFV performance tests using TRex traffic generator against testpmd DUT pods. +# Supports SRIOV and DPDK test types. +# +# Required variables: +# perf_test: dict with sriov/dpdk config (see telco_verification.yaml) + +- name: Clone ansible-nfv repository + ansible.builtin.git: + repo: "{{ perf_test.ansible_nfv_repo }}" + dest: "{{ home_dir }}/ansible-nfv" + version: "{{ perf_test.ansible_nfv_branch | default('main') }}" + force: true + +- name: Install ansible-nfv collection requirements + ansible.builtin.command: + cmd: ansible-galaxy collection install openvswitch.openvswitch -p {{ home_dir }}/.ansible/collections + changed_when: true + +- name: Create performance results directory + ansible.builtin.file: + path: "{{ artifacts_dir }}/performance" + state: directory + mode: u=rwx,g=rw,o=r + +- name: Get TRex server floating IP + ansible.builtin.shell: | + set -o pipefail + openstack floating ip list --port \ + "$(openstack port list --server {{ perf_test.trex_server }} \ + --network {{ perf_test.trex_management_network | default('management_net_management') }} \ + -c ID -f value | head -1)" \ + -c "Floating IP Address" -f value + environment: + OS_CLOUD: "{{ user_cloud }}" + register: _trex_fip + changed_when: false + +- name: Save TRex SSH key path + ansible.builtin.set_fact: + perf_trex_ssh_key: "{{ perf_test.trex_ssh_key }}" + +- name: Generate performance inventory + ansible.builtin.template: + src: perf-inventory.ini.j2 + dest: "{{ home_dir }}/perf-inventory.ini" + mode: u=rw,g=rw,o=r + +- name: Run SRIOV performance test + when: perf_test.sriov.enabled | default(true) | bool + block: + - name: Reboot TRex VM for SRIOV test + ansible.builtin.shell: | + openstack server stop {{ perf_test.trex_server }} && sleep 5 && openstack server start {{ perf_test.trex_server }} + environment: + OS_CLOUD: "{{ user_cloud }}" + changed_when: true + + - name: Wait for TRex VM to become active + ansible.builtin.shell: | + openstack server show {{ perf_test.trex_server }} -c status -f value + environment: + OS_CLOUD: "{{ user_cloud }}" + register: _trex_status + until: _trex_status.stdout == 'ACTIVE' + retries: 30 + delay: 10 + changed_when: false + + - name: Wait for TRex SSH connectivity + ansible.builtin.wait_for: + host: "{{ _trex_fip.stdout }}" + port: 22 + timeout: 300 + + - name: Create SRIOV testpmd DUT pod + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + state: present + definition: "{{ lookup('template', 'testpmd-dut-pod.yaml.j2') }}" + wait: true + wait_sleep: 10 + wait_timeout: 120 + vars: + dut: "{{ perf_test.sriov }}" + + - name: Wait for SRIOV testpmd to start + ansible.builtin.pause: + seconds: 15 + + - name: Collect SRIOV DUT port MACs + ansible.builtin.shell: | + oc --kubeconfig {{ kubeconfig }} logs {{ perf_test.sriov.pod_name }} -n default | grep -E "^Port [0-9]" | grep -v "link state change" + register: _sriov_macs + retries: 10 + delay: 10 + until: _sriov_macs.stdout | length > 0 + changed_when: false + + - name: Save SRIOV DUT MACs + ansible.builtin.copy: + content: "{{ _sriov_macs.stdout }}" + dest: "{{ artifacts_dir }}/performance/sriov_dut_port_macs.conf" + mode: u=rw,g=rw,o=r + + - name: Parse SRIOV DUT MACs into comma-separated format for ansible-nfv + ansible.builtin.set_fact: + _sriov_dut_macs: >- + {{ _sriov_macs.stdout_lines + | map('regex_search', '([0-9A-Fa-f:]{17})') + | select('string') + | list + | join(',') }} + + - name: Display SRIOV DUT MACs + ansible.builtin.debug: + msg: "DUT MACs for TRex: {{ _sriov_dut_macs }}" + + - name: Generate SRIOV performance extra vars + ansible.builtin.template: + src: perf-extra-vars.yaml.j2 + dest: "{{ home_dir }}/perf-sriov-vars.yaml" + mode: u=rw,g=rw,o=r + + - name: Run SRIOV performance scenario + ansible.builtin.shell: | + cd {{ home_dir }}/ansible-nfv && ansible-playbook \ + -i {{ home_dir }}/perf-inventory.ini \ + -e @{{ home_dir }}/perf-sriov-vars.yaml \ + -e manual_run=false \ + -e binary_perf_log={{ artifacts_dir }}/performance/sriov_performance.log \ + -e dut_group=sriov_dut \ + -e dut_type=sriov \ + -e report_junitxml_junitxml_output_path={{ artifacts_dir }}/performance/sriov_perf_results.xml \ + -e testpmd_lcores={{ perf_test.sriov.testpmd_lcores | default('1,2,3') }} \ + -e trex_rate={{ perf_test.sriov.trex_rate }} \ + -e clone_traffic_gen_repo=false \ + -e launch_testpmd=false \ + -e discover_dut_macs=false \ + "-e dut_macs={{ _sriov_dut_macs }}" \ + -e private_key_fetch_location={{ perf_trex_ssh_key | dirname }}/ \ + -e python_interperter=/usr/bin/python3 \ + playbooks/packet_gen/trex/performance_scenario.yml + register: _sriov_perf_result + changed_when: true + + - name: Display SRIOV performance result + ansible.builtin.debug: + msg: "SRIOV performance test completed with rc={{ _sriov_perf_result.rc }}" + +- name: Run DPDK performance test + when: perf_test.dpdk.enabled | default(true) | bool + block: + - name: Delete SRIOV DUT pod before DPDK test + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + state: absent + kind: Pod + namespace: default + name: "{{ perf_test.sriov.pod_name }}" + wait: true + when: perf_test.sriov.enabled | default(true) | bool + + - name: Reboot TRex VM for DPDK test + ansible.builtin.shell: | + openstack server stop {{ perf_test.trex_server }} && sleep 5 && openstack server start {{ perf_test.trex_server }} + environment: + OS_CLOUD: "{{ user_cloud }}" + changed_when: true + + - name: Wait for TRex VM to become active + ansible.builtin.shell: | + openstack server show {{ perf_test.trex_server }} -c status -f value + environment: + OS_CLOUD: "{{ user_cloud }}" + register: _trex_status + until: _trex_status.stdout == 'ACTIVE' + retries: 30 + delay: 10 + changed_when: false + + - name: Wait for TRex SSH connectivity + ansible.builtin.wait_for: + host: "{{ _trex_fip.stdout }}" + port: 22 + timeout: 300 + + - name: Create DPDK testpmd DUT pod + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + state: present + definition: "{{ lookup('template', 'testpmd-dut-pod.yaml.j2') }}" + wait: true + wait_sleep: 10 + wait_timeout: 120 + vars: + dut: "{{ perf_test.dpdk }}" + + - name: Wait for DPDK testpmd to start + ansible.builtin.pause: + seconds: 15 + + - name: Collect DPDK DUT port MACs + ansible.builtin.shell: | + oc --kubeconfig {{ kubeconfig }} logs {{ perf_test.dpdk.pod_name }} -n default | grep -E "^Port [0-9]" | grep -v "link state change" + register: _dpdk_macs + retries: 10 + delay: 10 + until: _dpdk_macs.stdout | length > 0 + changed_when: false + + - name: Save DPDK DUT MACs + ansible.builtin.copy: + content: "{{ _dpdk_macs.stdout }}" + dest: "{{ artifacts_dir }}/performance/dpdk_dut_port_macs.conf" + mode: u=rw,g=rw,o=r + + - name: Parse DPDK DUT MACs into comma-separated format for ansible-nfv + ansible.builtin.set_fact: + _dpdk_dut_macs: >- + {{ _dpdk_macs.stdout_lines + | map('regex_search', '([0-9A-Fa-f:]{17})') + | select('string') + | list + | join(',') }} + + - name: Display DPDK DUT MACs + ansible.builtin.debug: + msg: "DUT MACs for TRex: {{ _dpdk_dut_macs }}" + + - name: Generate DPDK performance extra vars + ansible.builtin.template: + src: perf-extra-vars.yaml.j2 + dest: "{{ home_dir }}/perf-dpdk-vars.yaml" + mode: u=rw,g=rw,o=r + + - name: Run DPDK performance scenario + ansible.builtin.shell: | + cd {{ home_dir }}/ansible-nfv && ansible-playbook \ + -i {{ home_dir }}/perf-inventory.ini \ + -e @{{ home_dir }}/perf-dpdk-vars.yaml \ + -e manual_run=false \ + -e binary_perf_log={{ artifacts_dir }}/performance/dpdk_performance.log \ + -e dut_group=dpdk_dut \ + -e dut_type=dpdk \ + -e report_junitxml_junitxml_output_path={{ artifacts_dir }}/performance/dpdk_perf_results.xml \ + -e testpmd_lcores={{ perf_test.dpdk.testpmd_lcores | default('3,4,5') }} \ + -e trex_rate={{ perf_test.dpdk.trex_rate }} \ + -e clone_traffic_gen_repo=false \ + -e launch_testpmd=false \ + -e discover_dut_macs=false \ + "-e dut_macs={{ _dpdk_dut_macs }}" \ + -e private_key_fetch_location={{ perf_trex_ssh_key | dirname }}/ \ + -e python_interperter=/usr/bin/python3 \ + playbooks/packet_gen/trex/performance_scenario.yml + register: _dpdk_perf_result + changed_when: true + + - name: Display DPDK performance result + ansible.builtin.debug: + msg: "DPDK performance test completed with rc={{ _dpdk_perf_result.rc }}" + +- name: Cleanup DUT pods + kubernetes.core.k8s: + kubeconfig: "{{ kubeconfig }}" + state: absent + kind: Pod + namespace: default + name: "{{ item }}" + wait: true + loop: + - "{{ perf_test.sriov.pod_name }}" + - "{{ perf_test.dpdk.pod_name }}" + ignore_errors: true diff --git a/collection/stages/roles/day2ops/templates/perf-extra-vars.yaml.j2 b/collection/stages/roles/day2ops/templates/perf-extra-vars.yaml.j2 new file mode 100644 index 00000000..53cfd963 --- /dev/null +++ b/collection/stages/roles/day2ops/templates/perf-extra-vars.yaml.j2 @@ -0,0 +1,21 @@ +--- +cloud_resources: external +cloud_name: "{{ user_cloud }}" +query_cloud: "{{ user_cloud }}" +trex_lcores: "{{ perf_test.trex_lcores | default(perf_trex_lcores | default('2-10')) }}" +# master_thread_id=2, latency_thread_id=3, data threads start at 4 +# TRex VM has 11 vCPUs (0-10), so max data thread is 10 -> 7 data threads +trex_process_threads: "{{ perf_test.trex_process_threads | default(7) }}" +binary_search_disable_upward_search: true +binary_search_warmup_trial_runtime: {{ perf_warmup_runtime | default(120) }} +discover_instance_external_ip: true +ssh_key: {{ perf_trex_ssh_key | basename }} +# Use system Python instead of the ansible-nfv venv (not created when manual_run=false) +# Note: ansible-nfv uses 'python_interperter' (typo) as the override variable +python_interperter: /usr/bin/python3 +venv_path: /usr +dut_compute: "" +dynamic_instances: + - name: {{ perf_test.trex_server }} + group: trex + user: {{ perf_trex_user | default('cloud-user') }} diff --git a/collection/stages/roles/day2ops/templates/perf-inventory.ini.j2 b/collection/stages/roles/day2ops/templates/perf-inventory.ini.j2 new file mode 100644 index 00000000..a562436c --- /dev/null +++ b/collection/stages/roles/day2ops/templates/perf-inventory.ini.j2 @@ -0,0 +1,8 @@ +localhost ansible_connection=local ansible_python_interpreter=python3 +undercloud-0 ansible_connection=local ansible_python_interpreter=python3 + +[local] +localhost + +[undercloud] +undercloud-0 diff --git a/collection/stages/roles/day2ops/templates/performance-profile.yaml.j2 b/collection/stages/roles/day2ops/templates/performance-profile.yaml.j2 new file mode 100644 index 00000000..21cd57dd --- /dev/null +++ b/collection/stages/roles/day2ops/templates/performance-profile.yaml.j2 @@ -0,0 +1,28 @@ +# PerformanceProfile for {{ _profile_type }} worker nodes +# Configures CPU pinning, hugepages, and kernel args for telco workloads +apiVersion: performance.openshift.io/v2 +kind: PerformanceProfile +metadata: + name: {{ _profile_type }}-performanceprofile +spec: + cpu: + isolated: "{{ _cpu_isolated }}" + reserved: "{{ _cpu_reserved }}" + hugepages: + defaultHugepagesSize: "{{ _hugepages_size }}" + pages: + - size: "{{ _hugepages_size }}" + count: {{ _hugepages_count }} + node: {{ _numa_node }} + realTimeKernel: + enabled: {{ _realtime_kernel | default(false) | lower }} + numa: + topologyPolicy: "{{ _numa_topology_policy | default('best-effort') }}" + nodeSelector: + node-role.kubernetes.io/{{ _profile_type }}: "" +{% if _additional_kernel_args is defined and _additional_kernel_args | length > 0 %} + additionalKernelArgs: +{% for arg in _additional_kernel_args %} + - {{ arg }} +{% endfor %} +{% endif %} diff --git a/collection/stages/roles/day2ops/templates/sriov-network-node-policy.yaml.j2 b/collection/stages/roles/day2ops/templates/sriov-network-node-policy.yaml.j2 new file mode 100644 index 00000000..d6557c6e --- /dev/null +++ b/collection/stages/roles/day2ops/templates/sriov-network-node-policy.yaml.j2 @@ -0,0 +1,20 @@ +# SriovNetworkNodePolicy for {{ _policy_name }} +# Attaches OpenStack network to SRIOV-capable nodes +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovNetworkNodePolicy +metadata: + name: {{ _policy_name }} + namespace: openshift-sriov-network-operator +spec: + deviceType: {{ _device_type | default('vfio-pci') }} + nicSelector: + netFilter: openstack/NetworkID:{{ _network_id }} + nodeSelector: +{% if _node_selector_label is defined %} + {{ _node_selector_label }}: "{{ _node_selector_value | default('') }}" +{% else %} + feature.node.kubernetes.io/network-sriov.capable: "true" +{% endif %} + numVfs: {{ _num_vfs | default(1) }} + priority: {{ _priority | default(99) }} + resourceName: {{ _resource_name }} diff --git a/collection/stages/roles/day2ops/templates/testpmd-dut-pod.yaml.j2 b/collection/stages/roles/day2ops/templates/testpmd-dut-pod.yaml.j2 new file mode 100644 index 00000000..b645e9be --- /dev/null +++ b/collection/stages/roles/day2ops/templates/testpmd-dut-pod.yaml.j2 @@ -0,0 +1,59 @@ +apiVersion: v1 +kind: Pod +metadata: + name: {{ dut.pod_name }} + namespace: default + annotations: + irq-load-balancing.crio.io: disable + cpu-load-balancing.crio.io: disable + cpu-quota.crio.io: disable +spec: + runtimeClassName: {{ dut.runtime_class }} + containers: + - name: {{ dut.pod_name }} + command: ["/bin/sh"] + args: + - "-c" + - >- + testpmd + -l $(taskset -pc 1 | cut -d: -f2) + --in-memory + --socket-mem {{ dut.socket_mem | default('1024') }} + -n 4 + -- + --nb-cores={{ dut.testpmd_nb_cores }} + --auto-start + --stats-period 10 +{% if dut.testpmd_rxd is defined %} + --rxd={{ dut.testpmd_rxd }} + --txd={{ dut.testpmd_txd }} +{% endif %} + image: {{ dut.image | default('registry.redhat.io/openshift4/dpdk-base-rhel8:v4.10.0-5') }} + securityContext: + privileged: true + runAsUser: 0 + resources: + requests: +{% for res in dut.resources %} + openshift.io/{{ res.name }}: "{{ res.count }}" +{% endfor %} + memory: {{ dut.memory }} + hugepages-{{ dut.hugepages_size | default('1Gi') }}: {{ dut.hugepages }} + cpu: '{{ dut.cpu }}' + limits: +{% for res in dut.resources %} + openshift.io/{{ res.name }}: "{{ res.count }}" +{% endfor %} + memory: {{ dut.memory }} + hugepages-{{ dut.hugepages_size | default('1Gi') }}: {{ dut.hugepages }} + cpu: '{{ dut.cpu }}' + volumeMounts: + - mountPath: /dev/hugepages + name: hugepage + readOnly: false + nodeSelector: + {{ dut.node_selector }}: "" + volumes: + - name: hugepage + emptyDir: + medium: HugePages From 688a84ef33792cda6457200ed9a46c14005fb2a3 Mon Sep 17 00:00:00 2001 From: eshulman2 Date: Mon, 22 Jun 2026 15:09:30 +0300 Subject: [PATCH 7/7] Add telco verification job definition and NFV setup playbook Add a job definition for the telco (SR-IOV/DPDK) verification pipeline and the nfv_setup playbook that orchestrates the day2ops procedures for machineset creation, performance tuning, and test execution. Change-Id: I01690ab8206ad8ef616f7a0c19c984045f5c548d --- jobs_definitions/telco_verification.yaml | 169 +++++++++++++++++++++++ playbooks/ocp_testing.yaml | 4 + playbooks/plays/nfv_setup.yaml | 31 +++++ 3 files changed, 204 insertions(+) create mode 100644 jobs_definitions/telco_verification.yaml create mode 100644 playbooks/plays/nfv_setup.yaml diff --git a/jobs_definitions/telco_verification.yaml b/jobs_definitions/telco_verification.yaml new file mode 100644 index 00000000..7ee1c9c8 --- /dev/null +++ b/jobs_definitions/telco_verification.yaml @@ -0,0 +1,169 @@ +--- +# +# * TELCO VERIFICATION * +# +# - This job will be run in integration pipeline and will be monitored by CI team. +# - It includes the stages that will exercise the OSP integration in order to +# validate the OSP candidate release with Telco (SRIOV/DPDK) workloads. +# - It will use latest stable version of OCP. +# - It is prepared to run with a full virtualized setup, so the flavors are reduced +# from the official OCP documentation to fit on the virtualized resources. +# - It is prepared to run on top of RHOSP18 OSASINFRA Validated Architecture. +# +# Environment-specific values (networks, flavors, CIDRs) must be provided via +# telco_env in the ci-framework-jobs shiftstack_test.yaml for each environment. + +# openshift_release: 4-stable # It can be "4.10", "4.11", etc. or "4-stable" +openshift_release: 4-stable # It can be "4.10", "4.11", etc. or "4-stable" +openshift_build_name: "" # It can be an empty value "" (latest nightly), a given build +installation_type: ipi +stages: + - prepare + - install + - post + - nfv_setup + +# NFV setup procedures to run before verification +nfv_setup_steps: + - create-telco-machinesets + - configure-telco-tuning + - run-performance-test + +openstack_infra: + external_network: "{{ telco_env.external_network }}" + region: regionOne + network_config: + ipv4: + dns_servers: "{{ telco_env.dns_servers }}" + +custom_network: "{{ telco_env.management_network }}" +custom_subnet: "{{ telco_env.management_network }}" + +project: + name: "{{ user_cloud }}" + user: user + password: redhat + role: member + swift_role: swiftoperator + domain_name: Default + region: 'regionOne' + os_quota: + ram: 262144 + cores: 200 + +ocp_deployment_topology: + network_type: OVNKubernetes + machines_subnet: "{{ telco_env.management_network }}" + primary_ip_protocol: ipv4 + secondary_ip_protocol: "" + defaultMachinePlatform: + type: "{{ telco_env.default_machine_platform_flavor }}" + platform: + openstack: + clusterOSImageProperties: + hw_cpu_policy: dedicated + ipv4: + ip_version: 4 + tenant_subnet: + cidr: "{{ telco_env.subnet_cidr }}" + cluster_network: + cidr: 10.128.0.0/14 + host_prefix: 23 + service_network: + - 172.30.0.0/16 + flavors: + master: + name: "{{ telco_env.master_flavor }}" + worker: + name: "worker" + replicas: + master: 3 + worker: 0 + servergroups: + master: "soft-anti-affinity" + worker: "soft-anti-affinity" + +bootstrap_flavor_override: "{{ telco_env.bootstrap_flavor }}" + +# Telco MachineSet configuration for SRIOV/DPDK workers +telco_machinesets: + delete_default_workers: true + disable_masters_schedulable: true + machinesets: + - name: sriov + role: worker + type: sriov + replicas: 1 + flavor: "{{ telco_env.sriov_worker_flavor }}" + networks: "{{ telco_env.sriov_networks }}" + - name: dpdk + role: worker + type: dpdk + replicas: 1 + flavor: "{{ telco_env.dpdk_worker_flavor }}" + networks: "{{ telco_env.dpdk_networks }}" + +# Telco tuning configuration for SRIOV/DPDK workloads +telco_tuning: + sriov: + performance_profile: + cpu_isolated: "{{ telco_env.sriov_cpu_isolated | default('4,5,6,7,8,9,10,11,12,13') }}" + cpu_reserved: "{{ telco_env.sriov_cpu_reserved | default('0,1,2,3') }}" + hugepages_size: "1G" + hugepages_count: "{{ telco_env.sriov_hugepages_count | default(7) }}" + numa_node: "{{ telco_env.sriov_numa_node | default(0) }}" + numa_topology_policy: "best-effort" + realtime_kernel: false + additional_kernel_args: + - nosmt + - tsc=reliable + networks: "{{ telco_env.sriov_tuning_networks }}" + dpdk: + performance_profile: + cpu_isolated: "{{ telco_env.dpdk_cpu_isolated | default('4,5,6,7,8,9,10,11,12,13') }}" + cpu_reserved: "{{ telco_env.dpdk_cpu_reserved | default('0,1,2,3') }}" + hugepages_size: "1G" + hugepages_count: "{{ telco_env.dpdk_hugepages_count | default(7) }}" + numa_node: "{{ telco_env.dpdk_numa_node | default(1) }}" + numa_topology_policy: "best-effort" + realtime_kernel: false + additional_kernel_args: + - nosmt + - tsc=reliable + networks: "{{ telco_env.dpdk_tuning_networks }}" + +# Performance test configuration +perf_test: + ansible_nfv_repo: "https://github.com/NFV18/ansible-nfv.git" + ansible_nfv_branch: "main" + trex_server: "{{ telco_env.trex_server | default('trex') }}" + trex_management_network: "{{ telco_env.management_network }}" + trex_ssh_key: "{{ telco_env.trex_ssh_key }}" + trex_lcores: "{{ telco_env.trex_lcores | default('2-10') }}" + trex_process_threads: "{{ telco_env.trex_process_threads | default(7) }}" + sriov: + enabled: true + pod_name: "sriov-testpmd" + node_selector: "node-role.kubernetes.io/sriov" + runtime_class: "performance-sriov-performanceprofile" + cpu: 3 + hugepages: "7Gi" + memory: "1000Mi" + testpmd_nb_cores: 2 + testpmd_rxd: 1024 + testpmd_txd: 1024 + testpmd_lcores: "1,2,3" + resources: "{{ telco_env.sriov_perf_resources }}" + trex_rate: 15 # mpps + dpdk: + enabled: true + pod_name: "dpdk-testpmd" + node_selector: "node-role.kubernetes.io/dpdk" + runtime_class: "performance-dpdk-performanceprofile" + cpu: 3 + hugepages: "7Gi" + memory: "1000Mi" + testpmd_nb_cores: 1 + testpmd_lcores: "3,4,5" + resources: "{{ telco_env.dpdk_perf_resources }}" + trex_rate: 2 # mpps diff --git a/playbooks/ocp_testing.yaml b/playbooks/ocp_testing.yaml index ac386333..cf2dcbef 100644 --- a/playbooks/ocp_testing.yaml +++ b/playbooks/ocp_testing.yaml @@ -76,6 +76,10 @@ ansible.builtin.import_playbook: plays/post_install.yaml when: "'post' in stages" +- name: NFV setup stage + ansible.builtin.import_playbook: plays/nfv_setup.yaml + when: "'nfv_setup' in stages" + - name: Run OpenShift Verification stage ansible.builtin.import_playbook: plays/verification.yaml when: "'verification' in stages" diff --git a/playbooks/plays/nfv_setup.yaml b/playbooks/plays/nfv_setup.yaml new file mode 100644 index 00000000..9ec1bae1 --- /dev/null +++ b/playbooks/plays/nfv_setup.yaml @@ -0,0 +1,31 @@ +--- +- name: NFV setup stage + hosts: installer + gather_facts: yes + vars_files: + - "../../configs/global.yml" + tasks: + - name: Main block + block: + - name: Run NFV setup procedures on OCP + ansible.builtin.include_role: + name: shiftstack.stages.day2ops + vars: + day2ops_steps: "{{ nfv_setup_steps }}" + day2ops_run_procedure_task: run_procedure_no_verify.yml + rescue: + - name: Set NFV setup failure info + ansible.builtin.set_fact: + nfv_setup_failed: true + nfv_setup_failed_task: "{{ ansible_failed_task.name | default('UNKNOWN') }}" + + - name: Fail the playbook + ansible.builtin.fail: + msg: > + NFV setup failed at task '{{ nfv_setup_failed_task }}'. + See ansible logs for more information. + always: + - name: Synchronize artifacts from the Ansible Managed Node to Ansible Controller + ansible.builtin.include_role: + name: shiftstack.tools.tools_ansible_inventory + tasks_from: sync_artifacts.yml