From 8a6d41cbc7d1a7f59d15f73e8a47204aad302c67 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Tue, 2 Jun 2020 18:39:02 +0530 Subject: [PATCH 01/34] initialised touchstone_workload --- workloads/nodevertical.yml | 5 +++++ workloads/roles/prometheus_metric_aggregation/tasks/main.yml | 0 workloads/templates/workload-env.yml.j2 | 1 + 3 files changed, 6 insertions(+) create mode 100644 workloads/roles/prometheus_metric_aggregation/tasks/main.yml diff --git a/workloads/nodevertical.yml b/workloads/nodevertical.yml index e2184c35..6ae696a6 100644 --- a/workloads/nodevertical.yml +++ b/workloads/nodevertical.yml @@ -85,6 +85,11 @@ name: pprof-collection when: pprof_collect and pprof_collect != "" + - name: Get Prometheus data aggregations + include_role: + name: prometheus_metirc_aggregations + when: prom_metric_aggregates and prom_metric_aggregates != "" + - name: Set NodeVertical template set_fact: nodevertical_template: "{% if nodevertical_heavy|bool %}workload-nodevertical-heavy-script-cm.yml.j2{% else %}workload-nodevertical-script-cm.yml.j2{% endif %}" diff --git a/workloads/roles/prometheus_metric_aggregation/tasks/main.yml b/workloads/roles/prometheus_metric_aggregation/tasks/main.yml new file mode 100644 index 00000000..e69de29b diff --git a/workloads/templates/workload-env.yml.j2 b/workloads/templates/workload-env.yml.j2 index 994f16bc..eef9e242 100644 --- a/workloads/templates/workload-env.yml.j2 +++ b/workloads/templates/workload-env.yml.j2 @@ -5,6 +5,7 @@ metadata: data: ENABLE_PBENCH_AGENTS: "{{enable_pbench_agents|bool|lower}}" PPROF_COLLECT: "{{ ((pprof_collect == None) | ternary(false, pprof_collect)) if pprof_collect is defined else false}}" + PROM_AGGREGATE_COLLECT: "{{ ((prom_aggregate_collect == None) | ternary(false, prom_aggregate_collect)) if prom_aggregate_collect is defined else false}}" {% if workload_job == "http" %} {% for v in http_env_vars %} {{ v }}: "{{ lookup('env', v) }}" From a7d5b1c409a8cb8f6731d906029ae378334b46ef Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Tue, 2 Jun 2020 19:58:34 +0530 Subject: [PATCH 02/34] added variable --- workloads/nodevertical.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workloads/nodevertical.yml b/workloads/nodevertical.yml index 6ae696a6..31d192d7 100644 --- a/workloads/nodevertical.yml +++ b/workloads/nodevertical.yml @@ -88,7 +88,7 @@ - name: Get Prometheus data aggregations include_role: name: prometheus_metirc_aggregations - when: prom_metric_aggregates and prom_metric_aggregates != "" + when: prom_aggregate_collect and prom_aggregate_collect != "" - name: Set NodeVertical template set_fact: From 3b91209467800e087dfdff1ae78471a5832d5a1a Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Thu, 11 Jun 2020 19:49:22 +0530 Subject: [PATCH 03/34] added touchstone --- docs/nodevertical.md | 5 +++ .../workload-nodevertical-script-cm.yml.j2 | 34 ++++++++++++++++++- workloads/vars/nodevertical.yml | 4 +++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/docs/nodevertical.md b/docs/nodevertical.md index ea2ae65b..68c7ff6c 100644 --- a/docs/nodevertical.md +++ b/docs/nodevertical.md @@ -175,6 +175,11 @@ Default: `false` If you'd like to enable pprof profile data collection of kubeapiserver and prometheus through conprof(https://github.com/conprof/conprof). Enabling this will create a few services to collect profiles from the apiserver pods and then create a conprof tarball in the pbench tarball +### PROM_AGGREGATE_COLLECT +Default: `false` +If you'd like to enable collection of prometheus data aggregation of kubeapiserver through touchstone(https://github.com/cloud-bulldozer/touchstone). +Enabling this will create a few services to produce prometheus data aggregations from the apiserver pods. + ### NODEVERTICAL_HEAVY_PROBE_PERIOD Default: `30` Readiness probe period for the application deployed by the heavy nodevertical. diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index 2039e0a2..db9f2ad0 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -51,6 +51,12 @@ data: envsubst < /root/workload/conprof_stop.sh > /tmp/conprof_stop.sh workload_log "Done configuring conprof" fi + if [ "${PROM_AGGREGATE_COLLECT}" = "true" ]; then + workload_log "Configuring touchstone" + envsubst < /root/workload/prom_config.yaml.template > /tmp/prom_config.yaml + envsubst < /root/workload/prom_aggregation_start.sh > /tmp/prom_aggregation_start.sh + workload_log "Done configuring touchstone" + fi workload_log "Running NodeVertical workload" if [ "${PBENCH_INSTRUMENTATION}" = "true" ]; then pbench-user-benchmark -- sh /root/workload/workload.sh @@ -90,15 +96,22 @@ data: #!/bin/sh set -o pipefail pkill conprof + prom_aggregation_start.sh: | + #!/bin/sh + set -o pipefail + touchstone_compare -database prometheus -v -prom_config /tmp/prom_config.yaml workload.sh: | #!/bin/sh set -o pipefail - result_dir=/tmp if [ "${PPROF_COLLECT}" = "true" ]; then workload_log "Starting conprof" bash /tmp/conprof_start.sh fi + if [ "${PROM_AGGREGATE_COLLECT}" = "true" ]; then + workload_log "Starting touchstone" + bash prom_aggregation_start.sh + fi if [ "${PBENCH_INSTRUMENTATION}" = "true" ]; then result_dir=${benchmark_results_dir} fi @@ -173,6 +186,25 @@ data: privileged: false nodeSelector: nodevertical: 'true' + + + prom_config.yaml.template: | + query: + - sum(container_memory_rss{namespace=~"openshift-kube-apiserver",name!="",container=~"kube-apiserver.*"}) by (container) + - up + headers: + - {{bearer_token.stdout}} + disable_ssl: + - True + start_time_list: + - 1588791810 + - 1588791710 + end_time_list: + - 1588795118 + - 1588795018 + url: + - 'prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}}' + conprof.yaml.template: | scrape_configs: - job_name: 'apiserver0' diff --git a/workloads/vars/nodevertical.yml b/workloads/vars/nodevertical.yml index d826d7c2..8054629b 100644 --- a/workloads/vars/nodevertical.yml +++ b/workloads/vars/nodevertical.yml @@ -35,6 +35,10 @@ pbench_server: "{{ lookup('env', 'PBENCH_SERVER')|default('', true) }}" # pporf variables pprof_collect: "{{ lookup('env', 'PPROF_COLLECT')|default(false, true)|bool|lower }}" + +# touchstone variables +prom_aggregate_collect: "{{ lookup('env', 'PPROF_COLLECT')|default(false, true)|bool|lower }}" + # Azure auth vars to set for ocp on azure azure_auth: "{{ lookup('env', 'AZURE_AUTH')|default(false, true)|bool|lower }}" azure_auth_file: "{{ lookup('env', 'AZURE_AUTH_FILE')|default('', true) }}" From 699c2550cef5b440f020601330487a6817e74e26 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Mon, 15 Jun 2020 18:00:31 +0530 Subject: [PATCH 04/34] added role --- .../tasks/main.yml | 16 +++++++++ .../workload-nodevertical-script-cm.yml.j2 | 33 +++++++------------ 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/workloads/roles/prometheus_metric_aggregation/tasks/main.yml b/workloads/roles/prometheus_metric_aggregation/tasks/main.yml index e69de29b..a3e606dd 100644 --- a/workloads/roles/prometheus_metric_aggregation/tasks/main.yml +++ b/workloads/roles/prometheus_metric_aggregation/tasks/main.yml @@ -0,0 +1,16 @@ +- name: check sa for prom_server + shell: "oc get sa -n openshift-kube-apiserver | grep prom_server | wc -l" + register: apiserver_pprof_sa + +- name: create sa to access pprof profiles of apiserver + block: + - name: create sa + shell: "oc -n openshift-kube-apiserver create sa prom_server" + + - name: add cluster-admin clusterrrole + shell: "oc create clusterrolebinding pprof-admin --clusterrole cluster-admin --serviceaccount=openshift-kube-apiserver:prom_server" + when: apiserver_pprof_sa.stdout | int == 0 + +- name: get the bearer token + shell: "oc -n openshift-kube-apiserver sa get-token prom_server" + register: prom_bearer_token diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index db9f2ad0..26662ca5 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -51,12 +51,6 @@ data: envsubst < /root/workload/conprof_stop.sh > /tmp/conprof_stop.sh workload_log "Done configuring conprof" fi - if [ "${PROM_AGGREGATE_COLLECT}" = "true" ]; then - workload_log "Configuring touchstone" - envsubst < /root/workload/prom_config.yaml.template > /tmp/prom_config.yaml - envsubst < /root/workload/prom_aggregation_start.sh > /tmp/prom_aggregation_start.sh - workload_log "Done configuring touchstone" - fi workload_log "Running NodeVertical workload" if [ "${PBENCH_INSTRUMENTATION}" = "true" ]; then pbench-user-benchmark -- sh /root/workload/workload.sh @@ -108,10 +102,6 @@ data: workload_log "Starting conprof" bash /tmp/conprof_start.sh fi - if [ "${PROM_AGGREGATE_COLLECT}" = "true" ]; then - workload_log "Starting touchstone" - bash prom_aggregation_start.sh - fi if [ "${PBENCH_INSTRUMENTATION}" = "true" ]; then result_dir=${benchmark_results_dir} fi @@ -128,7 +118,13 @@ data: exit_code=$? end_time=$(date +%s) duration=$((end_time-start_time)) - + if [ "${PROM_AGGREGATE_COLLECT}" = "true" ]; then + workload_log "Configuring touchstone" + envsubst < /root/workload/prom_config.yaml.template > /tmp/prom_config.yaml + envsubst < /root/workload/prom_aggregation_start.sh > /tmp/prom_aggregation_start.sh + workload_log "Starting touchstone" + bash prom_aggregation_start.sh + fi if [ "${PPROF_COLLECT}" = "true" ]; then workload_log "Stopping conprof" bash /tmp/conprof_stop.sh @@ -186,25 +182,20 @@ data: privileged: false nodeSelector: nodevertical: 'true' - - prom_config.yaml.template: | + --- query: - sum(container_memory_rss{namespace=~"openshift-kube-apiserver",name!="",container=~"kube-apiserver.*"}) by (container) - - up headers: - - {{bearer_token.stdout}} + - {{prom_bearer_token.stdout}} disable_ssl: - True start_time_list: - - 1588791810 - - 1588791710 + - $start_time end_time_list: - - 1588795118 - - 1588795018 + - $end_time url: - - 'prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}}' - + - prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}} conprof.yaml.template: | scrape_configs: - job_name: 'apiserver0' From 29034a13051d5cab0c9d78b760cf8b2f681a1b45 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Mon, 15 Jun 2020 20:56:56 +0530 Subject: [PATCH 05/34] store output and create tarball --- workloads/templates/workload-nodevertical-script-cm.yml.j2 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index 26662ca5..c806c58d 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -93,7 +93,7 @@ data: prom_aggregation_start.sh: | #!/bin/sh set -o pipefail - touchstone_compare -database prometheus -v -prom_config /tmp/prom_config.yaml + touchstone_compare -database prometheus -v -prom_config /tmp/prom_config.yaml > /tmp/prom_aggregations/aggregations.json workload.sh: | #!/bin/sh set -o pipefail @@ -124,6 +124,8 @@ data: envsubst < /root/workload/prom_aggregation_start.sh > /tmp/prom_aggregation_start.sh workload_log "Starting touchstone" bash prom_aggregation_start.sh + tar -czvf ${result_dir}/prom_aggregation.tar.gz /tmp/prom_aggregations/ + workload_log "Completed prometheus data aggregations and stored tarballs" fi if [ "${PPROF_COLLECT}" = "true" ]; then workload_log "Stopping conprof" From 41fba1675bcaa0807bb64f31a2b3218f64e6c8a4 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Tue, 16 Jun 2020 19:32:48 +0530 Subject: [PATCH 06/34] minor changes --- workloads/nodevertical.yml | 2 +- workloads/templates/workload-nodevertical-script-cm.yml.j2 | 7 +++---- workloads/vars/nodevertical.yml | 3 +-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/workloads/nodevertical.yml b/workloads/nodevertical.yml index 31d192d7..c413b262 100644 --- a/workloads/nodevertical.yml +++ b/workloads/nodevertical.yml @@ -85,7 +85,7 @@ name: pprof-collection when: pprof_collect and pprof_collect != "" - - name: Get Prometheus data aggregations + - name: Get Prometheus authorizations include_role: name: prometheus_metirc_aggregations when: prom_aggregate_collect and prom_aggregate_collect != "" diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index c806c58d..f2834b8d 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -93,7 +93,7 @@ data: prom_aggregation_start.sh: | #!/bin/sh set -o pipefail - touchstone_compare -database prometheus -v -prom_config /tmp/prom_config.yaml > /tmp/prom_aggregations/aggregations.json + touchstone_compare -database prometheus -v -prom_config /tmp/prom_config.yaml > ${result_dir}/aggregations.json workload.sh: | #!/bin/sh set -o pipefail @@ -123,9 +123,8 @@ data: envsubst < /root/workload/prom_config.yaml.template > /tmp/prom_config.yaml envsubst < /root/workload/prom_aggregation_start.sh > /tmp/prom_aggregation_start.sh workload_log "Starting touchstone" - bash prom_aggregation_start.sh - tar -czvf ${result_dir}/prom_aggregation.tar.gz /tmp/prom_aggregations/ - workload_log "Completed prometheus data aggregations and stored tarballs" + bash /tmp/prom_aggregation_start.sh + workload_log "Completed prometheus data aggregations" fi if [ "${PPROF_COLLECT}" = "true" ]; then workload_log "Stopping conprof" diff --git a/workloads/vars/nodevertical.yml b/workloads/vars/nodevertical.yml index 8054629b..07714cd6 100644 --- a/workloads/vars/nodevertical.yml +++ b/workloads/vars/nodevertical.yml @@ -35,9 +35,8 @@ pbench_server: "{{ lookup('env', 'PBENCH_SERVER')|default('', true) }}" # pporf variables pprof_collect: "{{ lookup('env', 'PPROF_COLLECT')|default(false, true)|bool|lower }}" - # touchstone variables -prom_aggregate_collect: "{{ lookup('env', 'PPROF_COLLECT')|default(false, true)|bool|lower }}" +prom_aggregate_collect: "{{ lookup('env', 'PROM_AGGREGATE_COLLECT')|default(false, true)|bool|lower }}" # Azure auth vars to set for ocp on azure azure_auth: "{{ lookup('env', 'AZURE_AUTH')|default(false, true)|bool|lower }}" From 1d47a5dc0795501be783cfb69b376b3946554f98 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Thu, 18 Jun 2020 17:38:17 +0530 Subject: [PATCH 07/34] fixed typo --- workloads/nodevertical.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workloads/nodevertical.yml b/workloads/nodevertical.yml index c413b262..093b96bb 100644 --- a/workloads/nodevertical.yml +++ b/workloads/nodevertical.yml @@ -87,7 +87,7 @@ - name: Get Prometheus authorizations include_role: - name: prometheus_metirc_aggregations + name: prometheus_metric_aggregation when: prom_aggregate_collect and prom_aggregate_collect != "" - name: Set NodeVertical template From 524d04befa5f072401a92b6704027c4beeafafd6 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Thu, 18 Jun 2020 19:34:30 +0530 Subject: [PATCH 08/34] fixed server name --- .../roles/prometheus_metric_aggregation/tasks/main.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/workloads/roles/prometheus_metric_aggregation/tasks/main.yml b/workloads/roles/prometheus_metric_aggregation/tasks/main.yml index a3e606dd..40bea9d2 100644 --- a/workloads/roles/prometheus_metric_aggregation/tasks/main.yml +++ b/workloads/roles/prometheus_metric_aggregation/tasks/main.yml @@ -1,16 +1,16 @@ -- name: check sa for prom_server - shell: "oc get sa -n openshift-kube-apiserver | grep prom_server | wc -l" +- name: check sa for prom-server + shell: "oc get sa -n openshift-kube-apiserver | grep prom-server | wc -l" register: apiserver_pprof_sa - name: create sa to access pprof profiles of apiserver block: - name: create sa - shell: "oc -n openshift-kube-apiserver create sa prom_server" + shell: "oc -n openshift-kube-apiserver create sa prom-server" - name: add cluster-admin clusterrrole - shell: "oc create clusterrolebinding pprof-admin --clusterrole cluster-admin --serviceaccount=openshift-kube-apiserver:prom_server" + shell: "oc create clusterrolebinding prom-admin --clusterrole cluster-admin --serviceaccount=openshift-kube-apiserver:prom-server" when: apiserver_pprof_sa.stdout | int == 0 - name: get the bearer token - shell: "oc -n openshift-kube-apiserver sa get-token prom_server" + shell: "oc -n openshift-kube-apiserver sa get-token prom-server" register: prom_bearer_token From 532f02e02ceaec63f16ac7d01d418d889f419894 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Tue, 23 Jun 2020 19:40:28 +0530 Subject: [PATCH 09/34] fixed variables name --- .../roles/prometheus_metric_aggregation/tasks/main.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workloads/roles/prometheus_metric_aggregation/tasks/main.yml b/workloads/roles/prometheus_metric_aggregation/tasks/main.yml index 40bea9d2..26c9831f 100644 --- a/workloads/roles/prometheus_metric_aggregation/tasks/main.yml +++ b/workloads/roles/prometheus_metric_aggregation/tasks/main.yml @@ -1,15 +1,15 @@ - name: check sa for prom-server shell: "oc get sa -n openshift-kube-apiserver | grep prom-server | wc -l" - register: apiserver_pprof_sa + register: prom_server_sa -- name: create sa to access pprof profiles of apiserver +- name: create sa to access prom_server profiles block: - name: create sa shell: "oc -n openshift-kube-apiserver create sa prom-server" - name: add cluster-admin clusterrrole shell: "oc create clusterrolebinding prom-admin --clusterrole cluster-admin --serviceaccount=openshift-kube-apiserver:prom-server" - when: apiserver_pprof_sa.stdout | int == 0 + when: prom_server_sa.stdout | int == 0 - name: get the bearer token shell: "oc -n openshift-kube-apiserver sa get-token prom-server" From c6c7bfd53f16cc36f43541f839f65cf5580fcd2f Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Tue, 23 Jun 2020 20:08:24 +0530 Subject: [PATCH 10/34] minor fixes --- workloads/templates/workload-nodevertical-script-cm.yml.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index f2834b8d..bd8d924f 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -93,7 +93,7 @@ data: prom_aggregation_start.sh: | #!/bin/sh set -o pipefail - touchstone_compare -database prometheus -v -prom_config /tmp/prom_config.yaml > ${result_dir}/aggregations.json + touchstone_compare -database prometheus -v -prom_config /tmp/prom_config.yaml workload.sh: | #!/bin/sh set -o pipefail @@ -196,7 +196,7 @@ data: end_time_list: - $end_time url: - - prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}} + - https://prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}} conprof.yaml.template: | scrape_configs: - job_name: 'apiserver0' From dd3bfeae72415cd9300f78578ce42b50a756731a Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Tue, 23 Jun 2020 21:34:41 +0530 Subject: [PATCH 11/34] added queries and fixed start,end time --- workloads/templates/workload-nodevertical-script-cm.yml.j2 | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index bd8d924f..2ccbd2ec 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -105,7 +105,7 @@ data: if [ "${PBENCH_INSTRUMENTATION}" = "true" ]; then result_dir=${benchmark_results_dir} fi - start_time=$(date +%s) + export start_time=$(date +%s) if [[ "${AZURE_AUTH}" == "true" ]]; then export AZURE_AUTH_LOCATION=/tmp/azure_auth fi @@ -116,7 +116,7 @@ data: export es_index={{ snafu_es_index_prefix }} VIPERCONFIG=/tmp/nodevertical.yaml python3 /tmp/snafu/run_snafu.py -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests | tee "${result_dir}/clusterloader.txt" exit_code=$? - end_time=$(date +%s) + export end_time=$(date +%s) duration=$((end_time-start_time)) if [ "${PROM_AGGREGATE_COLLECT}" = "true" ]; then workload_log "Configuring touchstone" @@ -187,6 +187,9 @@ data: --- query: - sum(container_memory_rss{namespace=~"openshift-kube-apiserver",name!="",container=~"kube-apiserver.*"}) by (container) + - sum(container_memory_rss{namespace!="",name!="",namespace=~"openshift-etcd",container=~"etcd"}) by (container) + - sum(rate(container_cpu_usage_seconds_total{name!="",clustername=~"$clustername",namespace!="",namespace=~"openshift-kube-apiserver",container=~"kube-apiserver"}[5m])) by (container) + - sum(rate(container_cpu_usage_seconds_total{name!="",clustername=~"$clustername",namespace!="",namespace=~"openshift-etcd",container=~"etcd"}[5m])) by (container) headers: - {{prom_bearer_token.stdout}} disable_ssl: From b1a8792248a6d310a93ec79cf6b6ea0e6d157eab Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Tue, 23 Jun 2020 22:02:29 +0530 Subject: [PATCH 12/34] removed invalid query --- workloads/templates/workload-nodevertical-script-cm.yml.j2 | 3 --- 1 file changed, 3 deletions(-) diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index 2ccbd2ec..f6820575 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -187,9 +187,6 @@ data: --- query: - sum(container_memory_rss{namespace=~"openshift-kube-apiserver",name!="",container=~"kube-apiserver.*"}) by (container) - - sum(container_memory_rss{namespace!="",name!="",namespace=~"openshift-etcd",container=~"etcd"}) by (container) - - sum(rate(container_cpu_usage_seconds_total{name!="",clustername=~"$clustername",namespace!="",namespace=~"openshift-kube-apiserver",container=~"kube-apiserver"}[5m])) by (container) - - sum(rate(container_cpu_usage_seconds_total{name!="",clustername=~"$clustername",namespace!="",namespace=~"openshift-etcd",container=~"etcd"}[5m])) by (container) headers: - {{prom_bearer_token.stdout}} disable_ssl: From 6ac11d16e9f690ab74ef302f683ca485d568ce5f Mon Sep 17 00:00:00 2001 From: Naga Ravi Chaitanya Elluri Date: Mon, 29 Jun 2020 21:31:58 -0400 Subject: [PATCH 13/34] Add support to mastervertical to collect conprof data This PR is highly inspired from https://github.com/openshift-scale/workloads/pull/131. It enables the users to collect pprof data through conprof. --- docs/mastervertical.md | 5 + workloads/mastervertical.yml | 18 +-- .../workload-mastervertical-script-cm.yml.j2 | 142 ++++++++++++++++++ .../workload-nodevertical-script-cm.yml.j2 | 8 - workloads/vars/mastervertical.yml | 3 + 5 files changed, 157 insertions(+), 19 deletions(-) diff --git a/docs/mastervertical.md b/docs/mastervertical.md index 2c3c855c..cba2733a 100644 --- a/docs/mastervertical.md +++ b/docs/mastervertical.md @@ -127,6 +127,11 @@ Basename used by cluster loader for the project(s) it creates. Default: `1000` Maximum number of projects that will be created by the mastervertical workload. Typically much higher values are used than the default for large scale tests. +### PPROF_COLLECT +Default: `false` +If you'd like to enable pprof profile data collection of kubeapiserver and prometheus through conprof(https://github.com/conprof/conprof). +Enabling this will create a few services to collect profiles from the apiserver pods and then create a conprof tarball in the pbench tarball + ### EXPECTED_MASTERVERTICAL_DURATION Default: `600` Pass/fail criteria. Value to determine if MasterVertical workload executed in duration expected. diff --git a/workloads/mastervertical.yml b/workloads/mastervertical.yml index d124031f..6ce07225 100644 --- a/workloads/mastervertical.yml +++ b/workloads/mastervertical.yml @@ -40,18 +40,14 @@ src: "{{pbench_ssh_public_key_file}}" register: pbench_ssh_public_key_file_slurp - - name: Block to set clustername - block: - - name: Get cluster name - shell: | - {%raw%}oc get machineset -n openshift-machine-api -o=go-template='{{index (index .items 0).metadata.labels "machine.openshift.io/cluster-api-cluster"}}'{%endraw%} - register: cluster_name + - name: Set cluster details + include_role: + name: cluster_details - - name: Create tooling service account - set_fact: - snafu_cluster_name: cluster_name.stdout - when: cluster_name is succeeded - when: snafu_cluster_name == "" + - name: Collect pprof + include_role: + name: pprof-collection + when: pprof_collect and pprof_collect != "" - name: Template workload templates template: diff --git a/workloads/templates/workload-mastervertical-script-cm.yml.j2 b/workloads/templates/workload-mastervertical-script-cm.yml.j2 index 87a6a678..97134f9c 100644 --- a/workloads/templates/workload-mastervertical-script-cm.yml.j2 +++ b/workloads/templates/workload-mastervertical-script-cm.yml.j2 @@ -40,6 +40,14 @@ data: fi workload_log "Done configuring pbench for MasterVertical" + if [ "${PPROF_COLLECT}" = "true" ]; then + workload_log "Configuring conprof" + envsubst < /root/workload/conprof.yaml.template > /tmp/conprof.yaml + envsubst < /root/workload/conprof_start.sh > /tmp/conprof_start.sh + envsubst < /root/workload/conprof_stop.sh > /tmp/conprof_stop.sh + workload_log "Done configuring conprof" + fi + workload_log "Configuring MasterVertical test" envsubst < /root/workload/mastervertical.yaml.template > /tmp/mastervertical.yaml workload_log "Done configuring MasterVertical test" @@ -75,11 +83,26 @@ data: # TODO: Check pbench-agent collected metrics for Pass/Fail # TODO: Check prometheus collected metrics for Pass/Fail workload_log "Test Analysis: Passed" + + conprof_start.sh: | + #!/bin/sh + set -o pipefail + nohup /usr/bin/conprof all --config.file /tmp/conprof.yaml --log.level=debug --storage.tsdb.path=/tmp/data &>/tmp/conprof.log & + conprof_stop.sh: | + #!/bin/sh + set -o pipefail + pkill conprof + workload.sh: | #!/bin/sh set -o pipefail result_dir=/tmp + if [ "${PPROF_COLLECT}" = "true" ]; then + workload_log "Starting conprof" + bash /tmp/conprof_start.sh + fi + if [ "${PBENCH_INSTRUMENTATION}" = "true" ]; then result_dir=${benchmark_results_dir} fi @@ -97,6 +120,15 @@ data: end_time=$(date +%s) duration=$((end_time-start_time)) + if [ "${PPROF_COLLECT}" = "true" ]; then + workload_log "Stopping conprof" + bash /tmp/conprof_stop.sh + cp /tmp/conprof.log ${result_dir}/conprof.log + cp /tmp/conprof.yaml ${result_dir}/conprof.yaml + tar -czvf ${result_dir}/conprof.tar.gz /tmp/data/ + workload_log "copied conprof tarballs and log" + fi + workload_log "Writing Cluster Loader Exit Code" jq -n '. | ."exit_code"='${exit_code}' | ."duration"='${duration}'' > "${result_dir}/exit.json" workload_log "Writing Cluster Loader Metrics to clusterloader.json" @@ -560,3 +592,113 @@ data: required: true labels: template: routeTemplate + conprof.yaml.template: | + scrape_configs: + - job_name: 'apiserver0' + scrape_interval: 30s + scrape_timeout: 10m + scheme: https + tls_config: + insecure_skip_verify: true + static_configs: + - targets: ['apiserver0-openshift-kube-apiserver.apps.{{clustername}}.{{base_domain}}'] + bearer_token: {{bearer_token.stdout}} + profiling_config: + pprof_config: + heap: + enabled: true + profile: + enabled: true + goroutine: + enabled: false + threadcreate: + enabled: false + allocs: + enabled: false + block: + enabled: false + mutex: + enabled: false + trace: + enabled: false + - job_name: 'apiserver1' + scrape_interval: 30s + scrape_timeout: 10m + scheme: https + tls_config: + insecure_skip_verify: true + static_configs: + - targets: ['apiserver1-openshift-kube-apiserver.apps.{{clustername}}.{{base_domain}}'] + bearer_token: {{bearer_token.stdout}} + profiling_config: + pprof_config: + heap: + enabled: true + profile: + enabled: true + goroutine: + enabled: false + threadcreate: + enabled: false + allocs: + enabled: false + block: + enabled: false + mutex: + enabled: false + trace: + enabled: false + - job_name: 'apiserver2' + scrape_interval: 30s + scrape_timeout: 10m + scheme: https + tls_config: + insecure_skip_verify: true + static_configs: + - targets: ['apiserver2-openshift-kube-apiserver.apps.{{clustername}}.{{base_domain}}'] + bearer_token: {{bearer_token.stdout}} + profiling_config: + pprof_config: + heap: + enabled: true + profile: + enabled: true + goroutine: + enabled: false + threadcreate: + enabled: false + allocs: + enabled: false + block: + enabled: false + mutex: + enabled: false + trace: + enabled: false + - job_name: 'prometheus' + scrape_interval: 30s + scrape_timeout: 10m + scheme: https + tls_config: + insecure_skip_verify: true + static_configs: + - targets: ['prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}}'] + bearer_token: {{bearer_token.stdout}} + profiling_config: + pprof_config: + heap: + enabled: true + profile: + enabled: false + goroutine: + enabled: false + threadcreate: + enabled: false + allocs: + enabled: false + block: + enabled: false + mutex: + enabled: false + trace: + enabled: false diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index 2039e0a2..ba94b5ec 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -198,8 +198,6 @@ data: enabled: false block: enabled: false - cmdline: - enabled: false mutex: enabled: false trace: @@ -227,8 +225,6 @@ data: enabled: false block: enabled: false - cmdline: - enabled: false mutex: enabled: false trace: @@ -256,8 +252,6 @@ data: enabled: false block: enabled: false - cmdline: - enabled: false mutex: enabled: false trace: @@ -285,8 +279,6 @@ data: enabled: false block: enabled: false - cmdline: - enabled: false mutex: enabled: false trace: diff --git a/workloads/vars/mastervertical.yml b/workloads/vars/mastervertical.yml index 197d9953..86400465 100644 --- a/workloads/vars/mastervertical.yml +++ b/workloads/vars/mastervertical.yml @@ -32,6 +32,9 @@ pbench_ssh_private_key_file: "{{ lookup('env', 'PBENCH_SSH_PRIVATE_KEY_FILE')|de pbench_ssh_public_key_file: "{{ lookup('env', 'PBENCH_SSH_PUBLIC_KEY_FILE')|default('~/.ssh/id_rsa.pub', true) }}" pbench_server: "{{ lookup('env', 'PBENCH_SERVER')|default('', true) }}" +# pporf variables +pprof_collect: "{{ lookup('env', 'PPROF_COLLECT')|default(false, true)|bool|lower }}" + # Azure auth vars to set for ocp on azure azure_auth: "{{ lookup('env', 'AZURE_AUTH')|default(false, true)|bool|lower }}" azure_auth_file: "{{ lookup('env', 'AZURE_AUTH_FILE')|default('', true) }}" From 238eba7ef31bf12475697ecda88ecc225505f57a Mon Sep 17 00:00:00 2001 From: Mohit Sheth Date: Thu, 2 Jul 2020 16:33:42 +0000 Subject: [PATCH 14/34] Add support to promethues-scale to collect conprof data --- docs/prometheus-scale.md | 6 + workloads/prometheus.yml | 13 +- .../workload-prometheus-script-cm.yml.j2} | 144 +++++++++++++++++- workloads/vars/prometheus.yml | 3 + 4 files changed, 163 insertions(+), 3 deletions(-) rename workloads/{files/workload-prometheus-script-cm.yml => templates/workload-prometheus-script-cm.yml.j2} (81%) diff --git a/docs/prometheus-scale.md b/docs/prometheus-scale.md index 958dd9cf..427a46d0 100644 --- a/docs/prometheus-scale.md +++ b/docs/prometheus-scale.md @@ -98,3 +98,9 @@ Sleep interval for each block iteration in seconds. ### PROMETHEUS_SCALE_TEST_PREFIX Default: `prometheus-scale` Sets the pbench result test prefix. + +### PPROF_COLLECT +Default: `false` +If you'd like to enable pprof profile data collection of kubeapiserver and prometheus through conprof(https://github.com/conprof/conprof). +Enabling this will create a few services to collect profiles from the apiserver pods and then create a conprof tarball in the pbench tarball + diff --git a/workloads/prometheus.yml b/workloads/prometheus.yml index f5a8f17f..f79eaffa 100644 --- a/workloads/prometheus.yml +++ b/workloads/prometheus.yml @@ -24,8 +24,6 @@ with_items: - src: scale-ci-tooling-ns.yml dest: "{{ansible_user_dir}}/scale-ci-tooling/scale-ci-tooling-ns.yml" - - src: workload-prometheus-script-cm.yml - dest: "{{ansible_user_dir}}/scale-ci-tooling/workload-prometheus-script-cm.yml" - name: Slurp kubeconfig file slurp: @@ -42,6 +40,15 @@ src: "{{pbench_ssh_public_key_file}}" register: pbench_ssh_public_key_file_slurp + - name: Set cluster details + include_role: + name: cluster_details + + - name: Collect pprof + include_role: + name: pprof-collection + when: pprof_collect and pprof_collect != "" + - name: Template workload templates template: src: "{{item.src}}" @@ -58,6 +65,8 @@ dest: "{{ansible_user_dir}}/scale-ci-tooling/workload-job.yml" - src: workload-env.yml.j2 dest: "{{ansible_user_dir}}/scale-ci-tooling/workload-prometheus-env.yml" + - src: workload-prometheus-script-cm.yml.j2 + dest: "{{ansible_user_dir}}/scale-ci-tooling/workload-prometheus-script-cm.yml" - name: Check if scale-ci-tooling namespace exists shell: | diff --git a/workloads/files/workload-prometheus-script-cm.yml b/workloads/templates/workload-prometheus-script-cm.yml.j2 similarity index 81% rename from workloads/files/workload-prometheus-script-cm.yml rename to workloads/templates/workload-prometheus-script-cm.yml.j2 index 03ce3e1d..4b6b301b 100644 --- a/workloads/files/workload-prometheus-script-cm.yml +++ b/workloads/templates/workload-prometheus-script-cm.yml.j2 @@ -38,6 +38,14 @@ data: fi workload_log "Done configuring pbench for Prometheus scale" + if [ "${PPROF_COLLECT}" = "true" ]; then + workload_log "Configuring conprof" + envsubst < /root/workload/conprof.yaml.template > /tmp/conprof.yaml + envsubst < /root/workload/conprof_start.sh > /tmp/conprof_start.sh + envsubst < /root/workload/conprof_stop.sh > /tmp/conprof_stop.sh + workload_log "Done configuring conprof" + fi + workload_log "Running Prometheus scale workload" if [ "${PBENCH_INSTRUMENTATION}" = "true" ]; then pbench-user-benchmark --pbench-post='sh /root/workload/post-run.sh' -- sh /root/workload/workload.sh @@ -53,10 +61,25 @@ data: RESULT_DIR=/tmp fi workload_log "Completed Prometheus scale workload run" + + conprof_start.sh: | + #!/bin/sh + set -o pipefail + nohup /usr/bin/conprof all --config.file /tmp/conprof.yaml --log.level=debug --storage.tsdb.path=/tmp/data &>/tmp/conprof.log & + conprof_stop.sh: | + #!/bin/sh + set -o pipefail + pkill conprof + workload.sh: | #!/bin/sh set -ox pipefail + if [ "${PPROF_COLLECT}" = "true" ]; then + workload_log "Starting conprof" + bash /tmp/conprof_start.sh + fi + db_aging() { while true; do echo "$(date +'%m-%d-%y-%H:%M:%S') $(oc exec prometheus-k8s-0 -n openshift-monitoring -c prometheus -- df |grep /prometheus$)" >> /tmp/pvc_monitor_0.log @@ -80,13 +103,22 @@ data: # stop the prometheus load kill -9 ${loader_pid} ${db_aging_pid} + if [ "${PPROF_COLLECT}" = "true" ]; then + workload_log "Stopping conprof" + bash /tmp/conprof_stop.sh + cp /tmp/conprof.log ${benchmark_results_dir}/conprof.log + cp /tmp/conprof.yaml ${benchmark_results_dir}/conprof.yaml + tar -czvf ${benchmark_results_dir}/conprof.tar.gz /tmp/data/ + workload_log "copied conprof tarballs and log" + fi + # test idle sleep 300 post-run.sh: | #!/bin/sh set -ox pipefail - RESULT_DIR="/var/lib/pbench-agent/$(ls -t /var/lib/pbench-agent/ | grep "pbench-user" | head -1)"/1/sample1 + RESULT_DIR="/var/lib/pbench-agent/$(ls -t /var/lib/pbench-agent/ | grep "pbench-user" | head -1)"/1-default/sample1 echo "Using RESULT_DIR of: \"${RESULT_DIR}\"" oc logs -n openshift-monitoring prometheus-k8s-0 -c prometheus --since=${PROMETHEUS_DURATION}s > ${RESULT_DIR}/oc_logs_1.log oc logs -n openshift-monitoring prometheus-k8s-1 -c prometheus --since=${PROMETHEUS_DURATION}s > ${RESULT_DIR}/oc_logs_2.log @@ -463,3 +495,113 @@ data: def get_dashboards(self): return self.dashboards + conprof.yaml.template: | + scrape_configs: + - job_name: 'apiserver0' + scrape_interval: 30s + scrape_timeout: 10m + scheme: https + tls_config: + insecure_skip_verify: true + static_configs: + - targets: ['apiserver0-openshift-kube-apiserver.apps.{{clustername}}.{{base_domain}}'] + bearer_token: {{bearer_token.stdout}} + profiling_config: + pprof_config: + heap: + enabled: true + profile: + enabled: true + goroutine: + enabled: false + threadcreate: + enabled: false + allocs: + enabled: false + block: + enabled: false + mutex: + enabled: false + trace: + enabled: false + - job_name: 'apiserver1' + scrape_interval: 30s + scrape_timeout: 10m + scheme: https + tls_config: + insecure_skip_verify: true + static_configs: + - targets: ['apiserver1-openshift-kube-apiserver.apps.{{clustername}}.{{base_domain}}'] + bearer_token: {{bearer_token.stdout}} + profiling_config: + pprof_config: + heap: + enabled: true + profile: + enabled: true + goroutine: + enabled: false + threadcreate: + enabled: false + allocs: + enabled: false + block: + enabled: false + mutex: + enabled: false + trace: + enabled: false + - job_name: 'apiserver2' + scrape_interval: 30s + scrape_timeout: 10m + scheme: https + tls_config: + insecure_skip_verify: true + static_configs: + - targets: ['apiserver2-openshift-kube-apiserver.apps.{{clustername}}.{{base_domain}}'] + bearer_token: {{bearer_token.stdout}} + profiling_config: + pprof_config: + heap: + enabled: true + profile: + enabled: true + goroutine: + enabled: false + threadcreate: + enabled: false + allocs: + enabled: false + block: + enabled: false + mutex: + enabled: false + trace: + enabled: false + - job_name: 'prometheus' + scrape_interval: 30s + scrape_timeout: 10m + scheme: https + tls_config: + insecure_skip_verify: true + static_configs: + - targets: ['prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}}'] + bearer_token: {{bearer_token.stdout}} + profiling_config: + pprof_config: + heap: + enabled: true + profile: + enabled: false + goroutine: + enabled: false + threadcreate: + enabled: false + allocs: + enabled: false + block: + enabled: false + mutex: + enabled: false + trace: + enabled: false diff --git a/workloads/vars/prometheus.yml b/workloads/vars/prometheus.yml index 59b81951..0aa0b596 100644 --- a/workloads/vars/prometheus.yml +++ b/workloads/vars/prometheus.yml @@ -29,6 +29,9 @@ pbench_server: "{{ lookup('env', 'PBENCH_SERVER')|default('', true) }}" scale_ci_results_token: "{{ lookup('env', 'SCALE_CI_RESULTS_TOKEN')|default('', true) }}" job_completion_poll_attempts: "{{ lookup('env', 'JOB_COMPLETION_POLL_ATTEMPTS')|default(360, true)|int }}" +# pporf variables +pprof_collect: "{{ lookup('env', 'PPROF_COLLECT')|default(false, true)|bool|lower }}" + # Prometheus scale workload specific parameters: prometheus_scale_test_prefix: "{{ lookup('env', 'PROMETHEUS_SCALE_TEST_PREFIX')|default('prometheus-scale', true) }}" prometheus_concurrency: "{{ lookup('env', 'PROMETHEUS_CONCURRENCY')|default(10, true)|int }}" From b800a7d72714180393639a83565c658e76e3742f Mon Sep 17 00:00:00 2001 From: Mohit Sheth Date: Tue, 28 Jul 2020 18:42:21 +0000 Subject: [PATCH 15/34] use snafu as a package --- .../templates/workload-deployments-per-ns-script-cm.yml.j2 | 2 +- workloads/templates/workload-fio-script-cm.yml.j2 | 2 +- workloads/templates/workload-mastervertical-script-cm.yml.j2 | 2 +- .../templates/workload-namespaces-per-cluster-script-cm.yml.j2 | 2 +- workloads/templates/workload-network-script-cm.yml.j2 | 2 +- .../templates/workload-nodevertical-heavy-script-cm.yml.j2 | 2 +- workloads/templates/workload-nodevertical-script-cm.yml.j2 | 2 +- workloads/templates/workload-podvertical-script-cm.yml.j2 | 2 +- workloads/templates/workload-pvcscale-script-cm.yml.j2 | 2 +- .../templates/workload-services-per-namespace-script-cm.yml.j2 | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/workloads/templates/workload-deployments-per-ns-script-cm.yml.j2 b/workloads/templates/workload-deployments-per-ns-script-cm.yml.j2 index dca9a86c..622a8173 100644 --- a/workloads/templates/workload-deployments-per-ns-script-cm.yml.j2 +++ b/workloads/templates/workload-deployments-per-ns-script-cm.yml.j2 @@ -51,7 +51,7 @@ data: export es={{ snafu_es_host }} export es_port={{ snafu_es_port }} export es_index={{ snafu_es_index_prefix }} - pbench-user-benchmark -- 'VIPERCONFIG=/root/workload/cluster-limits-deployments-per-namespace.yaml python3 /tmp/snafu/run_snafu.py -t cl scale-ci --cl-output True --dir /tmp/snafu_results -p openshift-tests' + pbench-user-benchmark -- 'VIPERCONFIG=/root/workload/cluster-limits-deployments-per-namespace.yaml run_snafu -t cl scale-ci --cl-output True --dir /tmp/snafu_results -p openshift-tests' pbench-copy-results --prefix {{deployments_per_ns_test_prefix}} echo "$(date -u) Completed running Deployments per ns cluster limits test" # End of Test Code diff --git a/workloads/templates/workload-fio-script-cm.yml.j2 b/workloads/templates/workload-fio-script-cm.yml.j2 index d44c1afa..b61b816c 100644 --- a/workloads/templates/workload-fio-script-cm.yml.j2 +++ b/workloads/templates/workload-fio-script-cm.yml.j2 @@ -56,7 +56,7 @@ data: export es={{ snafu_es_host }} export es_port={{ snafu_es_port }} export es_index={{ snafu_es_index_prefix }} - pbench-user-benchmark --config="{{ fiotest_prefix }}-pods-{{ fiotest_maxpods }}-sc-{{ fiotest_storageclass }}-create_pods-{{ fiotest_description }}" -- 'VIPERCONFIG=/root/workload/fiotest.yml python3 /tmp/snafu/run_snafu.py -t cl scale-ci --cl-output True --dir /tmp/snafu_results -p openshift-tests' + pbench-user-benchmark --config="{{ fiotest_prefix }}-pods-{{ fiotest_maxpods }}-sc-{{ fiotest_storageclass }}-create_pods-{{ fiotest_description }}" -- 'VIPERCONFIG=/root/workload/fiotest.yml run_snafu -t cl scale-ci --cl-output True --dir /tmp/snafu_results -p openshift-tests' echo "$(date -u) Pods for FIO I/O test created." # wait until all pods are started and then collect data diff --git a/workloads/templates/workload-mastervertical-script-cm.yml.j2 b/workloads/templates/workload-mastervertical-script-cm.yml.j2 index 97134f9c..c57a8be2 100644 --- a/workloads/templates/workload-mastervertical-script-cm.yml.j2 +++ b/workloads/templates/workload-mastervertical-script-cm.yml.j2 @@ -115,7 +115,7 @@ data: export es={{ snafu_es_host }} export es_port={{ snafu_es_port }} export es_index={{ snafu_es_index_prefix }} - VIPERCONFIG=/tmp/mastervertical.yaml python3 /tmp/snafu/run_snafu.py -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests | tee "${result_dir}/clusterloader.txt" + VIPERCONFIG=/tmp/mastervertical.yaml run_snafu -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests | tee "${result_dir}/clusterloader.txt" exit_code=$? end_time=$(date +%s) duration=$((end_time-start_time)) diff --git a/workloads/templates/workload-namespaces-per-cluster-script-cm.yml.j2 b/workloads/templates/workload-namespaces-per-cluster-script-cm.yml.j2 index 537f7248..57647299 100644 --- a/workloads/templates/workload-namespaces-per-cluster-script-cm.yml.j2 +++ b/workloads/templates/workload-namespaces-per-cluster-script-cm.yml.j2 @@ -92,7 +92,7 @@ data: export es={{ snafu_es_host }} export es_port={{ snafu_es_port }} export es_index={{ snafu_es_index_prefix }} - VIPERCONFIG=/tmp/namespaces_per_cluster.yaml python3 /tmp/snafu/run_snafu.py -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests | tee "${result_dir}/clusterloader.txt" + VIPERCONFIG=/tmp/namespaces_per_cluster.yaml run_snafu -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests | tee "${result_dir}/clusterloader.txt" exit_code=$? end_time=$(date +%s) duration=$((end_time-start_time)) diff --git a/workloads/templates/workload-network-script-cm.yml.j2 b/workloads/templates/workload-network-script-cm.yml.j2 index d7edf7aa..8bd62337 100644 --- a/workloads/templates/workload-network-script-cm.yml.j2 +++ b/workloads/templates/workload-network-script-cm.yml.j2 @@ -77,7 +77,7 @@ data: export es={{ snafu_es_host }} export es_port={{ snafu_es_port }} export es_index={{ snafu_es_index_prefix }} - VIPERCONFIG=/tmp/network.yaml python3 /tmp/snafu/run_snafu.py -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests + VIPERCONFIG=/tmp/network.yaml python3 run_snafu -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests # Check if clients and servers are ready for rc_num in `seq 0 $((${pair_count} - 1))` diff --git a/workloads/templates/workload-nodevertical-heavy-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-heavy-script-cm.yml.j2 index a91ccce0..ec1817b9 100644 --- a/workloads/templates/workload-nodevertical-heavy-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-heavy-script-cm.yml.j2 @@ -90,7 +90,7 @@ data: export es={{ snafu_es_host }} export es_port={{ snafu_es_port }} export es_index={{ snafu_es_index_prefix }} - VIPERCONFIG=/tmp/nodevertical-heavy.yaml python3 /tmp/snafu/run_snafu.py -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests | tee "${result_dir}/clusterloader.txt" + VIPERCONFIG=/tmp/nodevertical-heavy.yaml run_snafu -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests | tee "${result_dir}/clusterloader.txt" exit_code=$? end_time=$(date +%s) duration=$((end_time-start_time)) diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index ba94b5ec..e68d6ec1 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -111,7 +111,7 @@ data: export es={{ snafu_es_host }} export es_port={{ snafu_es_port }} export es_index={{ snafu_es_index_prefix }} - VIPERCONFIG=/tmp/nodevertical.yaml python3 /tmp/snafu/run_snafu.py -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests | tee "${result_dir}/clusterloader.txt" + VIPERCONFIG=/tmp/nodevertical.yaml run_snafu -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests | tee "${result_dir}/clusterloader.txt" exit_code=$? end_time=$(date +%s) duration=$((end_time-start_time)) diff --git a/workloads/templates/workload-podvertical-script-cm.yml.j2 b/workloads/templates/workload-podvertical-script-cm.yml.j2 index 0f7da5a0..05fef457 100644 --- a/workloads/templates/workload-podvertical-script-cm.yml.j2 +++ b/workloads/templates/workload-podvertical-script-cm.yml.j2 @@ -93,7 +93,7 @@ data: export es={{ snafu_es_host }} export es_port={{ snafu_es_port }} export es_index={{ snafu_es_index_prefix }} - VIPERCONFIG=/tmp/podvertical.yaml python3 /tmp/snafu/run_snafu.py -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests | tee "${result_dir}/clusterloader.txt" + VIPERCONFIG=/tmp/podvertical.yaml run_snafu -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests | tee "${result_dir}/clusterloader.txt" exit_code=$? end_time=$(date +%s) duration=$((end_time-start_time)) diff --git a/workloads/templates/workload-pvcscale-script-cm.yml.j2 b/workloads/templates/workload-pvcscale-script-cm.yml.j2 index 7da1c797..f2ca1920 100644 --- a/workloads/templates/workload-pvcscale-script-cm.yml.j2 +++ b/workloads/templates/workload-pvcscale-script-cm.yml.j2 @@ -51,7 +51,7 @@ data: export es={{ snafu_es_host }} export es_port={{ snafu_es_port }} export es_index={{ snafu_es_index_prefix }} - pbench-user-benchmark --config="{{ pvcscale_test_prefix }}-pods-{{ pvcscale_maxpods }}-sc-{{ pvcscale_storageclass }}-create_pods" -- 'VIPERCONFIG=/root/workload/pvcscale.yml python3 /tmp/snafu/run_snafu.py -t cl scale-ci --cl-output True --dir /tmp/snafu_results -p openshift-tests' + pbench-user-benchmark --config="{{ pvcscale_test_prefix }}-pods-{{ pvcscale_maxpods }}-sc-{{ pvcscale_storageclass }}-create_pods" -- 'VIPERCONFIG=/root/workload/pvcscale.yml run_snafu -t cl scale-ci --cl-output True --dir /tmp/snafu_results -p openshift-tests' echo "$(date -u) Pods/PVC are crated ..." # End Test Configuration diff --git a/workloads/templates/workload-services-per-namespace-script-cm.yml.j2 b/workloads/templates/workload-services-per-namespace-script-cm.yml.j2 index 5b5df720..4446a3da 100644 --- a/workloads/templates/workload-services-per-namespace-script-cm.yml.j2 +++ b/workloads/templates/workload-services-per-namespace-script-cm.yml.j2 @@ -92,7 +92,7 @@ data: export es={{ snafu_es_host }} export es_port={{ snafu_es_port }} export es_index={{ snafu_es_index_prefix }} - VIPERCONFIG=/tmp/services_per_namespace.yaml python3 /tmp/snafu/run_snafu.py -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests | tee "${result_dir}/clusterloader.txt" + VIPERCONFIG=/tmp/services_per_namespace.yaml run_snafu -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests | tee "${result_dir}/clusterloader.txt" exit_code=$? end_time=$(date +%s) duration=$((end_time-start_time)) From eda2ecfb7dedd93aa69037e4d54fc4f98aa70ca5 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Wed, 12 Aug 2020 16:45:18 +0530 Subject: [PATCH 16/34] stored the output and updated touchstone config file --- .../workload-nodevertical-script-cm.yml.j2 | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index f6820575..1e6bdf29 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -123,8 +123,9 @@ data: envsubst < /root/workload/prom_config.yaml.template > /tmp/prom_config.yaml envsubst < /root/workload/prom_aggregation_start.sh > /tmp/prom_aggregation_start.sh workload_log "Starting touchstone" - bash /tmp/prom_aggregation_start.sh - workload_log "Completed prometheus data aggregations" + bash /tmp/prom_aggregation_start.sh > /tmp/prom_aggregation.log + cp /tmp/prom_aggregation.log ${result_dir}/prom_aggregation.log + workload_log "Completed prometheus data aggregations and stored logs" fi if [ "${PPROF_COLLECT}" = "true" ]; then workload_log "Stopping conprof" @@ -185,9 +186,11 @@ data: nodevertical: 'true' prom_config.yaml.template: | --- - query: + url: + - https://prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}} + query_list: - sum(container_memory_rss{namespace=~"openshift-kube-apiserver",name!="",container=~"kube-apiserver.*"}) by (container) - headers: + bearer_token: - {{prom_bearer_token.stdout}} disable_ssl: - True @@ -195,8 +198,6 @@ data: - $start_time end_time_list: - $end_time - url: - - https://prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}} conprof.yaml.template: | scrape_configs: - job_name: 'apiserver0' From d2078a5d0206cfb95a1b0391cfefa44e0d299700 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Tue, 2 Jun 2020 18:39:02 +0530 Subject: [PATCH 17/34] initialised touchstone_workload --- workloads/nodevertical.yml | 5 +++++ workloads/roles/prometheus_metric_aggregation/tasks/main.yml | 0 workloads/templates/workload-env.yml.j2 | 1 + 3 files changed, 6 insertions(+) create mode 100644 workloads/roles/prometheus_metric_aggregation/tasks/main.yml diff --git a/workloads/nodevertical.yml b/workloads/nodevertical.yml index e2184c35..6ae696a6 100644 --- a/workloads/nodevertical.yml +++ b/workloads/nodevertical.yml @@ -85,6 +85,11 @@ name: pprof-collection when: pprof_collect and pprof_collect != "" + - name: Get Prometheus data aggregations + include_role: + name: prometheus_metirc_aggregations + when: prom_metric_aggregates and prom_metric_aggregates != "" + - name: Set NodeVertical template set_fact: nodevertical_template: "{% if nodevertical_heavy|bool %}workload-nodevertical-heavy-script-cm.yml.j2{% else %}workload-nodevertical-script-cm.yml.j2{% endif %}" diff --git a/workloads/roles/prometheus_metric_aggregation/tasks/main.yml b/workloads/roles/prometheus_metric_aggregation/tasks/main.yml new file mode 100644 index 00000000..e69de29b diff --git a/workloads/templates/workload-env.yml.j2 b/workloads/templates/workload-env.yml.j2 index 994f16bc..eef9e242 100644 --- a/workloads/templates/workload-env.yml.j2 +++ b/workloads/templates/workload-env.yml.j2 @@ -5,6 +5,7 @@ metadata: data: ENABLE_PBENCH_AGENTS: "{{enable_pbench_agents|bool|lower}}" PPROF_COLLECT: "{{ ((pprof_collect == None) | ternary(false, pprof_collect)) if pprof_collect is defined else false}}" + PROM_AGGREGATE_COLLECT: "{{ ((prom_aggregate_collect == None) | ternary(false, prom_aggregate_collect)) if prom_aggregate_collect is defined else false}}" {% if workload_job == "http" %} {% for v in http_env_vars %} {{ v }}: "{{ lookup('env', v) }}" From 5e550387d71595905088122b2e382865291d0cb5 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Tue, 2 Jun 2020 19:58:34 +0530 Subject: [PATCH 18/34] added variable --- workloads/nodevertical.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workloads/nodevertical.yml b/workloads/nodevertical.yml index 6ae696a6..31d192d7 100644 --- a/workloads/nodevertical.yml +++ b/workloads/nodevertical.yml @@ -88,7 +88,7 @@ - name: Get Prometheus data aggregations include_role: name: prometheus_metirc_aggregations - when: prom_metric_aggregates and prom_metric_aggregates != "" + when: prom_aggregate_collect and prom_aggregate_collect != "" - name: Set NodeVertical template set_fact: From c60f2096921dcf4626815665e0417df71b5779ff Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Thu, 11 Jun 2020 19:49:22 +0530 Subject: [PATCH 19/34] added touchstone --- docs/nodevertical.md | 5 +++ .../workload-nodevertical-script-cm.yml.j2 | 34 ++++++++++++++++++- workloads/vars/nodevertical.yml | 4 +++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/docs/nodevertical.md b/docs/nodevertical.md index ea2ae65b..68c7ff6c 100644 --- a/docs/nodevertical.md +++ b/docs/nodevertical.md @@ -175,6 +175,11 @@ Default: `false` If you'd like to enable pprof profile data collection of kubeapiserver and prometheus through conprof(https://github.com/conprof/conprof). Enabling this will create a few services to collect profiles from the apiserver pods and then create a conprof tarball in the pbench tarball +### PROM_AGGREGATE_COLLECT +Default: `false` +If you'd like to enable collection of prometheus data aggregation of kubeapiserver through touchstone(https://github.com/cloud-bulldozer/touchstone). +Enabling this will create a few services to produce prometheus data aggregations from the apiserver pods. + ### NODEVERTICAL_HEAVY_PROBE_PERIOD Default: `30` Readiness probe period for the application deployed by the heavy nodevertical. diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index e68d6ec1..9f6ebce2 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -51,6 +51,12 @@ data: envsubst < /root/workload/conprof_stop.sh > /tmp/conprof_stop.sh workload_log "Done configuring conprof" fi + if [ "${PROM_AGGREGATE_COLLECT}" = "true" ]; then + workload_log "Configuring touchstone" + envsubst < /root/workload/prom_config.yaml.template > /tmp/prom_config.yaml + envsubst < /root/workload/prom_aggregation_start.sh > /tmp/prom_aggregation_start.sh + workload_log "Done configuring touchstone" + fi workload_log "Running NodeVertical workload" if [ "${PBENCH_INSTRUMENTATION}" = "true" ]; then pbench-user-benchmark -- sh /root/workload/workload.sh @@ -90,15 +96,22 @@ data: #!/bin/sh set -o pipefail pkill conprof + prom_aggregation_start.sh: | + #!/bin/sh + set -o pipefail + touchstone_compare -database prometheus -v -prom_config /tmp/prom_config.yaml workload.sh: | #!/bin/sh set -o pipefail - result_dir=/tmp if [ "${PPROF_COLLECT}" = "true" ]; then workload_log "Starting conprof" bash /tmp/conprof_start.sh fi + if [ "${PROM_AGGREGATE_COLLECT}" = "true" ]; then + workload_log "Starting touchstone" + bash prom_aggregation_start.sh + fi if [ "${PBENCH_INSTRUMENTATION}" = "true" ]; then result_dir=${benchmark_results_dir} fi @@ -173,6 +186,25 @@ data: privileged: false nodeSelector: nodevertical: 'true' + + + prom_config.yaml.template: | + query: + - sum(container_memory_rss{namespace=~"openshift-kube-apiserver",name!="",container=~"kube-apiserver.*"}) by (container) + - up + headers: + - {{bearer_token.stdout}} + disable_ssl: + - True + start_time_list: + - 1588791810 + - 1588791710 + end_time_list: + - 1588795118 + - 1588795018 + url: + - 'prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}}' + conprof.yaml.template: | scrape_configs: - job_name: 'apiserver0' diff --git a/workloads/vars/nodevertical.yml b/workloads/vars/nodevertical.yml index d826d7c2..8054629b 100644 --- a/workloads/vars/nodevertical.yml +++ b/workloads/vars/nodevertical.yml @@ -35,6 +35,10 @@ pbench_server: "{{ lookup('env', 'PBENCH_SERVER')|default('', true) }}" # pporf variables pprof_collect: "{{ lookup('env', 'PPROF_COLLECT')|default(false, true)|bool|lower }}" + +# touchstone variables +prom_aggregate_collect: "{{ lookup('env', 'PPROF_COLLECT')|default(false, true)|bool|lower }}" + # Azure auth vars to set for ocp on azure azure_auth: "{{ lookup('env', 'AZURE_AUTH')|default(false, true)|bool|lower }}" azure_auth_file: "{{ lookup('env', 'AZURE_AUTH_FILE')|default('', true) }}" From aba8f78912bb40ea927b471df845d9cff3e05525 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Mon, 15 Jun 2020 18:00:31 +0530 Subject: [PATCH 20/34] added role --- .../tasks/main.yml | 16 +++++++++ .../workload-nodevertical-script-cm.yml.j2 | 33 +++++++------------ 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/workloads/roles/prometheus_metric_aggregation/tasks/main.yml b/workloads/roles/prometheus_metric_aggregation/tasks/main.yml index e69de29b..a3e606dd 100644 --- a/workloads/roles/prometheus_metric_aggregation/tasks/main.yml +++ b/workloads/roles/prometheus_metric_aggregation/tasks/main.yml @@ -0,0 +1,16 @@ +- name: check sa for prom_server + shell: "oc get sa -n openshift-kube-apiserver | grep prom_server | wc -l" + register: apiserver_pprof_sa + +- name: create sa to access pprof profiles of apiserver + block: + - name: create sa + shell: "oc -n openshift-kube-apiserver create sa prom_server" + + - name: add cluster-admin clusterrrole + shell: "oc create clusterrolebinding pprof-admin --clusterrole cluster-admin --serviceaccount=openshift-kube-apiserver:prom_server" + when: apiserver_pprof_sa.stdout | int == 0 + +- name: get the bearer token + shell: "oc -n openshift-kube-apiserver sa get-token prom_server" + register: prom_bearer_token diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index 9f6ebce2..6bc865ec 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -51,12 +51,6 @@ data: envsubst < /root/workload/conprof_stop.sh > /tmp/conprof_stop.sh workload_log "Done configuring conprof" fi - if [ "${PROM_AGGREGATE_COLLECT}" = "true" ]; then - workload_log "Configuring touchstone" - envsubst < /root/workload/prom_config.yaml.template > /tmp/prom_config.yaml - envsubst < /root/workload/prom_aggregation_start.sh > /tmp/prom_aggregation_start.sh - workload_log "Done configuring touchstone" - fi workload_log "Running NodeVertical workload" if [ "${PBENCH_INSTRUMENTATION}" = "true" ]; then pbench-user-benchmark -- sh /root/workload/workload.sh @@ -108,10 +102,6 @@ data: workload_log "Starting conprof" bash /tmp/conprof_start.sh fi - if [ "${PROM_AGGREGATE_COLLECT}" = "true" ]; then - workload_log "Starting touchstone" - bash prom_aggregation_start.sh - fi if [ "${PBENCH_INSTRUMENTATION}" = "true" ]; then result_dir=${benchmark_results_dir} fi @@ -128,7 +118,13 @@ data: exit_code=$? end_time=$(date +%s) duration=$((end_time-start_time)) - + if [ "${PROM_AGGREGATE_COLLECT}" = "true" ]; then + workload_log "Configuring touchstone" + envsubst < /root/workload/prom_config.yaml.template > /tmp/prom_config.yaml + envsubst < /root/workload/prom_aggregation_start.sh > /tmp/prom_aggregation_start.sh + workload_log "Starting touchstone" + bash prom_aggregation_start.sh + fi if [ "${PPROF_COLLECT}" = "true" ]; then workload_log "Stopping conprof" bash /tmp/conprof_stop.sh @@ -186,25 +182,20 @@ data: privileged: false nodeSelector: nodevertical: 'true' - - prom_config.yaml.template: | + --- query: - sum(container_memory_rss{namespace=~"openshift-kube-apiserver",name!="",container=~"kube-apiserver.*"}) by (container) - - up headers: - - {{bearer_token.stdout}} + - {{prom_bearer_token.stdout}} disable_ssl: - True start_time_list: - - 1588791810 - - 1588791710 + - $start_time end_time_list: - - 1588795118 - - 1588795018 + - $end_time url: - - 'prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}}' - + - prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}} conprof.yaml.template: | scrape_configs: - job_name: 'apiserver0' From 111f08eef64f645d800fda20cf5fa87e65919e4d Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Mon, 15 Jun 2020 20:56:56 +0530 Subject: [PATCH 21/34] store output and create tarball --- workloads/templates/workload-nodevertical-script-cm.yml.j2 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index 6bc865ec..a1f551ad 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -93,7 +93,7 @@ data: prom_aggregation_start.sh: | #!/bin/sh set -o pipefail - touchstone_compare -database prometheus -v -prom_config /tmp/prom_config.yaml + touchstone_compare -database prometheus -v -prom_config /tmp/prom_config.yaml > /tmp/prom_aggregations/aggregations.json workload.sh: | #!/bin/sh set -o pipefail @@ -124,6 +124,8 @@ data: envsubst < /root/workload/prom_aggregation_start.sh > /tmp/prom_aggregation_start.sh workload_log "Starting touchstone" bash prom_aggregation_start.sh + tar -czvf ${result_dir}/prom_aggregation.tar.gz /tmp/prom_aggregations/ + workload_log "Completed prometheus data aggregations and stored tarballs" fi if [ "${PPROF_COLLECT}" = "true" ]; then workload_log "Stopping conprof" From b60220a4bd676951bb098be5ed908acea1906f2e Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Tue, 16 Jun 2020 19:32:48 +0530 Subject: [PATCH 22/34] minor changes --- workloads/nodevertical.yml | 2 +- workloads/templates/workload-nodevertical-script-cm.yml.j2 | 7 +++---- workloads/vars/nodevertical.yml | 3 +-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/workloads/nodevertical.yml b/workloads/nodevertical.yml index 31d192d7..c413b262 100644 --- a/workloads/nodevertical.yml +++ b/workloads/nodevertical.yml @@ -85,7 +85,7 @@ name: pprof-collection when: pprof_collect and pprof_collect != "" - - name: Get Prometheus data aggregations + - name: Get Prometheus authorizations include_role: name: prometheus_metirc_aggregations when: prom_aggregate_collect and prom_aggregate_collect != "" diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index a1f551ad..b203afd9 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -93,7 +93,7 @@ data: prom_aggregation_start.sh: | #!/bin/sh set -o pipefail - touchstone_compare -database prometheus -v -prom_config /tmp/prom_config.yaml > /tmp/prom_aggregations/aggregations.json + touchstone_compare -database prometheus -v -prom_config /tmp/prom_config.yaml > ${result_dir}/aggregations.json workload.sh: | #!/bin/sh set -o pipefail @@ -123,9 +123,8 @@ data: envsubst < /root/workload/prom_config.yaml.template > /tmp/prom_config.yaml envsubst < /root/workload/prom_aggregation_start.sh > /tmp/prom_aggregation_start.sh workload_log "Starting touchstone" - bash prom_aggregation_start.sh - tar -czvf ${result_dir}/prom_aggregation.tar.gz /tmp/prom_aggregations/ - workload_log "Completed prometheus data aggregations and stored tarballs" + bash /tmp/prom_aggregation_start.sh + workload_log "Completed prometheus data aggregations" fi if [ "${PPROF_COLLECT}" = "true" ]; then workload_log "Stopping conprof" diff --git a/workloads/vars/nodevertical.yml b/workloads/vars/nodevertical.yml index 8054629b..07714cd6 100644 --- a/workloads/vars/nodevertical.yml +++ b/workloads/vars/nodevertical.yml @@ -35,9 +35,8 @@ pbench_server: "{{ lookup('env', 'PBENCH_SERVER')|default('', true) }}" # pporf variables pprof_collect: "{{ lookup('env', 'PPROF_COLLECT')|default(false, true)|bool|lower }}" - # touchstone variables -prom_aggregate_collect: "{{ lookup('env', 'PPROF_COLLECT')|default(false, true)|bool|lower }}" +prom_aggregate_collect: "{{ lookup('env', 'PROM_AGGREGATE_COLLECT')|default(false, true)|bool|lower }}" # Azure auth vars to set for ocp on azure azure_auth: "{{ lookup('env', 'AZURE_AUTH')|default(false, true)|bool|lower }}" From b2f344b47805e7f23fecee9aae67de5c2832e405 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Thu, 18 Jun 2020 17:38:17 +0530 Subject: [PATCH 23/34] fixed typo --- workloads/nodevertical.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workloads/nodevertical.yml b/workloads/nodevertical.yml index c413b262..093b96bb 100644 --- a/workloads/nodevertical.yml +++ b/workloads/nodevertical.yml @@ -87,7 +87,7 @@ - name: Get Prometheus authorizations include_role: - name: prometheus_metirc_aggregations + name: prometheus_metric_aggregation when: prom_aggregate_collect and prom_aggregate_collect != "" - name: Set NodeVertical template From 81b67449b36ed33ce00fc886288a63182cf05124 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Thu, 18 Jun 2020 19:34:30 +0530 Subject: [PATCH 24/34] fixed server name --- .../roles/prometheus_metric_aggregation/tasks/main.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/workloads/roles/prometheus_metric_aggregation/tasks/main.yml b/workloads/roles/prometheus_metric_aggregation/tasks/main.yml index a3e606dd..40bea9d2 100644 --- a/workloads/roles/prometheus_metric_aggregation/tasks/main.yml +++ b/workloads/roles/prometheus_metric_aggregation/tasks/main.yml @@ -1,16 +1,16 @@ -- name: check sa for prom_server - shell: "oc get sa -n openshift-kube-apiserver | grep prom_server | wc -l" +- name: check sa for prom-server + shell: "oc get sa -n openshift-kube-apiserver | grep prom-server | wc -l" register: apiserver_pprof_sa - name: create sa to access pprof profiles of apiserver block: - name: create sa - shell: "oc -n openshift-kube-apiserver create sa prom_server" + shell: "oc -n openshift-kube-apiserver create sa prom-server" - name: add cluster-admin clusterrrole - shell: "oc create clusterrolebinding pprof-admin --clusterrole cluster-admin --serviceaccount=openshift-kube-apiserver:prom_server" + shell: "oc create clusterrolebinding prom-admin --clusterrole cluster-admin --serviceaccount=openshift-kube-apiserver:prom-server" when: apiserver_pprof_sa.stdout | int == 0 - name: get the bearer token - shell: "oc -n openshift-kube-apiserver sa get-token prom_server" + shell: "oc -n openshift-kube-apiserver sa get-token prom-server" register: prom_bearer_token From d8296dd071756522ced241c33c586ee9bcb38e93 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Tue, 23 Jun 2020 19:40:28 +0530 Subject: [PATCH 25/34] fixed variables name --- .../roles/prometheus_metric_aggregation/tasks/main.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workloads/roles/prometheus_metric_aggregation/tasks/main.yml b/workloads/roles/prometheus_metric_aggregation/tasks/main.yml index 40bea9d2..26c9831f 100644 --- a/workloads/roles/prometheus_metric_aggregation/tasks/main.yml +++ b/workloads/roles/prometheus_metric_aggregation/tasks/main.yml @@ -1,15 +1,15 @@ - name: check sa for prom-server shell: "oc get sa -n openshift-kube-apiserver | grep prom-server | wc -l" - register: apiserver_pprof_sa + register: prom_server_sa -- name: create sa to access pprof profiles of apiserver +- name: create sa to access prom_server profiles block: - name: create sa shell: "oc -n openshift-kube-apiserver create sa prom-server" - name: add cluster-admin clusterrrole shell: "oc create clusterrolebinding prom-admin --clusterrole cluster-admin --serviceaccount=openshift-kube-apiserver:prom-server" - when: apiserver_pprof_sa.stdout | int == 0 + when: prom_server_sa.stdout | int == 0 - name: get the bearer token shell: "oc -n openshift-kube-apiserver sa get-token prom-server" From 5e234c1793e35a4e3c4000e25e10bdde7a9aeaa7 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Tue, 23 Jun 2020 20:08:24 +0530 Subject: [PATCH 26/34] minor fixes --- workloads/templates/workload-nodevertical-script-cm.yml.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index b203afd9..249bb4fb 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -93,7 +93,7 @@ data: prom_aggregation_start.sh: | #!/bin/sh set -o pipefail - touchstone_compare -database prometheus -v -prom_config /tmp/prom_config.yaml > ${result_dir}/aggregations.json + touchstone_compare -database prometheus -v -prom_config /tmp/prom_config.yaml workload.sh: | #!/bin/sh set -o pipefail @@ -196,7 +196,7 @@ data: end_time_list: - $end_time url: - - prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}} + - https://prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}} conprof.yaml.template: | scrape_configs: - job_name: 'apiserver0' From 98f6d4eed327eba6584bce09e5cb2b317e56d50e Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Tue, 23 Jun 2020 21:34:41 +0530 Subject: [PATCH 27/34] added queries and fixed start,end time --- workloads/templates/workload-nodevertical-script-cm.yml.j2 | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index 249bb4fb..2ab63539 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -105,7 +105,7 @@ data: if [ "${PBENCH_INSTRUMENTATION}" = "true" ]; then result_dir=${benchmark_results_dir} fi - start_time=$(date +%s) + export start_time=$(date +%s) if [[ "${AZURE_AUTH}" == "true" ]]; then export AZURE_AUTH_LOCATION=/tmp/azure_auth fi @@ -116,7 +116,7 @@ data: export es_index={{ snafu_es_index_prefix }} VIPERCONFIG=/tmp/nodevertical.yaml run_snafu -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests | tee "${result_dir}/clusterloader.txt" exit_code=$? - end_time=$(date +%s) + export end_time=$(date +%s) duration=$((end_time-start_time)) if [ "${PROM_AGGREGATE_COLLECT}" = "true" ]; then workload_log "Configuring touchstone" @@ -187,6 +187,9 @@ data: --- query: - sum(container_memory_rss{namespace=~"openshift-kube-apiserver",name!="",container=~"kube-apiserver.*"}) by (container) + - sum(container_memory_rss{namespace!="",name!="",namespace=~"openshift-etcd",container=~"etcd"}) by (container) + - sum(rate(container_cpu_usage_seconds_total{name!="",clustername=~"$clustername",namespace!="",namespace=~"openshift-kube-apiserver",container=~"kube-apiserver"}[5m])) by (container) + - sum(rate(container_cpu_usage_seconds_total{name!="",clustername=~"$clustername",namespace!="",namespace=~"openshift-etcd",container=~"etcd"}[5m])) by (container) headers: - {{prom_bearer_token.stdout}} disable_ssl: From 64c55c45d0b5b42794b2ecc73cc182ed0b550dbf Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Tue, 23 Jun 2020 22:02:29 +0530 Subject: [PATCH 28/34] removed invalid query --- workloads/templates/workload-nodevertical-script-cm.yml.j2 | 3 --- 1 file changed, 3 deletions(-) diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index 2ab63539..ffb19a52 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -187,9 +187,6 @@ data: --- query: - sum(container_memory_rss{namespace=~"openshift-kube-apiserver",name!="",container=~"kube-apiserver.*"}) by (container) - - sum(container_memory_rss{namespace!="",name!="",namespace=~"openshift-etcd",container=~"etcd"}) by (container) - - sum(rate(container_cpu_usage_seconds_total{name!="",clustername=~"$clustername",namespace!="",namespace=~"openshift-kube-apiserver",container=~"kube-apiserver"}[5m])) by (container) - - sum(rate(container_cpu_usage_seconds_total{name!="",clustername=~"$clustername",namespace!="",namespace=~"openshift-etcd",container=~"etcd"}[5m])) by (container) headers: - {{prom_bearer_token.stdout}} disable_ssl: From 26ea9e7586f9408f48c774633bd659ae2814a3b3 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Wed, 12 Aug 2020 16:45:18 +0530 Subject: [PATCH 29/34] stored the output and updated touchstone config file --- .../workload-nodevertical-script-cm.yml.j2 | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index ffb19a52..6bbb1c9d 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -123,8 +123,9 @@ data: envsubst < /root/workload/prom_config.yaml.template > /tmp/prom_config.yaml envsubst < /root/workload/prom_aggregation_start.sh > /tmp/prom_aggregation_start.sh workload_log "Starting touchstone" - bash /tmp/prom_aggregation_start.sh - workload_log "Completed prometheus data aggregations" + bash /tmp/prom_aggregation_start.sh > /tmp/prom_aggregation.log + cp /tmp/prom_aggregation.log ${result_dir}/prom_aggregation.log + workload_log "Completed prometheus data aggregations and stored logs" fi if [ "${PPROF_COLLECT}" = "true" ]; then workload_log "Stopping conprof" @@ -185,9 +186,11 @@ data: nodevertical: 'true' prom_config.yaml.template: | --- - query: + url: + - https://prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}} + query_list: - sum(container_memory_rss{namespace=~"openshift-kube-apiserver",name!="",container=~"kube-apiserver.*"}) by (container) - headers: + bearer_token: - {{prom_bearer_token.stdout}} disable_ssl: - True @@ -195,8 +198,6 @@ data: - $start_time end_time_list: - $end_time - url: - - https://prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}} conprof.yaml.template: | scrape_configs: - job_name: 'apiserver0' From 84074f5a3912f1dae5314a9bd4ab7d058716bc3d Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Mon, 17 Aug 2020 17:53:12 +0530 Subject: [PATCH 30/34] added touchstone to mastervertical --- docs/mastervertical.md | 5 +++ workloads/mastervertical.yml | 5 +++ .../workload-mastervertical-script-cm.yml.j2 | 32 +++++++++++++++++-- workloads/vars/mastervertical.yml | 3 ++ 4 files changed, 42 insertions(+), 3 deletions(-) diff --git a/docs/mastervertical.md b/docs/mastervertical.md index cba2733a..2ab316fd 100644 --- a/docs/mastervertical.md +++ b/docs/mastervertical.md @@ -132,6 +132,11 @@ Default: `false` If you'd like to enable pprof profile data collection of kubeapiserver and prometheus through conprof(https://github.com/conprof/conprof). Enabling this will create a few services to collect profiles from the apiserver pods and then create a conprof tarball in the pbench tarball +### PROM_AGGREGATE_COLLECT +Default: `false` +If you'd like to enable collection of prometheus data aggregation of kubeapiserver through touchstone(https://github.com/cloud-bulldozer/touchstone). +Enabling this will create a few services to produce prometheus data aggregations from the apiserver pods + ### EXPECTED_MASTERVERTICAL_DURATION Default: `600` Pass/fail criteria. Value to determine if MasterVertical workload executed in duration expected. diff --git a/workloads/mastervertical.yml b/workloads/mastervertical.yml index 6ce07225..4a92f7f4 100644 --- a/workloads/mastervertical.yml +++ b/workloads/mastervertical.yml @@ -49,6 +49,11 @@ name: pprof-collection when: pprof_collect and pprof_collect != "" + - name: Get Prometheus authorizations + include_role: + name: prometheus_metric_aggregation + when: prom_aggregate_collect and prom_aggregate_collect != "" + - name: Template workload templates template: src: "{{item.src}}" diff --git a/workloads/templates/workload-mastervertical-script-cm.yml.j2 b/workloads/templates/workload-mastervertical-script-cm.yml.j2 index c57a8be2..2b2845a9 100644 --- a/workloads/templates/workload-mastervertical-script-cm.yml.j2 +++ b/workloads/templates/workload-mastervertical-script-cm.yml.j2 @@ -92,7 +92,10 @@ data: #!/bin/sh set -o pipefail pkill conprof - + prom_aggregation_start.sh: | + #!/bin/sh + set -o pipefail + touchstone_compare -database prometheus -v -prom_config /tmp/prom_config.yaml workload.sh: | #!/bin/sh set -o pipefail @@ -109,7 +112,7 @@ data: if [[ "${AZURE_AUTH}" == "true" ]]; then export AZURE_AUTH_LOCATION=/tmp/azure_auth fi - start_time=$(date +%s) + export start_time=$(date +%s) export cluster_name={{ snafu_cluster_name }} export test_user={{ snafu_user }} export es={{ snafu_es_host }} @@ -117,9 +120,18 @@ data: export es_index={{ snafu_es_index_prefix }} VIPERCONFIG=/tmp/mastervertical.yaml run_snafu -t cl scale-ci --cl-output True --dir "${result_dir}" -p openshift-tests | tee "${result_dir}/clusterloader.txt" exit_code=$? - end_time=$(date +%s) + export end_time=$(date +%s) duration=$((end_time-start_time)) + if [ "${PROM_AGGREGATE_COLLECT}" = "true" ]; then + workload_log "Configuring touchstone" + envsubst < /root/workload/prom_config.yaml.template > /tmp/prom_config.yaml + envsubst < /root/workload/prom_aggregation_start.sh > /tmp/prom_aggregation_start.sh + workload_log "Starting touchstone" + bash /tmp/prom_aggregation_start.sh > /tmp/prom_aggregation.log + cp /tmp/prom_aggregation.log ${result_dir}/prom_aggregation.log + workload_log "Completed prometheus data aggregations and stored logs" + fi if [ "${PPROF_COLLECT}" = "true" ]; then workload_log "Stopping conprof" bash /tmp/conprof_stop.sh @@ -592,6 +604,20 @@ data: required: true labels: template: routeTemplate + prom_config.yaml.template: | + --- + url: + - https://prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}} + query_list: + - sum(container_memory_rss{namespace=~"openshift-kube-apiserver",name!="",container=~"kube-apiserver.*"}) by (container) + bearer_token: + - {{prom_bearer_token.stdout}} + disable_ssl: + - True + start_time_list: + - $start_time + end_time_list: + - $end_time conprof.yaml.template: | scrape_configs: - job_name: 'apiserver0' diff --git a/workloads/vars/mastervertical.yml b/workloads/vars/mastervertical.yml index 86400465..518b76b6 100644 --- a/workloads/vars/mastervertical.yml +++ b/workloads/vars/mastervertical.yml @@ -35,6 +35,9 @@ pbench_server: "{{ lookup('env', 'PBENCH_SERVER')|default('', true) }}" # pporf variables pprof_collect: "{{ lookup('env', 'PPROF_COLLECT')|default(false, true)|bool|lower }}" +# touchstone variables +prom_aggregate_collect: "{{ lookup('env', 'PROM_AGGREGATE_COLLECT')|default(false, true)|bool|lower }}" + # Azure auth vars to set for ocp on azure azure_auth: "{{ lookup('env', 'AZURE_AUTH')|default(false, true)|bool|lower }}" azure_auth_file: "{{ lookup('env', 'AZURE_AUTH_FILE')|default('', true) }}" From 44bb84fb42b4ba2e4be8804fc753a539d26f68c6 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Thu, 20 Aug 2020 18:30:41 +0530 Subject: [PATCH 31/34] updated the prom_config to index data to elastic search --- workloads/templates/workload-mastervertical-script-cm.yml.j2 | 1 + workloads/templates/workload-nodevertical-script-cm.yml.j2 | 1 + 2 files changed, 2 insertions(+) diff --git a/workloads/templates/workload-mastervertical-script-cm.yml.j2 b/workloads/templates/workload-mastervertical-script-cm.yml.j2 index 2b2845a9..10a9c29c 100644 --- a/workloads/templates/workload-mastervertical-script-cm.yml.j2 +++ b/workloads/templates/workload-mastervertical-script-cm.yml.j2 @@ -618,6 +618,7 @@ data: - $start_time end_time_list: - $end_time + index_result_to_es: True conprof.yaml.template: | scrape_configs: - job_name: 'apiserver0' diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index 6bbb1c9d..8d78ea2e 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -198,6 +198,7 @@ data: - $start_time end_time_list: - $end_time + index_result_to_es: True conprof.yaml.template: | scrape_configs: - job_name: 'apiserver0' From 4f4fb85f4feab6e27ee3abb6a0622731ba737d94 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Sat, 22 Aug 2020 00:03:31 +0530 Subject: [PATCH 32/34] updated prom_config file structure --- .../workload-mastervertical-script-cm.yml.j2 | 23 ++++++++----------- .../workload-nodevertical-script-cm.yml.j2 | 23 ++++++++----------- 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/workloads/templates/workload-mastervertical-script-cm.yml.j2 b/workloads/templates/workload-mastervertical-script-cm.yml.j2 index 10a9c29c..2ffd4ee5 100644 --- a/workloads/templates/workload-mastervertical-script-cm.yml.j2 +++ b/workloads/templates/workload-mastervertical-script-cm.yml.j2 @@ -606,19 +606,16 @@ data: template: routeTemplate prom_config.yaml.template: | --- - url: - - https://prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}} - query_list: - - sum(container_memory_rss{namespace=~"openshift-kube-apiserver",name!="",container=~"kube-apiserver.*"}) by (container) - bearer_token: - - {{prom_bearer_token.stdout}} - disable_ssl: - - True - start_time_list: - - $start_time - end_time_list: - - $end_time - index_result_to_es: True + - url: https://prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}} + query_list: + - sum(container_memory_rss{namespace=~"openshift-kube-apiserver",name!="",container=~"kube-apiserver.*"}) by (container) + bearer_token: {{prom_bearer_token.stdout}} + disable_ssl: True + start_time_list: + - $start_time + end_time_list: + - $end_time + index_result_to_es: True conprof.yaml.template: | scrape_configs: - job_name: 'apiserver0' diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index 8d78ea2e..ca67e1a8 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -186,19 +186,16 @@ data: nodevertical: 'true' prom_config.yaml.template: | --- - url: - - https://prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}} - query_list: - - sum(container_memory_rss{namespace=~"openshift-kube-apiserver",name!="",container=~"kube-apiserver.*"}) by (container) - bearer_token: - - {{prom_bearer_token.stdout}} - disable_ssl: - - True - start_time_list: - - $start_time - end_time_list: - - $end_time - index_result_to_es: True + - url: https://prometheus-k8s-openshift-monitoring.apps.{{clustername}}.{{base_domain}} + query_list: + - sum(container_memory_rss{namespace=~"openshift-kube-apiserver",name!="",container=~"kube-apiserver.*"}) by (container) + bearer_token: {{prom_bearer_token.stdout}} + disable_ssl: True + start_time_list: + - $start_time + end_time_list: + - $end_time + index_result_to_es: True conprof.yaml.template: | scrape_configs: - job_name: 'apiserver0' From 2670519b9e711398e4a49292c8a4cb49f72d3bb5 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Wed, 2 Sep 2020 16:12:10 +0530 Subject: [PATCH 33/34] sending results on the stdout --- workloads/templates/workload-mastervertical-script-cm.yml.j2 | 3 +-- workloads/templates/workload-nodevertical-script-cm.yml.j2 | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/workloads/templates/workload-mastervertical-script-cm.yml.j2 b/workloads/templates/workload-mastervertical-script-cm.yml.j2 index 2ffd4ee5..6283e3e6 100644 --- a/workloads/templates/workload-mastervertical-script-cm.yml.j2 +++ b/workloads/templates/workload-mastervertical-script-cm.yml.j2 @@ -128,8 +128,7 @@ data: envsubst < /root/workload/prom_config.yaml.template > /tmp/prom_config.yaml envsubst < /root/workload/prom_aggregation_start.sh > /tmp/prom_aggregation_start.sh workload_log "Starting touchstone" - bash /tmp/prom_aggregation_start.sh > /tmp/prom_aggregation.log - cp /tmp/prom_aggregation.log ${result_dir}/prom_aggregation.log + bash /tmp/prom_aggregation_start.sh workload_log "Completed prometheus data aggregations and stored logs" fi if [ "${PPROF_COLLECT}" = "true" ]; then diff --git a/workloads/templates/workload-nodevertical-script-cm.yml.j2 b/workloads/templates/workload-nodevertical-script-cm.yml.j2 index ca67e1a8..f22b31be 100644 --- a/workloads/templates/workload-nodevertical-script-cm.yml.j2 +++ b/workloads/templates/workload-nodevertical-script-cm.yml.j2 @@ -123,8 +123,7 @@ data: envsubst < /root/workload/prom_config.yaml.template > /tmp/prom_config.yaml envsubst < /root/workload/prom_aggregation_start.sh > /tmp/prom_aggregation_start.sh workload_log "Starting touchstone" - bash /tmp/prom_aggregation_start.sh > /tmp/prom_aggregation.log - cp /tmp/prom_aggregation.log ${result_dir}/prom_aggregation.log + bash /tmp/prom_aggregation_start.sh workload_log "Completed prometheus data aggregations and stored logs" fi if [ "${PPROF_COLLECT}" = "true" ]; then From e120ecb13defc6fc87d4772e8f931cdd12745ad5 Mon Sep 17 00:00:00 2001 From: Amit Sagtani Date: Wed, 2 Sep 2020 16:12:49 +0530 Subject: [PATCH 34/34] added touchstone es_port and es_host variables --- docs/mastervertical.md | 8 ++++++++ docs/nodevertical.md | 8 ++++++++ workloads/templates/workload-env.yml.j2 | 4 ++++ workloads/vars/mastervertical.yml | 2 ++ workloads/vars/nodevertical.yml | 2 ++ 5 files changed, 24 insertions(+) diff --git a/docs/mastervertical.md b/docs/mastervertical.md index 2ab316fd..24c3d783 100644 --- a/docs/mastervertical.md +++ b/docs/mastervertical.md @@ -137,6 +137,14 @@ Default: `false` If you'd like to enable collection of prometheus data aggregation of kubeapiserver through touchstone(https://github.com/cloud-bulldozer/touchstone). Enabling this will create a few services to produce prometheus data aggregations from the apiserver pods +### TOUCHSTONE_ES_HOST +Default: `` +Elasticsearch server host, set to index results from touchstone(prometheus metric data aggregations). + +### TOUCHSTONE_ES_PORT +Default: `` +Elasticsearch server port, set to index results from touchstone(prometheus metric data aggregations). + ### EXPECTED_MASTERVERTICAL_DURATION Default: `600` Pass/fail criteria. Value to determine if MasterVertical workload executed in duration expected. diff --git a/docs/nodevertical.md b/docs/nodevertical.md index 68c7ff6c..d0d13418 100644 --- a/docs/nodevertical.md +++ b/docs/nodevertical.md @@ -180,6 +180,14 @@ Default: `false` If you'd like to enable collection of prometheus data aggregation of kubeapiserver through touchstone(https://github.com/cloud-bulldozer/touchstone). Enabling this will create a few services to produce prometheus data aggregations from the apiserver pods. +### TOUCHSTONE_ES_HOST +Default: `` +Elasticsearch server host, set to index results from touchstone(prometheus metric data aggregations). + +### TOUCHSTONE_ES_PORT +Default: `` +Elasticsearch server port, set to index results from touchstone(prometheus metric data aggregations). + ### NODEVERTICAL_HEAVY_PROBE_PERIOD Default: `30` Readiness probe period for the application deployed by the heavy nodevertical. diff --git a/workloads/templates/workload-env.yml.j2 b/workloads/templates/workload-env.yml.j2 index eef9e242..f055720e 100644 --- a/workloads/templates/workload-env.yml.j2 +++ b/workloads/templates/workload-env.yml.j2 @@ -19,6 +19,8 @@ data: MASTERVERTICAL_PROJECTS: "{{mastervertical_projects}}" EXPECTED_MASTERVERTICAL_DURATION: "{{expected_mastervertical_duration}}" AZURE_AUTH: "{{azure_auth|bool|lower}}" + TOUCHSTONE_ES_HOST: "{{ touchstone_es_host }}" + TOUCHSTONE_ES_PORT: "{{ touchstone_es_port }}" {% elif workload_job == "network" %} NETWORK_TEST_UPERF_IMAGE: "{{network_test_uperf_image}}" NETWORK_TEST_UPERF_SSHD_PORT: "{{network_test_uperf_sshd_port}}" @@ -55,6 +57,8 @@ data: AZURE_AUTH: "{{azure_auth|bool|lower}}" NODEVERTICAL_HEAVY_PROBE_ENDPOINT: "{{ nodevertical_heavy_probe_endpoint }}" NODEVERTICAL_HEAVY_PROBE_PERIOD: "{{ nodevertical_heavy_probe_period }}" + TOUCHSTONE_ES_HOST: "{{ touchstone_es_host }}" + TOUCHSTONE_ES_PORT: "{{ touchstone_es_port }}" {% elif workload_job == "podvertical" %} PBENCH_INSTRUMENTATION: "{{pbench_instrumentation|bool|lower}}" ENABLE_PBENCH_COPY: "{{enable_pbench_copy|bool|lower}}" diff --git a/workloads/vars/mastervertical.yml b/workloads/vars/mastervertical.yml index 518b76b6..a269465a 100644 --- a/workloads/vars/mastervertical.yml +++ b/workloads/vars/mastervertical.yml @@ -37,6 +37,8 @@ pprof_collect: "{{ lookup('env', 'PPROF_COLLECT')|default(false, true)|bool|lowe # touchstone variables prom_aggregate_collect: "{{ lookup('env', 'PROM_AGGREGATE_COLLECT')|default(false, true)|bool|lower }}" +touchstone_es_host: "{{ lookup('env', 'TOUCHSTONE_ES_HOST')|default('', true) }}" +touchstone_es_port: "{{ lookup('env', 'TOUCHSTONE_ES_PORT')|default('', true) }}" # Azure auth vars to set for ocp on azure azure_auth: "{{ lookup('env', 'AZURE_AUTH')|default(false, true)|bool|lower }}" diff --git a/workloads/vars/nodevertical.yml b/workloads/vars/nodevertical.yml index 07714cd6..48ec7f7c 100644 --- a/workloads/vars/nodevertical.yml +++ b/workloads/vars/nodevertical.yml @@ -37,6 +37,8 @@ pprof_collect: "{{ lookup('env', 'PPROF_COLLECT')|default(false, true)|bool|lowe # touchstone variables prom_aggregate_collect: "{{ lookup('env', 'PROM_AGGREGATE_COLLECT')|default(false, true)|bool|lower }}" +touchstone_es_host: "{{ lookup('env', 'TOUCHSTONE_ES_HOST')|default('', true) }}" +touchstone_es_port: "{{ lookup('env', 'TOUCHSTONE_ES_PORT')|default('', true) }}" # Azure auth vars to set for ocp on azure azure_auth: "{{ lookup('env', 'AZURE_AUTH')|default(false, true)|bool|lower }}"