openshift-scale · amitsagtani97 · Jun 2, 2020 · Jun 2, 2020 · Jun 11, 2020 · Jun 15, 2020
diff --git a/docs/mastervertical.md b/docs/mastervertical.md
@@ -127,6 +127,24 @@ Basename used by cluster loader for the project(s) it creates.
 Default: `1000`  
 Maximum number of projects that will be created by the mastervertical workload. Typically much higher values are used than the default for large scale tests.
 
+### PPROF_COLLECT
+Default: `false`
+If you'd like to enable pprof profile data collection of kubeapiserver and prometheus through conprof(https://github.com/conprof/conprof).
+Enabling this will create a few services to collect profiles from the apiserver pods and then create a conprof tarball in the pbench tarball
+
+### PROM_AGGREGATE_COLLECT
+Default: `false`
+If you'd like to enable collection of prometheus data aggregation of kubeapiserver through touchstone(https://github.com/cloud-bulldozer/touchstone).
+Enabling this will create a few services to produce prometheus data aggregations from the apiserver pods
+
+### TOUCHSTONE_ES_HOST
+Default: ``
+Elasticsearch server host, set to index results from touchstone(prometheus metric data aggregations).
+
+### TOUCHSTONE_ES_PORT
+Default: ``
+Elasticsearch server port, set to index results from touchstone(prometheus metric data aggregations).
+
 ### EXPECTED_MASTERVERTICAL_DURATION
 Default: `600`  
 Pass/fail criteria. Value to determine if MasterVertical workload executed in duration expected.

diff --git a/docs/nodevertical.md b/docs/nodevertical.md
@@ -175,6 +175,19 @@ Default: `false`
 If you'd like to enable pprof profile data collection of kubeapiserver and prometheus through conprof(https://github.com/conprof/conprof).
 Enabling this will create a few services to collect profiles from the apiserver pods and then create a conprof tarball in the pbench tarball
 
+### PROM_AGGREGATE_COLLECT
+Default: `false`
+If you'd like to enable collection of prometheus data aggregation of kubeapiserver through touchstone(https://github.com/cloud-bulldozer/touchstone).
+Enabling this will create a few services to produce prometheus data aggregations from the apiserver pods.
+
+### TOUCHSTONE_ES_HOST
+Default: ``
+Elasticsearch server host, set to index results from touchstone(prometheus metric data aggregations).
+
+### TOUCHSTONE_ES_PORT
+Default: ``
+Elasticsearch server port, set to index results from touchstone(prometheus metric data aggregations).
+
 ### NODEVERTICAL_HEAVY_PROBE_PERIOD
 Default: `30`
 Readiness probe period for the application deployed by the heavy nodevertical.

diff --git a/docs/prometheus-scale.md b/docs/prometheus-scale.md
@@ -98,3 +98,9 @@ Sleep interval for each block iteration in seconds.
 ### PROMETHEUS_SCALE_TEST_PREFIX
 Default: `prometheus-scale`
 Sets the pbench result test prefix.
+
+### PPROF_COLLECT  
+Default: `false`   
+If you'd like to enable pprof profile data collection of kubeapiserver and prometheus through conprof(https://github.com/conprof/conprof).
+Enabling this will create a few services to collect profiles from the apiserver pods and then create a conprof tarball in the pbench tarball
+
diff --git a/workloads/mastervertical.yml b/workloads/mastervertical.yml
@@ -40,18 +40,19 @@
         src: "{{pbench_ssh_public_key_file}}"
       register: pbench_ssh_public_key_file_slurp
 
-    - name: Block to set clustername
-      block:
-        - name: Get cluster name
-          shell: |
-            {%raw%}oc get machineset -n openshift-machine-api -o=go-template='{{index (index .items 0).metadata.labels "machine.openshift.io/cluster-api-cluster"}}'{%endraw%}
-          register: cluster_name
+    - name: Set cluster details
+      include_role:
+        name: cluster_details
 
-        - name: Create tooling service account
-          set_fact:
-            snafu_cluster_name: cluster_name.stdout
-          when: cluster_name is succeeded
-      when: snafu_cluster_name == ""
+    - name: Collect pprof
+      include_role:
+        name: pprof-collection
+      when: pprof_collect and pprof_collect != ""
+
+    - name: Get Prometheus authorizations
+        include_role:
+          name: prometheus_metric_aggregation
+        when: prom_aggregate_collect and prom_aggregate_collect != ""
 
     - name: Template workload templates
       template:

diff --git a/workloads/nodevertical.yml b/workloads/nodevertical.yml
@@ -85,6 +85,11 @@
         name: pprof-collection
       when: pprof_collect and pprof_collect != ""
 
+    - name: Get Prometheus authorizations
+      include_role:
+        name: prometheus_metric_aggregation
+      when: prom_aggregate_collect and prom_aggregate_collect != ""
+
     - name: Set NodeVertical template
       set_fact:
         nodevertical_template: "{% if nodevertical_heavy|bool %}workload-nodevertical-heavy-script-cm.yml.j2{% else %}workload-nodevertical-script-cm.yml.j2{% endif %}"

diff --git a/workloads/prometheus.yml b/workloads/prometheus.yml
@@ -24,8 +24,6 @@
       with_items:
         - src: scale-ci-tooling-ns.yml
           dest: "{{ansible_user_dir}}/scale-ci-tooling/scale-ci-tooling-ns.yml"
-        - src: workload-prometheus-script-cm.yml
-          dest: "{{ansible_user_dir}}/scale-ci-tooling/workload-prometheus-script-cm.yml"
 
     - name: Slurp kubeconfig file
       slurp:
@@ -42,6 +40,15 @@
         src: "{{pbench_ssh_public_key_file}}"
       register: pbench_ssh_public_key_file_slurp
 
+    - name: Set cluster details
+      include_role:
+        name: cluster_details
+
+    - name: Collect pprof
+      include_role:
+        name: pprof-collection
+      when: pprof_collect and pprof_collect != ""
+
     - name: Template workload templates
       template:
         src: "{{item.src}}"
@@ -58,6 +65,8 @@
           dest: "{{ansible_user_dir}}/scale-ci-tooling/workload-job.yml"
         - src: workload-env.yml.j2
           dest: "{{ansible_user_dir}}/scale-ci-tooling/workload-prometheus-env.yml"
+        - src: workload-prometheus-script-cm.yml.j2
+          dest: "{{ansible_user_dir}}/scale-ci-tooling/workload-prometheus-script-cm.yml"
 
     - name: Check if scale-ci-tooling namespace exists
       shell: |

diff --git a/workloads/roles/prometheus_metric_aggregation/tasks/main.yml b/workloads/roles/prometheus_metric_aggregation/tasks/main.yml
@@ -0,0 +1,16 @@
+- name: check sa for prom-server
+  shell: "oc get sa -n openshift-kube-apiserver | grep prom-server | wc -l"
+  register: prom_server_sa
+
+- name: create sa to access prom_server profiles
+  block:
+    - name: create sa
+      shell: "oc -n openshift-kube-apiserver create sa prom-server"
+
+    - name: add cluster-admin clusterrrole
+      shell: "oc create clusterrolebinding prom-admin --clusterrole cluster-admin --serviceaccount=openshift-kube-apiserver:prom-server"
+  when: prom_server_sa.stdout | int == 0
+
+- name: get the bearer token
+  shell: "oc -n openshift-kube-apiserver sa get-token prom-server"
+  register: prom_bearer_token
diff --git a/workloads/templates/workload-deployments-per-ns-script-cm.yml.j2 b/workloads/templates/workload-deployments-per-ns-script-cm.yml.j2
@@ -51,7 +51,7 @@ data:
     export es={{ snafu_es_host }}
     export es_port={{ snafu_es_port }}
     export es_index={{ snafu_es_index_prefix }}
-    pbench-user-benchmark -- 'VIPERCONFIG=/root/workload/cluster-limits-deployments-per-namespace.yaml python3 /tmp/snafu/run_snafu.py -t cl scale-ci --cl-output True --dir /tmp/snafu_results -p openshift-tests'
+    pbench-user-benchmark -- 'VIPERCONFIG=/root/workload/cluster-limits-deployments-per-namespace.yaml run_snafu -t cl scale-ci --cl-output True --dir /tmp/snafu_results -p openshift-tests'
     pbench-copy-results --prefix {{deployments_per_ns_test_prefix}}
     echo "$(date -u) Completed running Deployments per ns cluster limits test"
     # End of Test Code

diff --git a/workloads/templates/workload-env.yml.j2 b/workloads/templates/workload-env.yml.j2
@@ -5,6 +5,7 @@ metadata:
 data:
   ENABLE_PBENCH_AGENTS: "{{enable_pbench_agents|bool|lower}}"
   PPROF_COLLECT: "{{ ((pprof_collect == None) | ternary(false, pprof_collect)) if pprof_collect is defined else false}}"
+  PROM_AGGREGATE_COLLECT: "{{ ((prom_aggregate_collect == None) | ternary(false, prom_aggregate_collect)) if prom_aggregate_collect is defined else false}}"
 {% if workload_job == "http" %}
 {% for v in http_env_vars %}
   {{ v }}: "{{ lookup('env', v) }}"
@@ -18,6 +19,8 @@ data:
   MASTERVERTICAL_PROJECTS: "{{mastervertical_projects}}"
   EXPECTED_MASTERVERTICAL_DURATION: "{{expected_mastervertical_duration}}"
   AZURE_AUTH: "{{azure_auth|bool|lower}}"
+  TOUCHSTONE_ES_HOST: "{{ touchstone_es_host }}"
+  TOUCHSTONE_ES_PORT: "{{ touchstone_es_port }}"
 {% elif workload_job == "network" %}
   NETWORK_TEST_UPERF_IMAGE: "{{network_test_uperf_image}}"
   NETWORK_TEST_UPERF_SSHD_PORT: "{{network_test_uperf_sshd_port}}"
@@ -54,6 +57,8 @@ data:
   AZURE_AUTH: "{{azure_auth|bool|lower}}"
   NODEVERTICAL_HEAVY_PROBE_ENDPOINT: "{{ nodevertical_heavy_probe_endpoint }}"
   NODEVERTICAL_HEAVY_PROBE_PERIOD: "{{ nodevertical_heavy_probe_period }}"
+  TOUCHSTONE_ES_HOST: "{{ touchstone_es_host }}"
+  TOUCHSTONE_ES_PORT: "{{ touchstone_es_port }}"
 {% elif workload_job == "podvertical" %}
   PBENCH_INSTRUMENTATION: "{{pbench_instrumentation|bool|lower}}"
   ENABLE_PBENCH_COPY: "{{enable_pbench_copy|bool|lower}}"

diff --git a/workloads/templates/workload-fio-script-cm.yml.j2 b/workloads/templates/workload-fio-script-cm.yml.j2
@@ -56,7 +56,7 @@ data:
     export es={{ snafu_es_host }}
     export es_port={{ snafu_es_port }}
     export es_index={{ snafu_es_index_prefix }}
-    pbench-user-benchmark --config="{{ fiotest_prefix }}-pods-{{ fiotest_maxpods }}-sc-{{ fiotest_storageclass }}-create_pods-{{ fiotest_description }}" -- 'VIPERCONFIG=/root/workload/fiotest.yml python3 /tmp/snafu/run_snafu.py -t cl scale-ci --cl-output True --dir /tmp/snafu_results -p openshift-tests'
+    pbench-user-benchmark --config="{{ fiotest_prefix }}-pods-{{ fiotest_maxpods }}-sc-{{ fiotest_storageclass }}-create_pods-{{ fiotest_description }}" -- 'VIPERCONFIG=/root/workload/fiotest.yml run_snafu -t cl scale-ci --cl-output True --dir /tmp/snafu_results -p openshift-tests'
     echo "$(date -u) Pods for FIO I/O test created."
 
     # wait until all pods are started and then collect data