Skip to content

Commit

Permalink
feat: (alerts) CHK-3231 Modify the query to make availability traffic…
Browse files Browse the repository at this point in the history
…-dependent (#2446)

* edit query to make availability parametric by the request number

* set low traffic availability to 90%

* remove toint function applied to availability

* set availability high traffic to 99 percent

* fix availability params

---------

Co-authored-by: Gianluca Ciuffa <gianlucaciuffa@MBP-di-Gianluca.homenet.telecomitalia.it>
Co-authored-by: Simone infante <52280205+infantesimone@users.noreply.github.com>
  • Loading branch information
3 people authored Oct 14, 2024
1 parent 5400690 commit 3f515b0
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 8 deletions.
22 changes: 20 additions & 2 deletions src/domains/ecommerce-common/00_alerts.tf
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,23 @@ resource "azurerm_monitor_scheduled_query_rules_alert" "ecommerce_for_checkout_a
description = "eCommerce Availability less than or equal 99%"
enabled = true
query = (<<-QUERY
let thresholdTrafficMin = 150;
let thresholdTrafficLinear = 400;
let lowTrafficAvailability = 96;
let highTrafficAvailability = 99;
let thresholdDelta = thresholdTrafficLinear - thresholdTrafficMin;
let availabilityDelta = highTrafficAvailability - lowTrafficAvailability;
AzureDiagnostics
| where url_s startswith 'https://api.platform.pagopa.it/ecommerce/checkout/'
| summarize
Total=count(),
Success=countif(responseCode_d < 500 or url_s startswith "https://api.platform.pagopa.it/ecommerce/checkout/v1/payment-requests" and ( responseCode_d == 502 or responseCode_d == 504))
by Time = bin(TimeGenerated, 15m)
| extend trafficUp = Total-thresholdTrafficMin
| extend deltaRatio = todouble(todouble(trafficUp)/todouble(thresholdDelta))
| extend expectedAvailability = iff(Total >= thresholdTrafficLinear, toreal(highTrafficAvailability), iff(Total <= thresholdTrafficMin, toreal(lowTrafficAvailability), (deltaRatio*(availabilityDelta))+lowTrafficAvailability))
| extend Availability=((Success * 1.0) / Total) * 100
| where toint(Availability) < 90
| where Availability < expectedAvailability
QUERY
)
severity = 1
Expand Down Expand Up @@ -219,14 +228,23 @@ resource "azurerm_monitor_scheduled_query_rules_alert" "ecommerce_for_app_io_v2_
description = "eCommerce api for app IO V2 availability less than 99% in the last 30 minutes detected"
enabled = true
query = (<<-QUERY
let thresholdTrafficMin = 200;
let thresholdTrafficLinear = 500;
let lowTrafficAvailability = 94;
let highTrafficAvailability = 98;
let thresholdDelta = thresholdTrafficLinear - thresholdTrafficMin;
let availabilityDelta = highTrafficAvailability - lowTrafficAvailability;
AzureDiagnostics
| where url_s startswith 'https://api.platform.pagopa.it/ecommerce/io/v2'
| summarize
Total=count(),
Success=countif(responseCode_d < 500 and DurationMs < 10000)
by Time = bin(TimeGenerated, 15m)
| extend trafficUp = Total-thresholdTrafficMin
| extend deltaRatio = todouble(todouble(trafficUp)/todouble(thresholdDelta))
| extend expectedAvailability = iff(Total >= thresholdTrafficLinear, toreal(highTrafficAvailability), iff(Total <= thresholdTrafficMin, toreal(lowTrafficAvailability), (deltaRatio*(availabilityDelta))+lowTrafficAvailability))
| extend Availability=((Success * 1.0) / Total) * 100
| where toint(Availability) < 99
| where Availability < expectedAvailability
QUERY
)
severity = 1
Expand Down
57 changes: 51 additions & 6 deletions src/domains/pay-wallet-common/00_alerts.tf
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,23 @@ resource "azurerm_monitor_scheduled_query_rules_alert" "payment_wallet_for_io_av
description = "Payment Wallet for IO - Availability less than 99% in the last 30 minutes"
enabled = true
query = (<<-QUERY
let thresholdTrafficMin = 50;
let thresholdTrafficLinear = 100;
let lowTrafficAvailability = 94;
let highTrafficAvailability = 98;
let thresholdDelta = thresholdTrafficLinear - thresholdTrafficMin;
let availabilityDelta = highTrafficAvailability - lowTrafficAvailability;
AzureDiagnostics
| where url_s startswith 'https://api.platform.pagopa.it/io-payment-wallet/v1'
| summarize
Total=count(),
Success=countif(responseCode_d < 500 and DurationMs < 250)
by Time = bin(TimeGenerated, 15m)
| extend trafficUp = Total-thresholdTrafficMin
| extend deltaRatio = todouble(todouble(trafficUp)/todouble(thresholdDelta))
| extend expectedAvailability = iff(Total >= thresholdTrafficLinear, toreal(highTrafficAvailability), iff(Total <= thresholdTrafficMin, toreal(lowTrafficAvailability), (deltaRatio*(availabilityDelta))+lowTrafficAvailability))
| extend Availability=((Success * 1.0) / Total) * 100
| where toint(Availability) < 99
| where Availability < expectedAvailability
QUERY
)
severity = 1
Expand Down Expand Up @@ -85,14 +94,23 @@ resource "azurerm_monitor_scheduled_query_rules_alert" "payment_wallet_for_webvi
description = "Payment Wallet for Webview - Availability less than 99% in the last 30 minutes"
enabled = true
query = (<<-QUERY
let thresholdTrafficMin = 50;
let thresholdTrafficLinear = 150;
let lowTrafficAvailability = 90;
let highTrafficAvailability = 99;
let thresholdDelta = thresholdTrafficLinear - thresholdTrafficMin;
let availabilityDelta = highTrafficAvailability - lowTrafficAvailability;
AzureDiagnostics
| where url_s startswith 'https://api.platform.pagopa.it/webview-payment-wallet/v1'
| summarize
Total=count(),
Success=countif(responseCode_d < 500 and DurationMs < 2000)
by Time = bin(TimeGenerated, 15m)
| extend trafficUp = Total-thresholdTrafficMin
| extend deltaRatio = todouble(todouble(trafficUp)/todouble(thresholdDelta))
| extend expectedAvailability = iff(Total >= thresholdTrafficLinear, toreal(highTrafficAvailability), iff(Total <= thresholdTrafficMin, toreal(lowTrafficAvailability), (deltaRatio*(availabilityDelta))+lowTrafficAvailability))
| extend Availability=((Success * 1.0) / Total) * 100
| where toint(Availability) < 99
| where Availability < expectedAvailability
QUERY
)
severity = 1
Expand Down Expand Up @@ -125,14 +143,23 @@ resource "azurerm_monitor_scheduled_query_rules_alert" "payment_wallet_for_ecomm
description = "Payment Wallet for eCommerce V1 - Availability less than 99% in the last 30 minutes"
enabled = true
query = (<<-QUERY
let thresholdTrafficMin = 20;
let thresholdTrafficLinear = 80;
let lowTrafficAvailability = 90;
let highTrafficAvailability = 99;
let thresholdDelta = thresholdTrafficLinear - thresholdTrafficMin;
let availabilityDelta = highTrafficAvailability - lowTrafficAvailability;
AzureDiagnostics
| where url_s startswith 'https://api.platform.pagopa.it/payment-wallet-for-ecommerce/v1'
| summarize
Total=count(),
Success=countif(responseCode_d < 500 and DurationMs < 250)
by Time = bin(TimeGenerated, 15m)
| extend trafficUp = Total-thresholdTrafficMin
| extend deltaRatio = todouble(todouble(trafficUp)/todouble(thresholdDelta))
| extend expectedAvailability = iff(Total >= thresholdTrafficLinear, toreal(highTrafficAvailability), iff(Total <= thresholdTrafficMin, toreal(lowTrafficAvailability), (deltaRatio*(availabilityDelta))+lowTrafficAvailability))
| extend Availability=((Success * 1.0) / Total) * 100
| where toint(Availability) < 99
| where Availability < expectedAvailability
QUERY
)
severity = 1
Expand Down Expand Up @@ -161,14 +188,23 @@ resource "azurerm_monitor_scheduled_query_rules_alert" "payment_wallet_npg_notif
description = "Payment Wallet NPG Notifications - Availability less than 99% in the last 30 minutes"
enabled = true
query = (<<-QUERY
let thresholdTrafficMin = 5;
let thresholdTrafficLinear = 20;
let lowTrafficAvailability = 80;
let highTrafficAvailability = 99;
let thresholdDelta = thresholdTrafficLinear - thresholdTrafficMin;
let availabilityDelta = highTrafficAvailability - lowTrafficAvailability;
AzureDiagnostics
| where url_s startswith 'https://api.platform.pagopa.it/payment-wallet-notifications/v1'
| summarize
Total=count(),
Success=countif(responseCode_d < 500 and DurationMs < 250)
Success=countif(responseCode_d < 500 and DurationMs < 350)
by Time = bin(TimeGenerated, 15m)
| extend trafficUp = Total-thresholdTrafficMin
| extend deltaRatio = todouble(todouble(trafficUp)/todouble(thresholdDelta))
| extend expectedAvailability = iff(Total >= thresholdTrafficLinear, toreal(highTrafficAvailability), iff(Total <= thresholdTrafficMin, toreal(lowTrafficAvailability), (deltaRatio*(availabilityDelta))+lowTrafficAvailability))
| extend Availability=((Success * 1.0) / Total) * 100
| where toint(Availability) < 99
| where Availability < expectedAvailability
QUERY
)
severity = 1
Expand Down Expand Up @@ -197,14 +233,23 @@ resource "azurerm_monitor_scheduled_query_rules_alert" "payment_wallet_outcomes_
description = "Payment Wallet redirection outcomes - Availability less than 99% in the last 30 minutes"
enabled = true
query = (<<-QUERY
let thresholdTrafficMin = 50;
let thresholdTrafficLinear = 100;
let lowTrafficAvailability = 90;
let highTrafficAvailability = 99;
let thresholdDelta = thresholdTrafficLinear - thresholdTrafficMin;
let availabilityDelta = highTrafficAvailability - lowTrafficAvailability;
AzureDiagnostics
| where url_s startswith 'https://api.platform.pagopa.it/payment-wallet-outcomes/v1'
| summarize
Total=count(),
Success=countif(responseCode_d < 500 and DurationMs < 100)
by Time = bin(TimeGenerated, 15m)
| extend trafficUp = Total-thresholdTrafficMin
| extend deltaRatio = todouble(todouble(trafficUp)/todouble(thresholdDelta))
| extend expectedAvailability = iff(Total >= thresholdTrafficLinear, toreal(highTrafficAvailability), iff(Total <= thresholdTrafficMin, toreal(lowTrafficAvailability), (deltaRatio*(availabilityDelta))+lowTrafficAvailability))
| extend Availability=((Success * 1.0) / Total) * 100
| where toint(Availability) < 99
| where Availability < expectedAvailability
QUERY
)
severity = 1
Expand Down

0 comments on commit 3f515b0

Please sign in to comment.