Skip to content

Commit

Permalink
feat: Prometheus managed installation (#81)
Browse files Browse the repository at this point in the history
  • Loading branch information
diegolagospagopa authored Aug 23, 2023
1 parent c47abb5 commit ac51417
Show file tree
Hide file tree
Showing 8 changed files with 217 additions and 80 deletions.
23 changes: 23 additions & 0 deletions src/aks-platform/.terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

146 changes: 73 additions & 73 deletions src/aks-platform/03_monitoring.tf
Original file line number Diff line number Diff line change
@@ -1,75 +1,75 @@
resource "kubernetes_namespace" "monitoring" {
metadata {
name = "monitoring"
}
depends_on = [module.aks]
}
# resource "kubernetes_namespace" "monitoring" {
# metadata {
# name = "monitoring"
# }
# depends_on = [module.aks]
# }

resource "helm_release" "prometheus" {
name = "prometheus"
repository = "https://prometheus-community.github.io/helm-charts"
chart = "prometheus"
version = var.prometheus_helm.chart_version
namespace = kubernetes_namespace.monitoring.metadata[0].name
# resource "helm_release" "prometheus" {
# name = "prometheus"
# repository = "https://prometheus-community.github.io/helm-charts"
# chart = "prometheus"
# version = var.prometheus_helm.chart_version
# namespace = kubernetes_namespace.monitoring.metadata[0].name

set {
name = "server.global.scrape_interval"
value = "30s"
}
set {
name = "alertmanager.image.repository"
value = var.prometheus_helm.alertmanager.image_name
}
set {
name = "alertmanager.image.tag"
value = var.prometheus_helm.alertmanager.image_tag
}
set {
name = "alertmanager.configmapReload.prometheus.image.repository"
value = var.prometheus_helm.configmap_reload_prometheus.image_name
}
set {
name = "alertmanager.configmapReload.prometheus.image.tag"
value = var.prometheus_helm.configmap_reload_prometheus.image_tag
}
set {
name = "alertmanager.configmapReload.alertmanager.image.repository"
value = var.prometheus_helm.configmap_reload_alertmanager.image_name
}
set {
name = "alertmanager.configmapReload.alertmanager.image.tag"
value = var.prometheus_helm.configmap_reload_alertmanager.image_tag
}
set {
name = "alertmanager.nodeExporter.image.repository"
value = var.prometheus_helm.node_exporter.image_name
}
set {
name = "alertmanager.nodeExporter.image.tag"
value = var.prometheus_helm.node_exporter.image_tag
}
set {
name = "alertmanager.nodeExporter.image.repository"
value = var.prometheus_helm.node_exporter.image_name
}
set {
name = "alertmanager.nodeExporter.image.tag"
value = var.prometheus_helm.node_exporter.image_tag
}
set {
name = "alertmanager.server.image.repository"
value = var.prometheus_helm.server.image_name
}
set {
name = "alertmanager.server.image.tag"
value = var.prometheus_helm.server.image_tag
}
set {
name = "alertmanager.pushgateway.image.repository"
value = var.prometheus_helm.pushgateway.image_name
}
set {
name = "alertmanager.pushgateway.image.tag"
value = var.prometheus_helm.pushgateway.image_tag
}
}
# set {
# name = "server.global.scrape_interval"
# value = "30s"
# }
# set {
# name = "alertmanager.image.repository"
# value = var.prometheus_helm.alertmanager.image_name
# }
# set {
# name = "alertmanager.image.tag"
# value = var.prometheus_helm.alertmanager.image_tag
# }
# set {
# name = "alertmanager.configmapReload.prometheus.image.repository"
# value = var.prometheus_helm.configmap_reload_prometheus.image_name
# }
# set {
# name = "alertmanager.configmapReload.prometheus.image.tag"
# value = var.prometheus_helm.configmap_reload_prometheus.image_tag
# }
# set {
# name = "alertmanager.configmapReload.alertmanager.image.repository"
# value = var.prometheus_helm.configmap_reload_alertmanager.image_name
# }
# set {
# name = "alertmanager.configmapReload.alertmanager.image.tag"
# value = var.prometheus_helm.configmap_reload_alertmanager.image_tag
# }
# set {
# name = "alertmanager.nodeExporter.image.repository"
# value = var.prometheus_helm.node_exporter.image_name
# }
# set {
# name = "alertmanager.nodeExporter.image.tag"
# value = var.prometheus_helm.node_exporter.image_tag
# }
# set {
# name = "alertmanager.nodeExporter.image.repository"
# value = var.prometheus_helm.node_exporter.image_name
# }
# set {
# name = "alertmanager.nodeExporter.image.tag"
# value = var.prometheus_helm.node_exporter.image_tag
# }
# set {
# name = "alertmanager.server.image.repository"
# value = var.prometheus_helm.server.image_name
# }
# set {
# name = "alertmanager.server.image.tag"
# value = var.prometheus_helm.server.image_tag
# }
# set {
# name = "alertmanager.pushgateway.image.repository"
# value = var.prometheus_helm.pushgateway.image_name
# }
# set {
# name = "alertmanager.pushgateway.image.tag"
# value = var.prometheus_helm.pushgateway.image_tag
# }
# }
80 changes: 80 additions & 0 deletions src/aks-platform/03_prometheus_managed.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
resource "azurerm_monitor_workspace" "prometheus_amw" {
name = local.monitor_log_analytics_workspace_prometheus_name
location = azurerm_resource_group.rg_aks.location
resource_group_name = azurerm_resource_group.rg_aks.name

tags = var.tags
}

resource "azurerm_monitor_data_collection_endpoint" "prometheus_dce" {
name = "${local.project}-prometheus-dce"
location = azurerm_resource_group.rg_aks.location
resource_group_name = azurerm_resource_group.rg_aks.name
kind = "Linux"
}

resource "azurerm_monitor_data_collection_rule" "prometheus_dcr" {
name = "${local.project}-prometheus-dcr"
location = azurerm_resource_group.rg_aks.location
resource_group_name = azurerm_resource_group.rg_aks.name
data_collection_endpoint_id = azurerm_monitor_data_collection_endpoint.prometheus_dce.id
kind = "Linux"

destinations {
monitor_account {
monitor_account_id = azurerm_monitor_workspace.prometheus_amw.id
name = "monitoring_account_prometheus"
}
}

data_flow {
streams = ["Microsoft-PrometheusMetrics"]
destinations = ["monitoring_account_prometheus"]
}

data_sources {
prometheus_forwarder {
streams = ["Microsoft-PrometheusMetrics"]
name = "PrometheusDataSource"
}
}

description = "DCR for Azure Monitor Metrics Profile (Managed Prometheus)"
depends_on = [
azurerm_monitor_data_collection_endpoint.prometheus_dce,
]
}

resource "azurerm_monitor_data_collection_rule_association" "dcra" {
name = "${local.project}-prometheus-dcra"
target_resource_id = module.aks[0].id
data_collection_rule_id = azurerm_monitor_data_collection_rule.prometheus_dcr.id
description = "Association of data collection rule. Deleting this association will break the data collection for this AKS Cluster."
depends_on = [
azurerm_monitor_data_collection_rule.prometheus_dcr
]
}

#
# Enable prometheus monitoring for AKS cluster
#
resource "null_resource" "enable_prometheus_monitoring" {
triggers = {
AKS_NAME = module.aks[0].name
AKS_RESOURCE_GROUP = azurerm_resource_group.rg_aks.name
AMW_ID = azurerm_monitor_workspace.prometheus_amw.id
}

depends_on = [
module.aks[0]
]

provisioner "local-exec" {
command = <<EOF
az aks update --enable-azure-monitor-metrics \
-n ${self.triggers.AKS_NAME} \
-g ${self.triggers.AKS_RESOURCE_GROUP} \
--azure-monitor-workspace-resource-id ${self.triggers.AMW_ID}
EOF
}
}
9 changes: 5 additions & 4 deletions src/aks-platform/99_locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ locals {
docker_registry_name = "dvopladneuacr"

# monitor
monitor_rg_name = "${local.product}-monitor-rg"
monitor_log_analytics_workspace_name = "${local.product}-law"
monitor_appinsights_name = "${local.product}-appinsights"
monitor_security_storage_name = replace("${local.product}-sec-monitor-st", "-", "")
monitor_rg_name = "${local.product}-monitor-rg"
monitor_log_analytics_workspace_name = "${local.product}-law"
monitor_log_analytics_workspace_prometheus_name = "${local.product}-prometheus-law"
monitor_appinsights_name = "${local.product}-appinsights"
monitor_security_storage_name = replace("${local.product}-sec-monitor-st", "-", "")

monitor_action_group_slack_name = "SlackPagoPA"
monitor_action_group_email_name = "PagoPA"
Expand Down
4 changes: 4 additions & 0 deletions src/aks-platform/99_main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ terraform {
source = "hashicorp/azuread"
version = "> 2.10.0"
}
local = {
source = "hashicorp/local"
version = ">= 2.4.0"
}
}

backend "azurerm" {}
Expand Down
18 changes: 18 additions & 0 deletions src/aks-platform/99_variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -559,3 +559,21 @@ variable "prometheus_helm" {
})
description = "prometheus helm chart configuration"
}

#
# Monitor
#
variable "law_prometheus_sku" {
type = string
description = "Sku of the Log Analytics Workspace"
}

variable "law_prometheus_retention_in_days" {
type = number
description = "The workspace data retention in days"
}

variable "law_prometheus_daily_quota_gb" {
type = number
description = "The workspace daily quota for ingestion in GB."
}
11 changes: 9 additions & 2 deletions src/aks-platform/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Re-enable all the resource, commented before to complete the procedure
| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >=1.3.0 |
| <a name="requirement_azuread"></a> [azuread](#requirement\_azuread) | > 2.10.0 |
| <a name="requirement_azurerm"></a> [azurerm](#requirement\_azurerm) | >= 3.64.0 |
| <a name="requirement_local"></a> [local](#requirement\_local) | >= 2.4.0 |

## Modules

Expand All @@ -45,12 +46,15 @@ Re-enable all the resource, commented before to complete the procedure

| Name | Type |
|------|------|
| [azurerm_monitor_data_collection_endpoint.prometheus_dce](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/monitor_data_collection_endpoint) | resource |
| [azurerm_monitor_data_collection_rule.prometheus_dcr](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/monitor_data_collection_rule) | resource |
| [azurerm_monitor_data_collection_rule_association.dcra](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/monitor_data_collection_rule_association) | resource |
| [azurerm_monitor_workspace.prometheus_amw](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/monitor_workspace) | resource |
| [azurerm_resource_group.rg_aks](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/resource_group) | resource |
| [azurerm_role_assignment.aks_to_acr](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource |
| [azurerm_role_assignment.keda_monitoring_reader](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource |
| [azurerm_role_assignment.managed_identity_operator_vs_aks_managed_identity](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource |
| [helm_release.keda](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
| [helm_release.prometheus](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
| [kubernetes_cluster_role.cluster_deployer](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/cluster_role) | resource |
| [kubernetes_cluster_role.edit_extra](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/cluster_role) | resource |
| [kubernetes_cluster_role.system_cluster_deployer](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/cluster_role) | resource |
Expand All @@ -61,8 +65,8 @@ Re-enable all the resource, commented before to complete the procedure
| [kubernetes_cluster_role_binding.view_extra_binding](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/cluster_role_binding) | resource |
| [kubernetes_namespace.ingress](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace) | resource |
| [kubernetes_namespace.keda](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace) | resource |
| [kubernetes_namespace.monitoring](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace) | resource |
| [null_resource.create_vnet_core_aks_link](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource |
| [null_resource.enable_prometheus_monitoring](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource |
| [azuread_group.adgroup_admin](https://registry.terraform.io/providers/hashicorp/azuread/latest/docs/data-sources/group) | data source |
| [azuread_group.adgroup_developers](https://registry.terraform.io/providers/hashicorp/azuread/latest/docs/data-sources/group) | data source |
| [azuread_group.adgroup_externals](https://registry.terraform.io/providers/hashicorp/azuread/latest/docs/data-sources/group) | data source |
Expand Down Expand Up @@ -117,6 +121,9 @@ Re-enable all the resource, commented before to complete the procedure
| <a name="input_keda_helm_version"></a> [keda\_helm\_version](#input\_keda\_helm\_version) | n/a | `string` | n/a | yes |
| <a name="input_key_vault_name"></a> [key\_vault\_name](#input\_key\_vault\_name) | Key Vault name | `string` | `""` | no |
| <a name="input_key_vault_rg_name"></a> [key\_vault\_rg\_name](#input\_key\_vault\_rg\_name) | Key Vault - rg name | `string` | `""` | no |
| <a name="input_law_prometheus_daily_quota_gb"></a> [law\_prometheus\_daily\_quota\_gb](#input\_law\_prometheus\_daily\_quota\_gb) | The workspace daily quota for ingestion in GB. | `number` | n/a | yes |
| <a name="input_law_prometheus_retention_in_days"></a> [law\_prometheus\_retention\_in\_days](#input\_law\_prometheus\_retention\_in\_days) | The workspace data retention in days | `number` | n/a | yes |
| <a name="input_law_prometheus_sku"></a> [law\_prometheus\_sku](#input\_law\_prometheus\_sku) | Sku of the Log Analytics Workspace | `string` | n/a | yes |
| <a name="input_location"></a> [location](#input\_location) | n/a | `string` | `"westeurope"` | no |
| <a name="input_location_short"></a> [location\_short](#input\_location\_short) | Location short like eg: weu, weu.. | `string` | n/a | yes |
| <a name="input_lock_enable"></a> [lock\_enable](#input\_lock\_enable) | Apply locks to block accedentaly deletions. | `bool` | `false` | no |
Expand Down
6 changes: 5 additions & 1 deletion src/aks-platform/env/dev01/terraform.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public_ip_aksoutbound_name = "dvopla-d-dev01-aksoutbound-pip-1"
aks_enabled = true
aks_private_cluster_enabled = false
aks_alerts_enabled = false
aks_kubernetes_version = "1.26.3"
aks_kubernetes_version = "1.27.3"
aks_system_node_pool = {
name = "dvldev01sys",
vm_size = "Standard_B4ms",
Expand Down Expand Up @@ -157,3 +157,7 @@ tls_cert_check_helm = {
}

tls_checker_https_endpoints_to_check = []

law_prometheus_sku = "PerGB2018"
law_prometheus_retention_in_days = 30
law_prometheus_daily_quota_gb = 0.1

0 comments on commit ac51417

Please sign in to comment.