Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add smoke test for crash logs [APMON-1544] #3243

Draft
wants to merge 13 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ onboarding_python:
test-app-python-container-3.11-alpine,
test-app-python-container-3.12-alpine,
]
SCENARIO: [INSTALLER_AUTO_INJECTION]
SCENARIO: [INSTALLER_AUTO_INJECTION, CONTAINER_AUTO_INJECTION_INSTALL_SCRIPT_PROFILING, CONTAINER_AUTO_INJECTION_INSTALL_SCRIPT_CRASHTRACKING]
- ONBOARDING_FILTER_ENV: [dev, prod]
ONBOARDING_FILTER_WEBLOG: [test-app-python]
SCENARIO: [INSTALLER_AUTO_INJECTION_LD_PRELOAD]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ def index(request):
return HttpResponse("test")


def crashme(request):
import ctypes

ctypes.string_at(0)


urlpatterns = [
path("", index),
path("crashme", crashme),
]
8 changes: 7 additions & 1 deletion tests/auto_inject/test_auto_inject_install.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from utils import scenarios, features, flaky
from utils.tools import logger
from utils import scenarios, features
import tests.auto_inject.utils as base
from utils.virtual_machine.utils import parametrize_virtual_machines

Expand Down Expand Up @@ -83,6 +82,13 @@ def test_install(self, virtual_machine):
self._test_install(virtual_machine, profile=True)


@scenarios.container_auto_injection_install_script_crashtracking
class TestContainerAutoInjectInstallScriptCrashTracking(base.AutoInjectBaseTest):
@parametrize_virtual_machines()
def test_install(self, virtual_machine):
self._test_install(virtual_machine, crashlog=True)


@features.installer_auto_instrumentation
@scenarios.installer_auto_injection
class TestInstallerAutoInjectManual(base.AutoInjectBaseTest):
Expand Down
27 changes: 8 additions & 19 deletions tests/auto_inject/utils.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
import os
import pytest
import paramiko
from utils.tools import logger
from utils.onboarding.weblog_interface import make_get_request, warmup_weblog, make_internal_get_request
from utils.onboarding.weblog_interface import make_get_request, warmup_weblog, request_weblog
from utils.onboarding.backend_interface import wait_backend_trace_id
from utils.onboarding.backend_interface import cause_and_verify_crash
from utils.onboarding.wait_for_tcp_port import wait_for_port
from utils.virtual_machine.vm_logger import vm_logger
from utils import context
from threading import Timer


class AutoInjectBaseTest:
def _test_install(self, virtual_machine, profile: bool = False):
def _test_install(self, virtual_machine, profile: bool = False, crashlog: bool = False):
""" We can easily install agent and lib injection software from agent installation script. Given a sample application we can enable tracing using local environment variables.
After starting application we can see application HTTP requests traces in the backend.
Using the agent installation script we can install different versions of the software (release or beta) in different OS."""
Expand All @@ -21,22 +19,13 @@ def _test_install(self, virtual_machine, profile: bool = False):
vm_logger(context.scenario.name, virtual_machine.name).info(
f"{header} \n {header} \n Launching the uninstall for VM: {virtual_machine.name} \n {header} \n {header}"
)
request_uuid = None
if virtual_machine.krunvm_config is not None and virtual_machine.krunvm_config.stdin is not None:
logger.info(
f"We are testing on krunvm. The request to the weblog will be done using the stdin (inside the microvm)"
)
request_uuid = make_internal_get_request(virtual_machine.krunvm_config.stdin, vm_port)
else:
logger.info(f"Waiting for weblog available [{vm_ip}:{vm_port}]")
wait_for_port(vm_port, vm_ip, 80.0)
logger.info(f"[{vm_ip}]: Weblog app is ready!")
warmup_weblog(f"http://{vm_ip}:{vm_port}/")
logger.info(f"Making a request to weblog [{vm_ip}:{vm_port}]")
request_uuid = make_get_request(f"http://{vm_ip}:{vm_port}/")
vm_name = virtual_machine.name
request_uuid = request_weblog(virtual_machine, vm_ip, vm_port)

logger.info(f"Http request done with uuid: [{request_uuid}] for ip [{vm_ip}]")
wait_backend_trace_id(request_uuid, 120.0, profile=profile)
runtime_id = wait_backend_trace_id(request_uuid, 120.0, profile=profile)
if crashlog:
cause_and_verify_crash(runtime_id, vm_ip, vm_port)

def close_channel(self, channel):
try:
Expand Down
9 changes: 8 additions & 1 deletion utils/_context/_scenarios/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import json

import pytest
Expand Down Expand Up @@ -618,6 +617,14 @@ def all_endtoend_scenarios(test_object):
github_workflow="libinjection",
)

container_auto_injection_install_script_crashtracking = InstallerAutoInjectionScenario(
"CONTAINER_AUTO_INJECTION_INSTALL_SCRIPT_CRASHTRACKING",
"Onboarding Container Single Step Instrumentation crashtracking scenario using agent auto install script",
vm_provision="container-auto-inject-install-script",
scenario_groups=[ScenarioGroup.ONBOARDING],
github_workflow="libinjection",
)

host_auto_injection_install_script = InstallerAutoInjectionScenario(
"HOST_AUTO_INJECTION_INSTALL_SCRIPT",
"Onboarding Host Single Step Instrumentation scenario using agent auto install script",
Expand Down
90 changes: 73 additions & 17 deletions utils/onboarding/backend_interface.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import functools
import os
import time
from typing import Callable
from typing import Optional
from datetime import datetime, timedelta, timezone
import requests
from utils.tools import logger
from utils.onboarding.weblog_interface import make_get_request


def _headers():
Expand Down Expand Up @@ -69,31 +73,83 @@ def _query_for_profile(runtime_id):
data = r.json()["data"]
# Check if we got any profile events
if isinstance(data, list) and len(data) > 0:
return r.status_code
return -1
return (r.status_code,)
return (-1,)
return r.status_code
except Exception as e:
logger.error(f"Error received connecting to host: [{host}] {e} ")
return -1
return (-1,)


def wait_backend_trace_id(trace_id, timeout: float = 5.0, profile: bool = False, validator=None):
def _query_for_crash_log(runtime_id):
path = "/api/v2/logs/events/search"
host = "https://api.datadoghq.com"
try:
time_to = datetime.now(timezone.utc)
time_from = time_to - timedelta(minutes=10)

queryJson = {
"filter": {
"from": time_from.isoformat(timespec="seconds"),
"to": time_to.isoformat(timespec="seconds"),
"query": f'service:instrumentation-telemetry-data (@tags.severity:crash OR severity:crash OR signum:*) @metadata.tags:"runtime-id:{runtime_id}"',
},
}
logger.debug(f"Posting to {host}{path} with query: {queryJson}")
headers = _headers()
headers["Content-Type"] = "application/json"
r = requests.post(f"{host}{path}", headers=headers, timeout=10, json=queryJson)
logger.debug(f" Backend response status for crash events for runtime [{runtime_id}]: [{r.status_code}]")
if r.status_code == 200:
logger.debug(f" Backend response for crash events for runtime [{runtime_id}]: [{r.text}]")
data = r.json()["data"]
if isinstance(data, list) and len(data) > 0:
return (r.status_code,)
return (-1,)
return r.status_code
except Exception as e:
logger.error(f"Error received connecting to host: [{host}] {e} ")
return (-1,)


def _retry_request_until_timeout(request_fn: Callable, timeout: float = 5.0):
start_time = time.perf_counter()
while True:
status, runtime_id = _query_for_trace_id(trace_id, validator=validator)
if status != 200:
return_value = request_fn()
if return_value[0] != 200:
time.sleep(2)
else:
logger.info(f"trace [{trace_id}] found in the backend!")
if profile:
while True:
if _query_for_profile(runtime_id) != 200:
time.sleep(2)
else:
logger.info(f"profile for trace [{trace_id}] (runtime [{runtime_id}]) found in the backend!")
break
if time.perf_counter() - start_time >= timeout:
raise TimeoutError("Backend timeout waiting for profile")
break
if time.perf_counter() - start_time >= timeout:
raise TimeoutError("Backend timeout waiting for trace")
raise TimeoutError("Backend timeout")
return return_value


def wait_backend_data(
trace_id=None,
timeout: float = 5.0,
profile: bool = False,
appsec: bool = False,
crashlog: bool = False,
validator=None,
) -> Optional[str]:
runtime_id = None
if trace_id is not None:
status, runtime_id = _retry_request_until_timeout(
functools.partial(_query_for_trace_id, trace_id, validator=validator), timeout=10.0
)
logger.info(f"trace [{trace_id}] found in the backend!")
if profile and runtime_id is not None:
(status,) = _retry_request_until_timeout(functools.partial(_query_for_profile, runtime_id))
logger.info(f"profile for trace [{trace_id}] (runtime [{runtime_id}]) found in the backend!")
return runtime_id


wait_backend_trace_id = wait_backend_data


def cause_and_verify_crash(runtime_id: str, vm_ip: str, vm_port: str):
logger.info(f"Making a crash-inducing request to weblog [{vm_ip}:{vm_port}]")
make_get_request(f"http://{vm_ip}:{vm_port}/crashme", swallow=True)
(status,) = _retry_request_until_timeout(functools.partial(_query_for_crash_log, runtime_id), timeout=600.0)
logger.info(f"crash from runtime {runtime_id} found in the backend!")
48 changes: 36 additions & 12 deletions utils/onboarding/weblog_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,27 @@
from random import randint
import os
import requests
from utils.onboarding.wait_for_tcp_port import wait_for_port
from utils.tools import logger


def make_get_request(app_url):
def make_get_request(app_url, swallow: bool = False) -> str:
generated_uuid = str(randint(1, 100000000000000000))
requests.get(
app_url,
headers={
"x-datadog-trace-id": generated_uuid,
"x-datadog-parent-id": generated_uuid,
"x-datadog-sampling-priority": "2",
},
timeout=10,
)
try:
requests.get(
app_url,
headers={
"x-datadog-trace-id": generated_uuid,
"x-datadog-parent-id": generated_uuid,
"x-datadog-sampling-priority": "2",
},
timeout=10,
)
except Exception as e:
if not swallow:
raise
else:
logger.warning(e)
return generated_uuid


Expand All @@ -28,13 +36,13 @@ def warmup_weblog(app_url):


def make_internal_get_request(stdin_file, vm_port):
""" This method is exclusively for testing through KrunVm microVM.
""" This method is exclusively for testing through KrunVm microVM.
It is used to make a request to the weblog application inside the VM, using stdin file"""

generated_uuid = str(randint(1, 100000000000000000))
timeout = 80
script_to_run = f"""#!/bin/bash
echo "Requesting weblog..."
echo "Requesting weblog..."
URL="http://localhost:{vm_port}/"
TIMEOUT={timeout}
TRACE_ID={generated_uuid}
Expand Down Expand Up @@ -74,3 +82,19 @@ def make_internal_get_request(stdin_file, vm_port):
raise TimeoutError("Timed out waiting for weblog ready")

return generated_uuid


def request_weblog(virtual_machine, vm_ip, vm_port) -> str:
if virtual_machine.krunvm_config is not None and virtual_machine.krunvm_config.stdin is not None:
logger.info(
"We are testing on krunvm. The request to the weblog will be done using the stdin (inside the microvm)"
)
request_uuid = make_internal_get_request(virtual_machine.krunvm_config.stdin, vm_port)
else:
logger.info(f"Waiting for weblog available [{vm_ip}:{vm_port}]")
wait_for_port(vm_port, vm_ip, 80.0)
logger.info(f"[{vm_ip}]: Weblog app is ready!")
warmup_weblog(f"http://{vm_ip}:{vm_port}/")
logger.info(f"Making a request to weblog [{vm_ip}:{vm_port}]")
request_uuid = make_get_request(f"http://{vm_ip}:{vm_port}/")
return request_uuid
Loading