Skip to content

feat: initial support for ASM inside the tracer #621

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ RUN pip install --no-cache-dir . -t ./python/lib/$runtime/site-packages
RUN rm -rf ./python/lib/$runtime/site-packages/botocore*
RUN rm -rf ./python/lib/$runtime/site-packages/setuptools
RUN rm -rf ./python/lib/$runtime/site-packages/jsonschema/tests
RUN find . -name 'libddwaf.so' -delete
RUN rm -f ./python/lib/$runtime/site-packages/ddtrace/appsec/_iast/_taint_tracking/*.so
RUN rm -f ./python/lib/$runtime/site-packages/ddtrace/appsec/_iast/_stacktrace*.so
RUN rm -f ./python/lib/$runtime/site-packages/ddtrace/internal/datadog/profiling/libdd_wrapper*.so
Expand Down
174 changes: 174 additions & 0 deletions datadog_lambda/asm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
from copy import deepcopy
import logging
from typing import Any, Dict, List, Optional, Union

from ddtrace.contrib.internal.trace_utils import _get_request_header_client_ip
from ddtrace.internal import core
from ddtrace.trace import Span

from datadog_lambda.trigger import (
EventSubtypes,
EventTypes,
_EventSource,
_http_event_types,
)

logger = logging.getLogger(__name__)


def _to_single_value_headers(headers: Dict[str, List[str]]) -> Dict[str, str]:
"""
Convert multi-value headers to single-value headers.
If a header has multiple values, join them with commas.
"""
single_value_headers = {}
for key, values in headers.items():
single_value_headers[key] = ", ".join(values)
return single_value_headers


def _merge_single_and_multi_value_headers(
single_value_headers: Dict[str, str],
multi_value_headers: Dict[str, List[str]],
):
"""
Merge single-value headers with multi-value headers.
If a header exists in both, we merge them removing duplicates
"""
merged_headers = deepcopy(multi_value_headers)
for key, value in single_value_headers.items():
if key not in merged_headers:
merged_headers[key] = [value]
elif value not in merged_headers[key]:
merged_headers[key].append(value)
return _to_single_value_headers(merged_headers)


def asm_start_request(
span: Span,
event: Dict[str, Any],
event_source: _EventSource,
trigger_tags: Dict[str, str],
):
if event_source.event_type not in _http_event_types:
return

request_headers: Dict[str, str] = {}
peer_ip: Optional[str] = None
request_path_parameters: Optional[Dict[str, Any]] = None
route: Optional[str] = None

if event_source.event_type == EventTypes.ALB:
headers = event.get("headers")
multi_value_request_headers = event.get("multiValueHeaders")
if multi_value_request_headers:
request_headers = _to_single_value_headers(multi_value_request_headers)
else:
request_headers = headers or {}

raw_uri = event.get("path")
parsed_query = event.get("multiValueQueryStringParameters") or event.get(
"queryStringParameters"
)

elif event_source.event_type == EventTypes.LAMBDA_FUNCTION_URL:
request_headers = event.get("headers", {})
peer_ip = event.get("requestContext", {}).get("http", {}).get("sourceIp")
raw_uri = event.get("rawPath")
parsed_query = event.get("queryStringParameters")

elif event_source.event_type == EventTypes.API_GATEWAY:
request_context = event.get("requestContext", {})
request_path_parameters = event.get("pathParameters")
route = trigger_tags.get("http.route")

if event_source.subtype == EventSubtypes.API_GATEWAY:
request_headers = event.get("headers", {})
peer_ip = request_context.get("identity", {}).get("sourceIp")
raw_uri = event.get("path")
parsed_query = event.get("multiValueQueryStringParameters")

elif event_source.subtype == EventSubtypes.HTTP_API:
request_headers = event.get("headers", {})
peer_ip = request_context.get("http", {}).get("sourceIp")
raw_uri = event.get("rawPath")
parsed_query = event.get("queryStringParameters")

elif event_source.subtype == EventSubtypes.WEBSOCKET:
request_headers = _to_single_value_headers(
event.get("multiValueHeaders", {})
)
peer_ip = request_context.get("identity", {}).get("sourceIp")
raw_uri = event.get("path")
parsed_query = event.get("multiValueQueryStringParameters")

else:
return

else:
return

body = event.get("body")
is_base64_encoded = event.get("isBase64Encoded", False)

request_ip = _get_request_header_client_ip(request_headers, peer_ip, True)
if request_ip is not None:
span.set_tag_str("http.client_ip", request_ip)
span.set_tag_str("network.client.ip", request_ip)

core.dispatch(
# The matching listener is registered in ddtrace.appsec._handlers
"aws_lambda.start_request",
(
span,
request_headers,
request_ip,
body,
is_base64_encoded,
raw_uri,
route,
trigger_tags.get("http.method"),
parsed_query,
request_path_parameters,
),
)


def asm_start_response(
span: Span,
status_code: str,
event_source: _EventSource,
response: Union[Dict[str, Any], str, None],
):
if event_source.event_type not in _http_event_types:
return

if isinstance(response, dict) and (
"headers" in response or "multiValueHeaders" in response
):
headers = response.get("headers", {})
multi_value_request_headers = response.get("multiValueHeaders")
if isinstance(multi_value_request_headers, dict) and isinstance(headers, dict):
response_headers = _merge_single_and_multi_value_headers(
headers, multi_value_request_headers
)
elif isinstance(headers, dict):
response_headers = headers
else:
response_headers = {
"content-type": "application/json",
}
else:
response_headers = {
"content-type": "application/json",
}

core.dispatch(
# The matching listener is registered in ddtrace.appsec._handlers
"aws_lambda.start_response",
(
span,
status_code,
response_headers,
),
)
1 change: 1 addition & 0 deletions datadog_lambda/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def _resolve_env(self, key, default=None, cast=None, depends_on_tracing=False):
data_streams_enabled = _get_env(
"DD_DATA_STREAMS_ENABLED", "false", as_bool, depends_on_tracing=True
)
appsec_enabled = _get_env("DD_APPSEC_ENABLED", "false", as_bool)

is_gov_region = _get_env("AWS_REGION", "", lambda x: x.startswith("us-gov-"))

Expand Down
2 changes: 1 addition & 1 deletion datadog_lambda/tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -859,7 +859,7 @@ def create_inferred_span_from_lambda_function_url_event(event, context):
InferredSpanInfo.set_tags(tags, tag_source="self", synchronicity="sync")
if span:
span.set_tags(tags)
span.start_ns = int(request_time_epoch) * 1e6
span.start_ns = int(request_time_epoch * 1e6)
return span


Expand Down
12 changes: 12 additions & 0 deletions datadog_lambda/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from importlib import import_module
from time import time_ns

from datadog_lambda.asm import asm_start_response, asm_start_request
from datadog_lambda.dsm import set_dsm_context
from datadog_lambda.extension import should_use_extension, flush_extension
from datadog_lambda.cold_start import (
Expand Down Expand Up @@ -253,6 +254,8 @@ def _before(self, event, context):
parent_span=self.inferred_span,
span_pointers=calculate_span_pointers(event_source, event),
)
if config.appsec_enabled:
asm_start_request(self.span, event, event_source, self.trigger_tags)
else:
set_correlation_ids()
if config.profiling_enabled and is_new_sandbox():
Expand Down Expand Up @@ -285,6 +288,15 @@ def _after(self, event, context):

if status_code:
self.span.set_tag("http.status_code", status_code)

if config.appsec_enabled:
asm_start_response(
self.span,
status_code,
self.event_source,
response=self.response,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the self.response might not be a Dictionary, right?

Copy link
Contributor Author

@florentinl florentinl Jun 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes you are right, it happens in two cases:

  • malformed responses
  • For API Gateway HTTP API events with payload version 2 or lambda function url events, the function may return just the body of the response

I pushed a fix, thank you.

)

self.span.finish()

if self.inferred_span:
Expand Down
4 changes: 2 additions & 2 deletions scripts/check_layer_size.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
# Compares layer size to threshold, and fails if below that threshold

set -e
MAX_LAYER_COMPRESSED_SIZE_KB=$(expr 5 \* 1024)
MAX_LAYER_UNCOMPRESSED_SIZE_KB=$(expr 13 \* 1024)
MAX_LAYER_COMPRESSED_SIZE_KB=$(expr 6 \* 1024)
MAX_LAYER_UNCOMPRESSED_SIZE_KB=$(expr 15 \* 1024)


LAYER_FILES_PREFIX="datadog_lambda_py"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"requestContext": {
"elb": {
"targetGroupArn": "arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/lambda-xyz/123abc"
}
},
"httpMethod": "GET",
"path": "/lambda",
"queryStringParameters": {
"query": "1234ABCD"
},
"multiValueHeaders": {
"accept": ["text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"],
"accept-encoding": ["gzip"],
"accept-language": ["en-US,en;q=0.9"],
"connection": ["keep-alive"],
"host": ["lambda-alb-123578498.us-east-2.elb.amazonaws.com"],
"upgrade-insecure-requests": ["1"],
"user-agent": ["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"],
"x-amzn-trace-id": ["Root=1-5c536348-3d683b8b04734faae651f476"],
"x-forwarded-for": ["72.12.164.125"],
"x-forwarded-port": ["80"],
"x-forwarded-proto": ["http"],
"x-imforwards": ["20"],
"x-datadog-trace-id": ["12345"],
"x-datadog-parent-id": ["67890"],
"x-datadog-sampling-priority": ["2"]
},
"body": "",
"isBase64Encoded": false
}
Loading
Loading