-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Open
Labels
bugSomething isn't workingSomething isn't workingneeds triageNew item requiring triageNew item requiring triagereceiver/awscontainerinsightwaiting for author
Description
Component(s)
receiver/awscontainerinsight
What happened?
Description
Bumping Otel collector contrib versions to v0.130.0 is throwing error in logs
Steps to Reproduce
Deployment template
# create namespace
apiVersion: v1
kind: Namespace
metadata:
name: aws-otel-eks
labels:
name: aws-otel-eks
---
# create cwagent service account and role binding
apiVersion: v1
kind: ServiceAccount
metadata:
name: aws-otel-sa
namespace: aws-otel-eks
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: aoc-agent-role
rules:
- apiGroups: [""]
resources: ["pods", "nodes", "endpoints"]
verbs: ["list", "watch", "get"]
- apiGroups: ["apps"]
resources: ["replicasets"]
verbs: ["list", "watch", "get"]
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["list", "watch"]
- apiGroups: [""]
resources: ["nodes/proxy"]
verbs: ["get"]
- apiGroups: [""]
resources: ["nodes/stats", "configmaps", "events"]
verbs: ["create", "get"]
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["update"]
- apiGroups: [""]
resources: ["configmaps"]
resourceNames: ["otel-container-insight-clusterleader"]
verbs: ["get","update", "create"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["create","get", "update"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: aoc-agent-role-binding
subjects:
- kind: ServiceAccount
name: aws-otel-sa
namespace: aws-otel-eks
roleRef:
kind: ClusterRole
name: aoc-agent-role
apiGroup: rbac.authorization.k8s.io
---
apiVersion: v1
kind: ConfigMap
metadata:
name: otel-agent-conf
namespace: aws-otel-eks
labels:
app: opentelemetry
component: otel-agent-conf
data:
otel-agent-config: |
extensions:
health_check:
receivers:
awscontainerinsightreceiver:
processors:
batch/metrics:
timeout: 60s
exporters:
awsemf:
namespace: ContainerInsights
log_group_name: '/aws/containerinsights/collector-ci/performance'
resource_to_telemetry_conversion:
enabled: true
dimension_rollup_option: NoDimensionRollup
parse_json_encoded_attr_values: [Sources, kubernetes]
metric_declarations:
# node metrics
- dimensions: [[NodeName, InstanceId, ClusterName]]
metric_name_selectors:
- node_cpu_utilization
- node_memory_utilization
- node_network_total_bytes
- node_cpu_reserved_capacity
- node_memory_reserved_capacity
- node_number_of_running_pods
- node_number_of_running_containers
- dimensions: [[ClusterName]]
metric_name_selectors:
- node_cpu_utilization
- node_memory_utilization
- node_network_total_bytes
- node_cpu_reserved_capacity
- node_memory_reserved_capacity
- node_number_of_running_pods
- node_number_of_running_containers
- node_cpu_usage_total
- node_cpu_limit
- node_memory_working_set
- node_memory_limit
# pod metrics
- dimensions: [[PodName, Namespace, ClusterName], [Service, Namespace, ClusterName], [Namespace, ClusterName], [ClusterName]]
metric_name_selectors:
- pod_cpu_utilization
- pod_memory_utilization
- pod_network_rx_bytes
- pod_network_tx_bytes
- pod_cpu_utilization_over_pod_limit
- pod_memory_utilization_over_pod_limit
- dimensions: [[PodName, Namespace, ClusterName], [ClusterName]]
metric_name_selectors:
- pod_cpu_reserved_capacity
- pod_memory_reserved_capacity
- dimensions: [[PodName, Namespace, ClusterName]]
metric_name_selectors:
- pod_number_of_container_restarts
# cluster metrics
- dimensions: [[ClusterName]]
metric_name_selectors:
- cluster_node_count
- cluster_failed_node_count
# service metrics
- dimensions: [[Service, Namespace, ClusterName], [ClusterName]]
metric_name_selectors:
- service_number_of_running_pods
# node fs metrics
- dimensions: [[NodeName, InstanceId, ClusterName], [ClusterName]]
metric_name_selectors:
- node_filesystem_utilization
# namespace metrics
- dimensions: [[Namespace, ClusterName], [ClusterName]]
metric_name_selectors:
- namespace_number_of_running_pods
debug:
verbosity: detailed
service:
pipelines:
metrics:
receivers: [awscontainerinsightreceiver]
processors: [batch/metrics]
exporters: [awsemf]
extensions: [health_check]
---
# create Daemonset
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: aws-otel-eks-ci
namespace: aws-otel-eks
spec:
selector:
matchLabels:
name: aws-otel-eks-ci
template:
metadata:
labels:
name: aws-otel-eks-ci
spec:
containers:
- name: aws-otel-collector
image: <collector-image-url>
env:
- name: AWS_REGION
value: "us-west-2"
- name: K8S_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: HOST_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
- name: HOST_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: K8S_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
imagePullPolicy: Always
command:
- "/awscollector"
- "--config=/conf/otel-agent-config.yaml"
volumeMounts:
- name: rootfs
mountPath: /rootfs
readOnly: true
- name: dockersock
mountPath: /var/run/docker.sock
readOnly: true
- name: varlibdocker
mountPath: /var/lib/docker
readOnly: true
- name: containerdsock
mountPath: /run/containerd/containerd.sock
readOnly: true
- name: sys
mountPath: /sys
readOnly: true
- name: devdisk
mountPath: /dev/disk
readOnly: true
- name: otel-agent-config-vol
mountPath: /conf
resources:
limits:
cpu: 200m
memory: 200Mi
requests:
cpu: 200m
memory: 200Mi
volumes:
- configMap:
name: otel-agent-conf
items:
- key: otel-agent-config
path: otel-agent-config.yaml
name: otel-agent-config-vol
- name: rootfs
hostPath:
path: /
- name: dockersock
hostPath:
path: /var/run/docker.sock
- name: varlibdocker
hostPath:
path: /var/lib/docker
- name: containerdsock
hostPath:
path: /run/containerd/containerd.sock
- name: sys
hostPath:
path: /sys
- name: devdisk
hostPath:
path: /dev/disk/
serviceAccountName: aws-otel-sa
# # create namespace
# apiVersion: v1
# kind: Namespace
# metadata:
# name: aws-otel-eks
# labels:
# name: aws-otel-eks
# ---
# # create cwagent service account and role binding
# apiVersion: v1
# kind: ServiceAccount
# metadata:
# name: aws-otel-sa
# namespace: aws-otel-eks
# ---
# kind: ClusterRole
# apiVersion: rbac.authorization.k8s.io/v1
# metadata:
# name: aoc-agent-role
# rules:
# - apiGroups: [""]
# resources: ["pods", "nodes", "endpoints"]
# verbs: ["list", "watch", "get"]
# - apiGroups: ["apps"]
# resources: ["replicasets"]
# verbs: ["list", "watch", "get"]
# - apiGroups: ["batch"]
# resources: ["jobs"]
# verbs: ["list", "watch"]
# - apiGroups: [""]
# resources: ["nodes/proxy"]
# verbs: ["get"]
# - apiGroups: [""]
# resources: ["nodes/stats", "configmaps", "events"]
# verbs: ["create", "get"]
# - apiGroups: [""]
# resources: ["configmaps"]
# verbs: ["update"]
# - apiGroups: [""]
# resources: ["configmaps"]
# resourceNames: ["otel-container-insight-clusterleader"]
# verbs: ["get","update", "create"]
# - apiGroups: ["coordination.k8s.io"]
# resources: ["leases"]
# verbs: ["create","get", "update"]
# ---
# kind: ClusterRoleBinding
# apiVersion: rbac.authorization.k8s.io/v1
# metadata:
# name: aoc-agent-role-binding
# subjects:
# - kind: ServiceAccount
# name: aws-otel-sa
# namespace: aws-otel-eks
# roleRef:
# kind: ClusterRole
# name: aoc-agent-role
# apiGroup: rbac.authorization.k8s.io
# ---
# apiVersion: v1
# kind: ConfigMap
# metadata:
# name: otel-agent-conf
# namespace: aws-otel-eks
# labels:
# app: opentelemetry
# component: otel-agent-conf
# data:
# otel-agent-config: |
# extensions:
# health_check:
# sigv4auth:
# region: "us-west-2"
# receivers:
# awscontainerinsightreceiver:
# prefer_full_pod_name: true
# processors:
# batch/metrics:
# timeout: 10s
# exporters:
# prometheusremotewrite:
# endpoint: "https://aps-workspaces.us-west-2.amazonaws.com/workspaces/ws-4c399252-f488-42dd-a500-3f0b6c09b2ab/api/v1/remote_write"
# resource_to_telemetry_conversion:
# enabled: true
# auth:
# authenticator: sigv4auth
# awsemf:
# namespace: ContainerInsights
# log_group_name: '/performance'
# log_stream_name: 'test'
# resource_to_telemetry_conversion:
# enabled: true
# dimension_rollup_option: NoDimensionRollup
# service:
# pipelines:
# metrics:
# receivers: [awscontainerinsightreceiver]
# processors: [batch/metrics]
# exporters: [awsemf, prometheusremotewrite]
# extensions: [health_check, sigv4auth]
# ---
# # create Daemonset
# apiVersion: apps/v1
# kind: Deployment
# metadata:
# name: aws-otel-eks-ci
# namespace: aws-otel-eks
# spec:
# selector:
# matchLabels:
# name: aws-otel-eks-ci
# template:
# metadata:
# labels:
# name: aws-otel-eks-ci
# spec:
# containers:
# - name: aws-otel-collector
# image: public.ecr.aws/aws-observability/aws-otel-collector:v0.38.0
# env:
# - name: K8S_NODE_NAME
# valueFrom:
# fieldRef:
# fieldPath: spec.nodeName
# - name: HOST_IP
# valueFrom:
# fieldRef:
# fieldPath: status.hostIP
# - name: HOST_NAME
# valueFrom:
# fieldRef:
# fieldPath: spec.nodeName
# - name: K8S_NAMESPACE
# valueFrom:
# fieldRef:
# fieldPath: metadata.namespace
# imagePullPolicy: Always
# command:
# - "/awscollector"
# - "--config=/conf/otel-agent-config.yaml"
# volumeMounts:
# - name: rootfs
# mountPath: /rootfs
# readOnly: true
# - name: dockersock
# mountPath: /var/run/docker.sock
# readOnly: true
# - name: varlibdocker
# mountPath: /var/lib/docker
# readOnly: true
# - name: containerdsock
# mountPath: /run/containerd/containerd.sock
# readOnly: true
# - name: sys
# mountPath: /sys
# readOnly: true
# - name: devdisk
# mountPath: /dev/disk
# readOnly: true
# - name: otel-agent-config-vol
# mountPath: /conf
# resources:
# limits:
# cpu: 200m
# memory: 200Mi
# requests:
# cpu: 200m
# memory: 200Mi
# volumes:
# - configMap:
# name: otel-agent-conf
# items:
# - key: otel-agent-config
# path: otel-agent-config.yaml
# name: otel-agent-config-vol
# - name: rootfs
# hostPath:
# path: /
# - name: dockersock
# hostPath:
# path: /var/run/docker.sock
# - name: varlibdocker
# hostPath:
# path: /var/lib/docker
# - name: containerdsock
# hostPath:
# path: /run/containerd/containerd.sock
# - name: sys
# hostPath:
# path: /sys
# - name: devdisk
# hostPath:
# path: /dev/disk/
# serviceAccountName: aws-otel-sa
Kubectl apply -f <file.yml>
Expected Result
Metrics in cloudwatch
Actual Result
Error in collector logs
Collector version
v0.130.0
Environment information
Environment
OS: (e.g., "Ubuntu 20.04")
Compiler(if manually compiled): (e.g., "go 14.2")
OpenTelemetry Collector configuration
extensions:
health_check:
receivers:
awscontainerinsightreceiver:
processors:
batch/metrics:
timeout: 60s
exporters:
awsemf:
namespace: ContainerInsights
log_group_name: '/aws/containerinsights/collector-ci/performance'
resource_to_telemetry_conversion:
enabled: true
dimension_rollup_option: NoDimensionRollup
parse_json_encoded_attr_values: [Sources, kubernetes]
metric_declarations:
# node metrics
- dimensions: [[NodeName, InstanceId, ClusterName]]
metric_name_selectors:
- node_cpu_utilization
- node_memory_utilization
- node_network_total_bytes
- node_cpu_reserved_capacity
- node_memory_reserved_capacity
- node_number_of_running_pods
- node_number_of_running_containers
- dimensions: [[ClusterName]]
metric_name_selectors:
- node_cpu_utilization
- node_memory_utilization
- node_network_total_bytes
- node_cpu_reserved_capacity
- node_memory_reserved_capacity
- node_number_of_running_pods
- node_number_of_running_containers
- node_cpu_usage_total
- node_cpu_limit
- node_memory_working_set
- node_memory_limit
# pod metrics
- dimensions: [[PodName, Namespace, ClusterName], [Service, Namespace, ClusterName], [Namespace, ClusterName], [ClusterName]]
metric_name_selectors:
- pod_cpu_utilization
- pod_memory_utilization
- pod_network_rx_bytes
- pod_network_tx_bytes
- pod_cpu_utilization_over_pod_limit
- pod_memory_utilization_over_pod_limit
- dimensions: [[PodName, Namespace, ClusterName], [ClusterName]]
metric_name_selectors:
- pod_cpu_reserved_capacity
- pod_memory_reserved_capacity
- dimensions: [[PodName, Namespace, ClusterName]]
metric_name_selectors:
- pod_number_of_container_restarts
# cluster metrics
- dimensions: [[ClusterName]]
metric_name_selectors:
- cluster_node_count
- cluster_failed_node_count
# service metrics
- dimensions: [[Service, Namespace, ClusterName], [ClusterName]]
metric_name_selectors:
- service_number_of_running_pods
# node fs metrics
- dimensions: [[NodeName, InstanceId, ClusterName], [ClusterName]]
metric_name_selectors:
- node_filesystem_utilization
# namespace metrics
- dimensions: [[Namespace, ClusterName], [ClusterName]]
metric_name_selectors:
- namespace_number_of_running_pods
debug:
verbosity: detailed
service:
pipelines:
metrics:
receivers: [awscontainerinsightreceiver]
processors: [batch/metrics]
exporters: [awsemf]
extensions: [health_check]
Log output
I0805 00:15:50.087785 1 leaderelection.go:271] successfully acquired lease aws-otel-eks/otel-container-insight-clusterleader
2025-08-05T00:15:50.087Z info k8sapiserver/k8sapiserver.go:305 k8sapiserver Switch New Leader: ip-10-0-190-30.us-west-2.compute.internal {"resource": {"service.instance.id": "6ff4ee99-132f-497d-a469-9be91d2c8260", "service.name": "aws-otel-collector", "service.version": "v0.43.3"}, "otelcol.component.id": "awscontainerinsightreceiver", "otelcol.component.kind": "receiver", "otelcol.signal": "metrics"}
2025-08-05T00:15:50.088Z info k8sapiserver/k8sapiserver.go:263 k8sapiserver OnStartedLeading: ip-10-0-190-30.us-west-2.compute.internal {"resource": {"service.instance.id": "6ff4ee99-132f-497d-a469-9be91d2c8260", "service.name": "aws-otel-collector", "service.version": "v0.43.3"}, "otelcol.component.id": "awscontainerinsightreceiver", "otelcol.component.kind": "receiver", "otelcol.signal": "metrics"}
I0805 00:15:50.088398 1 event.go:377] Event(v1.ObjectReference{Kind:"Lease", Namespace:"aws-otel-eks", Name:"otel-container-insight-clusterleader", UID:"37315191-b42b-41b8-a0f0-4c7962106c45", APIVersion:"coordination.k8s.io/v1", ResourceVersion:"1748114", FieldPath:""}): type: 'Normal' reason: 'LeaderElection' ip-10-0-190-30.us-west-2.compute.internal became leader
2025-08-05T00:15:52.662Z info host/ec2tags.go:80 Fetch ec2 tags to detect cluster name and auto scaling group name {"resource": {"service.instance.id": "6ff4ee99-132f-497d-a469-9be91d2c8260", "service.name": "aws-otel-collector", "service.version": "v0.43.3"}, "otelcol.component.id": "awscontainerinsightreceiver", "otelcol.component.kind": "receiver", "otelcol.signal": "metrics", "instanceId": "i-047f84b88d1d89bd6"}
2025-08-05T00:15:52.662Z info host/ebsvolume.go:86 Fetch ebs volumes from ec2 api {"resource": {"service.instance.id": "6ff4ee99-132f-497d-a469-9be91d2c8260", "service.name": "aws-otel-collector", "service.version": "v0.43.3"}, "otelcol.component.id": "awscontainerinsightreceiver", "otelcol.component.kind": "receiver", "otelcol.signal": "metrics"}
2025-08-05T00:15:52.678Z warn host/ec2tags.go:101 Fail to call ec2 DescribeTags {"resource": {"service.instance.id": "6ff4ee99-132f-497d-a469-9be91d2c8260", "service.name": "aws-otel-collector", "service.version": "v0.43.3"}, "otelcol.component.id": "awscontainerinsightreceiver", "otelcol.component.kind": "receiver", "otelcol.signal": "metrics", "error": "operation error EC2: DescribeTags, https response error StatusCode: 400, RequestID: 21db29a4-d358-4995-b5e1-a7371a869f96, api error MissingParameter: The request must contain the parameter AWSAccessKeyId", "instanceId": "i-047f84b88d1d89bd6"}
2025-08-05T00:15:52.678Z info host/ec2tags.go:132 Fetch ec2 tags successfully {"resource": {"service.instance.id": "6ff4ee99-132f-497d-a469-9be91d2c8260", "service.name": "aws-otel-collector", "service.version": "v0.43.3"}, "otelcol.component.id": "awscontainerinsightreceiver", "otelcol.component.kind": "receiver", "otelcol.signal": "metrics"}
2025-08-05T00:15:52.678Z info host/ec2tags.go:135 Fetch ec2 tags to detect cluster name and auto scaling group name {"resource": {"service.instance.id": "6ff4ee99-132f-497d-a469-9be91d2c8260", "service.name": "aws-otel-collector", "service.version": "v0.43.3"}, "otelcol.component.id": "awscontainerinsightreceiver", "otelcol.component.kind": "receiver", "otelcol.signal": "metrics", "instanceId": ""}
2025-08-05T00:15:52.678Z info host/ec2tags.go:136 Fetch ec2 tags to detect cluster name and auto scaling group name {"resource": {"service.instance.id": "6ff4ee99-132f-497d-a469-9be91d2c8260", "service.name": "aws-otel-collector", "service.version": "v0.43.3"}, "otelcol.component.id": "awscontainerinsightreceiver", "otelcol.component.kind": "receiver", "otelcol.signal": "metrics", "instanceId": ""}
2025-08-05T00:15:52.680Z warn host/ebsvolume.go:102 Fail to call ec2 DescribeVolumes {"resource": {"service.instance.id": "6ff4ee99-132f-497d-a469-9be91d2c8260", "service.name": "aws-otel-collector", "service.version": "v0.43.3"}, "otelcol.component.id": "awscontainerinsightreceiver", "otelcol.component.kind": "receiver", "otelcol.signal": "metrics", "error": "operation error EC2: DescribeVolumes, https response error StatusCode: 400, RequestID: f1e065df-0f5a-4cab-b9db-9ab1a7ff31b8, api error MissingParameter: The request must contain the parameter AWSAccessKeyId"}
2025-08-05T00:16:50.076Z warn cadvisor/cadvisor_linux.go:326 Failed to detect cluster name. Drop all metrics {"resource": {"service.instance.id": "6ff4ee99-132f-497d-a469-9be91d2c8260", "service.name": "aws-otel-collector", "service.version": "v0.43.3"}, "otelcol.component.id": "awscontainerinsightreceiver", "otelcol.component.kind": "receiver", "otelcol.signal": "metrics"}
Additional context
No response
Tip
React with 👍 to help prioritize this issue. Please use comments to provide useful context, avoiding +1
or me too
, to help us triage it. Learn more here.
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't workingneeds triageNew item requiring triageNew item requiring triagereceiver/awscontainerinsightwaiting for author