Commit 88b7e190b29a369a95244e89d23bc47edef6c51c
1 parent
4bb7ce40
Exists in
master
feat: add opentelemetry
Showing
16 changed files
with
328 additions
and
273 deletions
Show diff stats
prometheus-grafana-opentelemetry/alertmanager/config.yml
0 → 100644
... | ... | @@ -0,0 +1,105 @@ |
1 | +version: '3.8' | |
2 | + | |
3 | +volumes: | |
4 | + prometheus_data: {} | |
5 | + grafana_data: {} | |
6 | + | |
7 | +services: | |
8 | + # Collector | |
9 | + otel-collector: | |
10 | + image: otel/opentelemetry-collector | |
11 | + restart: always | |
12 | + command: ["--config=/etc/otel-collector-config.yaml", ""] | |
13 | + volumes: | |
14 | + - ./otel/otel-collector-config.yaml:/etc/otel-collector-config.yaml | |
15 | + ports: | |
16 | + - "1888:1888" # pprof extension | |
17 | + - "8888:8888" # Prometheus metrics exposed by the collector | |
18 | + - "8889:8889" # Prometheus exporter metrics | |
19 | + - "13133:13133" # health_check extension | |
20 | + - "4317:4317" # OTLP gRPC receiver | |
21 | + - "55679:55679" # zpages extension | |
22 | + depends_on: | |
23 | + - prometheus | |
24 | + | |
25 | + prometheus: | |
26 | + image: prom/prometheus | |
27 | + restart: always | |
28 | + volumes: | |
29 | + - ./prometheus:/etc/prometheus/ | |
30 | + - prometheus_data:/prometheus | |
31 | + command: | |
32 | + - '--config.file=/etc/prometheus/prometheus.yml' | |
33 | + - '--storage.tsdb.path=/prometheus' | |
34 | + - '--web.console.libraries=/usr/share/prometheus/console_libraries' | |
35 | + - '--web.console.templates=/usr/share/prometheus/consoles' | |
36 | + ports: | |
37 | + - 9090:9090 | |
38 | + links: | |
39 | + - alertmanager:alertmanager | |
40 | + | |
41 | + alertmanager: | |
42 | + image: prom/alertmanager | |
43 | + restart: always | |
44 | + ports: | |
45 | + - 9093:9093 | |
46 | + volumes: | |
47 | + - ./alertmanager/:/etc/alertmanager/ | |
48 | + command: | |
49 | + - '--config.file=/etc/alertmanager/config.yml' | |
50 | + - '--storage.path=/alertmanager' | |
51 | + | |
52 | + # Jaeger | |
53 | + jaeger-all-in-one: | |
54 | + image: jaegertracing/all-in-one:latest | |
55 | + restart: always | |
56 | + ports: | |
57 | + - "16686:16686" | |
58 | + - "14268" | |
59 | + - "14250" | |
60 | + | |
61 | + grafana: | |
62 | + image: grafana/grafana | |
63 | + user: '0' | |
64 | + restart: always | |
65 | + environment: | |
66 | + GF_INSTALL_PLUGINS: 'grafana-clock-panel,grafana-simple-json-datasource' | |
67 | + volumes: | |
68 | + - grafana_data:/var/lib/grafana | |
69 | + - ./grafana/provisioning/:/etc/grafana/provisioning/ | |
70 | + env_file: | |
71 | + - ./grafana/config.monitoring | |
72 | + ports: | |
73 | + - 5032:3000 | |
74 | + depends_on: | |
75 | + - prometheus | |
76 | + | |
77 | + # node-exporter: | |
78 | + # image: prom/node-exporter | |
79 | + # volumes: | |
80 | + # - /proc:/host/proc:ro | |
81 | + # - /sys:/host/sys:ro | |
82 | + # - /:/rootfs:ro | |
83 | + # command: | |
84 | + # - '--path.procfs=/host/proc' | |
85 | + # - '--path.sysfs=/host/sys' | |
86 | + # - --collector.filesystem.ignored-mount-points | |
87 | + # - '^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)' | |
88 | + # ports: | |
89 | + # - 9100:9100 | |
90 | + # restart: always | |
91 | + # deploy: | |
92 | + # mode: global | |
93 | + | |
94 | + # cadvisor: | |
95 | + # image: gcr.io/cadvisor/cadvisor | |
96 | + # volumes: | |
97 | + # - /:/rootfs:ro | |
98 | + # - /var/run:/var/run:rw | |
99 | + # - /sys:/sys:ro | |
100 | + # - /var/lib/docker/:/var/lib/docker:ro | |
101 | + # ports: | |
102 | + # - 8080:8080 | |
103 | + # restart: always | |
104 | + # deploy: | |
105 | + # mode: global | |
0 | 106 | \ No newline at end of file | ... | ... |
prometheus-grafana-opentelemetry/grafana/config.monitoring
0 → 100644
prometheus-grafana-opentelemetry/grafana/provisioning/dashboards/dashboard.yml
0 → 100644
prometheus-grafana-opentelemetry/grafana/provisioning/datasources/datasource.yml
0 → 100644
... | ... | @@ -0,0 +1,50 @@ |
1 | +# config file version | |
2 | +apiVersion: 1 | |
3 | + | |
4 | +# list of datasources that should be deleted from the database | |
5 | +deleteDatasources: | |
6 | + - name: Prometheus | |
7 | + orgId: 1 | |
8 | + | |
9 | +# list of datasources to insert/update depending | |
10 | +# whats available in the database | |
11 | +datasources: | |
12 | + # <string, required> name of the datasource. Required | |
13 | +- name: Prometheus | |
14 | + # <string, required> datasource type. Required | |
15 | + type: prometheus | |
16 | + # <string, required> access mode. direct or proxy. Required | |
17 | + access: proxy | |
18 | + # <int> org id. will default to orgId 1 if not specified | |
19 | + orgId: 1 | |
20 | + # <string> url | |
21 | + url: http://prometheus:9090 | |
22 | + # <string> database password, if used | |
23 | + password: | |
24 | + # <string> database user, if used | |
25 | + user: | |
26 | + # <string> database name, if used | |
27 | + database: | |
28 | + # <bool> enable/disable basic auth | |
29 | + basicAuth: false | |
30 | + # <string> basic auth username, if used | |
31 | + basicAuthUser: | |
32 | + # <string> basic auth password, if used | |
33 | + basicAuthPassword: | |
34 | + # <bool> enable/disable with credentials headers | |
35 | + withCredentials: | |
36 | + # <bool> mark as default datasource. Max one per org | |
37 | + isDefault: true | |
38 | + # <map> fields that will be converted to json and stored in json_data | |
39 | + jsonData: | |
40 | + graphiteVersion: "1.1" | |
41 | + tlsAuth: false | |
42 | + tlsAuthWithCACert: false | |
43 | + # <string> json object of data that will be encrypted. | |
44 | + secureJsonData: | |
45 | + tlsCACert: "..." | |
46 | + tlsClientCert: "..." | |
47 | + tlsClientKey: "..." | |
48 | + version: 1 | |
49 | + # <bool> allow users to edit datasources from the UI. | |
50 | + editable: true | |
0 | 51 | \ No newline at end of file | ... | ... |
prometheus-grafana-opentelemetry/otel/otel-collector-config.yaml
0 → 100644
... | ... | @@ -0,0 +1,43 @@ |
1 | +receivers: | |
2 | + otlp: | |
3 | + protocols: | |
4 | + grpc: | |
5 | + | |
6 | +exporters: | |
7 | + prometheus: | |
8 | + endpoint: "0.0.0.0:8889" | |
9 | + const_labels: | |
10 | + label1: value1 | |
11 | + | |
12 | + logging: | |
13 | + | |
14 | + # zipkin: | |
15 | + # endpoint: "http://zipkin-all-in-one:9411/api/v2/spans" | |
16 | + # format: proto | |
17 | + | |
18 | + jaeger: | |
19 | + endpoint: jaeger-all-in-one:14250 | |
20 | + tls: | |
21 | + insecure: true | |
22 | + | |
23 | +processors: | |
24 | + batch: | |
25 | + | |
26 | +extensions: | |
27 | + health_check: | |
28 | + pprof: | |
29 | + endpoint: :1888 | |
30 | + zpages: | |
31 | + endpoint: :55679 | |
32 | + | |
33 | +service: | |
34 | + extensions: [pprof, zpages, health_check] | |
35 | + pipelines: | |
36 | + traces: | |
37 | + receivers: [otlp] | |
38 | + processors: [batch] | |
39 | + exporters: [logging, jaeger] | |
40 | + metrics: | |
41 | + receivers: [otlp] | |
42 | + processors: [batch] | |
43 | + exporters: [logging, prometheus] | |
0 | 44 | \ No newline at end of file | ... | ... |
... | ... | @@ -0,0 +1,22 @@ |
1 | +groups: | |
2 | +- name: example | |
3 | + rules: | |
4 | + | |
5 | + # Alert for any instance that is unreachable for >2 minutes. | |
6 | + - alert: service_down | |
7 | + expr: up == 0 | |
8 | + for: 2m | |
9 | + labels: | |
10 | + severity: page | |
11 | + annotations: | |
12 | + summary: "Instance {{ $labels.instance }} down" | |
13 | + description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes." | |
14 | + | |
15 | + - alert: high_load | |
16 | + expr: node_load1 > 0.5 | |
17 | + for: 2m | |
18 | + labels: | |
19 | + severity: page | |
20 | + annotations: | |
21 | + summary: "Instance {{ $labels.instance }} under high load" | |
22 | + description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load." | ... | ... |
prometheus-grafana-opentelemetry/prometheus/prometheus.yml
0 → 100644
... | ... | @@ -0,0 +1,84 @@ |
1 | +# my global config | |
2 | +global: | |
3 | + scrape_interval: 15s # By default, scrape targets every 15 seconds. | |
4 | + evaluation_interval: 15s # By default, scrape targets every 15 seconds. | |
5 | + # scrape_timeout is set to the global default (10s). | |
6 | + | |
7 | + # Attach these labels to any time series or alerts when communicating with | |
8 | + # external systems (federation, remote storage, Alertmanager). | |
9 | + external_labels: | |
10 | + monitor: 'my-project' | |
11 | + | |
12 | +# Load and evaluate rules in this file every 'evaluation_interval' seconds. | |
13 | +rule_files: | |
14 | + - 'alert.rules' | |
15 | + # - "first.rules" | |
16 | + # - "second.rules" | |
17 | + | |
18 | +# alert | |
19 | +alerting: | |
20 | + alertmanagers: | |
21 | + - scheme: http | |
22 | + static_configs: | |
23 | + - targets: | |
24 | + - "alertmanager:9093" | |
25 | + | |
26 | +# A scrape configuration containing exactly one endpoint to scrape: | |
27 | +# Here it's Prometheus itself. | |
28 | +scrape_configs: | |
29 | + # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. | |
30 | + | |
31 | + # - job_name: app | |
32 | + # scrape_interval: 5s | |
33 | + # static_configs: | |
34 | + # - targets: ['host.docker.internal:8000'] | |
35 | + | |
36 | + - job_name: 'prometheus' | |
37 | + | |
38 | + # Override the global default and scrape targets from this job every 5 seconds. | |
39 | + scrape_interval: 5s | |
40 | + | |
41 | + static_configs: | |
42 | + - targets: ['localhost:9090'] | |
43 | + | |
44 | + - job_name: 'otel-collector' | |
45 | + scrape_interval: 10s | |
46 | + static_configs: | |
47 | + - targets: ['otel-collector:8889'] | |
48 | + - targets: ['otel-collector:8888'] | |
49 | + | |
50 | +# - job_name: 'cadvisor' | |
51 | + | |
52 | +# # Override the global default and scrape targets from this job every 5 seconds. | |
53 | +# scrape_interval: 5s | |
54 | + | |
55 | +# dns_sd_configs: | |
56 | +# - names: | |
57 | +# - 'tasks.cadvisor' | |
58 | +# type: 'A' | |
59 | +# port: 8080 | |
60 | + | |
61 | +# # static_configs: | |
62 | +# # - targets: ['cadvisor:8080'] | |
63 | + | |
64 | +# - job_name: 'node-exporter' | |
65 | + | |
66 | +# # Override the global default and scrape targets from this job every 5 seconds. | |
67 | +# scrape_interval: 5s | |
68 | + | |
69 | +# dns_sd_configs: | |
70 | +# - names: | |
71 | +# - 'tasks.node-exporter' | |
72 | +# type: 'A' | |
73 | +# port: 9100 | |
74 | + | |
75 | +# - job_name: 'pushgateway' | |
76 | +# scrape_interval: 10s | |
77 | +# dns_sd_configs: | |
78 | +# - names: | |
79 | +# - 'tasks.pushgateway' | |
80 | +# type: 'A' | |
81 | +# port: 9091 | |
82 | + | |
83 | +# static_configs: | |
84 | +# - targets: ['node-exporter:9100'] | |
0 | 85 | \ No newline at end of file | ... | ... |
prometheus-grafana/alertmanager/config.yml
prometheus-grafana/docker-compose.yml
... | ... | @@ -1,82 +0,0 @@ |
1 | -version: '3.8' | |
2 | - | |
3 | -volumes: | |
4 | - prometheus_data: {} | |
5 | - grafana_data: {} | |
6 | - | |
7 | -services: | |
8 | - prometheus: | |
9 | - image: prom/prometheus | |
10 | - restart: always | |
11 | - volumes: | |
12 | - - ./prometheus:/etc/prometheus/ | |
13 | - - prometheus_data:/prometheus | |
14 | - command: | |
15 | - - '--config.file=/etc/prometheus/prometheus.yml' | |
16 | - - '--storage.tsdb.path=/prometheus' | |
17 | - - '--web.console.libraries=/usr/share/prometheus/console_libraries' | |
18 | - - '--web.console.templates=/usr/share/prometheus/consoles' | |
19 | - ports: | |
20 | - - 9090:9090 | |
21 | - links: | |
22 | - - cadvisor:cadvisor | |
23 | - - alertmanager:alertmanager | |
24 | - depends_on: | |
25 | - - cadvisor | |
26 | - | |
27 | - node-exporter: | |
28 | - image: prom/node-exporter | |
29 | - volumes: | |
30 | - - /proc:/host/proc:ro | |
31 | - - /sys:/host/sys:ro | |
32 | - - /:/rootfs:ro | |
33 | - command: | |
34 | - - '--path.procfs=/host/proc' | |
35 | - - '--path.sysfs=/host/sys' | |
36 | - - --collector.filesystem.ignored-mount-points | |
37 | - - '^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)' | |
38 | - ports: | |
39 | - - 9100:9100 | |
40 | - restart: always | |
41 | - deploy: | |
42 | - mode: global | |
43 | - | |
44 | - alertmanager: | |
45 | - image: prom/alertmanager | |
46 | - restart: always | |
47 | - ports: | |
48 | - - 9093:9093 | |
49 | - volumes: | |
50 | - - ./alertmanager/:/etc/alertmanager/ | |
51 | - command: | |
52 | - - '--config.file=/etc/alertmanager/config.yml' | |
53 | - - '--storage.path=/alertmanager' | |
54 | - | |
55 | - cadvisor: | |
56 | - image: gcr.io/cadvisor/cadvisor | |
57 | - volumes: | |
58 | - - /:/rootfs:ro | |
59 | - - /var/run:/var/run:rw | |
60 | - - /sys:/sys:ro | |
61 | - - /var/lib/docker/:/var/lib/docker:ro | |
62 | - ports: | |
63 | - - 8080:8080 | |
64 | - restart: always | |
65 | - deploy: | |
66 | - mode: global | |
67 | - | |
68 | - grafana: | |
69 | - image: grafana/grafana | |
70 | - user: '472' | |
71 | - restart: always | |
72 | - environment: | |
73 | - GF_INSTALL_PLUGINS: 'grafana-clock-panel,grafana-simple-json-datasource' | |
74 | - volumes: | |
75 | - - grafana_data:/var/lib/grafana | |
76 | - - ./grafana/provisioning/:/etc/grafana/provisioning/ | |
77 | - env_file: | |
78 | - - ./grafana/config.monitoring | |
79 | - ports: | |
80 | - - 3000:3000 | |
81 | - depends_on: | |
82 | - - prometheus |
prometheus-grafana/grafana/config.monitoring
prometheus-grafana/grafana/provisioning/dashboards/dashboard.yml
prometheus-grafana/grafana/provisioning/datasources/datasource.yml
... | ... | @@ -1,50 +0,0 @@ |
1 | -# config file version | |
2 | -apiVersion: 1 | |
3 | - | |
4 | -# list of datasources that should be deleted from the database | |
5 | -deleteDatasources: | |
6 | - - name: Prometheus | |
7 | - orgId: 1 | |
8 | - | |
9 | -# list of datasources to insert/update depending | |
10 | -# whats available in the database | |
11 | -datasources: | |
12 | - # <string, required> name of the datasource. Required | |
13 | -- name: Prometheus | |
14 | - # <string, required> datasource type. Required | |
15 | - type: prometheus | |
16 | - # <string, required> access mode. direct or proxy. Required | |
17 | - access: proxy | |
18 | - # <int> org id. will default to orgId 1 if not specified | |
19 | - orgId: 1 | |
20 | - # <string> url | |
21 | - url: http://prometheus:9090 | |
22 | - # <string> database password, if used | |
23 | - password: | |
24 | - # <string> database user, if used | |
25 | - user: | |
26 | - # <string> database name, if used | |
27 | - database: | |
28 | - # <bool> enable/disable basic auth | |
29 | - basicAuth: false | |
30 | - # <string> basic auth username, if used | |
31 | - basicAuthUser: | |
32 | - # <string> basic auth password, if used | |
33 | - basicAuthPassword: | |
34 | - # <bool> enable/disable with credentials headers | |
35 | - withCredentials: | |
36 | - # <bool> mark as default datasource. Max one per org | |
37 | - isDefault: true | |
38 | - # <map> fields that will be converted to json and stored in json_data | |
39 | - jsonData: | |
40 | - graphiteVersion: "1.1" | |
41 | - tlsAuth: false | |
42 | - tlsAuthWithCACert: false | |
43 | - # <string> json object of data that will be encrypted. | |
44 | - secureJsonData: | |
45 | - tlsCACert: "..." | |
46 | - tlsClientCert: "..." | |
47 | - tlsClientKey: "..." | |
48 | - version: 1 | |
49 | - # <bool> allow users to edit datasources from the UI. | |
50 | - editable: true | |
51 | 0 | \ No newline at end of file |
prometheus-grafana/prometheus.yml
... | ... | @@ -1,16 +0,0 @@ |
1 | -global: | |
2 | - scrape_interval: 15s | |
3 | - evaluation_interval: 15s | |
4 | - | |
5 | -rule_files: | |
6 | - # - "first.rules" | |
7 | - # - "second.rules" | |
8 | - | |
9 | -scrape_configs: | |
10 | - - job_name: prometheus | |
11 | - static_configs: | |
12 | - - targets: ['localhost:9090'] | |
13 | - - job_name: app | |
14 | - scrape_interval: 5s | |
15 | - static_configs: | |
16 | - - targets: ['host.docker.internal:10088'] | |
17 | 0 | \ No newline at end of file |
prometheus-grafana/prometheus/alert.rules
... | ... | @@ -1,22 +0,0 @@ |
1 | -groups: | |
2 | -- name: example | |
3 | - rules: | |
4 | - | |
5 | - # Alert for any instance that is unreachable for >2 minutes. | |
6 | - - alert: service_down | |
7 | - expr: up == 0 | |
8 | - for: 2m | |
9 | - labels: | |
10 | - severity: page | |
11 | - annotations: | |
12 | - summary: "Instance {{ $labels.instance }} down" | |
13 | - description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes." | |
14 | - | |
15 | - - alert: high_load | |
16 | - expr: node_load1 > 0.5 | |
17 | - for: 2m | |
18 | - labels: | |
19 | - severity: page | |
20 | - annotations: | |
21 | - summary: "Instance {{ $labels.instance }} under high load" | |
22 | - description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load." |
prometheus-grafana/prometheus/prometheus.yml
... | ... | @@ -1,79 +0,0 @@ |
1 | -# my global config | |
2 | -global: | |
3 | - scrape_interval: 15s # By default, scrape targets every 15 seconds. | |
4 | - evaluation_interval: 15s # By default, scrape targets every 15 seconds. | |
5 | - # scrape_timeout is set to the global default (10s). | |
6 | - | |
7 | - # Attach these labels to any time series or alerts when communicating with | |
8 | - # external systems (federation, remote storage, Alertmanager). | |
9 | - external_labels: | |
10 | - monitor: 'my-project' | |
11 | - | |
12 | -# Load and evaluate rules in this file every 'evaluation_interval' seconds. | |
13 | -rule_files: | |
14 | - - 'alert.rules' | |
15 | - # - "first.rules" | |
16 | - # - "second.rules" | |
17 | - | |
18 | -# alert | |
19 | -alerting: | |
20 | - alertmanagers: | |
21 | - - scheme: http | |
22 | - static_configs: | |
23 | - - targets: | |
24 | - - "alertmanager:9093" | |
25 | - | |
26 | -# A scrape configuration containing exactly one endpoint to scrape: | |
27 | -# Here it's Prometheus itself. | |
28 | -scrape_configs: | |
29 | - # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. | |
30 | - | |
31 | - - job_name: app | |
32 | - scrape_interval: 5s | |
33 | - static_configs: | |
34 | - - targets: ['host.docker.internal:8000'] | |
35 | - | |
36 | - - job_name: 'prometheus' | |
37 | - | |
38 | - # Override the global default and scrape targets from this job every 5 seconds. | |
39 | - scrape_interval: 5s | |
40 | - | |
41 | - static_configs: | |
42 | - - targets: ['localhost:9090'] | |
43 | - | |
44 | - | |
45 | - - job_name: 'cadvisor' | |
46 | - | |
47 | - # Override the global default and scrape targets from this job every 5 seconds. | |
48 | - scrape_interval: 5s | |
49 | - | |
50 | - dns_sd_configs: | |
51 | - - names: | |
52 | - - 'tasks.cadvisor' | |
53 | - type: 'A' | |
54 | - port: 8080 | |
55 | - | |
56 | -# static_configs: | |
57 | -# - targets: ['cadvisor:8080'] | |
58 | - | |
59 | - - job_name: 'node-exporter' | |
60 | - | |
61 | - # Override the global default and scrape targets from this job every 5 seconds. | |
62 | - scrape_interval: 5s | |
63 | - | |
64 | - dns_sd_configs: | |
65 | - - names: | |
66 | - - 'tasks.node-exporter' | |
67 | - type: 'A' | |
68 | - port: 9100 | |
69 | - | |
70 | -# - job_name: 'pushgateway' | |
71 | -# scrape_interval: 10s | |
72 | -# dns_sd_configs: | |
73 | -# - names: | |
74 | -# - 'tasks.pushgateway' | |
75 | -# type: 'A' | |
76 | -# port: 9091 | |
77 | - | |
78 | -# static_configs: | |
79 | -# - targets: ['node-exporter:9100'] | |
80 | 0 | \ No newline at end of file |