Commit 88b7e190b29a369a95244e89d23bc47edef6c51c

Authored by Tao Huang
1 parent 4bb7ce40
Exists in master

feat: add opentelemetry

prometheus-grafana-opentelemetry/alertmanager/config.yml 0 → 100644
... ... @@ -0,0 +1,10 @@
  1 +route:
  2 + receiver: 'slack'
  3 +
  4 +receivers:
  5 + - name: 'slack'
  6 +# slack_configs:
  7 +# - send_resolved: true
  8 +# username: '<username>'
  9 +# channel: '#<channel-name>'
  10 +# api_url: '<incomming-webhook-url>'
0 11 \ No newline at end of file
... ...
prometheus-grafana-opentelemetry/docker-compose.yml 0 → 100644
... ... @@ -0,0 +1,105 @@
  1 +version: '3.8'
  2 +
  3 +volumes:
  4 + prometheus_data: {}
  5 + grafana_data: {}
  6 +
  7 +services:
  8 + # Collector
  9 + otel-collector:
  10 + image: otel/opentelemetry-collector
  11 + restart: always
  12 + command: ["--config=/etc/otel-collector-config.yaml", ""]
  13 + volumes:
  14 + - ./otel/otel-collector-config.yaml:/etc/otel-collector-config.yaml
  15 + ports:
  16 + - "1888:1888" # pprof extension
  17 + - "8888:8888" # Prometheus metrics exposed by the collector
  18 + - "8889:8889" # Prometheus exporter metrics
  19 + - "13133:13133" # health_check extension
  20 + - "4317:4317" # OTLP gRPC receiver
  21 + - "55679:55679" # zpages extension
  22 + depends_on:
  23 + - prometheus
  24 +
  25 + prometheus:
  26 + image: prom/prometheus
  27 + restart: always
  28 + volumes:
  29 + - ./prometheus:/etc/prometheus/
  30 + - prometheus_data:/prometheus
  31 + command:
  32 + - '--config.file=/etc/prometheus/prometheus.yml'
  33 + - '--storage.tsdb.path=/prometheus'
  34 + - '--web.console.libraries=/usr/share/prometheus/console_libraries'
  35 + - '--web.console.templates=/usr/share/prometheus/consoles'
  36 + ports:
  37 + - 9090:9090
  38 + links:
  39 + - alertmanager:alertmanager
  40 +
  41 + alertmanager:
  42 + image: prom/alertmanager
  43 + restart: always
  44 + ports:
  45 + - 9093:9093
  46 + volumes:
  47 + - ./alertmanager/:/etc/alertmanager/
  48 + command:
  49 + - '--config.file=/etc/alertmanager/config.yml'
  50 + - '--storage.path=/alertmanager'
  51 +
  52 + # Jaeger
  53 + jaeger-all-in-one:
  54 + image: jaegertracing/all-in-one:latest
  55 + restart: always
  56 + ports:
  57 + - "16686:16686"
  58 + - "14268"
  59 + - "14250"
  60 +
  61 + grafana:
  62 + image: grafana/grafana
  63 + user: '0'
  64 + restart: always
  65 + environment:
  66 + GF_INSTALL_PLUGINS: 'grafana-clock-panel,grafana-simple-json-datasource'
  67 + volumes:
  68 + - grafana_data:/var/lib/grafana
  69 + - ./grafana/provisioning/:/etc/grafana/provisioning/
  70 + env_file:
  71 + - ./grafana/config.monitoring
  72 + ports:
  73 + - 5032:3000
  74 + depends_on:
  75 + - prometheus
  76 +
  77 + # node-exporter:
  78 + # image: prom/node-exporter
  79 + # volumes:
  80 + # - /proc:/host/proc:ro
  81 + # - /sys:/host/sys:ro
  82 + # - /:/rootfs:ro
  83 + # command:
  84 + # - '--path.procfs=/host/proc'
  85 + # - '--path.sysfs=/host/sys'
  86 + # - --collector.filesystem.ignored-mount-points
  87 + # - '^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)'
  88 + # ports:
  89 + # - 9100:9100
  90 + # restart: always
  91 + # deploy:
  92 + # mode: global
  93 +
  94 + # cadvisor:
  95 + # image: gcr.io/cadvisor/cadvisor
  96 + # volumes:
  97 + # - /:/rootfs:ro
  98 + # - /var/run:/var/run:rw
  99 + # - /sys:/sys:ro
  100 + # - /var/lib/docker/:/var/lib/docker:ro
  101 + # ports:
  102 + # - 8080:8080
  103 + # restart: always
  104 + # deploy:
  105 + # mode: global
0 106 \ No newline at end of file
... ...
prometheus-grafana-opentelemetry/grafana/config.monitoring 0 → 100644
... ... @@ -0,0 +1,3 @@
  1 +GF_SECURITY_ADMIN_USER=admin
  2 +GF_SECURITY_ADMIN_PASSWORD=foobar
  3 +GF_USERS_ALLOW_SIGN_UP=false
0 4 \ No newline at end of file
... ...
prometheus-grafana-opentelemetry/grafana/provisioning/dashboards/dashboard.yml 0 → 100644
... ... @@ -0,0 +1,11 @@
  1 +apiVersion: 1
  2 +
  3 +providers:
  4 +- name: 'Prometheus'
  5 + orgId: 1
  6 + folder: ''
  7 + type: file
  8 + disableDeletion: false
  9 + editable: true
  10 + options:
  11 + path: /etc/grafana/provisioning/dashboards
0 12 \ No newline at end of file
... ...
prometheus-grafana-opentelemetry/grafana/provisioning/datasources/datasource.yml 0 → 100644
... ... @@ -0,0 +1,50 @@
  1 +# config file version
  2 +apiVersion: 1
  3 +
  4 +# list of datasources that should be deleted from the database
  5 +deleteDatasources:
  6 + - name: Prometheus
  7 + orgId: 1
  8 +
  9 +# list of datasources to insert/update depending
  10 +# whats available in the database
  11 +datasources:
  12 + # <string, required> name of the datasource. Required
  13 +- name: Prometheus
  14 + # <string, required> datasource type. Required
  15 + type: prometheus
  16 + # <string, required> access mode. direct or proxy. Required
  17 + access: proxy
  18 + # <int> org id. will default to orgId 1 if not specified
  19 + orgId: 1
  20 + # <string> url
  21 + url: http://prometheus:9090
  22 + # <string> database password, if used
  23 + password:
  24 + # <string> database user, if used
  25 + user:
  26 + # <string> database name, if used
  27 + database:
  28 + # <bool> enable/disable basic auth
  29 + basicAuth: false
  30 + # <string> basic auth username, if used
  31 + basicAuthUser:
  32 + # <string> basic auth password, if used
  33 + basicAuthPassword:
  34 + # <bool> enable/disable with credentials headers
  35 + withCredentials:
  36 + # <bool> mark as default datasource. Max one per org
  37 + isDefault: true
  38 + # <map> fields that will be converted to json and stored in json_data
  39 + jsonData:
  40 + graphiteVersion: "1.1"
  41 + tlsAuth: false
  42 + tlsAuthWithCACert: false
  43 + # <string> json object of data that will be encrypted.
  44 + secureJsonData:
  45 + tlsCACert: "..."
  46 + tlsClientCert: "..."
  47 + tlsClientKey: "..."
  48 + version: 1
  49 + # <bool> allow users to edit datasources from the UI.
  50 + editable: true
0 51 \ No newline at end of file
... ...
prometheus-grafana-opentelemetry/otel/otel-collector-config.yaml 0 → 100644
... ... @@ -0,0 +1,43 @@
  1 +receivers:
  2 + otlp:
  3 + protocols:
  4 + grpc:
  5 +
  6 +exporters:
  7 + prometheus:
  8 + endpoint: "0.0.0.0:8889"
  9 + const_labels:
  10 + label1: value1
  11 +
  12 + logging:
  13 +
  14 + # zipkin:
  15 + # endpoint: "http://zipkin-all-in-one:9411/api/v2/spans"
  16 + # format: proto
  17 +
  18 + jaeger:
  19 + endpoint: jaeger-all-in-one:14250
  20 + tls:
  21 + insecure: true
  22 +
  23 +processors:
  24 + batch:
  25 +
  26 +extensions:
  27 + health_check:
  28 + pprof:
  29 + endpoint: :1888
  30 + zpages:
  31 + endpoint: :55679
  32 +
  33 +service:
  34 + extensions: [pprof, zpages, health_check]
  35 + pipelines:
  36 + traces:
  37 + receivers: [otlp]
  38 + processors: [batch]
  39 + exporters: [logging, jaeger]
  40 + metrics:
  41 + receivers: [otlp]
  42 + processors: [batch]
  43 + exporters: [logging, prometheus]
0 44 \ No newline at end of file
... ...
prometheus-grafana-opentelemetry/prometheus/alert.rules 0 → 100644
... ... @@ -0,0 +1,22 @@
  1 +groups:
  2 +- name: example
  3 + rules:
  4 +
  5 + # Alert for any instance that is unreachable for >2 minutes.
  6 + - alert: service_down
  7 + expr: up == 0
  8 + for: 2m
  9 + labels:
  10 + severity: page
  11 + annotations:
  12 + summary: "Instance {{ $labels.instance }} down"
  13 + description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
  14 +
  15 + - alert: high_load
  16 + expr: node_load1 > 0.5
  17 + for: 2m
  18 + labels:
  19 + severity: page
  20 + annotations:
  21 + summary: "Instance {{ $labels.instance }} under high load"
  22 + description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load."
... ...
prometheus-grafana-opentelemetry/prometheus/prometheus.yml 0 → 100644
... ... @@ -0,0 +1,84 @@
  1 +# my global config
  2 +global:
  3 + scrape_interval: 15s # By default, scrape targets every 15 seconds.
  4 + evaluation_interval: 15s # By default, scrape targets every 15 seconds.
  5 + # scrape_timeout is set to the global default (10s).
  6 +
  7 + # Attach these labels to any time series or alerts when communicating with
  8 + # external systems (federation, remote storage, Alertmanager).
  9 + external_labels:
  10 + monitor: 'my-project'
  11 +
  12 +# Load and evaluate rules in this file every 'evaluation_interval' seconds.
  13 +rule_files:
  14 + - 'alert.rules'
  15 + # - "first.rules"
  16 + # - "second.rules"
  17 +
  18 +# alert
  19 +alerting:
  20 + alertmanagers:
  21 + - scheme: http
  22 + static_configs:
  23 + - targets:
  24 + - "alertmanager:9093"
  25 +
  26 +# A scrape configuration containing exactly one endpoint to scrape:
  27 +# Here it's Prometheus itself.
  28 +scrape_configs:
  29 + # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  30 +
  31 + # - job_name: app
  32 + # scrape_interval: 5s
  33 + # static_configs:
  34 + # - targets: ['host.docker.internal:8000']
  35 +
  36 + - job_name: 'prometheus'
  37 +
  38 + # Override the global default and scrape targets from this job every 5 seconds.
  39 + scrape_interval: 5s
  40 +
  41 + static_configs:
  42 + - targets: ['localhost:9090']
  43 +
  44 + - job_name: 'otel-collector'
  45 + scrape_interval: 10s
  46 + static_configs:
  47 + - targets: ['otel-collector:8889']
  48 + - targets: ['otel-collector:8888']
  49 +
  50 +# - job_name: 'cadvisor'
  51 +
  52 +# # Override the global default and scrape targets from this job every 5 seconds.
  53 +# scrape_interval: 5s
  54 +
  55 +# dns_sd_configs:
  56 +# - names:
  57 +# - 'tasks.cadvisor'
  58 +# type: 'A'
  59 +# port: 8080
  60 +
  61 +# # static_configs:
  62 +# # - targets: ['cadvisor:8080']
  63 +
  64 +# - job_name: 'node-exporter'
  65 +
  66 +# # Override the global default and scrape targets from this job every 5 seconds.
  67 +# scrape_interval: 5s
  68 +
  69 +# dns_sd_configs:
  70 +# - names:
  71 +# - 'tasks.node-exporter'
  72 +# type: 'A'
  73 +# port: 9100
  74 +
  75 +# - job_name: 'pushgateway'
  76 +# scrape_interval: 10s
  77 +# dns_sd_configs:
  78 +# - names:
  79 +# - 'tasks.pushgateway'
  80 +# type: 'A'
  81 +# port: 9091
  82 +
  83 +# static_configs:
  84 +# - targets: ['node-exporter:9100']
0 85 \ No newline at end of file
... ...
prometheus-grafana/alertmanager/config.yml
... ... @@ -1,10 +0,0 @@
1   -route:
2   - receiver: 'slack'
3   -
4   -receivers:
5   - - name: 'slack'
6   -# slack_configs:
7   -# - send_resolved: true
8   -# username: '<username>'
9   -# channel: '#<channel-name>'
10   -# api_url: '<incomming-webhook-url>'
11 0 \ No newline at end of file
prometheus-grafana/docker-compose.yml
... ... @@ -1,82 +0,0 @@
1   -version: '3.8'
2   -
3   -volumes:
4   - prometheus_data: {}
5   - grafana_data: {}
6   -
7   -services:
8   - prometheus:
9   - image: prom/prometheus
10   - restart: always
11   - volumes:
12   - - ./prometheus:/etc/prometheus/
13   - - prometheus_data:/prometheus
14   - command:
15   - - '--config.file=/etc/prometheus/prometheus.yml'
16   - - '--storage.tsdb.path=/prometheus'
17   - - '--web.console.libraries=/usr/share/prometheus/console_libraries'
18   - - '--web.console.templates=/usr/share/prometheus/consoles'
19   - ports:
20   - - 9090:9090
21   - links:
22   - - cadvisor:cadvisor
23   - - alertmanager:alertmanager
24   - depends_on:
25   - - cadvisor
26   -
27   - node-exporter:
28   - image: prom/node-exporter
29   - volumes:
30   - - /proc:/host/proc:ro
31   - - /sys:/host/sys:ro
32   - - /:/rootfs:ro
33   - command:
34   - - '--path.procfs=/host/proc'
35   - - '--path.sysfs=/host/sys'
36   - - --collector.filesystem.ignored-mount-points
37   - - '^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)'
38   - ports:
39   - - 9100:9100
40   - restart: always
41   - deploy:
42   - mode: global
43   -
44   - alertmanager:
45   - image: prom/alertmanager
46   - restart: always
47   - ports:
48   - - 9093:9093
49   - volumes:
50   - - ./alertmanager/:/etc/alertmanager/
51   - command:
52   - - '--config.file=/etc/alertmanager/config.yml'
53   - - '--storage.path=/alertmanager'
54   -
55   - cadvisor:
56   - image: gcr.io/cadvisor/cadvisor
57   - volumes:
58   - - /:/rootfs:ro
59   - - /var/run:/var/run:rw
60   - - /sys:/sys:ro
61   - - /var/lib/docker/:/var/lib/docker:ro
62   - ports:
63   - - 8080:8080
64   - restart: always
65   - deploy:
66   - mode: global
67   -
68   - grafana:
69   - image: grafana/grafana
70   - user: '472'
71   - restart: always
72   - environment:
73   - GF_INSTALL_PLUGINS: 'grafana-clock-panel,grafana-simple-json-datasource'
74   - volumes:
75   - - grafana_data:/var/lib/grafana
76   - - ./grafana/provisioning/:/etc/grafana/provisioning/
77   - env_file:
78   - - ./grafana/config.monitoring
79   - ports:
80   - - 3000:3000
81   - depends_on:
82   - - prometheus
prometheus-grafana/grafana/config.monitoring
... ... @@ -1,3 +0,0 @@
1   -GF_SECURITY_ADMIN_USER=admin
2   -GF_SECURITY_ADMIN_PASSWORD=foobar
3   -GF_USERS_ALLOW_SIGN_UP=false
4 0 \ No newline at end of file
prometheus-grafana/grafana/provisioning/dashboards/dashboard.yml
... ... @@ -1,11 +0,0 @@
1   -apiVersion: 1
2   -
3   -providers:
4   -- name: 'Prometheus'
5   - orgId: 1
6   - folder: ''
7   - type: file
8   - disableDeletion: false
9   - editable: true
10   - options:
11   - path: /etc/grafana/provisioning/dashboards
12 0 \ No newline at end of file
prometheus-grafana/grafana/provisioning/datasources/datasource.yml
... ... @@ -1,50 +0,0 @@
1   -# config file version
2   -apiVersion: 1
3   -
4   -# list of datasources that should be deleted from the database
5   -deleteDatasources:
6   - - name: Prometheus
7   - orgId: 1
8   -
9   -# list of datasources to insert/update depending
10   -# whats available in the database
11   -datasources:
12   - # <string, required> name of the datasource. Required
13   -- name: Prometheus
14   - # <string, required> datasource type. Required
15   - type: prometheus
16   - # <string, required> access mode. direct or proxy. Required
17   - access: proxy
18   - # <int> org id. will default to orgId 1 if not specified
19   - orgId: 1
20   - # <string> url
21   - url: http://prometheus:9090
22   - # <string> database password, if used
23   - password:
24   - # <string> database user, if used
25   - user:
26   - # <string> database name, if used
27   - database:
28   - # <bool> enable/disable basic auth
29   - basicAuth: false
30   - # <string> basic auth username, if used
31   - basicAuthUser:
32   - # <string> basic auth password, if used
33   - basicAuthPassword:
34   - # <bool> enable/disable with credentials headers
35   - withCredentials:
36   - # <bool> mark as default datasource. Max one per org
37   - isDefault: true
38   - # <map> fields that will be converted to json and stored in json_data
39   - jsonData:
40   - graphiteVersion: "1.1"
41   - tlsAuth: false
42   - tlsAuthWithCACert: false
43   - # <string> json object of data that will be encrypted.
44   - secureJsonData:
45   - tlsCACert: "..."
46   - tlsClientCert: "..."
47   - tlsClientKey: "..."
48   - version: 1
49   - # <bool> allow users to edit datasources from the UI.
50   - editable: true
51 0 \ No newline at end of file
prometheus-grafana/prometheus.yml
... ... @@ -1,16 +0,0 @@
1   -global:
2   - scrape_interval: 15s
3   - evaluation_interval: 15s
4   -
5   -rule_files:
6   - # - "first.rules"
7   - # - "second.rules"
8   -
9   -scrape_configs:
10   - - job_name: prometheus
11   - static_configs:
12   - - targets: ['localhost:9090']
13   - - job_name: app
14   - scrape_interval: 5s
15   - static_configs:
16   - - targets: ['host.docker.internal:10088']
17 0 \ No newline at end of file
prometheus-grafana/prometheus/alert.rules
... ... @@ -1,22 +0,0 @@
1   -groups:
2   -- name: example
3   - rules:
4   -
5   - # Alert for any instance that is unreachable for >2 minutes.
6   - - alert: service_down
7   - expr: up == 0
8   - for: 2m
9   - labels:
10   - severity: page
11   - annotations:
12   - summary: "Instance {{ $labels.instance }} down"
13   - description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
14   -
15   - - alert: high_load
16   - expr: node_load1 > 0.5
17   - for: 2m
18   - labels:
19   - severity: page
20   - annotations:
21   - summary: "Instance {{ $labels.instance }} under high load"
22   - description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load."
prometheus-grafana/prometheus/prometheus.yml
... ... @@ -1,79 +0,0 @@
1   -# my global config
2   -global:
3   - scrape_interval: 15s # By default, scrape targets every 15 seconds.
4   - evaluation_interval: 15s # By default, scrape targets every 15 seconds.
5   - # scrape_timeout is set to the global default (10s).
6   -
7   - # Attach these labels to any time series or alerts when communicating with
8   - # external systems (federation, remote storage, Alertmanager).
9   - external_labels:
10   - monitor: 'my-project'
11   -
12   -# Load and evaluate rules in this file every 'evaluation_interval' seconds.
13   -rule_files:
14   - - 'alert.rules'
15   - # - "first.rules"
16   - # - "second.rules"
17   -
18   -# alert
19   -alerting:
20   - alertmanagers:
21   - - scheme: http
22   - static_configs:
23   - - targets:
24   - - "alertmanager:9093"
25   -
26   -# A scrape configuration containing exactly one endpoint to scrape:
27   -# Here it's Prometheus itself.
28   -scrape_configs:
29   - # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
30   -
31   - - job_name: app
32   - scrape_interval: 5s
33   - static_configs:
34   - - targets: ['host.docker.internal:8000']
35   -
36   - - job_name: 'prometheus'
37   -
38   - # Override the global default and scrape targets from this job every 5 seconds.
39   - scrape_interval: 5s
40   -
41   - static_configs:
42   - - targets: ['localhost:9090']
43   -
44   -
45   - - job_name: 'cadvisor'
46   -
47   - # Override the global default and scrape targets from this job every 5 seconds.
48   - scrape_interval: 5s
49   -
50   - dns_sd_configs:
51   - - names:
52   - - 'tasks.cadvisor'
53   - type: 'A'
54   - port: 8080
55   -
56   -# static_configs:
57   -# - targets: ['cadvisor:8080']
58   -
59   - - job_name: 'node-exporter'
60   -
61   - # Override the global default and scrape targets from this job every 5 seconds.
62   - scrape_interval: 5s
63   -
64   - dns_sd_configs:
65   - - names:
66   - - 'tasks.node-exporter'
67   - type: 'A'
68   - port: 9100
69   -
70   -# - job_name: 'pushgateway'
71   -# scrape_interval: 10s
72   -# dns_sd_configs:
73   -# - names:
74   -# - 'tasks.pushgateway'
75   -# type: 'A'
76   -# port: 9091
77   -
78   -# static_configs:
79   -# - targets: ['node-exporter:9100']
80 0 \ No newline at end of file