feat: add opentelemetry

Tao Huang
1 parent 4bb7ce40
Showing 16 changed files with 328 additions and 273 deletions Show diff stats
prometheus-grafana-opentelemetry/alertmanager/config.yml
prometheus-grafana-opentelemetry/docker-compose.yml
prometheus-grafana-opentelemetry/grafana/config.monitoring
prometheus-grafana-opentelemetry/grafana/provisioning/dashboards/dashboard.yml
prometheus-grafana-opentelemetry/grafana/provisioning/datasources/datasource.yml
prometheus-grafana-opentelemetry/otel/otel-collector-config.yaml
prometheus-grafana-opentelemetry/prometheus/alert.rules
prometheus-grafana-opentelemetry/prometheus/prometheus.yml
prometheus-grafana/alertmanager/config.yml
prometheus-grafana/docker-compose.yml
prometheus-grafana/grafana/config.monitoring
prometheus-grafana/grafana/provisioning/dashboards/dashboard.yml
prometheus-grafana/grafana/provisioning/datasources/datasource.yml
prometheus-grafana/prometheus.yml
prometheus-grafana/prometheus/alert.rules
prometheus-grafana/prometheus/prometheus.yml
@@ -0,0 +1,10 @@
+route:
+  receiver: 'slack'
+
+receivers:
+  - name: 'slack'
+#       slack_configs:
+#           - send_resolved: true
+#             username: '<username>'
+#             channel: '#<channel-name>'
+#             api_url: '<incomming-webhook-url>'
 \ No newline at end of file
@@ -0,0 +1,105 @@
+version: '3.8'
+
+volumes:
+  prometheus_data: {}
+  grafana_data: {}
+
+services:
+  # Collector
+  otel-collector:
+    image: otel/opentelemetry-collector
+    restart: always
+    command: ["--config=/etc/otel-collector-config.yaml", ""]
+    volumes:
+      - ./otel/otel-collector-config.yaml:/etc/otel-collector-config.yaml
+    ports:
+      - "1888:1888"   # pprof extension
+      - "8888:8888"   # Prometheus metrics exposed by the collector
+      - "8889:8889"   # Prometheus exporter metrics
+      - "13133:13133" # health_check extension
+      - "4317:4317"   # OTLP gRPC receiver
+      - "55679:55679" # zpages extension
+    depends_on:
+      - prometheus
+
+  prometheus:
+    image: prom/prometheus
+    restart: always
+    volumes:
+      - ./prometheus:/etc/prometheus/
+      - prometheus_data:/prometheus
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+      - '--storage.tsdb.path=/prometheus'
+      - '--web.console.libraries=/usr/share/prometheus/console_libraries'
+      - '--web.console.templates=/usr/share/prometheus/consoles'
+    ports:
+      - 9090:9090
+    links:
+      - alertmanager:alertmanager
+
+  alertmanager:
+    image: prom/alertmanager
+    restart: always
+    ports:
+      - 9093:9093
+    volumes:
+      - ./alertmanager/:/etc/alertmanager/
+    command:
+      - '--config.file=/etc/alertmanager/config.yml'
+      - '--storage.path=/alertmanager'
+
+  # Jaeger
+  jaeger-all-in-one:
+    image: jaegertracing/all-in-one:latest
+    restart: always
+    ports:
+      - "16686:16686"
+      - "14268"
+      - "14250"
+      
+  grafana:
+    image: grafana/grafana
+    user: '0'
+    restart: always
+    environment:
+      GF_INSTALL_PLUGINS: 'grafana-clock-panel,grafana-simple-json-datasource'
+    volumes:
+      - grafana_data:/var/lib/grafana
+      - ./grafana/provisioning/:/etc/grafana/provisioning/
+    env_file:
+      - ./grafana/config.monitoring
+    ports:
+      - 5032:3000
+    depends_on:
+      - prometheus
+
+  # node-exporter:
+  #   image: prom/node-exporter
+  #   volumes:
+  #     - /proc:/host/proc:ro
+  #     - /sys:/host/sys:ro
+  #     - /:/rootfs:ro
+  #   command:
+  #     - '--path.procfs=/host/proc'
+  #     - '--path.sysfs=/host/sys'
+  #     - --collector.filesystem.ignored-mount-points
+  #     - '^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)'
+  #   ports:
+  #     - 9100:9100
+  #   restart: always
+  #   deploy:
+  #     mode: global
+
+    # cadvisor:
+    # image: gcr.io/cadvisor/cadvisor
+    # volumes:
+    #   - /:/rootfs:ro
+    #   - /var/run:/var/run:rw
+    #   - /sys:/sys:ro
+    #   - /var/lib/docker/:/var/lib/docker:ro
+    # ports:
+    #   - 8080:8080
+    # restart: always
+    # deploy:
+    #   mode: global
 \ No newline at end of file
@@ -0,0 +1,3 @@
+GF_SECURITY_ADMIN_USER=admin
+GF_SECURITY_ADMIN_PASSWORD=foobar
+GF_USERS_ALLOW_SIGN_UP=false
 \ No newline at end of file
@@ -0,0 +1,11 @@
+apiVersion: 1
+
+providers:
+- name: 'Prometheus'
+  orgId: 1
+  folder: ''
+  type: file
+  disableDeletion: false
+  editable: true
+  options:
+    path: /etc/grafana/provisioning/dashboards
 \ No newline at end of file
@@ -0,0 +1,50 @@
+# config file version
+apiVersion: 1
+
+# list of datasources that should be deleted from the database
+deleteDatasources:
+  - name: Prometheus
+    orgId: 1
+
+# list of datasources to insert/update depending
+# whats available in the database
+datasources:
+  # <string, required> name of the datasource. Required
+- name: Prometheus
+  # <string, required> datasource type. Required
+  type: prometheus
+  # <string, required> access mode. direct or proxy. Required
+  access: proxy
+  # <int> org id. will default to orgId 1 if not specified
+  orgId: 1
+  # <string> url
+  url: http://prometheus:9090
+  # <string> database password, if used
+  password:
+  # <string> database user, if used
+  user:
+  # <string> database name, if used
+  database:
+  # <bool> enable/disable basic auth
+  basicAuth: false
+  # <string> basic auth username, if used
+  basicAuthUser:
+  # <string> basic auth password, if used
+  basicAuthPassword:
+  # <bool> enable/disable with credentials headers
+  withCredentials:
+  # <bool> mark as default datasource. Max one per org
+  isDefault: true
+  # <map> fields that will be converted to json and stored in json_data
+  jsonData:
+     graphiteVersion: "1.1"
+     tlsAuth: false
+     tlsAuthWithCACert: false
+  # <string> json object of data that will be encrypted.
+  secureJsonData:
+    tlsCACert: "..."
+    tlsClientCert: "..."
+    tlsClientKey: "..."
+  version: 1
+  # <bool> allow users to edit datasources from the UI.
+  editable: true
 \ No newline at end of file
@@ -0,0 +1,43 @@
+receivers:
+  otlp:
+    protocols:
+      grpc:
+
+exporters:
+  prometheus:
+    endpoint: "0.0.0.0:8889"
+    const_labels:
+      label1: value1
+
+  logging:
+
+  # zipkin:
+  #   endpoint: "http://zipkin-all-in-one:9411/api/v2/spans"
+  #   format: proto
+
+  jaeger:
+    endpoint: jaeger-all-in-one:14250
+    tls:
+      insecure: true
+
+processors:
+  batch:
+
+extensions:
+  health_check:
+  pprof:
+    endpoint: :1888
+  zpages:
+    endpoint: :55679
+
+service:
+  extensions: [pprof, zpages, health_check]
+  pipelines:
+    traces:
+      receivers: [otlp]
+      processors: [batch]
+      exporters: [logging, jaeger]
+    metrics:
+      receivers: [otlp]
+      processors: [batch]
+      exporters: [logging, prometheus]
 \ No newline at end of file
@@ -0,0 +1,22 @@
+groups:
+- name: example
+  rules:
+
+  # Alert for any instance that is unreachable for >2 minutes.
+  - alert: service_down
+    expr: up == 0
+    for: 2m
+    labels:
+      severity: page
+    annotations:
+      summary: "Instance {{ $labels.instance }} down"
+      description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
+
+  - alert: high_load
+    expr: node_load1 > 0.5
+    for: 2m
+    labels:
+      severity: page
+    annotations:
+      summary: "Instance {{ $labels.instance }} under high load"
+      description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load."
@@ -0,0 +1,84 @@
+# my global config
+global:
+  scrape_interval:     15s # By default, scrape targets every 15 seconds.
+  evaluation_interval: 15s # By default, scrape targets every 15 seconds.
+  # scrape_timeout is set to the global default (10s).
+
+  # Attach these labels to any time series or alerts when communicating with
+  # external systems (federation, remote storage, Alertmanager).
+  external_labels:
+      monitor: 'my-project'
+
+# Load and evaluate rules in this file every 'evaluation_interval' seconds.
+rule_files:
+  - 'alert.rules'
+  # - "first.rules"
+  # - "second.rules"
+
+# alert
+alerting:
+  alertmanagers:
+  - scheme: http
+    static_configs:
+    - targets:
+      - "alertmanager:9093"
+
+# A scrape configuration containing exactly one endpoint to scrape:
+# Here it's Prometheus itself.
+scrape_configs:
+  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
+
+  # - job_name: app
+  #   scrape_interval: 5s
+  #   static_configs:
+  #     - targets: ['host.docker.internal:8000']
+
+  - job_name: 'prometheus'
+
+    # Override the global default and scrape targets from this job every 5 seconds.
+    scrape_interval: 5s
+
+    static_configs:
+         - targets: ['localhost:9090']
+
+  - job_name: 'otel-collector'
+    scrape_interval: 10s
+    static_configs:
+      - targets: ['otel-collector:8889']
+      - targets: ['otel-collector:8888']
+      
+#   - job_name: 'cadvisor'
+
+#     # Override the global default and scrape targets from this job every 5 seconds.
+#     scrape_interval: 5s
+
+#     dns_sd_configs:
+#     - names:
+#       - 'tasks.cadvisor'
+#       type: 'A'
+#       port: 8080
+
+# #     static_configs:
+# #          - targets: ['cadvisor:8080']
+
+#   - job_name: 'node-exporter'
+
+#     # Override the global default and scrape targets from this job every 5 seconds.
+#     scrape_interval: 5s
+
+#     dns_sd_configs:
+#     - names:
+#       - 'tasks.node-exporter'
+#       type: 'A'
+#       port: 9100
+
+#  - job_name: 'pushgateway'
+#    scrape_interval: 10s
+#    dns_sd_configs:
+#    - names:
+#      - 'tasks.pushgateway'
+#      type: 'A'
+#      port: 9091
+
+#     static_configs:
+#          - targets: ['node-exporter:9100']
 \ No newline at end of file
@@ -1,10 +0,0 @@
-route:
-  receiver: 'slack'
-
-receivers:
-  - name: 'slack'
-#       slack_configs:
-#           - send_resolved: true
-#             username: '<username>'
-#             channel: '#<channel-name>'
-#             api_url: '<incomming-webhook-url>'
 \ No newline at end of file
@@ -1,82 +0,0 @@
-version: '3.8'
-
-volumes:
-  prometheus_data: {}
-  grafana_data: {}
-
-services:
-  prometheus:
-    image: prom/prometheus
-    restart: always
-    volumes:
-      - ./prometheus:/etc/prometheus/
-      - prometheus_data:/prometheus
-    command:
-      - '--config.file=/etc/prometheus/prometheus.yml'
-      - '--storage.tsdb.path=/prometheus'
-      - '--web.console.libraries=/usr/share/prometheus/console_libraries'
-      - '--web.console.templates=/usr/share/prometheus/consoles'
-    ports:
-      - 9090:9090
-    links:
-      - cadvisor:cadvisor
-      - alertmanager:alertmanager
-    depends_on:
-      - cadvisor
-
-  node-exporter:
-    image: prom/node-exporter
-    volumes:
-      - /proc:/host/proc:ro
-      - /sys:/host/sys:ro
-      - /:/rootfs:ro
-    command:
-      - '--path.procfs=/host/proc'
-      - '--path.sysfs=/host/sys'
-      - --collector.filesystem.ignored-mount-points
-      - '^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)'
-    ports:
-      - 9100:9100
-    restart: always
-    deploy:
-      mode: global
-
-  alertmanager:
-    image: prom/alertmanager
-    restart: always
-    ports:
-      - 9093:9093
-    volumes:
-      - ./alertmanager/:/etc/alertmanager/
-    command:
-      - '--config.file=/etc/alertmanager/config.yml'
-      - '--storage.path=/alertmanager'
-
-  cadvisor:
-    image: gcr.io/cadvisor/cadvisor
-    volumes:
-      - /:/rootfs:ro
-      - /var/run:/var/run:rw
-      - /sys:/sys:ro
-      - /var/lib/docker/:/var/lib/docker:ro
-    ports:
-      - 8080:8080
-    restart: always
-    deploy:
-      mode: global
-
-  grafana:
-    image: grafana/grafana
-    user: '472'
-    restart: always
-    environment:
-      GF_INSTALL_PLUGINS: 'grafana-clock-panel,grafana-simple-json-datasource'
-    volumes:
-      - grafana_data:/var/lib/grafana
-      - ./grafana/provisioning/:/etc/grafana/provisioning/
-    env_file:
-      - ./grafana/config.monitoring
-    ports:
-      - 3000:3000
-    depends_on:
-      - prometheus
@@ -1,3 +0,0 @@
-GF_SECURITY_ADMIN_USER=admin
-GF_SECURITY_ADMIN_PASSWORD=foobar
-GF_USERS_ALLOW_SIGN_UP=false
 \ No newline at end of file
@@ -1,11 +0,0 @@
-apiVersion: 1
-
-providers:
-- name: 'Prometheus'
-  orgId: 1
-  folder: ''
-  type: file
-  disableDeletion: false
-  editable: true
-  options:
-    path: /etc/grafana/provisioning/dashboards
 \ No newline at end of file
@@ -1,50 +0,0 @@
-# config file version
-apiVersion: 1
-
-# list of datasources that should be deleted from the database
-deleteDatasources:
-  - name: Prometheus
-    orgId: 1
-
-# list of datasources to insert/update depending
-# whats available in the database
-datasources:
-  # <string, required> name of the datasource. Required
-- name: Prometheus
-  # <string, required> datasource type. Required
-  type: prometheus
-  # <string, required> access mode. direct or proxy. Required
-  access: proxy
-  # <int> org id. will default to orgId 1 if not specified
-  orgId: 1
-  # <string> url
-  url: http://prometheus:9090
-  # <string> database password, if used
-  password:
-  # <string> database user, if used
-  user:
-  # <string> database name, if used
-  database:
-  # <bool> enable/disable basic auth
-  basicAuth: false
-  # <string> basic auth username, if used
-  basicAuthUser:
-  # <string> basic auth password, if used
-  basicAuthPassword:
-  # <bool> enable/disable with credentials headers
-  withCredentials:
-  # <bool> mark as default datasource. Max one per org
-  isDefault: true
-  # <map> fields that will be converted to json and stored in json_data
-  jsonData:
-     graphiteVersion: "1.1"
-     tlsAuth: false
-     tlsAuthWithCACert: false
-  # <string> json object of data that will be encrypted.
-  secureJsonData:
-    tlsCACert: "..."
-    tlsClientCert: "..."
-    tlsClientKey: "..."
-  version: 1
-  # <bool> allow users to edit datasources from the UI.
-  editable: true
 \ No newline at end of file
@@ -1,16 +0,0 @@
-global:
-  scrape_interval:     15s
-  evaluation_interval: 15s
-
-rule_files:
-  # - "first.rules"
-  # - "second.rules"
-
-scrape_configs:
-  - job_name: prometheus
-    static_configs:
-      - targets: ['localhost:9090']
-  - job_name: app
-    scrape_interval: 5s
-    static_configs:
-      - targets: ['host.docker.internal:10088']
 \ No newline at end of file
@@ -1,22 +0,0 @@
-groups:
-- name: example
-  rules:
-
-  # Alert for any instance that is unreachable for >2 minutes.
-  - alert: service_down
-    expr: up == 0
-    for: 2m
-    labels:
-      severity: page
-    annotations:
-      summary: "Instance {{ $labels.instance }} down"
-      description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
-
-  - alert: high_load
-    expr: node_load1 > 0.5
-    for: 2m
-    labels:
-      severity: page
-    annotations:
-      summary: "Instance {{ $labels.instance }} under high load"
-      description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load."
@@ -1,79 +0,0 @@
-# my global config
-global:
-  scrape_interval:     15s # By default, scrape targets every 15 seconds.
-  evaluation_interval: 15s # By default, scrape targets every 15 seconds.
-  # scrape_timeout is set to the global default (10s).
-
-  # Attach these labels to any time series or alerts when communicating with
-  # external systems (federation, remote storage, Alertmanager).
-  external_labels:
-      monitor: 'my-project'
-
-# Load and evaluate rules in this file every 'evaluation_interval' seconds.
-rule_files:
-  - 'alert.rules'
-  # - "first.rules"
-  # - "second.rules"
-
-# alert
-alerting:
-  alertmanagers:
-  - scheme: http
-    static_configs:
-    - targets:
-      - "alertmanager:9093"
-
-# A scrape configuration containing exactly one endpoint to scrape:
-# Here it's Prometheus itself.
-scrape_configs:
-  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
-
-  - job_name: app
-    scrape_interval: 5s
-    static_configs:
-      - targets: ['host.docker.internal:8000']
-
-  - job_name: 'prometheus'
-
-    # Override the global default and scrape targets from this job every 5 seconds.
-    scrape_interval: 5s
-
-    static_configs:
-         - targets: ['localhost:9090']
-
-
-  - job_name: 'cadvisor'
-
-    # Override the global default and scrape targets from this job every 5 seconds.
-    scrape_interval: 5s
-
-    dns_sd_configs:
-    - names:
-      - 'tasks.cadvisor'
-      type: 'A'
-      port: 8080
-
-#     static_configs:
-#          - targets: ['cadvisor:8080']
-
-  - job_name: 'node-exporter'
-
-    # Override the global default and scrape targets from this job every 5 seconds.
-    scrape_interval: 5s
-
-    dns_sd_configs:
-    - names:
-      - 'tasks.node-exporter'
-      type: 'A'
-      port: 9100
-
-#  - job_name: 'pushgateway'
-#    scrape_interval: 10s
-#    dns_sd_configs:
-#    - names:
-#      - 'tasks.pushgateway'
-#      type: 'A'
-#      port: 9091
-
-#     static_configs:
-#          - targets: ['node-exporter:9100']
 \ No newline at end of file
...	...	@@ -0,0 +1,10 @@
	1	+route:
	2	+ receiver: 'slack'
	3	+
	4	+receivers:
	5	+ - name: 'slack'
	6	+# slack_configs:
	7	+# - send_resolved: true
	8	+# username: '<username>'
	9	+# channel: '#<channel-name>'
	10	+# api_url: '<incomming-webhook-url>'
0	11	\ No newline at end of file
...	...
...	...	@@ -0,0 +1,105 @@
	1	+version: '3.8'
	2	+
	3	+volumes:
	4	+ prometheus_data: {}
	5	+ grafana_data: {}
	6	+
	7	+services:
	8	+ # Collector
	9	+ otel-collector:
	10	+ image: otel/opentelemetry-collector
	11	+ restart: always
	12	+ command: ["--config=/etc/otel-collector-config.yaml", ""]
	13	+ volumes:
	14	+ - ./otel/otel-collector-config.yaml:/etc/otel-collector-config.yaml
	15	+ ports:
	16	+ - "1888:1888" # pprof extension
	17	+ - "8888:8888" # Prometheus metrics exposed by the collector
	18	+ - "8889:8889" # Prometheus exporter metrics
	19	+ - "13133:13133" # health_check extension
	20	+ - "4317:4317" # OTLP gRPC receiver
	21	+ - "55679:55679" # zpages extension
	22	+ depends_on:
	23	+ - prometheus
	24	+
	25	+ prometheus:
	26	+ image: prom/prometheus
	27	+ restart: always
	28	+ volumes:
	29	+ - ./prometheus:/etc/prometheus/
	30	+ - prometheus_data:/prometheus
	31	+ command:
	32	+ - '--config.file=/etc/prometheus/prometheus.yml'
	33	+ - '--storage.tsdb.path=/prometheus'
	34	+ - '--web.console.libraries=/usr/share/prometheus/console_libraries'
	35	+ - '--web.console.templates=/usr/share/prometheus/consoles'
	36	+ ports:
	37	+ - 9090:9090
	38	+ links:
	39	+ - alertmanager:alertmanager
	40	+
	41	+ alertmanager:
	42	+ image: prom/alertmanager
	43	+ restart: always
	44	+ ports:
	45	+ - 9093:9093
	46	+ volumes:
	47	+ - ./alertmanager/:/etc/alertmanager/
	48	+ command:
	49	+ - '--config.file=/etc/alertmanager/config.yml'
	50	+ - '--storage.path=/alertmanager'
	51	+
	52	+ # Jaeger
	53	+ jaeger-all-in-one:
	54	+ image: jaegertracing/all-in-one:latest
	55	+ restart: always
	56	+ ports:
	57	+ - "16686:16686"
	58	+ - "14268"
	59	+ - "14250"
	60	+
	61	+ grafana:
	62	+ image: grafana/grafana
	63	+ user: '0'
	64	+ restart: always
	65	+ environment:
	66	+ GF_INSTALL_PLUGINS: 'grafana-clock-panel,grafana-simple-json-datasource'
	67	+ volumes:
	68	+ - grafana_data:/var/lib/grafana
	69	+ - ./grafana/provisioning/:/etc/grafana/provisioning/
	70	+ env_file:
	71	+ - ./grafana/config.monitoring
	72	+ ports:
	73	+ - 5032:3000
	74	+ depends_on:
	75	+ - prometheus
	76	+
	77	+ # node-exporter:
	78	+ # image: prom/node-exporter
	79	+ # volumes:
	80	+ # - /proc:/host/proc:ro
	81	+ # - /sys:/host/sys:ro
	82	+ # - /:/rootfs:ro
	83	+ # command:
	84	+ # - '--path.procfs=/host/proc'
	85	+ # - '--path.sysfs=/host/sys'
	86	+ # - --collector.filesystem.ignored-mount-points
	87	+ # - '^/(sys\|proc\|dev\|host\|etc\|rootfs/var/lib/docker/containers\|rootfs/var/lib/docker/overlay2\|rootfs/run/docker/netns\|rootfs/var/lib/docker/aufs)($$\|/)'
	88	+ # ports:
	89	+ # - 9100:9100
	90	+ # restart: always
	91	+ # deploy:
	92	+ # mode: global
	93	+
	94	+ # cadvisor:
	95	+ # image: gcr.io/cadvisor/cadvisor
	96	+ # volumes:
	97	+ # - /:/rootfs:ro
	98	+ # - /var/run:/var/run:rw
	99	+ # - /sys:/sys:ro
	100	+ # - /var/lib/docker/:/var/lib/docker:ro
	101	+ # ports:
	102	+ # - 8080:8080
	103	+ # restart: always
	104	+ # deploy:
	105	+ # mode: global
0	106	\ No newline at end of file
...	...
...	...	@@ -0,0 +1,3 @@
	1	+GF_SECURITY_ADMIN_USER=admin
	2	+GF_SECURITY_ADMIN_PASSWORD=foobar
	3	+GF_USERS_ALLOW_SIGN_UP=false
0	4	\ No newline at end of file
...	...
...	...	@@ -0,0 +1,11 @@
	1	+apiVersion: 1
	2	+
	3	+providers:
	4	+- name: 'Prometheus'
	5	+ orgId: 1
	6	+ folder: ''
	7	+ type: file
	8	+ disableDeletion: false
	9	+ editable: true
	10	+ options:
	11	+ path: /etc/grafana/provisioning/dashboards
0	12	\ No newline at end of file
...	...
...	...	@@ -0,0 +1,50 @@
	1	+# config file version
	2	+apiVersion: 1
	3	+
	4	+# list of datasources that should be deleted from the database
	5	+deleteDatasources:
	6	+ - name: Prometheus
	7	+ orgId: 1
	8	+
	9	+# list of datasources to insert/update depending
	10	+# whats available in the database
	11	+datasources:
	12	+ # <string, required> name of the datasource. Required
	13	+- name: Prometheus
	14	+ # <string, required> datasource type. Required
	15	+ type: prometheus
	16	+ # <string, required> access mode. direct or proxy. Required
	17	+ access: proxy
	18	+ # <int> org id. will default to orgId 1 if not specified
	19	+ orgId: 1
	20	+ # <string> url
	21	+ url: http://prometheus:9090
	22	+ # <string> database password, if used
	23	+ password:
	24	+ # <string> database user, if used
	25	+ user:
	26	+ # <string> database name, if used
	27	+ database:
	28	+ # <bool> enable/disable basic auth
	29	+ basicAuth: false
	30	+ # <string> basic auth username, if used
	31	+ basicAuthUser:
	32	+ # <string> basic auth password, if used
	33	+ basicAuthPassword:
	34	+ # <bool> enable/disable with credentials headers
	35	+ withCredentials:
	36	+ # <bool> mark as default datasource. Max one per org
	37	+ isDefault: true
	38	+ # <map> fields that will be converted to json and stored in json_data
	39	+ jsonData:
	40	+ graphiteVersion: "1.1"
	41	+ tlsAuth: false
	42	+ tlsAuthWithCACert: false
	43	+ # <string> json object of data that will be encrypted.
	44	+ secureJsonData:
	45	+ tlsCACert: "..."
	46	+ tlsClientCert: "..."
	47	+ tlsClientKey: "..."
	48	+ version: 1
	49	+ # <bool> allow users to edit datasources from the UI.
	50	+ editable: true
0	51	\ No newline at end of file
...	...
...	...	@@ -0,0 +1,43 @@
	1	+receivers:
	2	+ otlp:
	3	+ protocols:
	4	+ grpc:
	5	+
	6	+exporters:
	7	+ prometheus:
	8	+ endpoint: "0.0.0.0:8889"
	9	+ const_labels:
	10	+ label1: value1
	11	+
	12	+ logging:
	13	+
	14	+ # zipkin:
	15	+ # endpoint: "http://zipkin-all-in-one:9411/api/v2/spans"
	16	+ # format: proto
	17	+
	18	+ jaeger:
	19	+ endpoint: jaeger-all-in-one:14250
	20	+ tls:
	21	+ insecure: true
	22	+
	23	+processors:
	24	+ batch:
	25	+
	26	+extensions:
	27	+ health_check:
	28	+ pprof:
	29	+ endpoint: :1888
	30	+ zpages:
	31	+ endpoint: :55679
	32	+
	33	+service:
	34	+ extensions: [pprof, zpages, health_check]
	35	+ pipelines:
	36	+ traces:
	37	+ receivers: [otlp]
	38	+ processors: [batch]
	39	+ exporters: [logging, jaeger]
	40	+ metrics:
	41	+ receivers: [otlp]
	42	+ processors: [batch]
	43	+ exporters: [logging, prometheus]
0	44	\ No newline at end of file
...	...
...	...	@@ -0,0 +1,22 @@
	1	+groups:
	2	+- name: example
	3	+ rules:
	4	+
	5	+ # Alert for any instance that is unreachable for >2 minutes.
	6	+ - alert: service_down
	7	+ expr: up == 0
	8	+ for: 2m
	9	+ labels:
	10	+ severity: page
	11	+ annotations:
	12	+ summary: "Instance {{ $labels.instance }} down"
	13	+ description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
	14	+
	15	+ - alert: high_load
	16	+ expr: node_load1 > 0.5
	17	+ for: 2m
	18	+ labels:
	19	+ severity: page
	20	+ annotations:
	21	+ summary: "Instance {{ $labels.instance }} under high load"
	22	+ description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load."
...	...
...	...	@@ -0,0 +1,84 @@
	1	+# my global config
	2	+global:
	3	+ scrape_interval: 15s # By default, scrape targets every 15 seconds.
	4	+ evaluation_interval: 15s # By default, scrape targets every 15 seconds.
	5	+ # scrape_timeout is set to the global default (10s).
	6	+
	7	+ # Attach these labels to any time series or alerts when communicating with
	8	+ # external systems (federation, remote storage, Alertmanager).
	9	+ external_labels:
	10	+ monitor: 'my-project'
	11	+
	12	+# Load and evaluate rules in this file every 'evaluation_interval' seconds.
	13	+rule_files:
	14	+ - 'alert.rules'
	15	+ # - "first.rules"
	16	+ # - "second.rules"
	17	+
	18	+# alert
	19	+alerting:
	20	+ alertmanagers:
	21	+ - scheme: http
	22	+ static_configs:
	23	+ - targets:
	24	+ - "alertmanager:9093"
	25	+
	26	+# A scrape configuration containing exactly one endpoint to scrape:
	27	+# Here it's Prometheus itself.
	28	+scrape_configs:
	29	+ # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
	30	+
	31	+ # - job_name: app
	32	+ # scrape_interval: 5s
	33	+ # static_configs:
	34	+ # - targets: ['host.docker.internal:8000']
	35	+
	36	+ - job_name: 'prometheus'
	37	+
	38	+ # Override the global default and scrape targets from this job every 5 seconds.
	39	+ scrape_interval: 5s
	40	+
	41	+ static_configs:
	42	+ - targets: ['localhost:9090']
	43	+
	44	+ - job_name: 'otel-collector'
	45	+ scrape_interval: 10s
	46	+ static_configs:
	47	+ - targets: ['otel-collector:8889']
	48	+ - targets: ['otel-collector:8888']
	49	+
	50	+# - job_name: 'cadvisor'
	51	+
	52	+# # Override the global default and scrape targets from this job every 5 seconds.
	53	+# scrape_interval: 5s
	54	+
	55	+# dns_sd_configs:
	56	+# - names:
	57	+# - 'tasks.cadvisor'
	58	+# type: 'A'
	59	+# port: 8080
	60	+
	61	+# # static_configs:
	62	+# # - targets: ['cadvisor:8080']
	63	+
	64	+# - job_name: 'node-exporter'
	65	+
	66	+# # Override the global default and scrape targets from this job every 5 seconds.
	67	+# scrape_interval: 5s
	68	+
	69	+# dns_sd_configs:
	70	+# - names:
	71	+# - 'tasks.node-exporter'
	72	+# type: 'A'
	73	+# port: 9100
	74	+
	75	+# - job_name: 'pushgateway'
	76	+# scrape_interval: 10s
	77	+# dns_sd_configs:
	78	+# - names:
	79	+# - 'tasks.pushgateway'
	80	+# type: 'A'
	81	+# port: 9091
	82	+
	83	+# static_configs:
	84	+# - targets: ['node-exporter:9100']
0	85	\ No newline at end of file
...	...
...	...	@@ -1,10 +0,0 @@
1		-route:
2		- receiver: 'slack'
3		-
4		-receivers:
5		- - name: 'slack'
6		-# slack_configs:
7		-# - send_resolved: true
8		-# username: '<username>'
9		-# channel: '#<channel-name>'
10		-# api_url: '<incomming-webhook-url>'
11	0	\ No newline at end of file
...	...	@@ -1,82 +0,0 @@
1		-version: '3.8'
2		-
3		-volumes:
4		- prometheus_data: {}
5		- grafana_data: {}
6		-
7		-services:
8		- prometheus:
9		- image: prom/prometheus
10		- restart: always
11		- volumes:
12		- - ./prometheus:/etc/prometheus/
13		- - prometheus_data:/prometheus
14		- command:
15		- - '--config.file=/etc/prometheus/prometheus.yml'
16		- - '--storage.tsdb.path=/prometheus'
17		- - '--web.console.libraries=/usr/share/prometheus/console_libraries'
18		- - '--web.console.templates=/usr/share/prometheus/consoles'
19		- ports:
20		- - 9090:9090
21		- links:
22		- - cadvisor:cadvisor
23		- - alertmanager:alertmanager
24		- depends_on:
25		- - cadvisor
26		-
27		- node-exporter:
28		- image: prom/node-exporter
29		- volumes:
30		- - /proc:/host/proc:ro
31		- - /sys:/host/sys:ro
32		- - /:/rootfs:ro
33		- command:
34		- - '--path.procfs=/host/proc'
35		- - '--path.sysfs=/host/sys'
36		- - --collector.filesystem.ignored-mount-points
37		- - '^/(sys\|proc\|dev\|host\|etc\|rootfs/var/lib/docker/containers\|rootfs/var/lib/docker/overlay2\|rootfs/run/docker/netns\|rootfs/var/lib/docker/aufs)($$\|/)'
38		- ports:
39		- - 9100:9100
40		- restart: always
41		- deploy:
42		- mode: global
43		-
44		- alertmanager:
45		- image: prom/alertmanager
46		- restart: always
47		- ports:
48		- - 9093:9093
49		- volumes:
50		- - ./alertmanager/:/etc/alertmanager/
51		- command:
52		- - '--config.file=/etc/alertmanager/config.yml'
53		- - '--storage.path=/alertmanager'
54		-
55		- cadvisor:
56		- image: gcr.io/cadvisor/cadvisor
57		- volumes:
58		- - /:/rootfs:ro
59		- - /var/run:/var/run:rw
60		- - /sys:/sys:ro
61		- - /var/lib/docker/:/var/lib/docker:ro
62		- ports:
63		- - 8080:8080
64		- restart: always
65		- deploy:
66		- mode: global
67		-
68		- grafana:
69		- image: grafana/grafana
70		- user: '472'
71		- restart: always
72		- environment:
73		- GF_INSTALL_PLUGINS: 'grafana-clock-panel,grafana-simple-json-datasource'
74		- volumes:
75		- - grafana_data:/var/lib/grafana
76		- - ./grafana/provisioning/:/etc/grafana/provisioning/
77		- env_file:
78		- - ./grafana/config.monitoring
79		- ports:
80		- - 3000:3000
81		- depends_on:
82		- - prometheus