commit 3654daa52cda51ff52e1fe274d6e504b01e663b6 Author: paulmataruso Date: Sun Oct 12 15:56:31 2025 +0000 first upload diff --git a/config/akvorado.yaml b/config/akvorado.yaml new file mode 100644 index 0000000..252bc62 --- /dev/null +++ b/config/akvorado.yaml @@ -0,0 +1,94 @@ +--- +# This configuration file is documented in docs/02-configuration.md. +# +# You can get all default values with `akvorado orchestrator /dev/null --dump +# --check` or `docker compose run --rm --no-deps akvorado-orchestrator +# orchestrator /dev/null --dump --check`. Do not use the output of the command +# as your configuration file, it should only help you get the default values. + +kafka: + topic: flows + brokers: + - kafka:9092 + topic-configuration: + num-partitions: 8 + replication-factor: 1 + config-entries: + # The retention policy in Kafka is mainly here to keep a buffer + # for ClickHouse. + segment.bytes: 1073741824 + retention.ms: 86400000 # 1 day + cleanup.policy: delete + compression.type: producer + +geoip: + optional: true + # When running on Docker, these paths are inside the container. By default, + # IPinfo databases are used. (https://ipinfo.io/) + asn-database: + - /usr/share/GeoIP/asn.mmdb + geo-database: + - /usr/share/GeoIP/country.mmdb + # If you want to use MaxmindDB, check `.env`, `docker-compose-maxmind.yml` and + # update these paths: + #asn-database: + # - /usr/share/GeoIP/GeoLite2-ASN.mmdb + #geo-database: + # - /usr/share/GeoIP/GeoLite2-Country.mmdb + +clickhousedb: + servers: + - clickhouse:9000 + +clickhouse: + orchestrator-url: http://akvorado-orchestrator:8080 + kafka: + consumers: 4 + prometheus-endpoint: /metrics + asns: + 400848: DHI Technical Services + networks: + # You should customize this section with your networks. This + # populates the Src/DstNetName/Role/Site/Region/Tenant fields. + 140.235.236.0/27: + name: CCI-DMZ-VRF + role: servers + 140.235.236.96/30: + name: DMVPN-RTRS + role: routers + 140.235.236.100/30: + name: UBNT-RTRS + role: routers + 140.235.236.120/29: + name: OFFICE-LAB-NET1 + role: routers-lab + 140.235.237.120/29: + name: OFFICE-LAB-NET2 + role: routers-lab + 140.235.237.0/27: + name: MAIN-EDGE-RTR + role: routers + 140.235.237.1/32: + name: MAIN-EDGE-NAT + role: routers + network-sources: [] + # amazon: + # url: https://ip-ranges.amazonaws.com/ip-ranges.json + # interval: 6h + # transform: | + # (.prefixes + .ipv6_prefixes)[] | + # { prefix: (.ip_prefix // .ipv6_prefix), tenant: "amazon", region: .region, role: .service|ascii_downcase } + # gcp: + # url: https://www.gstatic.com/ipranges/cloud.json + # interval: 6h + # transform: | + # .prefixes[] | + # { prefix: (.ipv4Prefix // .ipv6Prefix), tenant: "google-cloud", region: .scope } + +inlet: !include "inlet.yaml" +outlet: !include "outlet.yaml" +console: !include "console.yaml" + +# This line can be removed unless you have enabled the demo exporters with the +# "demo" profile. +#demo-exporter: !include "demo.yaml" diff --git a/config/console.yaml b/config/console.yaml new file mode 100644 index 0000000..7818757 --- /dev/null +++ b/config/console.yaml @@ -0,0 +1,16 @@ +--- +http: + cache: + type: redis + server: redis:6379 +database: + saved-filters: + # These are prepopulated filters you can select in a drop-down + # menu. Users can add more filters interactively. + - description: "From Netflix" + content: >- + InIfBoundary = external AND SrcAS = AS2906 + - description: "From GAFAM" + content: >- + InIfBoundary = external AND + SrcAS IN (AS15169, AS16509, AS32934, AS6185, AS8075) diff --git a/config/demo.yaml b/config/demo.yaml new file mode 100644 index 0000000..0b88fd4 --- /dev/null +++ b/config/demo.yaml @@ -0,0 +1,347 @@ +--- +.demo-exporter-flows: + - &http-src + src-port: [80, 443] + dst-port: 0 + protocol: tcp + size: 1300 + - &http-dst + src-port: 0 + dst-port: [80, 443] + protocol: tcp + size: 1300 + - &quic-src + src-port: 443 + dst-port: 0 + protocol: udp + size: 1200 + - &ssh-src + src-port: 22 + dst-port: 0 + protocol: tcp + size: 200 + - &ssh-dst + src-port: 0 + dst-port: 22 + protocol: tcp + size: 300 + - &to-v4-customers + dst-net: 192.0.2.0/24 + dst-as: 64501 + - &to-v6-customers + dst-net: 2a01:db8:cafe:1::/64 + dst-as: 64501 + - &to-v4-servers + dst-net: 203.0.113.0/24 + dst-as: 64501 + - &to-v6-servers + dst-net: 2a01:db8:cafe:2::/64 + dst-as: 64501 + - &from-v4-google + src-net: 216.58.206.0/24 + src-as: 15169 + - &from-v6-google + src-net: 2a00:1450:4007:807::2000/124 + src-as: 15169 + - &from-v4-facebook + src-net: 179.60.192.0/24 + src-as: 32934 + - &from-v6-facebook + src-net: 2a03:2880:f130:83:face:b00c:0::/112 + src-as: 32934 + - &from-v4-netflix + src-net: 198.38.120.0/23 + src-as: 2906 + - &from-v6-netflix + src-net: 2a00:86c0:115:115::/112 + src-as: 2906 + - &from-v4-akamai + src-net: 23.33.27.0/24 + src-as: 20940 + - &from-v6-akamai + src-net: 2a02:26f0:9100:28:0:17c0::/112 + src-as: 20940 + - &from-v4-amazon + src-net: 52.84.175.0/24 + src-as: 16509 + - &from-v6-amazon + src-net: 2600:9000:218d:4a00:15:74db::/112 + src-as: 16509 + - &from-v4-fastly + src-net: 199.232.178.0/29 + src-as: 54113 + - &from-v6-fastly + src-net: 2a04:4e42:1d::/126 + src-as: 54113 + - &from-v4-twitch + src-net: 52.223.202.128/27 + src-as: 46489 + - &from-v4-renater + src-net: 138.231.0.0/16 + src-as: 2269 + - &from-v4-random + src-net: 92.0.0.0/8 + src-as: [12322, 3215, 3303, 15557, 3320, 13335, 6185, 202818, 60068, 16276, 8075, 32590] + - &from-v6-random + src-net: 2a01:cb00::/32 + src-as: [12322, 3215, 3303, 15557, 3320, 13335, 6185, 202818, 60068, 16276, 8075, 32590] + +"": + - snmp: + name: th2-edge1.example.com + interfaces: + 10: "Transit: Telia" + 11: "IX: AMSIX" + 20: "core" + 21: "core" + listen: :161 + bmp: &bmp + target: akvorado-outlet:10179 + routes: + - prefixes: 192.0.2.0/24,2a01:db8:cafe:1::/64 + aspath: 64501 + communities: 65401:10,65401:12 + large-communities: 65401:100:200,65401:100:201 + - prefixes: 203.0.113.0/24,2a01:db8:cafe:2::/64 + aspath: 65401 + communities: 65401:10,65401:13 + large-communities: 65401:100:200,65401:100:213 + - prefixes: 216.58.206.0/24,2a00:1450:4007:807::2000/124 + aspath: 174,1299,15169 + communities: 174:22004,174:21100 + - prefixes: 179.60.192.0/24,2a03:2880:f130:83:face:b00c:0::/112 + aspath: 1299,1299,32934 + communities: 1299:30000,1299:30220 + - prefixes: 198.38.120.0/23,2a00:86c0:115:115::/112 + aspath: 5511,1299,1299,32934 + communities: 1299:30000,1299:30310 + - prefixes: 23.33.27.0/24,2a02:26f0:9100:28:0:17c0::/112 + aspath: 174,174,174,20940 + communities: 174:22002,174:21200 + - prefixes: 52.84.175.0/24,2600:9000:218d:4a00:15:74db::/112 + aspath: 16509 + - prefixes: 199.232.178.0/29,2a04:4e42:1d::/126 + aspath: 1299,54113 + communities: 1299:35000,1299:35200 + - prefixes: 52.223.202.128/27 + aspath: 16509,46489 + - prefixes: 138.231.0.0/16 + aspath: 1299,174,2269,2269 + communities: 1299:30000,1299:30400 + - prefixes: 0.0.0.0/0 + aspath: 174 + - prefixes: ::/0 + aspath: 1299 + flows: &flows1 + samplingrate: 100000 + target: akvorado-inlet:2055 + flows: + # Google + - per-second: 0.5 + in-if-index: 10 + out-if-index: [20, 21] + peak-hour: 16h + multiplier: 3.3 + reverse-direction-ratio: 0.1 + <<: [*from-v4-google, *to-v4-customers, *http-src] + - per-second: 0.25 + in-if-index: 10 + out-if-index: [20, 21] + peak-hour: 16h + multiplier: 5.5 + reverse-direction-ratio: 0.1 + <<: [*from-v4-google, *to-v4-customers, *quic-src] + - per-second: 0.7 + in-if-index: 10 + out-if-index: [20, 21] + peak-hour: 21h + multiplier: 3.3 + reverse-direction-ratio: 0.1 + <<: [*from-v6-google, *to-v6-customers, *http-src] + - per-second: 0.4 + in-if-index: 10 + out-if-index: [20, 21] + peak-hour: 21h + multiplier: 5.5 + reverse-direction-ratio: 0.1 + <<: [*from-v6-google, *to-v6-customers, *quic-src] + # Facebook + - per-second: 0.55 + in-if-index: [10, 11] + out-if-index: [20, 21] + peak-hour: 16h + multiplier: 3.3 + reverse-direction-ratio: 0.2 + <<: [*from-v4-facebook, *to-v4-customers, *http-src] + - per-second: 0.1 + in-if-index: [10, 11] + out-if-index: [20, 21] + peak-hour: 16h + multiplier: 3.3 + reverse-direction-ratio: 0.2 + <<: [*from-v4-facebook, *to-v4-customers, *quic-src] + - per-second: 0.9 + in-if-index: [10, 11] + out-if-index: [20, 21] + peak-hour: 18h + multiplier: 3.3 + reverse-direction-ratio: 0.2 + <<: [*from-v6-facebook, *to-v6-customers, *http-src] + - per-second: 0.1 + in-if-index: [10, 11] + out-if-index: [20, 21] + peak-hour: 20h + multiplier: 3.3 + reverse-direction-ratio: 0.2 + <<: [*from-v6-facebook, *to-v6-customers, *quic-src] + # Netflix + - per-second: 0.1 + in-if-index: 10 + out-if-index: [20, 21] + peak-hour: 22h + multiplier: 22 + reverse-direction-ratio: 0.1 + <<: [*from-v4-netflix, *to-v4-customers, *http-src] + - per-second: 0.35 + in-if-index: 10 + out-if-index: [20, 21] + peak-hour: 22h + multiplier: 22 + reverse-direction-ratio: 0.1 + <<: [*from-v6-netflix, *to-v6-customers, *http-src] + # Twitch + - per-second: 0.6 + in-if-index: 10 + out-if-index: [20, 21] + peak-hour: 21h + multiplier: 20 + reverse-direction-ratio: 0.4 + <<: [*from-v4-twitch, *to-v4-customers, *http-src] + # Akamai + - per-second: 0.7 + in-if-index: [10, 11] + out-if-index: [20, 21] + peak-hour: 18h + multiplier: 1.5 + reverse-direction-ratio: 0.1 + <<: [*from-v4-akamai, *to-v4-customers, *http-src] + - per-second: 0.4 + in-if-index: [10, 11] + out-if-index: [20, 21] + peak-hour: 18h + multiplier: 1.5 + reverse-direction-ratio: 0.1 + <<: [*from-v6-akamai, *to-v6-customers, *http-src] + # Fastly + - per-second: 0.2 + in-if-index: [10, 11] + out-if-index: [20, 21] + peak-hour: 15h + multiplier: 1.5 + reverse-direction-ratio: 0.1 + <<: [*from-v4-fastly, *to-v4-customers, *http-src] + - per-second: 0.35 + in-if-index: [10, 11] + out-if-index: [20, 21] + peak-hour: 14h + multiplier: 1.5 + reverse-direction-ratio: 0.1 + <<: [*from-v6-fastly, *to-v6-customers, *http-src] + # Amazon + - per-second: 0.15 + in-if-index: [10, 11] + out-if-index: [20, 21] + peak-hour: 18h + multiplier: 1.5 + reverse-direction-ratio: 0.15 + <<: [*from-v4-amazon, *to-v4-customers, *http-src] + - per-second: 0.05 + in-if-index: 10 + out-if-index: [20, 21] + peak-hour: 18h + multiplier: 1.5 + reverse-direction-ratio: 0.15 + <<: [*from-v6-amazon, *to-v6-customers, *http-src] + + # Random SSH + - per-second: 0.05 + in-if-index: 10 + out-if-index: [20, 21] + peak-hour: 15h + multiplier: 1.4 + reverse-direction-ratio: 0.5 + <<: [*from-v4-renater, *to-v4-customers, *ssh-src] + # Servers + - per-second: 0.05 + in-if-index: 10 + out-if-index: [20, 21] + peak-hour: 15h + multiplier: 1.4 + reverse-direction-ratio: 0.2 + <<: [*from-v4-renater, *to-v4-servers, *ssh-dst] + - per-second: 0.1 + in-if-index: 10 + out-if-index: [20, 21] + peak-hour: 15h + multiplier: 1.4 + reverse-direction-ratio: 0.15 + <<: [*from-v4-random, *to-v4-servers, *http-dst] + - per-second: 0.1 + in-if-index: 10 + out-if-index: [20, 21] + peak-hour: 15h + multiplier: 1.4 + reverse-direction-ratio: 0.1 + <<: [*from-v6-random, *to-v6-servers, *http-dst] + + # Noise + - &random-flow + per-second: 0.5 + in-if-index: 10 + out-if-index: [20, 21] + peak-hour: 20h + multiplier: 1.1 + protocol: [tcp, udp] + srcport: [80, 443, 22, 25461, 8080, 4500, 993, 8801] + reverse-direction-ratio: 0.25 + <<: [*from-v4-random, *to-v4-customers] + - <<: [*from-v6-random, *to-v6-customers, *random-flow] + - snmp: + name: th2-edge2.example.com + interfaces: + 10: "Transit: Cogent" + 11: "IX: DECIX" + 20: "core" + 21: "core" + listen: :161 + bmp: + <<: *bmp + flows: + <<: *flows1 + seed: 100 + - snmp: + name: dc3-edge1.example.com + interfaces: + 10: "Transit: Tata" + 11: "Transit: Lumen" + 20: "core" + 21: "core" + listen: :161 + bmp: + <<: *bmp + flows: + <<: *flows1 + seed: 200 + - snmp: + name: dc5-edge2.example.com + interfaces: + 10: "IX: FranceIX" + 11: "Transit: Cogent" + 20: "core" + 21: "core" + listen: :161 + bmp: + <<: *bmp + flows: + <<: *flows1 + seed: 300 diff --git a/config/inlet.yaml b/config/inlet.yaml new file mode 100644 index 0000000..ff7ef64 --- /dev/null +++ b/config/inlet.yaml @@ -0,0 +1,23 @@ +--- +flow: + inputs: + # NetFlow port + - type: udp + decoder: netflow + listen: :2055 + workers: 6 + # Before increasing this value, look for it in the troubleshooting section + # of the documentation. + receive-buffer: 212992 + # IPFIX port + - type: udp + decoder: netflow + listen: :4739 + workers: 6 + receive-buffer: 212992 + # sFlow port + - type: udp + decoder: sflow + listen: :6343 + workers: 6 + receive-buffer: 212992 diff --git a/config/outlet.yaml b/config/outlet.yaml new file mode 100644 index 0000000..efe8592 --- /dev/null +++ b/config/outlet.yaml @@ -0,0 +1,33 @@ +--- +metadata: + providers: + - type: snmp + credentials: + ::/0: + communities: 205gti205gti +routing: + provider: + type: bmp + # Before increasing this value, look for it in the troubleshooting section + # of the documentation. + receive-buffer: 212992 +core: + exporter-classifiers: + # This is an example. This should be customized depending on how + # your exporters are named. + - ClassifySiteRegex(Exporter.Name, "^([^-]+)-", "$1") + - ClassifyRegion("europe") + - ClassifyTenant("acme") + - ClassifyRole("edge") + interface-classifiers: + # This is an example. This must be customized depending on the + # descriptions of your interfaces. In the following, we assume + # external interfaces are named "Transit: Cogent" Or "IX: + # FranceIX". + - | + ClassifyConnectivityRegex(Interface.Description, "^(?i)(External|external|transit|pni|ppni|ix):? ", "$1") && + ClassifyProviderRegex(Interface.Description, "^\\S+?\\s(\\S+)", "$1") && + ClassifyExternal() + - ClassifyInternal() + default-sampling-rate: 100 + override-sampling-rate: 100 diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..9180a47 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,34 @@ +FROM --platform=$BUILDPLATFORM node:20-alpine AS build-js +RUN apk add --no-cache make +WORKDIR /build +COPY console/frontend console/frontend +COPY Makefile . +RUN make console/data/frontend + +FROM --platform=$BUILDPLATFORM golang:alpine AS build-go +RUN apk add --no-cache make curl zip +WORKDIR /build +# Cache for modules +COPY go.mod go.sum . +RUN go mod download +# Build +COPY . . +COPY --from=build-js /build/console/data/frontend console/data/frontend +RUN touch console/frontend/node_modules .fmt-js~ .fmt.go~ .lint-js~ .lint-go~ \ + && find . -print0 | xargs -0 touch -d @0 +RUN make all-indep +ARG TARGETOS +ARG TARGETARCH +ARG TARGETVARIANT +ARG VERSION +RUN make + +FROM gcr.io/distroless/static:latest +LABEL org.opencontainers.image.title="Akvorado" +LABEL org.opencontainers.image.description="Flow collector, enricher and visualizer" +LABEL org.opencontainers.image.source=https://github.com/akvorado/akvorado +LABEL org.opencontainers.image.licenses=AGPL-3.0-only +COPY --from=build-go /build/bin/akvorado /usr/local/bin/akvorado +EXPOSE 8080 +HEALTHCHECK --interval=20s CMD [ "/usr/local/bin/akvorado", "healthcheck" ] +ENTRYPOINT [ "/usr/local/bin/akvorado" ] diff --git a/docker/Dockerfile.dev b/docker/Dockerfile.dev new file mode 100644 index 0000000..776a7ff --- /dev/null +++ b/docker/Dockerfile.dev @@ -0,0 +1,9 @@ +FROM gcr.io/distroless/static:latest +LABEL org.opencontainers.image.title="Akvorado" +LABEL org.opencontainers.image.description="Flow collector, enricher and visualizer (dev)" +LABEL org.opencontainers.image.source=https://github.com/akvorado/akvorado +LABEL org.opencontainers.image.licenses=AGPL-3.0-only +COPY bin/akvorado /usr/local/bin/ +EXPOSE 8080 +HEALTHCHECK --interval=20s CMD [ "/usr/local/bin/akvorado", "healthcheck" ] +ENTRYPOINT [ "/usr/local/bin/akvorado" ] diff --git a/docker/Dockerfile.ipinfo-geoipupdate b/docker/Dockerfile.ipinfo-geoipupdate new file mode 100644 index 0000000..ffc367a --- /dev/null +++ b/docker/Dockerfile.ipinfo-geoipupdate @@ -0,0 +1,12 @@ +FROM alpine:latest + +RUN apk add --no-cache curl + +WORKDIR /data +VOLUME /data + +COPY --chmod=555 ipinfo-geoipupdate.sh /usr/local/bin/ipinfo-geoipupdate.sh + +LABEL org.opencontainers.image.description="IPinfo database updater" +HEALTHCHECK --interval=20s CMD test -f /tmp/healthy +CMD ["/usr/local/bin/ipinfo-geoipupdate.sh"] diff --git a/docker/Dockerfile.nix b/docker/Dockerfile.nix new file mode 100644 index 0000000..04e256d --- /dev/null +++ b/docker/Dockerfile.nix @@ -0,0 +1,22 @@ +FROM nixpkgs/nix-flakes:latest AS build +ARG TARGETPLATFORM +RUN echo filter-syscalls = false >> /etc/nix/nix.conf +WORKDIR /app +COPY . . +RUN mkdir -p /output/store +RUN make version > .version && git add -f .version +RUN nix run ".#update" \ + && nix run ".#build" \ + && cp -va $(nix-store -qR result) /output/store \ + && rm -rf /output/store/*-akvorado + +FROM gcr.io/distroless/static:latest +LABEL org.opencontainers.image.title="Akvorado" +LABEL org.opencontainers.image.description="Flow collector, enricher and visualizer (nix)" +LABEL org.opencontainers.image.source=https://github.com/akvorado/akvorado +LABEL org.opencontainers.image.licenses=AGPL-3.0-only +COPY --from=build /output/store /nix/store +COPY --from=build /app/result/ /usr/local/ +EXPOSE 8080 +HEALTHCHECK --interval=20s CMD [ "/usr/local/bin/akvorado", "healthcheck" ] +ENTRYPOINT [ "/usr/local/bin/akvorado" ] diff --git a/docker/clickhouse/cluster-1.xml b/docker/clickhouse/cluster-1.xml new file mode 100644 index 0000000..aa6600b --- /dev/null +++ b/docker/clickhouse/cluster-1.xml @@ -0,0 +1,6 @@ + + + 01 + 01 + + diff --git a/docker/clickhouse/cluster-2.xml b/docker/clickhouse/cluster-2.xml new file mode 100644 index 0000000..f27d73f --- /dev/null +++ b/docker/clickhouse/cluster-2.xml @@ -0,0 +1,6 @@ + + + 01 + 02 + + diff --git a/docker/clickhouse/cluster-3.xml b/docker/clickhouse/cluster-3.xml new file mode 100644 index 0000000..d1751fa --- /dev/null +++ b/docker/clickhouse/cluster-3.xml @@ -0,0 +1,6 @@ + + + 02 + 01 + + diff --git a/docker/clickhouse/cluster-4.xml b/docker/clickhouse/cluster-4.xml new file mode 100644 index 0000000..647fa72 --- /dev/null +++ b/docker/clickhouse/cluster-4.xml @@ -0,0 +1,6 @@ + + + 02 + 02 + + diff --git a/docker/clickhouse/cluster.xml b/docker/clickhouse/cluster.xml new file mode 100644 index 0000000..3546c81 --- /dev/null +++ b/docker/clickhouse/cluster.xml @@ -0,0 +1,47 @@ + + + + + + true + + clickhouse-1 + 9000 + + + clickhouse-2 + 9000 + + + + true + + clickhouse-3 + 9000 + + + clickhouse-4 + 9000 + + + + + + + clickhouse-keeper-1 + 2181 + + + clickhouse-keeper-2 + 2181 + + + clickhouse-keeper-3 + 2181 + + + + 0 + diff --git a/docker/clickhouse/keeper-cluster-1.xml b/docker/clickhouse/keeper-cluster-1.xml new file mode 100644 index 0000000..2fc2854 --- /dev/null +++ b/docker/clickhouse/keeper-cluster-1.xml @@ -0,0 +1,5 @@ + + + 1 + + diff --git a/docker/clickhouse/keeper-cluster-2.xml b/docker/clickhouse/keeper-cluster-2.xml new file mode 100644 index 0000000..394e24b --- /dev/null +++ b/docker/clickhouse/keeper-cluster-2.xml @@ -0,0 +1,5 @@ + + + 2 + + diff --git a/docker/clickhouse/keeper-cluster-3.xml b/docker/clickhouse/keeper-cluster-3.xml new file mode 100644 index 0000000..0bdbcab --- /dev/null +++ b/docker/clickhouse/keeper-cluster-3.xml @@ -0,0 +1,5 @@ + + + 3 + + diff --git a/docker/clickhouse/keeper-cluster.xml b/docker/clickhouse/keeper-cluster.xml new file mode 100644 index 0000000..cee3bbc --- /dev/null +++ b/docker/clickhouse/keeper-cluster.xml @@ -0,0 +1,21 @@ + + + + + 1 + clickhouse-keeper-1 + 9234 + + + 2 + clickhouse-keeper-2 + 9234 + + + 3 + clickhouse-keeper-3 + 9234 + + + + diff --git a/docker/clickhouse/keeper.xml b/docker/clickhouse/keeper.xml new file mode 100644 index 0000000..7c885a7 --- /dev/null +++ b/docker/clickhouse/keeper.xml @@ -0,0 +1,6 @@ + + 0.0.0.0 + + 2181 + + diff --git a/docker/clickhouse/observability.xml b/docker/clickhouse/observability.xml new file mode 100644 index 0000000..3ef548c --- /dev/null +++ b/docker/clickhouse/observability.xml @@ -0,0 +1,31 @@ + + + + /metrics + true + true + true + + + + fatal + + 1 + information + + json + + date_time + date_time_utc + thread_name + thread_id + level + query_id + logger_name + message + source_file + source_line + + + + diff --git a/docker/clickhouse/server.xml b/docker/clickhouse/server.xml new file mode 100644 index 0000000..703cbec --- /dev/null +++ b/docker/clickhouse/server.xml @@ -0,0 +1,36 @@ + + + + event_date + INTERVAL 30 DAY DELETE + + + event_date + INTERVAL 30 DAY DELETE + + + event_date + INTERVAL 30 DAY DELETE + + + event_date + INTERVAL 30 DAY DELETE + + + event_date + INTERVAL 30 DAY DELETE + + + event_date + INTERVAL 30 DAY DELETE + + + event_date + INTERVAL 30 DAY DELETE + + + event_date + INTERVAL 30 DAY DELETE + + + event_date + INTERVAL 30 DAY DELETE + + + event_date + INTERVAL 30 DAY DELETE + + + event_date + INTERVAL 30 DAY DELETE + + diff --git a/docker/clickhouse/standalone.xml b/docker/clickhouse/standalone.xml new file mode 100644 index 0000000..391bcfd --- /dev/null +++ b/docker/clickhouse/standalone.xml @@ -0,0 +1,4 @@ + + + 0 + diff --git a/docker/clickhouse/test-db.sql b/docker/clickhouse/test-db.sql new file mode 100644 index 0000000..e68c2ef --- /dev/null +++ b/docker/clickhouse/test-db.sql @@ -0,0 +1 @@ +CREATE DATABASE IF NOT EXISTS test; diff --git a/docker/config.alloy b/docker/config.alloy new file mode 100644 index 0000000..a32792a --- /dev/null +++ b/docker/config.alloy @@ -0,0 +1,108 @@ +discovery.docker "docker" { + host = "unix:///var/run/docker.sock" + refresh_interval = "30s" + filter { + name = "label" + values = ["com.docker.compose.project=akvorado"] + } +} + +prometheus.remote_write "default" { + endpoint { + url = "http://prometheus:9090/prometheus/api/v1/write" + } +} + +// Docker relabeling. We match entries with metrics.port as the port (it needs to +// be exposed) and use metrics.path if defined. The job name is the service name +// minus the numbered suffix. +discovery.relabel "prometheus" { + targets = discovery.docker.docker.targets + + // Normalize the case where network is host. + rule { + source_labels = ["__meta_docker_container_label_metrics_port", "__meta_docker_network_name"] + regex = `(.+);host` + target_label = "__address__" + replacement = "host.docker.internal:$1" + } + rule { + source_labels = ["__meta_docker_container_label_metrics_port", "__meta_docker_network_name"] + regex = `(.+);host` + target_label = "__meta_docker_port_private" + replacement = "$1" + } + // Then keep if we metrics.port matches private port + rule { + source_labels = ["__meta_docker_container_label_metrics_port"] + regex = `.+` + action = "keep" + } + rule { + source_labels = ["__meta_docker_container_label_metrics_port"] + target_label = "__meta_docker_port_private" + action = "keepequal" + } + + // Set job and instance name + rule { + source_labels = ["__meta_docker_container_label_com_docker_compose_service"] + regex = `(.+)(?:-\d+)?` + target_label = "job" + } + rule { + source_labels = ["__address__"] + regex = `(.+):\d+` + target_label = "instance" + } + + // Set metrics path from metrics.path label, default to /metrics + rule { + source_labels = ["__meta_docker_container_label_metrics_path"] + regex = `(.+)` + target_label = "__metrics_path__" + } + rule { + source_labels = ["__metrics_path__"] + regex = "" + target_label = "__metrics_path__" + replacement = "/metrics" + } +} +prometheus.scrape "docker" { + targets = discovery.relabel.prometheus.output + forward_to = [prometheus.remote_write.default.receiver] + scrape_interval = "30s" +} + +prometheus.exporter.redis "docker" { + redis_addr = "redis:6379" +} +discovery.relabel "redis" { + targets = prometheus.exporter.redis.docker.targets + rule { + target_label = "job" + replacement = "redis" + } +} +prometheus.scrape "redis" { + targets = discovery.relabel.redis.output + forward_to = [prometheus.remote_write.default.receiver] + scrape_interval = "30s" +} + +prometheus.exporter.kafka "docker" { + kafka_uris = ["kafka:9092"] +} +discovery.relabel "kafka" { + targets = prometheus.exporter.kafka.docker.targets + rule { + target_label = "job" + replacement = "kafka" + } +} +prometheus.scrape "kafka" { + targets = discovery.relabel.kafka.output + forward_to = [prometheus.remote_write.default.receiver] + scrape_interval = "30s" +} diff --git a/docker/docker-compose-clickhouse-cluster.yml b/docker/docker-compose-clickhouse-cluster.yml new file mode 100644 index 0000000..5f1c0a8 --- /dev/null +++ b/docker/docker-compose-clickhouse-cluster.yml @@ -0,0 +1,99 @@ +--- +volumes: + akvorado-clickhouse-keeper-db-1: + akvorado-clickhouse-keeper-db-2: + akvorado-clickhouse-keeper-db-3: + akvorado-clickhouse-keeper-1: + akvorado-clickhouse-keeper-2: + akvorado-clickhouse-keeper-3: + akvorado-clickhouse-2: + akvorado-clickhouse-3: + akvorado-clickhouse-4: + +services: + akvorado-orchestrator: + environment: + AKVORADO_CFG_ORCHESTRATOR_CLICKHOUSE_CLUSTER: akvorado + + clickhouse-keeper-1: &clickhouse-keeper + extends: + file: versions.yml + service: clickhouse-keeper + restart: unless-stopped + expose: + - 9100/tcp + volumes: + - akvorado-clickhouse-keeper-db-1:/var/lib/clickhouse-keeper + - akvorado-clickhouse-keeper-1:/var/lib/clickhouse + - ./clickhouse/observability:/etc/clickhouse-keeper/keeper_config.d/observability.xml + - ./clickhouse/keeper.xml:/etc/clickhouse-keeper/keeper_config.d/akvorado.xml + - ./clickhouse/keeper-cluster.xml:/etc/clickhouse-keeper/keeper_config.d/keeper-cluster.xml + - ./clickhouse/keeper-cluster.xml:/etc/clickhouse-keeper/keeper_config.d/keeper-cluster-1.xml + healthcheck: + interval: 20s + test: ["CMD", "wget", "-T", "1", "--spider", "--no-proxy", "http://127.0.0.1:9100/metrics"] + labels: + - metrics.port=9100 + clickhouse-keeper-2: + <<: *clickhouse-keeper + volumes: + - akvorado-clickhouse-keeper-db-2:/var/lib/clickhouse-keeper + - akvorado-clickhouse-keeper-2:/var/lib/clickhouse + - ./clickhouse/observability:/etc/clickhouse-keeper/keeper_config.d/observability.xml + - ./clickhouse/keeper.xml:/etc/clickhouse-keeper/keeper_config.d/akvorado.xml + - ./clickhouse/keeper-cluster.xml:/etc/clickhouse-keeper/keeper_config.d/keeper-cluster.xml + - ./clickhouse/keeper-cluster.xml:/etc/clickhouse-keeper/keeper_config.d/keeper-cluster-2.xml + clickhouse-keeper-3: + <<: *clickhouse-keeper + volumes: + - akvorado-clickhouse-keeper-db-3:/var/lib/clickhouse-keeper + - akvorado-clickhouse-keeper-3:/var/lib/clickhouse + - ./clickhouse/observability:/etc/clickhouse-keeper/keeper_config.d/observability.xml + - ./clickhouse/keeper.xml:/etc/clickhouse-keeper/keeper_config.d/akvorado.xml + - ./clickhouse/keeper-cluster.xml:/etc/clickhouse-keeper/keeper_config.d/keeper-cluster.xml + - ./clickhouse/keeper-cluster.xml:/etc/clickhouse-keeper/keeper_config.d/keeper-cluster-3.xml + + clickhouse: + depends_on: + - clickhouse-keeper-1 + - clickhouse-keeper-2 + - clickhouse-keeper-3 + volumes: + - ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml + - ./clickhouse/cluster-1.xml:/etc/clickhouse-server/config.d/cluster-1.xml + clickhouse-2: + extends: + file: docker-compose.yml + service: clickhouse + depends_on: + - clickhouse-keeper-1 + - clickhouse-keeper-2 + - clickhouse-keeper-3 + volumes: + - ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml + - ./clickhouse/cluster-2.xml:/etc/clickhouse-server/config.d/cluster-2.xml + - akvorado-clickhouse-2:/var/lib/clickhouse + clickhouse-3: + extends: + file: docker-compose.yml + service: clickhouse + depends_on: + - clickhouse-keeper-1 + - clickhouse-keeper-2 + - clickhouse-keeper-3 + volumes: + - ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml + - ./clickhouse/cluster-3.xml:/etc/clickhouse-server/config.d/cluster-3.xml + - akvorado-clickhouse-3:/var/lib/clickhouse + clickhouse-4: + extends: + file: docker-compose.yml + service: clickhouse + depends_on: + - clickhouse-keeper-1 + - clickhouse-keeper-2 + - clickhouse-keeper-3 + volumes: + - ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml + - ./clickhouse/cluster-4.xml:/etc/clickhouse-server/config.d/cluster-4.xml + - akvorado-clickhouse-4:/var/lib/clickhouse diff --git a/docker/docker-compose-demo.yml b/docker/docker-compose-demo.yml new file mode 100644 index 0000000..8263022 --- /dev/null +++ b/docker/docker-compose-demo.yml @@ -0,0 +1,26 @@ +--- +services: + akvorado-exporter-1: &exporter + extends: + file: versions.yml + service: akvorado + profiles: [ demo ] + restart: unless-stopped + command: demo-exporter http://akvorado-orchestrator:8080#0 + depends_on: + akvorado-inlet: + condition: service_healthy + akvorado-outlet: + condition: service_healthy + labels: + - metrics.port=8080 + - metrics.path=/api/v0/metrics + akvorado-exporter-2: + <<: *exporter + command: demo-exporter http://akvorado-orchestrator:8080#1 + akvorado-exporter-3: + <<: *exporter + command: demo-exporter http://akvorado-orchestrator:8080#2 + akvorado-exporter-4: + <<: *exporter + command: demo-exporter http://akvorado-orchestrator:8080#3 diff --git a/docker/docker-compose-dev.yml b/docker/docker-compose-dev.yml new file mode 100644 index 0000000..99f15e1 --- /dev/null +++ b/docker/docker-compose-dev.yml @@ -0,0 +1,198 @@ +--- +name: akvorado-dev +services: + mock-oauth2-server: + extends: + file: versions.yml + service: mock-oauth2-server + ports: + - 127.0.0.1:5556:8080/tcp + environment: + LOG_LEVEL: debug + + kafka: + extends: + file: versions.yml + service: kafka + environment: + # KRaft settings + KAFKA_NODE_ID: 1 + KAFKA_PROCESS_ROLES: controller,broker + KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka:9096 + KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 500 + # We have two sets of listeners: INTERNAL that is used from inside the docker + # compose network and listens on "kafka" and EXTERNAL that is mapped to + # the host network and listens on "localhost". + # + # Then, in each set, we have a plain text one and an OAuth-enabled one. + KAFKA_LISTENERS: >- + INTERNAL://:9092, + OINTERNAL://:9093, + EXTERNAL://:9094, + OEXTERNAL://:9095, + CONTROLLER://:9096 + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: >- + INTERNAL:PLAINTEXT, + OINTERNAL:SASL_PLAINTEXT, + EXTERNAL:PLAINTEXT, + OEXTERNAL:SASL_PLAINTEXT, + CONTROLLER:PLAINTEXT + KAFKA_ADVERTISED_LISTENERS: >- + INTERNAL://kafka:9092, + OINTERNAL://kafka:9093, + EXTERNAL://localhost:9092, + OEXTERNAL://localhost:9093 + KAFKA_INTER_BROKER_LISTENER_NAME: INTERNAL + KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER + # OAuth2 configuration + KAFKA_LISTENER_NAME_OEXTERNAL_SASL_ENABLED_MECHANISMS: OAUTHBEARER + KAFKA_LISTENER_NAME_OEXTERNAL_SASL_OAUTHBEARER_JWKS_ENDPOINT_URL: http://mock-oauth2-server:8080/default/jwks + KAFKA_LISTENER_NAME_OEXTERNAL_SASL_OAUTHBEARER_EXPECTED_AUDIENCE: default + KAFKA_LISTENER_NAME_OEXTERNAL_OAUTHBEARER_SASL_JAAS_CONFIG: >- + org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule required + clientId=kafka-client + clientSecret=kafka-client-secret + unsecuredLoginStringClaim_sub="sub"; + KAFKA_LISTENER_NAME_OEXTERNAL_OAUTHBEARER_SASL_SERVER_CALLBACK_HANDLER_CLASS: org.apache.kafka.common.security.oauthbearer.OAuthBearerValidatorCallbackHandler + KAFKA_LISTENER_NAME_OINTERNAL_SASL_ENABLED_MECHANISMS: OAUTHBEARER + KAFKA_LISTENER_NAME_OINTERNAL_SASL_OAUTHBEARER_JWKS_ENDPOINT_URL: http://mock-oauth2-server:8080/default/jwks + KAFKA_LISTENER_NAME_OINTERNAL_SASL_OAUTHBEARER_EXPECTED_AUDIENCE: default + KAFKA_LISTENER_NAME_OINTERNAL_OAUTHBEARER_SASL_JAAS_CONFIG: >- + org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule required + clientId=kafka-client + clientSecret=kafka-client-secret + unsecuredLoginStringClaim_sub="sub"; + KAFKA_LISTENER_NAME_OINTERNAL_OAUTHBEARER_SASL_SERVER_CALLBACK_HANDLER_CLASS: org.apache.kafka.common.security.oauthbearer.OAuthBearerValidatorCallbackHandler + KAFKA_OPTS: >- + -Dorg.apache.kafka.sasl.oauthbearer.allowed.urls=http://mock-oauth2-server:8080/default/jwks + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 + KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 + KAFKA_SHARE_COORDINATOR_STATE_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_SHARE_COORDINATOR_STATE_TOPIC_MIN_ISR: 1 + KAFKA_LOG_DIRS: /var/lib/kafka/data + depends_on: + - mock-oauth2-server + ports: + - 127.0.0.1:9092:9094/tcp + - 127.0.0.1:9093:9095/tcp + + redis: + extends: + file: versions.yml + service: redis + ports: + - 127.0.0.1:6379:6379/tcp + + postgres: + extends: + file: versions.yml + service: postgres + environment: + POSTGRES_USER: akvorado + POSTGRES_PASSWORD: akpass + POSTGRES_DB: akvorado + ports: + - 127.0.0.1:5432:5432/tcp + healthcheck: + test: ["CMD-SHELL", "pg_isready -U akvorado -d akvorado"] + interval: 5s + timeout: 5s + retries: 5 + + mysql: + extends: + file: versions.yml + service: mysql + environment: + MARIADB_ALLOW_EMPTY_ROOT_PASSWORD: "yes" + MARIADB_USER: akvorado + MARIADB_PASSWORD: akpass + MARIADB_DATABASE: akvorado + ports: + - 127.0.0.1:3306:3306/tcp + healthcheck: + test: ['CMD', 'healthcheck.sh', '--connect', '--innodb_initialized'] + interval: 5s + timeout: 5s + retries: 5 + + clickhouse: &clickhouse + extends: + file: versions.yml + service: clickhouse + environment: + CLICKHOUSE_SKIP_USER_SETUP: 1 + CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS: 1 + cap_add: + - SYS_NICE + volumes: + - ./clickhouse/standalone.xml:/etc/clickhouse-server/config.d/standalone.xml + - ./clickhouse/test-db.sql:/docker-entrypoint-initdb.d/test-db.sql + ports: + - 127.0.0.1:8123:8123/tcp + - 127.0.0.1:9000:9000/tcp + clickhouse-1: &clickhouse-cluster + <<: *clickhouse + depends_on: + - clickhouse-keeper-1 + volumes: + - ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml + - ./clickhouse/cluster-1.xml:/etc/clickhouse-server/config.d/cluster-1.xml + ports: + - 127.0.0.1:9001:9000/tcp + clickhouse-2: + <<: *clickhouse-cluster + volumes: + - ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml + - ./clickhouse/cluster-2.xml:/etc/clickhouse-server/config.d/cluster-2.xml + ports: + - 127.0.0.1:9002:9000/tcp + clickhouse-3: + <<: *clickhouse-cluster + volumes: + - ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml + - ./clickhouse/cluster-3.xml:/etc/clickhouse-server/config.d/cluster-3.xml + ports: + - 127.0.0.1:9003:9000/tcp + clickhouse-4: + <<: *clickhouse-cluster + volumes: + - ./clickhouse/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml + - ./clickhouse/cluster-4.xml:/etc/clickhouse-server/config.d/cluster-4.xml + ports: + - 127.0.0.1:9004:9000/tcp + clickhouse-keeper-1: + extends: + file: versions.yml + service: clickhouse-keeper + volumes: + - ./clickhouse/observability:/etc/clickhouse-keeper/keeper_config.d/observability.xml + - ./clickhouse/keeper.xml:/etc/clickhouse-keeper/keeper_config.d/akvorado.xml + + srlinux: + # The SR Linux container does not like to be restarted. If you get: + # Error: Peer netns reference is invalid. + # Be sure to put it down before starting it. + extends: + file: versions.yml + service: srlinux + privileged: true + user: root + command: /opt/srlinux/bin/sr_linux + ports: + - 127.0.0.1:57400:57400/tcp + - 127.0.0.1:57401:22/tcp + + vector: + extends: + file: versions.yml + service: vector + restart: "no" + profiles: [ manual ] + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + - ./vector.yaml:/etc/vector/vector.yaml:ro + - ./vector.tests.yaml:/etc/vector/vector.tests.yaml:ro + environment: + VECTOR_CONFIG_DIR: /etc/vector diff --git a/docker/docker-compose-grafana.yml b/docker/docker-compose-grafana.yml new file mode 100644 index 0000000..1bd424e --- /dev/null +++ b/docker/docker-compose-grafana.yml @@ -0,0 +1,27 @@ +--- +volumes: + akvorado-grafana: +services: + grafana: + extends: + file: versions.yml + service: grafana + profiles: [ grafana ] + environment: + GF_SERVER_ROOT_URL: /grafana + GF_SERVER_SERVE_FROM_SUB_PATH: "true" + depends_on: + - prometheus + restart: unless-stopped + volumes: + - akvorado-grafana:/var/lib/grafana + - ./grafana:/etc/grafana + expose: + - 3000/tcp + labels: + - traefik.enable=true + - traefik.http.routers.grafana.rule=PathPrefix(`/grafana`) + - traefik.http.routers.grafana.middlewares=console-auth,grafana-avatar + - traefik.http.middlewares.grafana-avatar.redirectRegex.regex=^(https?://.+)/grafana/avatar/.+ + - traefik.http.middlewares.grafana-avatar.redirectRegex.replacement=$${1}/api/v0/console/user/avatar + - metrics.port=3000 diff --git a/docker/docker-compose-ipinfo.yml b/docker/docker-compose-ipinfo.yml new file mode 100644 index 0000000..fbc62a7 --- /dev/null +++ b/docker/docker-compose-ipinfo.yml @@ -0,0 +1,12 @@ +--- +services: + geoip: + extends: + file: versions.yml + service: ipinfo-geoipupdate + environment: + IPINFO_TOKEN: a2632ea59736c7 + IPINFO_DATABASES: country asn + UPDATE_FREQUENCY: 48h + volumes: + - akvorado-geoip:/data diff --git a/docker/docker-compose-local.yml b/docker/docker-compose-local.yml new file mode 100644 index 0000000..24f136a --- /dev/null +++ b/docker/docker-compose-local.yml @@ -0,0 +1,58 @@ +# If possible, overrides should go there! + +# If you prefer to use geo IP databases from host, uncomment this block. + +# services: +# akvorado-orchestrator: +# volumes: +# - /usr/share/GeoIP:/usr/share/GeoIP:ro + +# If you want to expose the service directly on port 80, uncomment this block. + +# services: +# traefik: +# ports: +# - 80:8081/tcp + +# To not use SQLite for the console database, you need to unset a specific +# environment variable: + +# services: +# akvorado-console: +# environment: +# AKVORADO_CFG_CONSOLE_DATABASE_DSN: !reset null + +# To add use HTTP basic auth to protect the service, uncomment the snippet +# below. You can generate your own user/password with "htpasswd -nB akvorado". +# If you enable TLS, replace "public" by "publicsecure". + +# services: +# traefik: +# environment: +# TRAEFIK_ENTRYPOINTS_public_HTTP_MIDDLEWARES: auth@docker +# labels: +# - traefik.http.middlewares.auth.basicauth.users=akvorado:$$2y$$05$$Ud.JjfZWtKlSOoXKkv48leXze3u4cSNC5G4lG9nkfv5OFOkVcgRrm + +# If you don't want to expose Kafka-UI and Traefik on the public endpoints, uncomment this block. + +# services: +# kafka-ui: +# labels: +# - traefik.http.routers.kafka-ui.entrypoints=private +# traefik: +# labels: +# - traefik.http.routers.traefik.entrypoints=private + +# To enable IPv6 routing, uncomment the following block. This requires Docker +# Engine v27. + +# networks: +# default: +# driver: bridge +# driver_opts: +# com.docker.network.bridge.gateway_mode_ipv6: routed +# ipam: +# driver: default +# config: +# - subnet: 2001:db8::/64 +# gateway: 2001:db8::1 diff --git a/docker/docker-compose-loki.yml b/docker/docker-compose-loki.yml new file mode 100644 index 0000000..80d693a --- /dev/null +++ b/docker/docker-compose-loki.yml @@ -0,0 +1,45 @@ +--- +volumes: + akvorado-loki: + +services: + loki: + extends: + file: versions.yml + service: loki + profiles: [ loki ] + restart: unless-stopped + volumes: + - akvorado-loki:/loki + - ./loki.yaml:/etc/loki/local-config.yaml:ro + expose: + - 3100/tcp + labels: + - traefik.enable=true + - traefik.http.routers.loki.rule=PathPrefix(`/loki`) + - traefik.http.routers.loki.entrypoints=private + - metrics.port=3100 + - metrics.path=/loki/metrics + + vector: + extends: + file: versions.yml + service: vector + profiles: [ loki ] + restart: unless-stopped + user: root # for access to /var/run/docker.sock + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + - ./vector.yaml:/etc/vector/vector.yaml:ro + depends_on: + - loki + healthcheck: + interval: 20s + test: ["CMD", + "wget", "-T", "1", "--spider", "http://127.0.0.1:8686/health"] + expose: + - 9598 # metrics + environment: + VECTOR_CONFIG_DIR: /etc/vector + labels: + - metrics.port=9598 diff --git a/docker/docker-compose-maxmind.yml b/docker/docker-compose-maxmind.yml new file mode 100644 index 0000000..03c6446 --- /dev/null +++ b/docker/docker-compose-maxmind.yml @@ -0,0 +1,14 @@ +--- +services: + geoip: + # Put GEOIPUPDATE_ACCOUNT_ID and GEOIPUPDATE_LICENSE_KEY values in `.env` file. + extends: + file: versions.yml + service: maxmind-geoipupdate + environment: + GEOIPUPDATE_ACCOUNT_ID: + GEOIPUPDATE_LICENSE_KEY: + GEOIPUPDATE_EDITION_IDS: GeoLite2-ASN GeoLite2-Country + GEOIPUPDATE_FREQUENCY: 48 + volumes: + - akvorado-geoip:/usr/share/GeoIP diff --git a/docker/docker-compose-prometheus.yml b/docker/docker-compose-prometheus.yml new file mode 100644 index 0000000..a2747f2 --- /dev/null +++ b/docker/docker-compose-prometheus.yml @@ -0,0 +1,112 @@ +--- +volumes: + akvorado-prometheus: + +services: + # Store metrics + prometheus: + extends: + file: versions.yml + service: prometheus + profiles: [ prometheus ] + restart: unless-stopped + volumes: + - akvorado-prometheus:/prometheus + - ./prometheus.yml:/etc/prometheus/prometheus.yml + command: + # Those are the defaults + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + # Those are not the defaults + - --web.enable-remote-write-receiver + - --web.external-url=/prometheus + - --storage.tsdb.retention.time=30d + healthcheck: + interval: 20s + test: ["CMD", + "promtool", "check", "healthy", "--url=http://127.0.0.1:9090/prometheus"] + expose: + - 9090/tcp + labels: + - traefik.enable=true + - traefik.http.routers.prometheus.rule=PathPrefix(`/prometheus`) + - traefik.http.routers.prometheus.entrypoints=private + - metrics.port=9090 + - metrics.path=/prometheus/metrics + + # Fetch metrics + alloy: + extends: + file: versions.yml + service: alloy + profiles: [ prometheus ] + restart: unless-stopped + user: root # for access to /var/run/docker.sock + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + - ./config.alloy:/etc/alloy/config.alloy + extra_hosts: + - "host.docker.internal:host-gateway" + expose: + - 12345 + command: + - run + - /etc/alloy + - --storage.path=/var/lib/alloy/data + - --server.http.listen-addr=0.0.0.0:12345 + - --server.http.ui-path-prefix=/alloy + depends_on: + prometheus: + condition: service_healthy + kafka: + condition: service_healthy + labels: + - traefik.enable=true + - traefik.http.routers.alloy.rule=PathPrefix(`/alloy`) + - traefik.http.routers.alloy.entrypoints=private + - metrics.port=12345 + + # Node exporter for host metrics + node-exporter: + extends: + file: versions.yml + service: node-exporter + profiles: [ prometheus ] + restart: unless-stopped + volumes: + - /:/host:ro,rslave + network_mode: host + pid: host + command: + - --path.rootfs=/host + expose: + - 9100/tcp + labels: + - metrics.port=9100 + + # cAdvisor for container metrics + cadvisor: + extends: + file: versions.yml + service: cadvisor + profiles: [ prometheus ] + restart: unless-stopped + privileged: true + volumes: + - /:/rootfs:ro + - /sys:/sys:ro + - /dev/disk/:/dev/disk:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + - /var/lib/docker/:/var/lib/docker:ro + devices: + - /dev/kmsg:/dev/kmsg + command: + # Minimize the amount of metrics reported, notably don't report anything from host + - --docker_only + - --store_container_labels=false + - --disable_root_cgroup_stats + - --enable_metrics=cpu,cpuLoad,diskIO,memory,network,oom_event,process,tcp,udp + expose: + - 8080/tcp + labels: + - metrics.port=8080 diff --git a/docker/docker-compose-tls.yml b/docker/docker-compose-tls.yml new file mode 100644 index 0000000..44fc605 --- /dev/null +++ b/docker/docker-compose-tls.yml @@ -0,0 +1,24 @@ +volumes: + akvorado-traefik: +services: + traefik: + environment: + TRAEFIK_ENTRYPOINTS_public_ADDRESS: ":80" + TRAEFIK_ENTRYPOINTS_public_HTTP_REDIRECTIONS_ENTRYPOINT_SCHEME: https + TRAEFIK_ENTRYPOINTS_public_HTTP_REDIRECTIONS_ENTRYPOINT_TO: publicsecure + TRAEFIK_ENTRYPOINTS_publicsecure_ADDRESS: ":443" + TRAEFIK_ENTRYPOINTS_publicsecure_HTTP_TLS: "true" + TRAEFIK_ENTRYPOINTS_publicsecure_HTTP_TLS_CERTRESOLVER: le + TRAEFIK_ENTRYPOINTS_publicsecure_HTTP_TLS_DOMAINS_0_SANS: ${TLS_DOMAIN:?TLS_DOMAIN is mandatory to setup TLS} + TRAEFIK_ENTRYPOINTS_publicsecure_HTTP_MIDDLEWARES: compress@docker + TRAEFIK_CERTIFICATESRESOLVERS_le: "true" + TRAEFIK_CERTIFICATESRESOLVERS_le_ACME_EMAIL: ${TLS_EMAIL:?TLS_EMAIL is mandatory to setup TLS} + TRAEFIK_CERTIFICATESRESOLVERS_le_ACME_STORAGE: /etc/traefik/acme.json + TRAEFIK_CERTIFICATESRESOLVERS_le_ACME_HTTPCHALLENGE: "true" + TRAEFIK_CERTIFICATESRESOLVERS_le_ACME_HTTPCHALLENGE_ENTRYPOINT: public + volumes: + - akvorado-traefik:/etc/traefik + ports: !override + - 127.0.0.1:8080:8080/tcp + - 80:80/tcp + - 443:443/tcp diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..8f717d8 --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,293 @@ +--- +networks: + default: + enable_ipv6: true + ipam: + config: + - subnet: 247.16.14.0/24 + - subnet: fd1c:8ce3:6fb:1::/64 + driver: bridge + driver_opts: + com.docker.network.bridge.name: br-akvorado + +volumes: + akvorado-kafka: + akvorado-geoip: + akvorado-clickhouse: + akvorado-run: + akvorado-console-db: + +services: + kafka: + extends: + file: versions.yml + service: kafka + environment: + # KRaft settings + KAFKA_NODE_ID: 1 + KAFKA_PROCESS_ROLES: controller,broker + KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka:9093 + # Listeners + KAFKA_LISTENERS: CLIENT://:9092,CONTROLLER://:9093 + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CLIENT:PLAINTEXT,CONTROLLER:PLAINTEXT + KAFKA_ADVERTISED_LISTENERS: CLIENT://kafka:9092 + KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER + KAFKA_INTER_BROKER_LISTENER_NAME: CLIENT + # Misc + KAFKA_DELETE_TOPIC_ENABLE: "true" + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 + KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 + KAFKA_SHARE_COORDINATOR_STATE_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_SHARE_COORDINATOR_STATE_TOPIC_MIN_ISR: 1 + KAFKA_LOG_DIRS: /var/lib/kafka/data + restart: unless-stopped + volumes: + - akvorado-kafka:/var/lib/kafka/data + healthcheck: + interval: 20s + test: ["CMD", + "/opt/kafka/bin/kafka-topics.sh", "--list", "--bootstrap-server", "kafka:9092"] + + kafka-ui: + extends: + file: versions.yml + service: kafka-ui + restart: unless-stopped + depends_on: + - kafka + environment: + KAFKA_CLUSTERS_0_NAME: local + KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka:9092 + KAFKA_CLUSTERS_0_READONLY: true + SERVER_SERVLET_CONTEXT_PATH: /kafka-ui + labels: + - traefik.enable=true + - traefik.http.routers.kafka-ui.rule=PathPrefix(`/kafka-ui`) + + redis: + extends: + file: versions.yml + service: redis + restart: unless-stopped + healthcheck: + interval: 20s + test: + - CMD-SHELL + - "timeout 3 redis-cli ping | grep -q PONG" + + akvorado-orchestrator: + extends: + file: versions.yml + service: akvorado + restart: unless-stopped + depends_on: + kafka: + condition: service_healthy + command: orchestrator /etc/akvorado/akvorado.yaml + volumes: + - ../config:/etc/akvorado:ro + - akvorado-geoip:/usr/share/GeoIP:ro + labels: + - traefik.enable=true + # Disable access logging of /api/v0/orchestrator/metrics + - traefik.http.routers.akvorado-orchestrator-metrics.rule=PathPrefix(`/api/v0/orchestrator/metrics`) + - traefik.http.routers.akvorado-orchestrator-metrics.service=akvorado-orchestrator + - traefik.http.routers.akvorado-orchestrator-metrics.observability.accesslogs=false + # Everything else is exposed to private entrypoint in /api/v0/orchestrator + - traefik.http.routers.akvorado-orchestrator.entrypoints=private + - traefik.http.routers.akvorado-orchestrator.rule=PathPrefix(`/api/v0/orchestrator`) + - traefik.http.services.akvorado-orchestrator.loadbalancer.server.port=8080 + - metrics.port=8080 + - metrics.path=/api/v0/metrics + akvorado-console: + extends: + file: versions.yml + service: akvorado + restart: unless-stopped + depends_on: + akvorado-orchestrator: + condition: service_healthy + redis: + condition: service_healthy + clickhouse: + condition: service_healthy + command: console http://akvorado-orchestrator:8080 + volumes: + - akvorado-console-db:/run/akvorado + environment: + AKVORADO_CFG_CONSOLE_DATABASE_DSN: /run/akvorado/console.sqlite + AKVORADO_CFG_CONSOLE_BRANDING: ${AKVORADO_CFG_CONSOLE_BRANDING-false} + healthcheck: + disable: ${CONSOLE_HEALTHCHECK_DISABLED-false} + labels: + - traefik.enable=true + # Only expose /debug endpoint on the private entrypoint. + - traefik.http.routers.akvorado-console-debug.rule=PathPrefix(`/debug`) + - traefik.http.routers.akvorado-console-debug.entrypoints=private + - traefik.http.routers.akvorado-console-debug.service=akvorado-console + # Disable access logging of /api/v0/console/metrics + - traefik.http.routers.akvorado-console-metrics.rule=PathPrefix(`/api/v0/console/metrics`) + - traefik.http.routers.akvorado-console-metrics.service=akvorado-console + - traefik.http.routers.akvorado-console-metrics.observability.accesslogs=false + # For anything else... + - "traefik.http.routers.akvorado-console.rule=!PathPrefix(`/debug`)" + - traefik.http.routers.akvorado-console.priority=1 + - traefik.http.routers.akvorado-console.middlewares=console-auth + - traefik.http.services.akvorado-console.loadbalancer.server.port=8080 + - traefik.http.middlewares.console-auth.headers.customrequestheaders.Remote-User=alfred + - traefik.http.middlewares.console-auth.headers.customrequestheaders.Remote-Name=Alfred Pennyworth + - traefik.http.middlewares.console-auth.headers.customrequestheaders.Remote-Email=alfred@example.com + - metrics.port=8080 + - metrics.path=/api/v0/metrics + akvorado-inlet: + extends: + file: versions.yml + service: akvorado + ports: + - 2055:2055/udp + - 4739:4739/udp + - 6343:6343/udp + restart: unless-stopped + depends_on: + akvorado-orchestrator: + condition: service_healthy + kafka: + condition: service_healthy + command: inlet http://akvorado-orchestrator:8080 + volumes: + - akvorado-run:/run/akvorado + labels: + - traefik.enable=true + # Disable access logging of /api/v0/inlet/metrics + - traefik.http.routers.akvorado-inlet-metrics.rule=PathPrefix(`/api/v0/inlet/metrics`) + - traefik.http.routers.akvorado-inlet-metrics.service=akvorado-inlet + - traefik.http.routers.akvorado-inlet-metrics.observability.accesslogs=false + # Everything else is exposed to private entrypoint in /api/v0/inlet + - traefik.http.routers.akvorado-inlet.entrypoints=private + - traefik.http.routers.akvorado-inlet.rule=PathPrefix(`/api/v0/inlet`) + - traefik.http.services.akvorado-inlet.loadbalancer.server.port=8080 + - akvorado.conntrack.fix=true + - metrics.port=8080 + - metrics.path=/api/v0/metrics + akvorado-outlet: + extends: + file: versions.yml + service: akvorado + ports: + - 10179:10179/tcp + restart: unless-stopped + depends_on: + akvorado-orchestrator: + condition: service_healthy + kafka: + condition: service_healthy + clickhouse: + condition: service_healthy + command: outlet http://akvorado-orchestrator:8080 + volumes: + - akvorado-run:/run/akvorado + environment: + AKVORADO_CFG_OUTLET_METADATA_CACHEPERSISTFILE: /run/akvorado/metadata.cache + labels: + - traefik.enable=true + # Disable access logging of /api/v0/outlet/metrics + - traefik.http.routers.akvorado-outlet-metrics.rule=PathPrefix(`/api/v0/outlet/metrics`) + - traefik.http.routers.akvorado-outlet-metrics.service=akvorado-outlet + - traefik.http.routers.akvorado-outlet-metrics.observability.accesslogs=false + # Everything else is exposed to private entrypoint in /api/v0/outlet + - traefik.http.routers.akvorado-outlet.entrypoints=private + - traefik.http.routers.akvorado-outlet.rule=PathPrefix(`/api/v0/outlet`) + - traefik.http.services.akvorado-outlet.loadbalancer.server.port=8080 + - metrics.port=8080 + - metrics.path=/api/v0/metrics + akvorado-conntrack-fixer: + extends: + file: versions.yml + service: akvorado + cap_add: + - NET_ADMIN + command: conntrack-fixer + restart: unless-stopped + network_mode: host + healthcheck: + disable: true + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + + clickhouse: + extends: + file: versions.yml + service: clickhouse + volumes: + - akvorado-clickhouse:/var/lib/clickhouse + - ./clickhouse/observability.xml:/etc/clickhouse-server/config.d/observability.xml + - ./clickhouse/server.xml:/etc/clickhouse-server/config.d/akvorado.xml + environment: + CLICKHOUSE_INIT_TIMEOUT: 60 + CLICKHOUSE_SKIP_USER_SETUP: 1 + cap_add: + - SYS_NICE + restart: unless-stopped + stop_grace_period: 30s + healthcheck: + interval: 20s + test: ["CMD", "wget", "-T", "1", "--spider", "--no-proxy", "http://127.0.0.1:8123/ping"] + labels: + - traefik.enable=true + - traefik.http.routers.clickhouse.entrypoints=private + - traefik.http.routers.clickhouse.rule=PathPrefix(`/clickhouse`) + - traefik.http.routers.clickhouse.middlewares=clickhouse-strip + - traefik.http.middlewares.clickhouse-strip.stripprefix.prefixes=/clickhouse + - metrics.port=8123 + + traefik: + extends: + file: versions.yml + service: traefik + restart: unless-stopped + environment: + TRAEFIK_API: "true" + TRAEFIK_API_BASEPATH: "/traefik" + TRAEFIK_METRICS_PROMETHEUS: "true" + TRAEFIK_METRICS_PROMETHEUS_MANUALROUTING: "true" + TRAEFIK_METRICS_PROMETHEUS_ADDROUTERSLABELS: "true" + TRAEFIK_PROVIDERS_DOCKER: "true" + TRAEFIK_PROVIDERS_DOCKER_EXPOSEDBYDEFAULT: "false" + TRAEFIK_ENTRYPOINTS_private_ADDRESS: ":8080" # all services + TRAEFIK_ENTRYPOINTS_private_HTTP_MIDDLEWARES: compress@docker + TRAEFIK_ENTRYPOINTS_public_ADDRESS: ":8081" # public services only + TRAEFIK_ENTRYPOINTS_public_HTTP_MIDDLEWARES: compress@docker + TRAEFIK_ACCESSLOG: "true" + labels: + - traefik.enable=true + - "traefik.http.routers.traefik.rule=PathPrefix(`/traefik`) && !PathPrefix(`/traefik/debug`)" + - traefik.http.routers.traefik.service=api@internal + - traefik.http.routers.traefik-metrics.rule=PathPrefix(`/traefik/metrics`) + - traefik.http.routers.traefik-metrics.priority=200 + - traefik.http.routers.traefik-metrics.service=prometheus@internal + - traefik.http.middlewares.compress.compress=true + - "traefik.http.middlewares.compress.compress.includedcontenttypes=\ + application/javascript,\ + application/json,\ + application/xml,\ + image/svg+xml,\ + text/css,\ + text/csv,\ + text/javascript,\ + text/markdown,\ + text/plain,\ + text/xml" + - metrics.port=8080 + - metrics.path=/traefik/metrics + expose: + - 8080/tcp + ports: + # Port 8080 is considered private as it exposes sensible unauthenticated + # services (ClickHouse, configuration, ...). Therefore, it is only exposed + # on the loopback. Port 8081 is the one you can expose to users. Check + # docker-compose-local.yml if you want to expose directly on port 80. + - 127.0.0.1:8080:8080/tcp + - 8081:8081/tcp + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro diff --git a/docker/grafana/dashboards/akvorado/inlet.json b/docker/grafana/dashboards/akvorado/inlet.json new file mode 100644 index 0000000..00bc3a4 --- /dev/null +++ b/docker/grafana/dashboards/akvorado/inlet.json @@ -0,0 +1,616 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Number of received UDP packets per second per exporter. Each exporter should appear here.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "pps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 10, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "rate(akvorado_inlet_flow_input_udp_packets_total[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{exporter}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "UDP: packets received", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Errors from inlet component when processing received flows. Some errors are OK during start, but then they should disappear. Errors include decoding errors, SNMP errors, and processing errors.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "noValue": "No errors", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "pps" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 10, + "x": 10, + "y": 0 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "rate(akvorado_inlet_core_flows_errors_total[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{exporter}}: {{error}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "rate(akvorado_inlet_flow_decoder_netflow_errors_total[$__rate_interval])", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{exporter}}: {{error}}", + "range": true, + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "rate(akvorado_inlet_flow_decoder_sflow_errors_total[$__rate_interval])", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{exporter}}: {{error}}", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "rate(akvorado_inlet_metadata_provider_snmp_poller_error_requests_total[$__rate_interval])", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{exporter}}: {{error}}", + "range": true, + "refId": "D", + "useBackend": false + } + ], + "title": "Inlet: flow errors", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Number of dropped packets because workers were too busy to handle them. This should always be 0. Otherwise, increase the number of workers or the listening queue.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 1 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "pps" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 5, + "x": 0, + "y": 10 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(rate(akvorado_inlet_flow_input_udp_in_dropped_packets_total[$__rate_interval]) or vector(0))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "UDP: packets dropped (in)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Number of dropped packets because internal queue was full. This should be 0. Either increase the internal queue or add more core workers.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 1 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "pps" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 5, + "x": 5, + "y": 10 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(rate(akvorado_inlet_flow_input_udp_out_dropped_packets_total[$__rate_interval]) or vector(0))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "UDP: packets dropped (out)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "The number of decoded packets, by flow type.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 5, + "x": 10, + "y": 10 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "rate(akvorado_inlet_flow_decoder_flows_total[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{name}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Inlet: decoded packets", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "The number of flows forwarded to Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 5, + "x": 15, + "y": 10 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum(rate(akvorado_inlet_core_forwarded_flows_total[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "Forwarded", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Inlet: forwarded flows", + "type": "stat" + } + ], + "refresh": "1m", + "schemaVersion": 38, + "tags": [ + "akvorado" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Inlet", + "uid": "c6241364-fd78-46a6-b13e-d520d2776a20", + "version": 2, + "weekStart": "" +} diff --git a/docker/grafana/grafana.ini b/docker/grafana/grafana.ini new file mode 100644 index 0000000..89c510c --- /dev/null +++ b/docker/grafana/grafana.ini @@ -0,0 +1,36 @@ +# Authentication is "synced" with Akvorado: it uses the same HTTP headers. + +[security] +disable_initial_admin_creation = false + +[users] +allow_sign_up = false +allow_org_create = false +auto_assign_org = true +auto_assign_org_role = Viewer # Viewer, Admin, Editor, or None +viewers_can_edit = true + +[auth] +disable_signout_menu = true + +[auth.proxy] +enabled = true +auto_sign_up = true +header_name = Remote-User +header_property = username +headers = Name:Remote-Name Email:Remote-Email + +# Default dashboard +[dashboards] +default_home_dashboard_path = /etc/grafana/dashboards/akvorado/inlet.json + +# More privacy + +[news] +news_feed_enabled = false + +[analytics] +enabled = false +reporting_enabled = false +check_for_updates = false +check_for_plugin_updates = false diff --git a/docker/grafana/provisioning/dashboards/akvorado.yaml b/docker/grafana/provisioning/dashboards/akvorado.yaml new file mode 100644 index 0000000..af0fa7f --- /dev/null +++ b/docker/grafana/provisioning/dashboards/akvorado.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: 1 +providers: + - name: default + type: file + updateIntervalSeconds: 10 + options: + path: /etc/grafana/dashboards + foldersFromFileStructure: true diff --git a/docker/grafana/provisioning/datasources/akvorado.yaml b/docker/grafana/provisioning/datasources/akvorado.yaml new file mode 100644 index 0000000..5b5dc5b --- /dev/null +++ b/docker/grafana/provisioning/datasources/akvorado.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: 1 +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090/prometheus + uid: PBFA97CFB590B2093 + - name: Loki + type: loki + access: proxy + url: http://loki:3100/loki + uid: QUU1WAEQUO0NAF6YZ diff --git a/docker/ipinfo-geoipupdate.sh b/docker/ipinfo-geoipupdate.sh new file mode 100644 index 0000000..c2fcf0c --- /dev/null +++ b/docker/ipinfo-geoipupdate.sh @@ -0,0 +1,41 @@ +#!/bin/sh + +trap exit TERM + +while true; do + ok=1 + for DATABASE in ${IPINFO_DATABASES}; do + if [ -f ${DATABASE}.mmdb ]; then + # Is it up-to-date? + LOCAL=$(sha256sum ${DATABASE}.mmdb | awk '{print $1}') + REMOTE=$(curl --silent https://ipinfo.io/data/free/${DATABASE}.mmdb/checksums?token=${IPINFO_TOKEN} \ + | sed -n 's/.*"sha256": *"\([a-f0-9]*\)".*/\1/p') + if [ "$LOCAL" = "$REMOTE" ]; then + echo "${DATABASE}.mmdb is up-to-date" + continue + fi + fi + RESPONSE=$(curl \ + --silent \ + --write-out '%{http_code}' \ + --remote-time \ + --location \ + --output "${DATABASE}.mmdb.new" \ + "https://ipinfo.io/data/free/${DATABASE}.mmdb?token=${IPINFO_TOKEN}") + case "$RESPONSE" in + 200) + echo "${DATABASE}.mmdb database downloaded in /data volume." + mv "${DATABASE}.mmdb.new" "${DATABASE}.mmdb" + ;; + *) + echo "Failed to download ${DATABASE}.mmdb database (HTTP error $RESPONSE)." + rm "${DATABASE}.mmdb.new" 2> /dev/null + ok=0 + ;; + esac + done + + [ $ok -eq 1 ] && touch /tmp/healthy + sleep "$UPDATE_FREQUENCY" & + wait $! +done diff --git a/docker/loki.yaml b/docker/loki.yaml new file mode 100644 index 0000000..05f56bb --- /dev/null +++ b/docker/loki.yaml @@ -0,0 +1,42 @@ +--- +auth_enabled: false + +server: + http_path_prefix: /loki + http_listen_address: 0.0.0.0 + http_listen_port: 3100 + +common: + instance_addr: 127.0.0.1 + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2020-10-24 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +limits_config: + discover_log_levels: false + allow_structured_metadata: true + retention_period: 720h + +compactor: + retention_enabled: true + working_directory: /loki/retention + delete_request_store: filesystem + +analytics: + reporting_enabled: false diff --git a/docker/prometheus.yml b/docker/prometheus.yml new file mode 100644 index 0000000..0d60947 --- /dev/null +++ b/docker/prometheus.yml @@ -0,0 +1,5 @@ +--- +global: + evaluation_interval: 30s +scrape_configs: + # none: everything is done from alloy diff --git a/docker/vector.tests.yaml b/docker/vector.tests.yaml new file mode 100644 index 0000000..037d39a --- /dev/null +++ b/docker/vector.tests.yaml @@ -0,0 +1,450 @@ +--- +# docker compose -f docker/docker-compose-dev.yml run --quiet --rm vector test +tests: + - name: "unknown application" + inputs: + - insert_at: base + type: log + log_fields: + container_id: b8ee56469 + container_name: akvorado-something-unknown-1 + label."com.docker.compose.service": something-unknown + message: >- + Hello world! + outputs: + - extract_from: combine + conditions: + - type: vrl + source: |- + assert_eq!(.message, "Hello world!") + assert!(is_timestamp(.timestamp)) + assert_eq!(._labels, + {"service_name": "something-unknown", + "instance": "akvorado-something-unknown-1"}) + assert_eq!(._metadata, null) + + - name: "akvorado logs" + inputs: + - insert_at: base + type: log + log_fields: + container_id: b8ee56469 + container_name: akvorado-akvorado-conntrack-fixer-1 + label."com.docker.compose.service": akvorado-conntrack-fixer-1 + message: >- + {"level":"info", + "version":"v2.0.0-beta.4-66-g0ad0128fc6cd-dirty", + "time":"2025-08-29T15:01:02Z", + "caller":"akvorado/cmd/components.go:38", + "module":"akvorado/cmd", + "message":"akvorado has started"} + outputs: + - extract_from: combine + conditions: + - type: vrl + source: |- + assert_eq!(.message, "akvorado has started") + assert_eq!(.timestamp, t'2025-08-29T15:01:02Z') + assert_eq!(._labels, + {"service_name": "akvorado-conntrack-fixer", + "instance": "akvorado-akvorado-conntrack-fixer-1", + "level": "info", + "module": "akvorado/cmd"}) + assert_eq!(._metadata, + {"caller": "akvorado/cmd/components.go:38", + "version": "v2.0.0-beta.4-66-g0ad0128fc6cd-dirty"}) + + - name: "kafka logs" + inputs: + - insert_at: base + type: log + log_fields: + container_id: b8ee56469 + container_name: akvorado-kafka-1 + label."com.docker.compose.service": kafka + message: |- + [2025-08-29 15:15:48,641] INFO [BrokerServer id=1] Waiting for all of the authorizer futures to be completed (kafka.server.BrokerServer) + outputs: + - extract_from: combine + conditions: + - type: vrl + source: |- + assert_eq!(.message, "[BrokerServer id=1] Waiting for all of the authorizer futures to be completed (kafka.server.BrokerServer)") + assert_eq!(.timestamp, t'2025-08-29T15:15:48.641Z') + assert_eq!(._labels, + {"service_name": "kafka", + "instance": "akvorado-kafka-1", + "level": "info"}) + assert_eq!(._metadata, null) + - name: "kafka logs multiline" + inputs: + - insert_at: base + type: log + log_fields: + container_id: b8ee56469 + container_name: akvorado-kafka-1 + label."com.docker.compose.service": kafka + message: |- + [2025-08-29 15:15:48,605] INFO KafkaConfig values: + - insert_at: base + type: log + log_fields: + container_id: b8ee56469 + container_name: akvorado-kafka-1 + label."com.docker.compose.service": kafka + message: |- + add.partitions.to.txn.retry.backoff.max.ms = 100 + - insert_at: base + type: log + log_fields: + container_id: b8ee56469 + container_name: akvorado-kafka-1 + label."com.docker.compose.service": kafka + message: |- + add.partitions.to.txn.retry.backoff.ms = 20 + outputs: + - extract_from: combine + conditions: + - type: vrl + source: |- + assert_eq!(.message, "KafkaConfig values:\n\ + add.partitions.to.txn.retry.backoff.max.ms = 100\n\ + add.partitions.to.txn.retry.backoff.ms = 20") + assert_eq!(.timestamp, t'2025-08-29T15:15:48.605Z') + assert_eq!(._labels, + {"service_name": "kafka", + "instance": "akvorado-kafka-1", + "level": "info"}) + assert_eq!(._metadata, null) + + - name: "redis logs" + inputs: + - insert_at: base + type: log + log_fields: + container_id: b8ee56469 + container_name: akvorado-redis-1 + label."com.docker.compose.service": redis + message: |- + 1:C 28 Aug 2025 04:08:22.843 # Warning: no config file specified + outputs: + - extract_from: combine + conditions: + - type: vrl + source: |- + assert_eq!(.message, "Warning: no config file specified") + assert_eq!(.timestamp, t'2025-08-28T04:08:22.843Z') + assert_eq!(._labels, + {"service_name": "redis", + "instance": "akvorado-redis-1", + "level": "warning", + "role": "RDB"}) + assert_eq!(._metadata, {"pid": 1}) + + - name: "alloy logs" + inputs: + - insert_at: base + type: log + log_fields: + container_id: b8ee56469 + container_name: akvorado-alloy-1 + label."com.docker.compose.service": alloy + message: >- + ts=2025-08-28T09:30:45.497277819Z + level=info + msg="Scraped metadata watcher stopped" + component_path=/ + component_id=prometheus.remote_write.default + subcomponent=rw + remote_name=0ffafb + url=http://prometheus:9090/prometheus/api/v1/write + outputs: + - extract_from: combine + conditions: + - type: vrl + source: |- + assert_eq!(.message, "Scraped metadata watcher stopped") + assert_eq!(.timestamp, t'2025-08-28T09:30:45.497277819Z') + assert_eq!(._labels, + {"service_name": "alloy", + "instance": "akvorado-alloy-1", + "level": "info"}) + assert_eq!(._metadata, + {"component_path": "/", + "component_id": "prometheus.remote_write.default", + "subcomponent": "rw", + "remote_name": "0ffafb", + "url": "http://prometheus:9090/prometheus/api/v1/write"}) + + - name: "loki logs" + inputs: + - insert_at: base + type: log + log_fields: + container_id: b8ee56469 + container_name: akvorado-loki-1 + label."com.docker.compose.service": loki + message: >- + ts=2025-08-29T05:07:45.543770684Z + caller=spanlogger.go:116 + middleware=QueryShard.astMapperware + org_id=fake + traceID=0dd74c5aaeb81d32 + user=fake + level=warn + msg="failed mapping AST" + err="context canceled" + query="{service_name=\"alloy\"}" + outputs: + - extract_from: combine + conditions: + - type: vrl + source: |- + assert_eq!(.message, "failed mapping AST: context canceled") + assert_eq!(.timestamp, t'2025-08-29T05:07:45.543770684Z') + assert_eq!(._labels, + {"service_name": "loki", + "instance": "akvorado-loki-1", + "level": "warning"}) + assert_eq!(._metadata, + {"caller": "spanlogger.go:116", + "middleware": "QueryShard.astMapperware", + "org_id": "fake", + "traceID": "0dd74c5aaeb81d32", + "user": "fake", + "query": "{service_name=\"alloy\"}"}) + + - name: "grafana logs" + inputs: + - insert_at: base + type: log + log_fields: + container_id: b8ee56469 + container_name: akvorado-grafana-1 + label."com.docker.compose.service": grafana + message: >- + logger=provisioning.alerting + t=2025-08-29T21:05:35.215005098Z + level=error + msg="can't read alerting provisioning files from directory" + path=/etc/grafana/provisioning/alerting + error="open /etc/grafana/provisioning/alerting: no such file or directory" + outputs: + - extract_from: combine + conditions: + - type: vrl + source: |- + assert_eq!(.message, "can't read alerting provisioning files from directory: \ + open /etc/grafana/provisioning/alerting: no such file or directory") + assert_eq!(.timestamp, t'2025-08-29T21:05:35.215005098Z') + assert_eq!(._labels, + {"service_name": "grafana", + "instance": "akvorado-grafana-1", + "level": "error"}) + assert_eq!(._metadata, + {"logger": "provisioning.alerting", + "path": "/etc/grafana/provisioning/alerting"}) + + - name: "prometheus logs" + inputs: + - insert_at: base + type: log + log_fields: + container_id: b8ee56469 + container_name: akvorado-prometheus-1 + label."com.docker.compose.service": prometheus + message: >- + time=2025-08-29T21:34:41.191Z + level=INFO + source=manager.go:540 + msg="Stopping notification manager..." + component=notifier + outputs: + - extract_from: combine + conditions: + - type: vrl + source: |- + assert_eq!(.message, "Stopping notification manager...") + assert_eq!(.timestamp, t'2025-08-29T21:34:41.191Z') + assert_eq!(._labels, + {"service_name": "prometheus", + "instance": "akvorado-prometheus-1", + "level": "info"}) + assert_eq!(._metadata, + {"source": "manager.go:540", + "component": "notifier"}) + + - name: "node-exporter logs" + inputs: + - insert_at: base + type: log + log_fields: + container_id: b8ee56469 + container_name: akvorado-node-exporter-1 + label."com.docker.compose.service": node-exporter + message: >- + time=2025-08-29T21:37:28.398Z + level=ERROR + source=diskstats_linux.go:264 + msg="Failed to open directory, disabling udev device properties" + collector=diskstats + path=/run/udev/data + outputs: + - extract_from: combine + conditions: + - type: vrl + source: |- + assert_eq!(.message, "Failed to open directory, disabling udev device properties") + assert_eq!(.timestamp, t'2025-08-29T21:37:28.398Z') + assert_eq!(._labels, + {"service_name": "node-exporter", + "instance": "akvorado-node-exporter-1", + "level": "error"}) + assert_eq!(._metadata, + {"source": "diskstats_linux.go:264", + "collector": "diskstats", + "path": "/run/udev/data"}) + + - name: "cadvidsor logs" + inputs: + - insert_at: base + type: log + log_fields: + container_id: b8ee56469 + container_name: akvorado-cadvisor-1 + label."com.docker.compose.service": cadvisor + message: >- + I0829 21:38:18.192196 1 factory.go:352] Registering Docker factory + outputs: + - extract_from: combine + conditions: + - type: vrl + source: |- + assert_eq!(.message, "Registering Docker factory") + assert!(is_timestamp(.timestamp)) + assert_eq!(._labels, + {"service_name": "cadvisor", + "instance": "akvorado-cadvisor-1", + "level": "info"}) + assert_eq!(._metadata, {"pid": 1, "caller": "factory.go:352"}) + + - name: "traefik access logs" + inputs: + - insert_at: base + type: log + log_fields: + container_id: b8ee56469 + container_name: akvorado-traefik-1 + label."com.docker.compose.service": traefik + message: >- + 240.0.2.1 + - + - + [29/Aug/2025:20:40:35 +0000] + "GET /api/v0/console/widget/flow-rate?11334 HTTP/1.0" + 200 + 46 + "-" + "-" + 1596365 + "akvorado-console@docker" + "http://240.0.2.10:8080" + 3ms + outputs: + - extract_from: combine + conditions: + - type: vrl + source: |- + assert_eq!(.message, "GET /api/v0/console/widget/flow-rate?11334 HTTP/1.0") + assert_eq!(.timestamp, t'2025-08-29T20:40:35Z') + assert_eq!(._labels, + {"service_name": "traefik", + "instance": "akvorado-traefik-1", + "status": 200}) + assert_eq!(._metadata, + {"backend_url": "http://240.0.2.10:8080", + "body_bytes_sent": 46, + "duration_ms": 3, + "frontend_name": "akvorado-console@docker", + "remote_addr": "240.0.2.1", + "request_count": 1596365}) + - name: "traefik logs" + inputs: + - insert_at: base + type: log + log_fields: + container_id: b8ee56469 + container_name: akvorado-traefik-1 + label."com.docker.compose.service": traefik + message: >- + 2025-08-29T19:17:05Z ERR error="accept tcp [::]:8081: use of closed network connection" entryPointName=public + outputs: + - extract_from: combine + conditions: + - type: vrl + source: |- + assert_eq!(.message, "accept tcp [::]:8081: use of closed network connection") + assert_eq!(.timestamp, t'2025-08-29T19:17:05Z') + assert_eq!(._labels, + {"service_name": "traefik", + "instance": "akvorado-traefik-1", + "level": "error"}) + assert_eq!(._metadata, + {"entryPointName": "public"}) + + - name: "clickhouse raw logs" + inputs: + - insert_at: base + type: log + log_fields: + container_id: b8ee56469 + container_name: akvorado-clickhouse-1 + label."com.docker.compose.service": clickhouse + message: >- + Merging configuration file '/etc/clickhouse-server/config.d/akvorado.xml'. + outputs: + - extract_from: combine + conditions: + - type: vrl + source: |- + assert_eq!(.message, "Merging configuration file '/etc/clickhouse-server/config.d/akvorado.xml'.") + assert!(is_timestamp(.timestamp)) + assert_eq!(._labels, + {"service_name": "clickhouse", + "instance": "akvorado-clickhouse-1"}) + assert_eq!(._metadata, null) + - name: "clickhouse logs" + inputs: + - insert_at: base + type: log + log_fields: + container_id: b8ee56469 + container_name: akvorado-clickhouse-1 + label."com.docker.compose.service": clickhouse + message: >- + {"date_time_utc":"2025-08-31T07:27:15Z", + "date_time":"1756625235.240594", + "thread_name":"", + "thread_id":"747", + "level":"Warning", + "query_id":"", + "logger_name":"Application", + "message":"Listen [0.0.0.0]:9009 failed: Poco::Exception. Code: 1000, e.code() = 98", + "source_file":"programs\/server\/Server.cpp; void DB::Server::createServer(Poco::Util::AbstractConfiguration &, const std::string &, const char *, bool, bool, std::vector &, CreateServerFunc &&) const", + "source_line":"564"} + outputs: + - extract_from: combine + conditions: + - type: vrl + source: |- + assert_eq!(.message, "Listen [0.0.0.0]:9009 failed: Poco::Exception. Code: 1000, e.code() = 98") + assert_eq!(.timestamp, t'2025-08-31T07:27:15Z') + assert_eq!(._labels, + {"service_name": "clickhouse", + "instance": "akvorado-clickhouse-1", + "level": "warning"}) + assert_eq!(._metadata, + {"thread_id": "747", + "logger_name": "Application", + "source_file": "programs/server/Server.cpp; void DB::Server::createServer(Poco::Util::AbstractConfiguration &, const std::string &, const char *, bool, bool, std::vector &, CreateServerFunc &&) const", + "source_line": "564"}) diff --git a/docker/vector.yaml b/docker/vector.yaml new file mode 100644 index 0000000..e559825 --- /dev/null +++ b/docker/vector.yaml @@ -0,0 +1,269 @@ +--- +api: + enabled: true + address: 0.0.0.0:8686 + +sources: + internal_metrics: + type: internal_metrics + scrape_interval_secs: 10 + internal_logs: + type: internal_logs + docker: + type: docker_logs + include_labels: + - "com.docker.compose.project=akvorado" + +transforms: + base: + type: remap + inputs: + - docker + source: | + .service_name = replace(string!(.label."com.docker.compose.service"), r'(.+?)(?:-\d+)?', "$$1") + ._labels.service_name = .service_name + ._labels.instance = .container_name + routes: + type: route + inputs: + - base + route: + akvorado: 'starts_with(string!(.service_name), "akvorado-")' + kafka: '.service_name == "kafka"' + redis: '.service_name == "redis"' + alloy: '.service_name == "alloy"' + loki: '.service_name == "loki"' + grafana: '.service_name == "grafana"' + prometheus: '.service_name == "prometheus"' + nodeexporter: '.service_name == "node-exporter"' + cadvisor: '.service_name == "cadvisor"' + traefik: '.service_name == "traefik"' + clickhouse: '.service_name == "clickhouse" || .service_name == "clickhouse-keeper"' + + from_akvorado: + type: remap + inputs: + - routes.akvorado + source: | + parsed = parse_json!(.message) + .timestamp = parse_timestamp!(parsed.time, format: "%+") + .message = parsed.message + ._labels.level = parsed.level + ._labels.module = parsed.module + ._metadata = parsed + del(._metadata.message) + del(._metadata.time) + del(._metadata.level) + del(._metadata.module) + + from_kafka_multiline: + type: reduce + inputs: + - routes.kafka + group_by: + - .container_id + starts_when: | + match(string!(.message), r'^\[\d{4}-\d{2}-\d{2} ') + expire_after_ms: 1000 + merge_strategies: + message: concat_newline + from_kafka: + type: remap + inputs: + - from_kafka_multiline + source: | + parsed = parse_regex!(string!(.message), + r'^\[(?P[^\]]+)\]\s+(?P\w+)\s+(?P(?s:.*))$$') + .timestamp = parse_timestamp!(parsed.timestamp, format: "%Y-%m-%d %H:%M:%S,%3f") + .message = parsed.message + ._labels.level = parsed.level + + from_redis: + type: remap + inputs: + - routes.redis + source: | + parsed = parse_regex!(string!(.message), r'(?x) + ^(?P\d+): + (?P[XCSM])\s+ + (?P\d+\s+\w+\s+\d{4}\s+\d{2}:\d{2}:\d{2}\.\d{3})\s+ + (?P[*\#.-])\s+ + (?P.*)$$') + .timestamp = parse_timestamp!(parsed.timestamp, format: "%e %b %Y %H:%M:%S%.3f") + .message = parsed.message + ._labels.role = if parsed.role == "X" { "sentinel" } else if parsed.role == "C" { "RDB" } else if parsed.role == "S" { "slave" } else { "master" } + ._labels.level = if parsed.level == "." { "debug" } else if parsed.level == "-" { "info" } else if parsed.level == "*" { "notice" } else { "warning" } + ._metadata.pid = to_int!(parsed.pid) + + from_logfmt: + type: remap + inputs: + - routes.alloy + - routes.loki + - routes.grafana + - routes.prometheus + - routes.nodeexporter + source: | + parsed = parse_logfmt!(.message) + .timestamp = parse_timestamp!(parsed.ts || parsed.t || parsed.time, format: "%+") + .message = join!(unique(compact( + [parsed.msg || parsed.message || parsed.error || parsed.err, + parsed.err || parsed.error], recursive: false)), separator: ": ") + ._labels.level = parsed.level + ._metadata = parsed + del(._metadata.ts) + del(._metadata.t) + del(._metadata.time) + del(._metadata.msg) + del(._metadata.message) + del(._metadata.level) + del(._metadata.err) + del(._metadata.error) + + from_vector: + type: remap + inputs: + - internal_logs + source: | + ._labels.service_name = "vector" + ._labels.instance = .host + ._metadata = .metadata + ._metadata.pid = .pid + + from_cadvisor: + type: remap + inputs: + - routes.cadvisor + source: | + parsed = parse_regex!(string!(.message), r'(?x) + ^(?P[IWEF]) + (?P\d{4}\s\d{2}:\d{2}:\d{2}\.\d+)\s+ + (?P\d+)\s+ + (?P[^]]+)\]\s+ + (?P.*)$$') + # Timestamp is missing the year + # .timestamp = parse_timestamp!(parsed.timestamp, format: "%m%d %H:%M:%S%.6f") + .message = parsed.message + ._labels.level = if parsed.level == "I" { "info" } else if parsed.level == "W" { "warning" } else if parsed.level == "E" { "error" } else { "fatal" } + ._metadata.pid = to_int!(parsed.pid) + ._metadata.caller = parsed.caller + + from_traefik: + type: remap + inputs: + - routes.traefik + source: | + parsed, err = parse_regex(.message, r'(?x) + ^(?P\S+)\s + -\s + (?P\S+)\s + \[(?P[^\]]+)\]\s + "(?P\S+)\s(?P\S+)\s(?P[^"]+)"\s + (?P\d+)\s + (?P\d+)\s + "(?P[^"]*)"\s + "(?P[^"]*)"\s + (?P\d+)\s + "(?P[^"]*)"\s + "(?P[^"]*)"\s + (?P\d+)ms$$') + if err == null { + .timestamp = parse_timestamp!(parsed.timestamp, "%d/%b/%Y:%H:%M:%S %z") + .message = join!([parsed.method, parsed.path, parsed.protocol], " ") + ._labels.status = to_int!(parsed.status) + del(parsed.timestamp) + del(parsed.method) + del(parsed.path) + del(parsed.protocol) + del(parsed.status) + parsed.body_bytes_sent = to_int!(parsed.body_bytes_sent) + parsed.request_count = to_int!(parsed.request_count) + parsed.duration_ms = to_int!(parsed.duration_ms) + parsed = filter(parsed) -> |key, val| { + val != "-" + } + ._metadata = parsed + } else { + parsed, err = parse_regex(.message, r'(?x) + ^(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\S+)\s + (?P\S+)\s + (?P.*)$$') + if err == null { + .timestamp = parse_timestamp!(parsed.timestamp, "%+") + ._labels.level = parsed.level + parsed = parse_logfmt!(parsed.remaining) + .message = parsed.msg || parsed.message || parsed.error + ._metadata = parsed + del(._metadata.msg) + del(._metadata.message) + del(._metadata.error) + } + } + + from_clickhouse: + type: remap + inputs: + - routes.clickhouse + source: | + parsed, err = parse_json(.message) + if err == null { + parsed = filter(object!(parsed)) -> |key, val| { + val != "" + } + .timestamp = parse_timestamp!(parsed.date_time_utc, format: "%+") + .message = parsed.message + ._labels.level = parsed.level + ._metadata = parsed + del(._metadata.message) + del(._metadata.date_time_utc) + del(._metadata.date_time) + del(._metadata.level) + } + + combine: + type: remap + inputs: + - from_akvorado + - from_kafka + - from_redis + - from_logfmt + - from_vector + - from_cadvisor + - from_traefik + - from_clickhouse + - routes._unmatched + source: | + if exists(._labels.level) { + level = downcase!(._labels.level) + if starts_with(level, "i") || starts_with(level, "n") { + level = "info" + } else if starts_with(level, "d") { + level = "debug" + } else if starts_with(level, "w") { + level = "warning" + } else if starts_with(level, "er") { + level = "error" + } else if starts_with(level, "c") || starts_with(level, "a") || starts_with(level, "f") || starts_with(level, "e") { + level = "critical" + } else if starts_with(level, "t") { + level = "trace" + } + ._labels.level = level + } + +sinks: + prometheus: + type: prometheus_exporter + inputs: + - internal_metrics + loki: + type: loki + inputs: + - combine + endpoint: http://loki:3100/loki + encoding: + codec: "text" + labels: + "*": "{{ ._labels }}" + structured_metadata: + "*": "{{ ._metadata }}" diff --git a/docker/versions.yml b/docker/versions.yml new file mode 100644 index 0000000..902d15d --- /dev/null +++ b/docker/versions.yml @@ -0,0 +1,50 @@ +--- +services: + # main services + kafka: + image: apache/kafka:4.1.0 # \d+\.\d+\.\d+ + redis: + image: valkey/valkey:7.2 # \d+\.\d+ + clickhouse: + image: clickhouse/clickhouse-server:25.8 # \d+\.[38] + clickhouse-keeper: + image: clickhouse/clickhouse-keeper:25.8 # \d+\.[38] + traefik: + image: traefik:v3.5 # v\d+\.\d+ + maxmind-geoipupdate: + image: ghcr.io/maxmind/geoipupdate:v7 + ipinfo-geoipupdate: + image: ghcr.io/akvorado/ipinfo-geoipupdate:latest + build: + context: . + dockerfile: Dockerfile.ipinfo-geoipupdate + akvorado: + image: ghcr.io/akvorado/akvorado:2.0.1 + + # observability + grafana: + image: grafana/grafana-oss:10.2.6 # \d+\.\d+\.\d+ + alloy: + image: grafana/alloy:v1.10.2 # v\d+\.\d+\.\d+ + loki: + image: grafana/loki:3.5.5 # \d+\.\d+\.\d+ + prometheus: + image: prom/prometheus:v3.5.0 # v\d+\.\d+\.\d+ + node-exporter: + image: prom/node-exporter:v1.9.1 # v\d+\.\d+\.\d+ + cadvisor: + image: ghcr.io/google/cadvisor:v0.53.0 # v\d+\.\d+\.\d+ + kafka-ui: + image: ghcr.io/kafbat/kafka-ui:v1.3.0 # v\d+\.\d+\.\d+ + vector: + image: timberio/vector:0.49.0-alpine # \d+\.\d+\.\d+-alpine + + # for tests + srlinux: + image: ghcr.io/nokia/srlinux:23.10.6 + postgres: + image: postgres:17 # \d+ + mysql: + image: mariadb:12 # \d+ + mock-oauth2-server: + image: ghcr.io/navikt/mock-oauth2-server:3.0.0 # \d+\.\d+\.\d+