From 66fa596d6375bc2b4d866e2ecf4b3d3d4255c3fc Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 16 Jan 2025 02:30:04 +0100 Subject: [PATCH] Add VictoriaLogs results for c6a.4xlarge VictoriaLogs is a database for logs. It wasn't designed as analytical database, but it has been appeared that it provides enough functionality for ClickBench: it happily consumes hits.json via JSON lines data ingestion endpoint, and its' query language supports all the queries used in the ClickBench - see victorialogs/queries.logsql with all the queries translated into LogsQL ( https://docs.victoriametrics.com/victorialogs/logsql/ ). Fun fact - LogsQL queries are shorer than the corresponding SQL queries, and they are easier to read (YMMV) :) VictoriaLogs doesn't need any table schema, so victorialogs folder has no create.sql file. The resulting performance isn't so good comparing to ClickHouse, but there are a few queries, where VictoriaLogs provides better performance than ClickHouse because of bloom filters and "log streams" features. See https://docs.victoriametrics.com/victorialogs/keyconcepts/ . These features speed up "needle in the haystack" queries over large volumes of logs, which do not fit RAM, such as "search for logs with the given trace_id". Such queries are typical during logs' exploration. --- victorialogs/README.md | 7 ++++ victorialogs/benchmark.sh | 29 ++++++++++++++ victorialogs/queries.logsql | 43 +++++++++++++++++++++ victorialogs/results/c6a.4xlarge.json | 55 +++++++++++++++++++++++++++ victorialogs/run.sh | 22 +++++++++++ 5 files changed, 156 insertions(+) create mode 100644 victorialogs/README.md create mode 100755 victorialogs/benchmark.sh create mode 100644 victorialogs/queries.logsql create mode 100644 victorialogs/results/c6a.4xlarge.json create mode 100755 victorialogs/run.sh diff --git a/victorialogs/README.md b/victorialogs/README.md new file mode 100644 index 000000000..8f06d6b59 --- /dev/null +++ b/victorialogs/README.md @@ -0,0 +1,7 @@ +# VictoriaLogs + +There is no need in creating any table schema - just ingest `hits.json` into VictoriaLogs +via [JSON stream API](https://docs.victoriametrics.com/victorialogs/data-ingestion/#json-stream-api). +See `benchmark.sh` for details. + +Queries are translated into [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/) and are put into `queries.logsql`. diff --git a/victorialogs/benchmark.sh b/victorialogs/benchmark.sh new file mode 100755 index 000000000..da2401d1d --- /dev/null +++ b/victorialogs/benchmark.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# Install + +RELEASE_VERSION=v1.6.0-victorialogs + +wget --no-verbose --continue https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/${RELEASE_VERSION}/victoria-logs-linux-amd64-${RELEASE_VERSION}.tar.gz +tar xzf victoria-logs-linux-amd64-${RELEASE_VERSION}.tar.gz +./victoria-logs-prod -loggerOutput=stdout > server.log & + +while true +do + curl http://localhost:9428/select/logsql/query -d 'query=_time:2100-01-01Z' && break + sleep 1 +done + +# Load the data + +wget --no-verbose --continue https://datasets.clickhouse.com/hits_compatible/hits.json.gz +gunzip hits.json.gz +time cat hits.json | split -n r/8 -d --filter="curl -T - -X POST 'http://localhost:9428/insert/jsonline?_time_field=EventTime&_stream_fields=AdvEngineID,CounterID'" + +# Run the queries + +./run.sh + +# Determine on-disk size of the ingested data + +du -sb victoria-logs-data diff --git a/victorialogs/queries.logsql b/victorialogs/queries.logsql new file mode 100644 index 000000000..66a73ad42 --- /dev/null +++ b/victorialogs/queries.logsql @@ -0,0 +1,43 @@ +* | count() +{AdvEngineID!=0} | count() +* | sum(AdvEngineID), count(), avg(ResolutionWidth) +* | avg(UserID) +* | count_uniq(UserID) +* | count_uniq(SearchPhrase) +* | min(EventDate), max(EventDate) +{AdvEngineID!=0} | by (AdvEngineID) count() c | sort (c desc) +* | by (RegionID) count_uniq(UserID) u | first 10 (u desc) +* | by (RegionID) sum(AdvEngineID), count() c, avg(ResolutionWidth), count_uniq(UserID) | first 10 (c desc) +MobilePhoneModel:* | by (MobilePhoneModel) count_uniq(UserID) u | first 10 (u desc) +MobilePhoneModel:* | by (MobilePhone, MobilePhoneModel) count_uniq(UserID) u | first 10 (u desc) +SearchPhrase:* | top 10 (SearchPhrase) +SearchPhrase:* | by (SearchPhrase) count_uniq(UserID) u | first 10 (u desc) +SearchPhrase:* | top 10 (SearchEngineID, SearchPhrase) +* | top 10 (UserID) +* | top 10 (UserID, SearchPhrase) +* | by (UserID, SearchPhrase) count() | limit 10 +* | math floor((_time % 1h)/1m) m | top 10 (UserID, m, SearchPhrase) +UserID:=435090932899640449 | keep UserID +URL:google | count() +URL:google SearchPhrase:* | by (SearchPhrase) min(URL), count() c | first 10 (c desc) +Title:Google -URL:".google." SearchPhrase:* | by (SearchPhrase) min(URL), min(Title), count() c, count_uniq(UserID) | first 10 (c desc) +URL:google | first 10 (_time) +SearchPhrase:* | first 10 (_time) | keep SearchPhrase +SearchPhrase:* | first 10 (SearchPhrase) | keep SearchPhrase +SearchPhrase:* | first 10 (_time, SearchPhrase) | keep SearchPhrase +URL:* | len(URL) url_len | by (CounterID) avg(url_len) l, count() c | c:>100_000 | first 25 (l desc) +Referer:* | cp Referer k | replace_regexp('^https?://(?:www[.])?([^/]+)/.*$', '$1') at k | len(Referer) referer_len | by (k) avg(referer_len) l, count() c, min(Referer) | c:>100_000 | first 25 (l desc) +* | math ResolutionWidth x0, ResolutionWidth+1 x1, ResolutionWidth+2 x2, ResolutionWidth+3 x3, ResolutionWidth+4 x4, ResolutionWidth+5 x5, ResolutionWidth+6 x6, ResolutionWidth+7 x7, ResolutionWidth+8 x8,ResolutionWidth+9 x9, ResolutionWidth+10 x10, ResolutionWidth+11 x11, ResolutionWidth+12 x12, ResolutionWidth+13 x13, ResolutionWidth+14 x14, ResolutionWidth+15 x15, ResolutionWidth+16 x16, ResolutionWidth+17 x17, ResolutionWidth+18 x18, ResolutionWidth+19 x19, ResolutionWidth+20 x20, ResolutionWidth+21 x21, ResolutionWidth+22 x22, ResolutionWidth+23 x23, ResolutionWidth+24 x24, ResolutionWidth+25 x25, ResolutionWidth+26 x26, ResolutionWidth+27 x27, ResolutionWidth+28 x28, ResolutionWidth+29 x29, ResolutionWidth+30 x30, ResolutionWidth+31 x31, ResolutionWidth+32 x32, ResolutionWidth+33 x33, ResolutionWidth+34 x34, ResolutionWidth+35 x35, ResolutionWidth+36 x36, ResolutionWidth+37 x37, ResolutionWidth+38 x38, ResolutionWidth+39 x39, ResolutionWidth+40 x40, ResolutionWidth+41 x41, ResolutionWidth+42 x42, ResolutionWidth+43 x43, ResolutionWidth+44 x44, ResolutionWidth+45 x45, ResolutionWidth+46 x46, ResolutionWidth+47 x47, ResolutionWidth+48 x48, ResolutionWidth+49 x49, ResolutionWidth+50 x50, ResolutionWidth+51 x51, ResolutionWidth+52 x52, ResolutionWidth+53 x53, ResolutionWidth+54 x54, ResolutionWidth+55 x55, ResolutionWidth+56 x56, ResolutionWidth+57 x57, ResolutionWidth+58 x58, ResolutionWidth+59 x59, ResolutionWidth+60 x60, ResolutionWidth+61 x61, ResolutionWidth+62 x62, ResolutionWidth+63 x63, ResolutionWidth+64 x64, ResolutionWidth+65 x65, ResolutionWidth+66 x66, ResolutionWidth+67 x67, ResolutionWidth+68 x68, ResolutionWidth+69 x69, ResolutionWidth+70 x70, ResolutionWidth+71 x71, ResolutionWidth+72 x72, ResolutionWidth+73 x73, ResolutionWidth+74 x74, ResolutionWidth+75 x75, ResolutionWidth+76 x76, ResolutionWidth+77 x77, ResolutionWidth+78 x78, ResolutionWidth+79 x79, ResolutionWidth+80 x80, ResolutionWidth+81 x81, ResolutionWidth+82 x82, ResolutionWidth+83 x83, ResolutionWidth+84 x84, ResolutionWidth+85 x85, ResolutionWidth+86 x86, ResolutionWidth+87 x87, ResolutionWidth+88 x88, ResolutionWidth+89 x89 | sum(x0), sum(x1), sum(x2), sum(x3), sum(x4), sum(x5), sum(x6), sum(x7), sum(x8), sum(x9), sum(x10), sum(x11), sum(x12), sum(x13), sum(x14), sum(x15), sum(x16), sum(x17), sum(x18), sum(x19), sum(x20), sum(x21), sum(x22), sum(x23), sum(x24), sum(x25), sum(x26), sum(x27), sum(x28), sum(x29), sum(x30), sum(x31), sum(x32), sum(x33), sum(x34), sum(x35), sum(x36), sum(x37), sum(x38), sum(x39), sum(x40), sum(x41), sum(x42), sum(x43), sum(x44), sum(x45), sum(x46), sum(x47), sum(x48), sum(x49), sum(x50), sum(x51), sum(x52), sum(x53), sum(x54), sum(x55), sum(x56), sum(x57), sum(x58), sum(x59), sum(x60), sum(x61), sum(x62), sum(x63), sum(x64), sum(x65), sum(x66), sum(x67), sum(x68), sum(x69), sum(x70), sum(x71), sum(x72), sum(x73), sum(x74), sum(x75), sum(x76), sum(x77), sum(x78), sum(x79), sum(x80), sum(x81), sum(x82), sum(x83), sum(x84), sum(x85), sum(x86), sum(x87), sum(x88), sum(x89) +SearchPhrase:* | by (SearchEngineID, ClientIP) count() c, sum(IsRefresh), avg(ResolutionWidth) | first 10 (c desc) +SearchPhrase:* | by (WatchID, ClientIP) count() c, sum(IsRefresh), avg(ResolutionWidth) | first 10 (c desc) +* | by (WatchID, ClientIP) count() c, sum(IsRefresh), avg(ResolutionWidth) | first 10 (c desc) +* | top 10 (URL) +* | format '1' as x | top 10 (x, URL) +* | math ClientIP x0, ClientIP - 1 x1, ClientIP - 2 x2, ClientIP - 3 x3 | top 10 (x0, x1, x2, x3) +{CounterID=62} EventDate:>='2013-07-01' EventDate:<='2013-07-31' DontCountHits:=0 IsRefresh:=0 URL:* | top 10 (URL) +{CounterID=62} EventDate:>='2013-07-01' EventDate:<='2013-07-31' DontCountHits:=0 IsRefresh:=0 Title:* | top 10 (Title) +{CounterID=62} EventDate:>='2013-07-01' EventDate:<='2013-07-31' IsRefresh:=0 IsLink:!=0 IsDownload:=0 | by (URL) count() PageViews | sort (PageViews desc) limit 10 offset 1_000 +{CounterID=62} EventDate:>='2013-07-01' EventDate:<='2013-07-31' IsRefresh:=0 | format if (SearchEngineID:=0 AdvEngineID:=0) '' as Src | cp URL Dst | by (TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst) count() PageViews | sort (PageViews desc) limit 10 offset 1_000 +{CounterID=62} EventDate:>='2013-07-01' EventDate:<='2013-07-31' IsRefresh:=0 TraficSourceID:in(-1, 6) RefererHash:=3594120000172545465 | by (URLHash, EventDate) count() PageViews | sort (PageViews desc) limit 10 offset 100 +{CounterID=62} EventDate:>='2013-07-01' EventDate:<='2013-07-31' IsRefresh:=0 DontCountHits:=0 URLHash:=2868770270353813622 | by (WindowClientWidth, WindowClientHeight) count() PageViews | sort (PageViews desc) limit 10 offset 10_000 +{CounterID=62} EventDate:>='2013-07-14' EventDate:<='2013-07-15' IsRefresh:=0 DontCountHits:=0 | math floor(_time / 1m) minute | by (minute) count() PageViews | sort by (minute) limit 10 offset 1_000 diff --git a/victorialogs/results/c6a.4xlarge.json b/victorialogs/results/c6a.4xlarge.json new file mode 100644 index 000000000..a00923275 --- /dev/null +++ b/victorialogs/results/c6a.4xlarge.json @@ -0,0 +1,55 @@ +{ + "system": "VictoriaMetrics", + "date": "2025-01-16", + "machine": "c6a.4xlarge, 500gb gp2", + "cluster_size": 1, + "comment": "", + "tags": ["Go", "column-oriented"], + "load_time": 1265, + "data_size": 17110607560, + "result": [ +[0.03, 0.013, 0.014], +[0.086, 0.01, 0.01], +[0.412, 0.252, 0.252], +[0.454, 0.297, 0.286], +[3.611, 3.322, 3.092], +[2.165, 2.02, 1.959], +[0.064, 0.046, 0.047], +[0.03, 0.012, 0.013], +[3.32, 3.266, 3.273], +[4.443, 4.434, 4.431], +[0.673, 0.641, 0.62], +[0.933, 0.874, 0.882], +[2.667, 2.571, 2.503], +[5.473, 5.097, 4.742], +[2.816, 2.761, 2.755], +[5.198, 5.083, 5.155], +[10.826, 10.565, 10.728], +[12.718, 12.179, 12.549], +[25.186, 24.501, 24.525], +[0.097, 0.026, 0.027], +[0.567, 0.308, 0.303], +[0.319, 0.314, 0.324], +[1.6, 0.848, 0.817], +[0.697, 0.537, 0.527], +[0.797, 0.768, 0.756], +[1.913, 1.986, 2.024], +[0.854, 0.846, 0.83], +[2.474, 2.195, 2.149], +[19.734, 19.245, 19.043], +[20.552, 20.473, 20.34], +[4.718, 4.285, 4.407], +[6.078, 5.803, 5.951], +[null, null, null], +[13.404, 13.343, 12.318], +[13.779, 13.342, 13.683], +[9.473, 9.896, 9.906], +[0.125, 0.13, 0.119], +[0.057, 0.038, 0.042], +[0.056, 0.035, 0.037], +[0.32, 0.298, 0.308], +[0.047, 0.028, 0.026], +[0.043, 0.024, 0.027], +[0.047, 0.027, 0.03] + ] +} diff --git a/victorialogs/run.sh b/victorialogs/run.sh new file mode 100755 index 000000000..36fafb724 --- /dev/null +++ b/victorialogs/run.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +TRIES=3 + +set -f +cat queries.logsql | while read -r query; do + sync + echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null + + echo -n "[" + for i in $(seq 1 $TRIES); do + t1=$(date +%s%3N) + curl -s --fail http://localhost:9428/select/logsql/query --data-urlencode "query=$query" > /dev/null + exit_code=$? + t2=$(date +%s%3N) + duration=$((t2-t1)) + RES=$(awk "BEGIN {print $duration / 1000}" | tr ',' '.') + [[ "$exit_code" == "0" ]] && echo -n "${RES}" || echo -n "null" + [[ "$i" != $TRIES ]] && echo -n ", " + done + echo "]," +done