Skip to content

Commit

Permalink
metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
iddqdex committed Jan 29, 2025
1 parent 730844a commit ad3a882
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 11 deletions.
73 changes: 64 additions & 9 deletions ydb/library/workload/log/log.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ TVector<std::string> TLogGenerator::GetCleanPaths() const {
}

std::string TLogGenerator::RandomWord(bool canBeEmpty) const {
if (canBeEmpty && (RandomNumber<char>() % 2) ) {
if (canBeEmpty && !RandomIsNotNull()) {
return {};
}
size_t len = RandomNumber<size_t>(Params.StringLen);
Expand All @@ -191,6 +191,15 @@ std::string TLogGenerator::RandomWord(bool canBeEmpty) const {
return ss.str();
}

std::string TLogGenerator::RandomPhrase(ui32 maxLen, ui32 minLen) const {
std::stringstream result;
for (ui32 len = RandomNumber<ui32>(maxLen - minLen) + minLen; len > 0; --len) {
static const std::string delimiters = " .,=;-+:";
result << RandomWord(false) << delimiters[RandomNumber<size_t>(delimiters.length() - 1)];
}
return result.str();
}

TInstant TLogGenerator::RandomInstant() const {
auto result = TInstant::Now();
i64 millisecondsDiff = 60 * 1000 * NormalRandom<double>(0., Params.TimestampStandardDeviationMinutes);
Expand All @@ -202,6 +211,10 @@ TInstant TLogGenerator::RandomInstant() const {
return result;
}

bool TLogGenerator::RandomIsNotNull() const {
return RandomNumber<ui32>(100) >= Params.NullPercent;
}

TVector<TRow> TLogGenerator::GenerateRandomRows() const {
TVector<TRow> result;
result.reserve(Params.RowsCnt);
Expand All @@ -213,19 +226,57 @@ TVector<TRow> TLogGenerator::GenerateRandomRows() const {
result.back().Level = RandomNumber<ui32>(10);
result.back().ServiceName = RandomWord(false);
result.back().Component = RandomWord(true);
for (ui32 msgLen = RandomNumber<ui32>(100) + 1; msgLen > 0; --msgLen) {
static const std::string delimiters = " .,=;-+:";
result.back().Message += RandomWord(false) + delimiters[RandomNumber<size_t>(delimiters.length() - 1)];
}
result.back().Message += RandomPhrase(100);

if (RandomNumber<char>() % 2) {
if (RandomIsNotNull()) {
NJson::TJsonValue json(NJson::JSON_MAP);
for (ui32 metaLen = RandomNumber<ui32>(50) + 1; metaLen > 0; --metaLen) {
json[RandomWord(false)] = RandomWord(false);
if (RandomIsNotNull()) {
json["adv_engine_id"] = ToString(RandomNumber<ui32>(10));
}
if (RandomIsNotNull()) {
json["client_ip"] = RandomNumber<ui64>();
}
if (RandomIsNotNull()) {
json["dont_count"] = RandomNumber<bool>();
}
if (RandomIsNotNull()) {
json["is_download"] = RandomNumber<bool>();
}
if (RandomIsNotNull()) {
json["is_link"] = RandomNumber<bool>();
}
if (RandomIsNotNull()) {
json["is_refresh"] = RandomNumber<bool>();
}
if (RandomIsNotNull()) {
json["referer"] = ToString(RandomNumber<ui32>(10));
json["referer_hash"] = ToString(RandomNumber<ui64>());
}
if (RandomIsNotNull()) {
json["response_time"] = RandomNumber<double>();
}
if (RandomIsNotNull()) {
json["search_engine_id"] = ToString(RandomNumber<ui32>(10));
}
if (RandomIsNotNull()) {
json["title"] = RandomPhrase(100);
}
if (RandomIsNotNull()) {
json["traffic_source_id"] = ToString(RandomNumber<ui32>(10));
}
if (RandomIsNotNull()) {
json["url"] = TStringBuilder() << (RandomNumber<bool>() ? "api:" : "http:") << RandomNumber<ui64>();
json["url_hash"] = ToString(RandomNumber<ui64>());
}
if (RandomIsNotNull()) {
json["window_client_height"] = RandomNumber<ui32>();
}
if (RandomIsNotNull()) {
json["window_client_width"] = RandomNumber<ui32>();
}
result.back().Metadata = json.GetStringRobust().c_str();
}
result.back().IngestedAt = (RandomNumber<char>() % 2) ? RandomInstant() : TInstant::Zero();
result.back().IngestedAt = RandomIsNotNull() ? RandomInstant() : TInstant::Zero();
}

return result;
Expand Down Expand Up @@ -265,6 +316,8 @@ void TLogWorkloadParams::ConfigureOpts(NLastGetopt::TOpts& opts, const ECommandT
throw yexception() << "Ivalid store type: " << arg;
}
});
opts.AddLongOption("null-percent", "Percent of nulls in generated data")
.DefaultValue(NullPercent).StoreResult(&NullPercent);
break;
case TWorkloadParams::ECommandType::Run:
switch (static_cast<TLogGenerator::EType>(workloadType)) {
Expand All @@ -277,6 +330,8 @@ void TLogWorkloadParams::ConfigureOpts(NLastGetopt::TOpts& opts, const ECommandT
.DefaultValue(RowsCnt).StoreResult(&RowsCnt);
opts.AddLongOption("timestamp_deviation", "Standard deviation. For each timestamp, a random variable with a specified standard deviation in minutes is added.")
.DefaultValue(TimestampStandardDeviationMinutes).StoreResult(&TimestampStandardDeviationMinutes);
opts.AddLongOption("null-percent", "Percent of nulls in generated data")
.DefaultValue(NullPercent).StoreResult(&NullPercent);
break;
case TLogGenerator::EType::Select:
break;
Expand Down
3 changes: 3 additions & 0 deletions ydb/library/workload/log/log.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class TLogWorkloadParams : public TWorkloadParams {
ui64 TimestampStandardDeviationMinutes = 0;
ui64 TimestampTtlMinutes = 0;
ui64 RowsCnt = 1;
ui32 NullPercent = 10;
bool PartitionsByLoad = true;

std::string TableName = "log_writer_test";
Expand Down Expand Up @@ -65,7 +66,9 @@ class TLogGenerator final: public TWorkloadQueryGeneratorBase<TLogWorkloadParams

private:
std::string RandomWord(bool canBeEmpty) const;
std::string RandomPhrase(ui32 maxLen, ui32 minLen = 1) const;
TInstant RandomInstant() const;
bool RandomIsNotNull() const;
TQueryInfoList WriteRows(TString operation, TVector<TRow>&& rows) const;
TQueryInfoList Insert(TVector<TRow>&& rows) const;
TQueryInfoList Upsert(TVector<TRow>&& rows) const;
Expand Down
3 changes: 1 addition & 2 deletions ydb/tests/stress/log/tests/test_workload.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,12 @@

from ydb.tests.library.harness.kikimr_runner import KiKiMR
from ydb.tests.library.harness.kikimr_config import KikimrConfigGenerator
from ydb.tests.library.common.types import Erasure


class TestYdbLogWorkload(object):
@classmethod
def setup_class(cls):
cls.cluster = KiKiMR(KikimrConfigGenerator(erasure=Erasure.MIRROR_3_DC))
cls.cluster = KiKiMR(KikimrConfigGenerator())
cls.cluster.start()

@classmethod
Expand Down

0 comments on commit ad3a882

Please sign in to comment.