Skip to content

Commit

Permalink
Configurable bloom tokenizer and block settings (#11889)
Browse files Browse the repository at this point in the history
**What this PR does / why we need it**:
This PR makes the max block size configurable and wires up the already
existing nGramLen and nGramSkip settings.


**Special notes for your reviewer**:

**Checklist**
- [ ] Reviewed the
[`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md)
guide (**required**)
- [ ] Documentation added
- [ ] Tests updated
- [ ] `CHANGELOG.md` updated
- [ ] If the change is worth mentioning in the release notes, add
`add-to-release-notes` label
- [ ] Changes that require user attention or interaction to upgrade are
documented in `docs/sources/setup/upgrade/_index.md`
- [ ] For Helm chart changes bump the Helm chart version in
`production/helm/loki/Chart.yaml` and update
`production/helm/loki/CHANGELOG.md` and
`production/helm/loki/README.md`. [Example
PR](d10549e)
- [ ] If the change is deprecating or removing a configuration option,
update the `deprecated-config.yaml` and `deleted-config.yaml` files
respectively in the `tools/deprecated-config-checker` directory.
[Example
PR](0d4416a)
  • Loading branch information
salvacorts authored Feb 12, 2024
1 parent 681bb57 commit 7a95cb8
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 10 deletions.
6 changes: 6 additions & 0 deletions docs/sources/configure/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -3140,6 +3140,12 @@ shard_streams:
# CLI flag: -bloom-gateway.cache-key-interval
[bloom_gateway_cache_key_interval: <duration> | default = 15m]

# The maximum bloom block size. A value of 0 sets an unlimited size. Default is
# 200MB. The actual block size might exceed this limit since blooms will be
# added to blocks until the block exceeds the maximum block size.
# CLI flag: -bloom-compactor.max-block-size
[bloom_compactor_max_block_size: <int> | default = 200MB]

# Allow user to send structured metadata in push payload.
# CLI flag: -validation.allow-structured-metadata
[allow_structured_metadata: <boolean> | default = false]
Expand Down
1 change: 1 addition & 0 deletions pkg/bloomcompactor/bloomcompactor.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ func New(
c.tsdbStore,
c.bloomStore,
chunkLoader,
c.limits,
c.metrics,
c.logger,
)
Expand Down
1 change: 1 addition & 0 deletions pkg/bloomcompactor/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ type Limits interface {
BloomNGramLength(tenantID string) int
BloomNGramSkip(tenantID string) int
BloomFalsePositiveRate(tenantID string) float64
BloomCompactorMaxBlockSize(tenantID string) int
}

// TODO(owen-d): Remove this type in favor of config.DayTime
Expand Down
10 changes: 9 additions & 1 deletion pkg/bloomcompactor/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ type SimpleBloomController struct {
bloomStore bloomshipper.Store
chunkLoader ChunkLoader
metrics *Metrics
limits Limits

// TODO(owen-d): add metrics
logger log.Logger
Expand All @@ -30,6 +31,7 @@ func NewSimpleBloomController(
tsdbStore TSDBStore,
blockStore bloomshipper.Store,
chunkLoader ChunkLoader,
limits Limits,
metrics *Metrics,
logger log.Logger,
) *SimpleBloomController {
Expand All @@ -38,6 +40,7 @@ func NewSimpleBloomController(
bloomStore: blockStore,
chunkLoader: chunkLoader,
metrics: metrics,
limits: limits,
logger: logger,
}
}
Expand Down Expand Up @@ -110,6 +113,11 @@ func (s *SimpleBloomController) buildBlocks(
return errors.Wrap(err, "failed to create plan")
}

nGramSize := uint64(s.limits.BloomNGramLength(tenant))
nGramSkip := uint64(s.limits.BloomNGramSkip(tenant))
maxBlockSize := uint64(s.limits.BloomCompactorMaxBlockSize(tenant))
blockOpts := v1.NewBlockOptions(nGramSize, nGramSkip, maxBlockSize)

// 4. Generate Blooms
// Now that we have the gaps, we will generate a bloom block for each gap.
// We can accelerate this by using existing blocks which may already contain
Expand Down Expand Up @@ -148,7 +156,7 @@ func (s *SimpleBloomController) buildBlocks(

gen := NewSimpleBloomGenerator(
tenant,
v1.DefaultBlockOptions, // TODO(salvacorts) make block options configurable
blockOpts,
seriesItr,
s.chunkLoader,
preExistingBlocks,
Expand Down
26 changes: 17 additions & 9 deletions pkg/validation/limits.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ const (

defaultMaxStructuredMetadataSize = "64kb"
defaultMaxStructuredMetadataCount = 128
defaultBloomCompactorMaxBlockSize = "200MB"
)

// Limits describe all the limits for users; can be used to describe global default
Expand Down Expand Up @@ -187,15 +188,16 @@ type Limits struct {
BloomGatewayShardSize int `yaml:"bloom_gateway_shard_size" json:"bloom_gateway_shard_size"`
BloomGatewayEnabled bool `yaml:"bloom_gateway_enable_filtering" json:"bloom_gateway_enable_filtering"`

BloomCompactorShardSize int `yaml:"bloom_compactor_shard_size" json:"bloom_compactor_shard_size"`
BloomCompactorMaxTableAge time.Duration `yaml:"bloom_compactor_max_table_age" json:"bloom_compactor_max_table_age"`
BloomCompactorEnabled bool `yaml:"bloom_compactor_enable_compaction" json:"bloom_compactor_enable_compaction"`
BloomCompactorChunksBatchSize int `yaml:"bloom_compactor_chunks_batch_size" json:"bloom_compactor_chunks_batch_size"`
BloomNGramLength int `yaml:"bloom_ngram_length" json:"bloom_ngram_length"`
BloomNGramSkip int `yaml:"bloom_ngram_skip" json:"bloom_ngram_skip"`
BloomFalsePositiveRate float64 `yaml:"bloom_false_positive_rate" json:"bloom_false_positive_rate"`
BloomGatewayBlocksDownloadingParallelism int `yaml:"bloom_gateway_blocks_downloading_parallelism" json:"bloom_gateway_blocks_downloading_parallelism"`
BloomGatewayCacheKeyInterval time.Duration `yaml:"bloom_gateway_cache_key_interval" json:"bloom_gateway_cache_key_interval"`
BloomCompactorShardSize int `yaml:"bloom_compactor_shard_size" json:"bloom_compactor_shard_size"`
BloomCompactorMaxTableAge time.Duration `yaml:"bloom_compactor_max_table_age" json:"bloom_compactor_max_table_age"`
BloomCompactorEnabled bool `yaml:"bloom_compactor_enable_compaction" json:"bloom_compactor_enable_compaction"`
BloomCompactorChunksBatchSize int `yaml:"bloom_compactor_chunks_batch_size" json:"bloom_compactor_chunks_batch_size"`
BloomNGramLength int `yaml:"bloom_ngram_length" json:"bloom_ngram_length"`
BloomNGramSkip int `yaml:"bloom_ngram_skip" json:"bloom_ngram_skip"`
BloomFalsePositiveRate float64 `yaml:"bloom_false_positive_rate" json:"bloom_false_positive_rate"`
BloomGatewayBlocksDownloadingParallelism int `yaml:"bloom_gateway_blocks_downloading_parallelism" json:"bloom_gateway_blocks_downloading_parallelism"`
BloomGatewayCacheKeyInterval time.Duration `yaml:"bloom_gateway_cache_key_interval" json:"bloom_gateway_cache_key_interval"`
BloomCompactorMaxBlockSize flagext.ByteSize `yaml:"bloom_compactor_max_block_size" json:"bloom_compactor_max_block_size"`

AllowStructuredMetadata bool `yaml:"allow_structured_metadata,omitempty" json:"allow_structured_metadata,omitempty" doc:"description=Allow user to send structured metadata in push payload."`
MaxStructuredMetadataSize flagext.ByteSize `yaml:"max_structured_metadata_size" json:"max_structured_metadata_size" doc:"description=Maximum size accepted for structured metadata per log line."`
Expand Down Expand Up @@ -333,6 +335,8 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
f.Float64Var(&l.BloomFalsePositiveRate, "bloom-compactor.false-positive-rate", 0.01, "Scalable Bloom Filter desired false-positive rate.")
f.IntVar(&l.BloomGatewayBlocksDownloadingParallelism, "bloom-gateway.blocks-downloading-parallelism", 50, "Maximum number of blocks will be downloaded in parallel by the Bloom Gateway.")
f.DurationVar(&l.BloomGatewayCacheKeyInterval, "bloom-gateway.cache-key-interval", 15*time.Minute, "Interval for computing the cache key in the Bloom Gateway.")
_ = l.BloomCompactorMaxBlockSize.Set(defaultBloomCompactorMaxBlockSize)
f.Var(&l.BloomCompactorMaxBlockSize, "bloom-compactor.max-block-size", "The maximum bloom block size. A value of 0 sets an unlimited size. Default is 200MB. The actual block size might exceed this limit since blooms will be added to blocks until the block exceeds the maximum block size.")

l.ShardStreams = &shardstreams.Config{}
l.ShardStreams.RegisterFlagsWithPrefix("shard-streams", f)
Expand Down Expand Up @@ -882,6 +886,10 @@ func (o *Overrides) BloomNGramSkip(userID string) int {
return o.getOverridesForUser(userID).BloomNGramSkip
}

func (o *Overrides) BloomCompactorMaxBlockSize(userID string) int {
return o.getOverridesForUser(userID).BloomCompactorMaxBlockSize.Val()
}

func (o *Overrides) BloomFalsePositiveRate(userID string) float64 {
return o.getOverridesForUser(userID).BloomFalsePositiveRate
}
Expand Down

0 comments on commit 7a95cb8

Please sign in to comment.