Skip to content

Commit

Permalink
Add the call point of the capacity related metrics and change the reg…
Browse files Browse the repository at this point in the history
…istry
  • Loading branch information
Kai-Zhang committed Nov 1, 2023
1 parent ec4868d commit 21214dd
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import alluxio.file.ReadTargetBuffer;
import alluxio.metrics.MetricKey;
import alluxio.metrics.MetricsSystem;
import alluxio.metrics.MultiDimensionalMetricsSystem;
import alluxio.network.protocol.databuffer.DataFileChannel;
import alluxio.resource.LockResource;

Expand Down Expand Up @@ -209,6 +210,7 @@ public Optional<DataFileChannel> getDataFileChannel(
DataFileChannel dataFileChannel = pageInfo.getLocalCacheDir().getPageStore()
.getDataFileChannel(pageInfo.getPageId(), pageOffset, bytesToRead,
cacheContext.isTemporary());
MultiDimensionalMetricsSystem.CACHED_DATA_READ.inc(bytesToRead);
MetricsSystem.counter(MetricKey.CLIENT_CACHE_HIT_REQUESTS.getName()).inc();
MetricsSystem.meter(MetricKey.CLIENT_CACHE_BYTES_READ_CACHE.getName()).mark(bytesToRead);
cacheContext.incrementCounter(MetricKey.CLIENT_CACHE_BYTES_READ_CACHE.getMetricName(), BYTE,
Expand Down Expand Up @@ -501,6 +503,7 @@ private PutResult putAttempt(PageId pageId, ByteBuffer page, CacheContext cacheC
try {
pageStoreDir.getPageStore().delete(victim);
// Bytes evicted from the cache
MultiDimensionalMetricsSystem.CACHED_EVICTED_DATA.inc(victimPageInfo.getPageSize());
MetricsSystem.meter(MetricKey.CLIENT_CACHE_BYTES_EVICTED.getName())
.mark(victimPageInfo.getPageSize());
// Errors when adding pages
Expand Down Expand Up @@ -620,6 +623,7 @@ public int get(PageId pageId, int pageOffset, int bytesToRead, ReadTargetBuffer
}
return -1;
}
MultiDimensionalMetricsSystem.CACHED_DATA_READ.inc(bytesRead);
MetricsSystem.meter(MetricKey.CLIENT_CACHE_BYTES_READ_CACHE.getName()).mark(bytesRead);
cacheContext.incrementCounter(MetricKey.CLIENT_CACHE_BYTES_READ_CACHE.getMetricName(), BYTE,
bytesRead);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@

import alluxio.conf.Configuration;
import alluxio.conf.PropertyKey;
import alluxio.util.CommonUtils;
import alluxio.util.FormatUtils;

import io.prometheus.metrics.config.PrometheusProperties;
import io.prometheus.metrics.core.metrics.Counter;
import io.prometheus.metrics.core.metrics.Gauge;
import io.prometheus.metrics.core.metrics.GaugeWithCallback;
import io.prometheus.metrics.core.metrics.Histogram;
import io.prometheus.metrics.core.metrics.Summary;
import io.prometheus.metrics.exporter.common.PrometheusHttpRequest;
import io.prometheus.metrics.exporter.common.PrometheusHttpResponse;
Expand All @@ -43,37 +45,57 @@
* and expose all the metrics to the web server.
*/
public final class MultiDimensionalMetricsSystem {
public static final Summary DATA_ACCESS = Summary.builder()
public static final Histogram DATA_ACCESS = Histogram.builder()
.name("alluxio_data_access")
.help("aggregated throughput of all the data access")
.unit(Unit.BYTES)
.labelNames("method")
.build();

public static final Summary DATA_ACCESS_LATENCY = Summary.builder()
.name("alluxio_data_access_latency")
.help("aggregated latency of all the data access")
.unit(Unit.SECONDS)
.labelNames("method")
.register();
.build();

public static final Summary UFS_DATA_ACCESS = Summary.builder()
public static final Histogram UFS_DATA_ACCESS = Histogram.builder()
.name("alluxio_ufs_data_access")
.help("aggregated throughput of ufs access")
.unit(Unit.BYTES)
.labelNames("method")
.build();

public static final Summary UFS_DATA_ACCESS_LATENCY = Summary.builder()
.name("alluxio_ufs_data_access_latency")
.help("aggregated latency of ufs access")
.unit(Unit.SECONDS)
.labelNames("method")
.register();
.build();

public static final Counter META_OPERATION = Counter.builder()
.name("alluxio_meta_operation")
.help("counter of rpc calls of the meta operations")
.labelNames("op")
.build();

public static final Counter CACHED_DATA_READ = Counter.builder()
.name("alluxio_cached_data_read")
.help("amount of the read cached data")
.unit(Unit.BYTES)
.register();
.build();

public static final Counter META_OPERATION = Counter.builder()
.name("alluxio_meta_operation")
.help("counter of rpc calls of the meta operations")
.labelNames("op")
.register();
public static final Counter CACHED_EVICTED_DATA = Counter.builder()
.name("alluxio_cached_evicted_data")
.help("amount of the evicted data")
.unit(Unit.BYTES)
.build();

public static final Gauge CACHED_STORAGE = Gauge.builder()
.name("alluxio_cached_storage")
.help("amount of the cached data")
.unit(Unit.BYTES)
.register();
.build();

public static final GaugeWithCallback CACHED_CAPACITY = GaugeWithCallback.builder()
.name("alluxio_cached_capacity")
Expand All @@ -84,25 +106,40 @@ public final class MultiDimensionalMetricsSystem {
long sum = sizes.stream().map(FormatUtils::parseSpaceSize).reduce(0L, Long::sum);
callback.call(sum);
})
.register();

public static final Counter CACHED_EVICTED_DATA = Counter.builder()
.name("alluxio_cached_evicted_data")
.help("amount of the evicted data")
.unit(Unit.BYTES)
.register();

public static final Gauge CACHED_READABLE_STORAGE = Gauge.builder()
.name("alluxio_cached_readable_storage")
.help("amount of readable cached data")
.unit(Unit.BYTES)
.register();
.build();

/**
* Initialize all the metrics.
*/
public static void initMetrics() {
JvmMetrics.builder().register();
switch (CommonUtils.PROCESS_TYPE.get()) {
case MASTER:
// No essential metrics for the master for now.
break;
case WORKER:
PrometheusRegistry.defaultRegistry.register(DATA_ACCESS);
PrometheusRegistry.defaultRegistry.register(DATA_ACCESS_LATENCY);
PrometheusRegistry.defaultRegistry.register(UFS_DATA_ACCESS);
PrometheusRegistry.defaultRegistry.register(UFS_DATA_ACCESS_LATENCY);
PrometheusRegistry.defaultRegistry.register(META_OPERATION);
PrometheusRegistry.defaultRegistry.register(CACHED_DATA_READ);
PrometheusRegistry.defaultRegistry.register(CACHED_EVICTED_DATA);
PrometheusRegistry.defaultRegistry.register(CACHED_STORAGE);
PrometheusRegistry.defaultRegistry.register(CACHED_CAPACITY);
break;
case CLIENT:
PrometheusRegistry.defaultRegistry.register(DATA_ACCESS);
PrometheusRegistry.defaultRegistry.register(DATA_ACCESS_LATENCY);
PrometheusRegistry.defaultRegistry.register(META_OPERATION);
PrometheusRegistry.defaultRegistry.register(CACHED_DATA_READ);
PrometheusRegistry.defaultRegistry.register(CACHED_EVICTED_DATA);
PrometheusRegistry.defaultRegistry.register(CACHED_STORAGE);
PrometheusRegistry.defaultRegistry.register(CACHED_CAPACITY);
break;
default:
// Ignore and only expose JVM-related metrics
}
}

/**
Expand Down

0 comments on commit 21214dd

Please sign in to comment.