diff --git a/src/qp/metrics/parallel_metrics.ipynb b/src/qp/metrics/parallel_metrics.ipynb index 8352f0e..3996f6a 100644 --- a/src/qp/metrics/parallel_metrics.ipynb +++ b/src/qp/metrics/parallel_metrics.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -17,7 +17,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -62,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -88,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -122,54 +122,18 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1.844492171486455" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "PointSigmaIQR().evaluate(estimate, reference)" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Starting 4 engines with \n", - "100%|██████████| 4/4 [00:05<00:00, 1.48s/engine]\n", - "mpi_example: 100%|██████████| 10/10 [00:00<00:00, 12.39tasks/s]\n", - "0 : (296, 2)\n", - "1 : (295, 2)\n", - "2 : (298, 2)\n", - "3 : (299, 2)\n", - "4 : (296, 2)\n", - "5 : (297, 2)\n", - "6 : (295, 2)\n", - "7 : (295, 2)\n", - "8 : (294, 2)\n", - "9 : (297, 2)\n", - "1.8458703638788456\n", - "Stopping engine(s): 1708026063\n", - "engine set stopped 1708026063: {'exit_code': 0, 'pid': 40773, 'identifier': 'ipengine-1708026062-ui56-1708026063-40755'}\n", - "Stopping controller\n", - "Controller stopped: {'exit_code': 0, 'pid': 40761, 'identifier': 'ipcontroller-1708026062-ui56-40755'}\n" - ] - } - ], + "outputs": [], "source": [ "run_parallel_metric(PointSigmaIQR(), iqr_data_chunks)" ] @@ -183,54 +147,18 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "-0.15921544705180912" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "PointBias().evaluate(estimate, reference)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Starting 4 engines with \n", - "100%|██████████| 4/4 [00:05<00:00, 1.49s/engine]\n", - "mpi_example: 100%|██████████| 10/10 [00:00<00:00, 12.51tasks/s]\n", - "0 : (296, 2)\n", - "1 : (295, 2)\n", - "2 : (298, 2)\n", - "3 : (299, 2)\n", - "4 : (296, 2)\n", - "5 : (297, 2)\n", - "6 : (295, 2)\n", - "7 : (295, 2)\n", - "8 : (294, 2)\n", - "9 : (297, 2)\n", - "-0.15852842748117044\n", - "Stopping engine(s): 1708026071\n", - "engine set stopped 1708026071: {'exit_code': 0, 'pid': 40822, 'identifier': 'ipengine-1708026070-ddho-1708026071-40755'}\n", - "Stopping controller\n", - "Controller stopped: {'exit_code': 0, 'pid': 40810, 'identifier': 'ipcontroller-1708026070-ddho-40755'}\n" - ] - } - ], + "outputs": [], "source": [ "run_parallel_metric(PointBias(), point_bias_data_chunks)" ] @@ -244,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -256,45 +184,33 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1.0738614584809976" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "PointSigmaMAD().evaluate(estimate, reference)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This cell allows for adjustment of the `num_bins` parameter.\n", + "\n", + "Larger values trend closer to the analytic result from the cell above." + ] + }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1.101290495249205" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "psmad = PointSigmaMAD()\n", - "centroid_1 = psmad.accumulate(estimate[0:5000], reference[0:5000])\n", - "centroid_2 = psmad.accumulate(estimate[5000:], reference[5000:])\n", - "psmad.finalize(centroids=[centroid_1, centroid_2])\n" + "centroids = psmad.accumulate(estimate, reference)\n", + "\n", + "#default value for `num_bins` is 1_000_000\n", + "psmad.finalize(centroids=[centroids], num_bins=1_000_000)" ] }, { @@ -302,7 +218,9 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "run_parallel_metric(PointSigmaMAD(), point_sigma_mad_data_chunks)" + ] } ], "metadata": { diff --git a/src/qp/metrics/point_estimate_metric_classes.py b/src/qp/metrics/point_estimate_metric_classes.py index 8eeb4f0..caf3748 100644 --- a/src/qp/metrics/point_estimate_metric_classes.py +++ b/src/qp/metrics/point_estimate_metric_classes.py @@ -219,7 +219,7 @@ def accumulate(self, estimate, reference): centroids = digest.get_centroids() return centroids - def finalize(self, centroids=None): + def finalize(self, centroids=None, num_bins=1_000_000): digests = ( TDigest.of_centroids(np.array(centroid), compression=1000) for centroid in centroids @@ -229,11 +229,10 @@ def finalize(self, centroids=None): SCALE_FACTOR = 1.4826 # calculation of `np.median(np.fabs(ez - np.median(ez)))` as suggested by Eric Charles - this_median = digest.inverse_cdf([0.50])[0] - lots_of_bins = 100000 + this_median = digest.inverse_cdf(0.50) this_min = digest.inverse_cdf(0) this_max = digest.inverse_cdf(1) - bins = np.linspace(this_min, this_max, lots_of_bins) + bins = np.linspace(this_min, this_max, num_bins) this_pdf = digest.cdf(bins[1:]) - digest.cdf(bins[0:-1]) # len(this_pdf) = lots_of_bins - 1 bin_dist = np.fabs(bins - this_median) # get the distance to the center for each bin in the hist