diff --git a/Dockerfile b/Dockerfile index 418de70..08eba37 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,10 +5,14 @@ LABEL org.label-schema.name='AMOC Arsenal' \ WORKDIR /amoc_arsenal -COPY ./ ./ -RUN git clean -ffxd +COPY rebar.lock . +RUN rebar3 compile --deps_only +COPY rebar.config . +COPY src src +COPY ci ci +COPY priv priv RUN rebar3 release -ENV PATH "/amoc_arsenal/_build/default/rel/amoc_arsenal/bin:${PATH}" +ENV PATH="/amoc_arsenal/_build/default/rel/amoc_arsenal/bin:${PATH}" CMD ["amoc_arsenal", "console", "-noshell", "-noinput", "+Bd"] diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml index 5af413b..df7dce9 100644 --- a/ci/docker-compose.yml +++ b/ci/docker-compose.yml @@ -1,9 +1,10 @@ x-amoc-defaults: &amoc-defaults image: "amoc-arsenal" + pull_policy: never networks: - amoc-test-network environment: - AMOC_GRAPHITE_HOST: '"graphite"' + AMOC_PROMETHEUS_IP: '{0, 0, 0, 0}' AMOC_NODES: "['amoc_arsenal@amoc-master']" healthcheck: test: "amoc_arsenal status" @@ -29,14 +30,16 @@ services: hostname: "amoc-worker-3" ports: - "4003:4000" - graphite: - image: "graphiteapp/graphite-statsd:1.1.10-4" + prometheus: + image: "prom/prometheus" ports: - - "8080:80" + - "9090:9090" + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml networks: - amoc-test-network grafana: - image: "grafana/grafana:8.2.6" + image: "grafana/grafana" ports: - "3000:3000" networks: diff --git a/ci/helper.sh b/ci/helper.sh index 1862e02..5f3903e 100644 --- a/ci/helper.sh +++ b/ci/helper.sh @@ -81,7 +81,7 @@ function get_status() { ## graphite REST API functions ## ################################# function metrics_reported() { - curl -s "http://localhost:8080/metrics/find?query=*" | contains "$@" + curl -s "http://localhost:9090/api/v1/targets" | contains "$@" } function wait_for_metrics() { diff --git a/ci/prometheus.yml b/ci/prometheus.yml new file mode 100644 index 0000000..4412b93 --- /dev/null +++ b/ci/prometheus.yml @@ -0,0 +1,21 @@ +global: + scrape_interval: 5s + evaluation_interval: 5s +scrape_configs: + - job_name: prometheus + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + static_configs: + - targets: ["localhost:9090"] + - job_name: amoc-master + static_configs: + - targets: ["amoc-master:9090"] + - job_name: amoc-worker-1 + static_configs: + - targets: ["amoc-worker-1:9090"] + - job_name: amoc-worker-2 + static_configs: + - targets: ["amoc-worker-2:9090"] + - job_name: amoc-worker-3 + static_configs: + - targets: ["amoc-worker-3:9090"] diff --git a/ci/start_demo_cluster.sh b/ci/start_demo_cluster.sh index 4d544b1..a00fda9 100755 --- a/ci/start_demo_cluster.sh +++ b/ci/start_demo_cluster.sh @@ -3,14 +3,14 @@ source "$(dirname "$0")/helper.sh" enable_strict_mode -docker_compose up --wait --wait-timeout 100 amoc-{master,worker-1,worker-2} graphite grafana +docker_compose up --wait --wait-timeout 100 amoc-{master,worker-1,worker-2} prometheus grafana ## configure default grafana datasource json=( '{' - '"name": "graphite",' + '"name": "prometheus",' '"access": "proxy",' - '"type": "graphite",' - '"url": "http://graphite",' + '"type": "prometheus",' + '"url": "http://prometheus:9090",' '"isDefault": true' '}' ) diff --git a/doc/metrics.md b/doc/metrics.md index a115010..53e5e7f 100644 --- a/doc/metrics.md +++ b/doc/metrics.md @@ -1,20 +1,21 @@ ## Metrics -`amoc_metrics` allow to configure a Graphite reporter using the following environment variables: +`amoc_metrics` allow to configure a Prometheus exporter using the following environment variables: -* ``graphite_host`` - a graphite host address (string or `undefined`): - * default value - `undefined` (amoc_metrics do not try to initialise a metrics reporter) - * example: `AMOC_GRAPHITE_HOST='"graphite"'` +* `prometheus_port` - prometheus port: + * default value - `9090` + * example: `AMOC_PROMETHEUS_PORT='9090'` -* ``graphite_port`` - graphite port: - * default value - `2003` - * example: `AMOC_GRAPHITE_PORT='2003'` +* `prometheus_ip` - prometheus IP: + * default value - `"127.0.0.1"` + * example: `AMOC_PROMETHEUS_IP='"127.0.0.1"'` -* ``graphite_prefix`` - graphite prefix: - * default value - `net_adm:localhost()` - * example: `AMOC_GRAPHITE_PREFIX='"amoc"'` +Note that they are parsed as erlang terms and so the double-quotes inside the single-quotes are +necessary. -In order to initialise some preconfigured metrics, other applications can declare the `predefined_metrics` environment variable (in their own `*.app.src` file): +In order to initialise some preconfigured metrics, +other applications can declare the `predefined_metrics` +environment variable (in their own `*.app.src` file): ```erl -{predefined_metrics, [{gauge, some_metric}, {times, another_metric}]} +{predefined_metrics, [{gauge, some_metric}, {histogram, another_metric}]} ``` diff --git a/rebar.config b/rebar.config index da1c6b4..c69bfc3 100644 --- a/rebar.config +++ b/rebar.config @@ -7,9 +7,8 @@ {deps, [ {amoc, "3.3.0"}, {telemetry, "1.3.0"}, - {exometer_core, {git, "https://github.com/esl/exometer_core.git", {branch, "master"}}}, - {exometer_report_graphite, - {git, "https://github.com/esl/exometer_report_graphite.git", {branch, "master"}}}, + {prometheus, "4.11.0"}, + {prometheus_cowboy, "0.1.8"}, %% when updating amoc_rest version, don't forget to update it at ./doc/http-api.md as well. {amoc_rest, {git, "https://github.com/esl/amoc_rest.git", {tag, "1.2.2"}}} ]}. diff --git a/rebar.lock b/rebar.lock index 058f8e8..f100ac7 100644 --- a/rebar.lock +++ b/rebar.lock @@ -1,41 +1,42 @@ {"1.2.0", -[{<<"amoc">>,{pkg,<<"amoc">>,<<"3.3.0">>},0}, +[{<<"accept">>,{pkg,<<"accept">>,<<"0.3.5">>},2}, + {<<"amoc">>,{pkg,<<"amoc">>,<<"3.3.0">>},0}, {<<"amoc_rest">>, {git,"https://github.com/esl/amoc_rest.git", {ref,"d9f642255f48d14a1ff9601500796f5edb4e11e6"}}, 0}, - {<<"bear">>,{pkg,<<"bear">>,<<"1.0.0">>},1}, {<<"cowboy">>,{pkg,<<"cowboy">>,<<"2.12.0">>},1}, {<<"cowlib">>,{pkg,<<"cowlib">>,<<"2.13.0">>},2}, - {<<"exometer_core">>, - {git,"https://github.com/esl/exometer_core.git", - {ref,"123daa053a4abb3ff4bdbf52f08344da535294e9"}}, - 0}, - {<<"exometer_report_graphite">>, - {git,"https://github.com/esl/exometer_report_graphite.git", - {ref,"59e475a094818294443de9dc68e08ee0116a5626"}}, - 0}, {<<"jesse">>,{pkg,<<"jesse">>,<<"1.8.1">>},1}, - {<<"parse_trans">>,{pkg,<<"parse_trans">>,<<"3.4.0">>},1}, + {<<"prometheus">>,{pkg,<<"prometheus">>,<<"4.11.0">>},0}, + {<<"prometheus_cowboy">>,{pkg,<<"prometheus_cowboy">>,<<"0.1.8">>},0}, + {<<"prometheus_httpd">>,{pkg,<<"prometheus_httpd">>,<<"2.1.11">>},1}, + {<<"quantile_estimator">>,{pkg,<<"quantile_estimator">>,<<"0.2.1">>},1}, {<<"ranch">>,{pkg,<<"ranch">>,<<"2.1.0">>},1}, {<<"telemetry">>,{pkg,<<"telemetry">>,<<"1.3.0">>},0}]}. [ {pkg_hash,[ + {<<"accept">>, <<"B33B127ABCA7CC948BBE6CAA4C263369ABF1347CFA9D8E699C6D214660F10CD1">>}, {<<"amoc">>, <<"531B7E8CE39D40B4BF5A819868091C4451DC3D3FDAE753E3E3B1D0E5E8E81CDD">>}, - {<<"bear">>, <<"430419C1126B477686CDE843E88BA0F2C7DC5CDF0881C677500074F704339A99">>}, {<<"cowboy">>, <<"F276D521A1FF88B2B9B4C54D0E753DA6C66DD7BE6C9FCA3D9418B561828A3731">>}, {<<"cowlib">>, <<"DB8F7505D8332D98EF50A3EF34B34C1AFDDEC7506E4EE4DD4A3A266285D282CA">>}, {<<"jesse">>, <<"C9E3670C7EE40F719734E3BC716578143AABA93FC7525A02A7D5CB300B3AD71E">>}, - {<<"parse_trans">>, <<"BB87AC362A03CA674EBB7D9D498F45C03256ADED7214C9101F7035EF44B798C7">>}, + {<<"prometheus">>, <<"B95F8DE8530F541BD95951E18E355A840003672E5EDA4788C5FA6183406BA29A">>}, + {<<"prometheus_cowboy">>, <<"CFCE0BC7B668C5096639084FCD873826E6220EA714BF60A716F5BD080EF2A99C">>}, + {<<"prometheus_httpd">>, <<"F616ED9B85B536B195D94104063025A91F904A4CFC20255363F49A197D96C896">>}, + {<<"quantile_estimator">>, <<"EF50A361F11B5F26B5F16D0696E46A9E4661756492C981F7B2229EF42FF1CD15">>}, {<<"ranch">>, <<"2261F9ED9574DCFCC444106B9F6DA155E6E540B2F82BA3D42B339B93673B72A3">>}, {<<"telemetry">>, <<"FEDEBBAE410D715CF8E7062C96A1EF32EC22E764197F70CDA73D82778D61E7A2">>}]}, {pkg_hash_ext,[ + {<<"accept">>, <<"11B18C220BCC2EAB63B5470C038EF10EB6783BCB1FCDB11AA4137DEFA5AC1BB8">>}, {<<"amoc">>, <<"B8DD4F77BB94716ABC64E863158EEF8E1375CECB2F69E57DC4A293B0949D4985">>}, - {<<"bear">>, <<"157B67901ADF84FF0DA6EAE035CA1292A0AC18AA55148154D8C582B2C68959DB">>}, {<<"cowboy">>, <<"8A7ABE6D183372CEB21CAA2709BEC928AB2B72E18A3911AA1771639BEF82651E">>}, {<<"cowlib">>, <<"E1E1284DC3FC030A64B1AD0D8382AE7E99DA46C3246B815318A4B848873800A4">>}, {<<"jesse">>, <<"0EDED3F18623FDA2F25989804A06CF518B4ACF2E9365B18C8E8C013D7E3C906F">>}, - {<<"parse_trans">>, <<"F99E368830BEA44552224E37E04943A54874F08B8590485DE8D13832B63A2DC3">>}, + {<<"prometheus">>, <<"719862351AABF4DF7079B05DC085D2BBCBE3AC0AC3009E956671B1D5AB88247D">>}, + {<<"prometheus_cowboy">>, <<"BA286BECA9302618418892D37BCD5DC669A6CC001F4EB6D6AF85FF81F3F4F34C">>}, + {<<"prometheus_httpd">>, <<"0BBE831452CFDF9588538EB2F570B26F30C348ADAE5E95A7D87F35A5910BCF92">>}, + {<<"quantile_estimator">>, <<"282A8A323CA2A845C9E6F787D166348F776C1D4A41EDE63046D72D422E3DA946">>}, {<<"ranch">>, <<"244EE3FA2A6175270D8E1FC59024FD9DBC76294A321057DE8F803B1479E76916">>}, {<<"telemetry">>, <<"7015FC8919DBE63764F4B4B87A95B7C0996BD539E0D499BE6EC9D7F3875B79E6">>}]} ]. diff --git a/src/amoc_arsenal.app.src b/src/amoc_arsenal.app.src index 6ec56c8..f8eb188 100644 --- a/src/amoc_arsenal.app.src +++ b/src/amoc_arsenal.app.src @@ -6,28 +6,11 @@ {applications, [ kernel, stdlib, - exometer_core, - exometer_report_graphite, + prometheus, + prometheus_cowboy, amoc_rest, amoc ]}, - {env, [ - {exometer_predefined, [ - { - [erlang, system_info], - {function, erlang, system_info, ['$dp'], value, [port_count, process_count]}, - [] - }, - { - [erlang, memory], - {function, erlang, memory, ['$dp'], value, [ - total, processes, processes_used, system, binary, ets - ]}, - [] - }, - {[amoc, users], {function, amoc_metrics, user_size, [], proplist, [size]}, []} - ]} - ]}, {modules, []}, {licenses, ["Apache-2.0"]}, {links, []} diff --git a/src/amoc_arsenal_app.erl b/src/amoc_arsenal_app.erl index 2213162..4948ca4 100644 --- a/src/amoc_arsenal_app.erl +++ b/src/amoc_arsenal_app.erl @@ -10,9 +10,9 @@ -export([start/2, stop/1]). start(_StartType, _StartArgs) -> - {ok, _} = amoc_api:start(), - amoc_metrics:start(), amoc_logging:start(), + HasMetrics = amoc_metrics:start(), + {ok, _} = amoc_api:start(HasMetrics), amoc_arsenal_sup:start_link(). stop(_State) -> diff --git a/src/amoc_metrics.erl b/src/amoc_metrics.erl index 49496cd..71175f6 100644 --- a/src/amoc_metrics.erl +++ b/src/amoc_metrics.erl @@ -1,122 +1,107 @@ -module(amoc_metrics). --export([start/0, init/2, user_size/0]). --export([update_time/2, update_counter/2, update_counter/1, update_gauge/2]). +-behaviour(prometheus_collector). + +-export([start/0, start_predefined_metrics/1, init/2]). +-export([update_counter/1, update_counter/2, update_gauge/2, update_time/2]). +-export([deregister_cleanup/1, collect_mf/2]). -include_lib("kernel/include/logger.hrl"). -type simple_name() :: atom() | [atom()]. -type name() :: simple_name() | {strict, simple_name()}. --type type() :: counters | times | gauge. +-type type() :: counters | gauge | times | histogram | summary. -export_type([name/0]). --define(AMOC_DEFAULT_METRICS_REPORTER, exometer_report_graphite). --define(AMOC_METRICS_REPORTING_INTERVAL, timer:seconds(10)). +%% =================================================================== +%% API +%% =================================================================== --spec start() -> any(). +-spec start() -> boolean(). start() -> - maybe_add_reporter(), - subsribe_default_metrics(), - maybe_init_predefined_metrics(). - --spec init(type(), name()) -> ok. -init(Type, Name) -> - ExName = make_name(Type, Name), - ExType = exometer_metric_type(Type), - create_metric_and_maybe_subscribe(ExName, ExType). + maybe_add_exporter(). --spec user_size() -> [{size, non_neg_integer()}]. -user_size() -> - [{size, amoc_users_sup:count_no_of_users()}]. +-spec start_predefined_metrics(atom()) -> any(). +start_predefined_metrics(App) -> + Preconfigured = application:get_env(App, predefined_metrics, []), + [init(Type, Name) || {Type, Name} <- lists:flatten(Preconfigured)]. --spec update_time(name(), integer()) -> ok. -update_time(Name, Value) -> - ExName = make_name(times, Name), - exometer:update(ExName, Value). +-spec init(type(), name()) -> ok. +init(counters, Name) -> + prometheus_counter:new([{name, Name}]); +init(gauge, Name) -> + prometheus_gauge:new([{name, Name}]); +init(summary, Name) -> + prometheus_summary:new([{name, Name}]); +init(Type, Name) when histogram =:= Type; times =:= Type -> + prometheus_histogram:new([{name, Name}, {buckets, histogram_buckets()}]). -spec update_counter(name()) -> ok. update_counter(Name) -> - ExName = make_name(counters, Name), - exometer:update(ExName, 1). + prometheus_counter:inc(Name). -spec update_counter(name(), integer()) -> ok. update_counter(Name, Value) -> - ExName = make_name(counters, Name), - exometer:update(ExName, Value). + prometheus_counter:inc(Name, Value). -spec update_gauge(name(), integer()) -> ok. update_gauge(Name, Value) -> - ExName = make_name(gauge, Name), - exometer:update(ExName, Value). + prometheus_gauge:set(Name, Value). + +-spec update_time(name(), integer()) -> ok. +update_time(Name, Value) -> + prometheus_summary:observe(Name, Value). + +-spec collect_mf(prometheus_registry:registry(), prometheus_collector:collect_mf_callback()) -> ok. +collect_mf(_Registry, Callback) -> + Data = amoc_users_sup:count_no_of_users(), + Mf = prometheus_model_helpers:create_mf( + amoc_users_size, "Number of AMOC users running", gauge, Data), + Callback(Mf), + ok. + +-spec deregister_cleanup(prometheus_registry:registry()) -> ok. +deregister_cleanup(_Registry) -> ok. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% internal functions %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -make_name(_, {strict, Name}) -> - add_name_prefix([amoc], Name); -make_name(Type, Name) -> - add_name_prefix([amoc, Type], Name). - -add_name_prefix(Prefix, Name) when is_atom(Name) -> - Prefix ++ [Name]; -add_name_prefix(Prefix, Name) when is_list(Name) -> - Prefix ++ Name. - -metric_report_datapoints(gauge) -> [value]; -metric_report_datapoints(spiral) -> [count, one]; -metric_report_datapoints(histogram) -> [mean, min, max, median, 95, 99, 999]. - -exometer_metric_type(counters) -> spiral; -exometer_metric_type(times) -> histogram; -exometer_metric_type(gauge) -> gauge. - -create_metric_and_maybe_subscribe(ExName, ExType) -> - ok = exometer:re_register(ExName, ExType, []), - Datapoints = metric_report_datapoints(ExType), - maybe_subscribe(ExName, Datapoints). - -maybe_add_reporter() -> - Reporter = get_reporter(), - case lists:keyfind(Reporter, 1, exometer_report:list_reporters()) of - {Reporter, _} -> ok; - _-> - case amoc_config_env:get(graphite_host, undefined) of - undefined -> ok; - Host -> - Prefix = amoc_config_env:get(graphite_prefix, net_adm:localhost()), - Port = amoc_config_env:get(graphite_port, 2003), - Options = [{module, exometer_report_graphite}, - {prefix, Prefix}, - {host, Host}, - {port, Port}, - {api_key, ""}], - exometer_report:add_reporter(Reporter, Options) - end +-spec maybe_add_exporter() -> boolean(). +maybe_add_exporter() -> + case {get_ip_address(), amoc_config_env:get(prometheus_port, 9090)} of + {IpTuple, Port} + when is_tuple(IpTuple), is_integer(Port) -> + Routes = [{'_', [{"/metrics/[:registry]", prometheus_cowboy2_handler, []}]}], + Dispatch = cowboy_router:compile(Routes), + ProtocolOpts = #{env => #{dispatch => Dispatch}}, + TransportOpts = #{socket_opts => [{port, Port}, {ip, IpTuple}]}, + cowboy:start_clear(prometheus_exporter, TransportOpts, ProtocolOpts), + true; + {Ip, Port} -> + ?LOG_INFO(#{what => no_prometheus_backend_enabled, ip => Ip, port => Port}), + false end. -subsribe_default_metrics() -> - maybe_subscribe([amoc, users], [size]), - maybe_subscribe([erlang, system_info], [port_count, process_count]), - maybe_subscribe([erlang, memory], [total, processes, processes_used, system, binary, ets]). - -get_reporter() -> - App = application:get_application(?MODULE), - application:get_env(App, metrics_reporter, ?AMOC_DEFAULT_METRICS_REPORTER). - -maybe_subscribe(ExName, Datapoints) -> - Reporter = get_reporter(), - Interval = ?AMOC_METRICS_REPORTING_INTERVAL, - case lists:keyfind(Reporter, 1, exometer_report:list_reporters()) of - {Reporter, _} -> - exometer_report:unsubscribe(Reporter, ExName, Datapoints, []), - ok = exometer_report:subscribe(Reporter, ExName, Datapoints, Interval); +get_ip_address() -> + case amoc_config_env:get(prometheus_ip, {0, 0, 0, 0}) of + IpTuple when is_tuple(IpTuple) + andalso 4 =:= tuple_size(IpTuple) + orelse 8 =:= tuple_size(IpTuple) -> + IpTuple; + IpAddr when is_list(IpAddr) -> + {ok, IpTuple} = inet:parse_address(IpAddr), + IpTuple; _ -> - ?LOG_WARNING("Reporter=~p not_enabled", [Reporter]) + undefined end. -maybe_init_predefined_metrics() -> - App = application:get_application(?MODULE), - Preconfigured = application:get_env(App, predefined_metrics, []), - [init(Type, Name) || {Type, Name} <- lists:flatten(Preconfigured)]. +-spec histogram_buckets() -> [integer()]. +histogram_buckets() -> + histogram_buckets([], 1 bsl 30). % ~1.07 * 10^9 + +histogram_buckets(AccBuckets, Val) when Val > 0 -> + histogram_buckets([Val | AccBuckets], Val bsr 1); +histogram_buckets(AccBuckets, _Val) -> + AccBuckets. diff --git a/src/rest_api/amoc_api.erl b/src/rest_api/amoc_api.erl index ba06ee9..22facac 100644 --- a/src/rest_api/amoc_api.erl +++ b/src/rest_api/amoc_api.erl @@ -4,19 +4,27 @@ %%============================================================================== -module(amoc_api). --export([start/0, stop/0]). +-export([start/1, stop/0]). --spec start() -> {ok, pid()} | {error, any()}. -start() -> +-spec start(boolean()) -> {ok, pid()} | {error, any()}. +start(HasMetrics) -> amoc_api_logic_handler:set_validator_state(), Port = amoc_config_env:get(api_port, 4000), TransportOpts = #{socket_opts => [{ip, {0, 0, 0, 0}}, {port, Port}]}, + ProtocolOpts = + case HasMetrics of + true -> + #{metrics_callback => fun prometheus_cowboy2_instrumenter:observe/1, + stream_handlers => [cowboy_metrics_h]}; + false -> + #{} + end, amoc_rest_server:start( openapi_http_server, #{ transport => tcp, transport_opts => TransportOpts, - protocol_opts => #{}, + protocol_opts => ProtocolOpts, logic_handler => amoc_api_logic_handler } ).