diff --git a/models/localMacros.yaml b/models/localMacros.yaml new file mode 100644 index 0000000..126ecb2 --- /dev/null +++ b/models/localMacros.yaml @@ -0,0 +1,10 @@ +macros: + - name: macro_datediff + inputs: + - column + value: "{% if !(end_time|isnil) %} datediff(day, date({{column}}), date('{{end_time.Format(\"2006-01-02 15:04:05\")}}')) {% else %} datediff(day, date({{column}}::timestamp), GETDATE()) {% endif %}" + - name: macro_datediff_n + inputs: + - column + - number_of_days + value: "{% if !(end_time|isnil) %} datediff(day, date({{column}}), date('{{end_time.Format(\"2006-01-02 15:04:05\")}}')) <={{number_of_days}} {% else %} datediff(day, date({{column}}), GETDATE()) <= {{number_of_days}} {% endif %}" diff --git a/models/profiles-ml.yaml b/models/profiles-ml.yaml index 67780a4..f28c668 100644 --- a/models/profiles-ml.yaml +++ b/models/profiles-ml.yaml @@ -7,7 +7,6 @@ models: entity_key: user validity_time: 24h # 1 day py_repo_url: git@github.com:rudderlabs/rudderstack-profiles-classifier.git - train: file_extension: .json file_validity: 168h # If the last trained model is older than this, then the model will be trained again, @@ -21,16 +20,13 @@ models: prediction_horizon_days: 7 # Number of days in future for which we want to predict features_profiles_model: 'rudder_user_base_features' # Model name output_profiles_ml_model: *model_name_7_days # Name of output model based on current model to dinstinguish between multiple models - eligible_users: + eligible_users: inputs: *inputs_7_days - - preprocessing: &model_prep_configs_7_days ignore_features: - user_email - first_name - last_name - predict: inputs: - models/rudder_user_base_features @@ -45,9 +41,7 @@ models: features: - name: *percentile_name_7_days description: 'Percentile of churn score. Higher the percentile, higher the probability of churn' - - <<: *feature_meta_data_7_days - + !!merge <<: *feature_meta_data_7_days - name: &model_name_30_days churn_30_days_model model_type: python_model model_spec: @@ -55,7 +49,6 @@ models: entity_key: user validity_time: 24h # 1 day py_repo_url: git@github.com:rudderlabs/rudderstack-profiles-classifier.git - train: file_extension: .json file_validity: 168h # If the last trained model is older than this, then the model will be trained again, @@ -69,16 +62,13 @@ models: prediction_horizon_days: 30 # Number of days in future for which we want to predict features_profiles_model: 'rudder_user_base_features' # Model name output_profiles_ml_model: *model_name_30_days # Name of output model based on current model to dinstinguish between multiple models - eligible_users: + eligible_users: inputs: *inputs_30_days - - preprocessing: &model_prep_configs_30_days ignore_features: - user_email - first_name - last_name - predict: inputs: - models/rudder_user_base_features @@ -93,9 +83,7 @@ models: features: - name: *percentile_name_30_days description: 'Percentile of churn score. Higher the percentile, higher the probability of churn' - - <<: *feature_meta_data_30_days - + !!merge <<: *feature_meta_data_30_days - name: &model_name_90_days churn_90_days_model model_type: python_model model_spec: @@ -103,7 +91,6 @@ models: entity_key: user validity_time: 24h # 1 day py_repo_url: git@github.com:rudderlabs/rudderstack-profiles-classifier.git - train: file_extension: .json file_validity: 168h # If the last trained model is older than this, then the model will be trained again, @@ -117,16 +104,13 @@ models: prediction_horizon_days: 90 # Number of days in future for which we want to predict features_profiles_model: 'rudder_user_base_features' # Model name output_profiles_ml_model: *model_name_90_days # Name of output model based on current model to dinstinguish between multiple models - eligible_users: + eligible_users: inputs: *inputs_90_days - - preprocessing: &model_prep_configs_90_days ignore_features: - user_email - first_name - last_name - predict: inputs: - models/rudder_user_base_features @@ -141,5 +125,4 @@ models: features: - name: *percentile_name_90_days description: 'Percentile of churn score. Higher the percentile, higher the probability of churn' - - <<: *feature_meta_data_90_days \ No newline at end of file + !!merge <<: *feature_meta_data_90_days diff --git a/models/profiles.yaml b/models/profiles.yaml index bb350ac..79df4bb 100644 --- a/models/profiles.yaml +++ b/models/profiles.yaml @@ -43,22 +43,12 @@ models: - name: context_campaign_source - name: context_campaign_medium - name: timestamp - - name: user_main_id + - name: user_main_id - name: rudder_user_base_features model_type: feature_table_model model_spec: validity_time: 24h # 1 day entity_key: user - macros: - - name: macro_datediff - inputs: - - column - value: "{% if !(end_time|isnil) %} datediff(day, date({{column}}), date('{{end_time.Format(\"2006-01-02 15:04:05\")}}')) {% else %} datediff(day, date({{column}}::timestamp), GETDATE()) {% endif %}" - - name: macro_datediff_n - inputs: - - column - - number_of_days - value: "{% if !(end_time|isnil) %} datediff(day, date({{column}}), date('{{end_time.Format(\"2006-01-02 15:04:05\")}}')) <={{number_of_days}} {% else %} datediff(day, date({{column}}), GETDATE()) <= {{number_of_days}} {% endif %}" vars: - entity_var: name: max_timestamp_bw_tracks_pages @@ -67,27 +57,27 @@ models: #days since last seen - entity_var: name: days_since_last_seen - select: "{{macro_datediff('max_timestamp_bw_tracks_pages')}}" + select: "{{macro_datediff('{{user.Var(\"max_timestamp_bw_tracks_pages\")}}')}}" dependencies: - max_timestamp_bw_tracks_pages #Churn features - entity_var: name: is_churned_7_days - select: case when days_since_last_seen > 7 then 1 else 0 end + select: case when {{user.Var("days_since_last_seen")}} > 7 then 1 else 0 end description: Depending on the n value, it specifies if there is any activity observed in the last 7 days. dependencies: - days_since_last_seen - session_end_time - entity_var: name: is_churned_30_days - select: case when days_since_last_seen > 30 then 1 else 0 end + select: case when {{user.Var("days_since_last_seen")}} > 30 then 1 else 0 end description: Depending on the n value, it specifies if there is any activity observed in the last 30 days. dependencies: - days_since_last_seen - session_end_time - entity_var: name: is_churned_90_days - select: case when days_since_last_seen > 90 then 1 else 0 end + select: case when {{user.Var("days_since_last_seen")}} > 90 then 1 else 0 end description: Depending on the n value, it specifies if there is any activity observed in the last 90 days. dependencies: - days_since_last_seen @@ -99,11 +89,11 @@ models: - entity_var: name: state from: inputs/rsIdentifies - select: first_value(state) + select: first_value({{user.Var("state")}}) window: order_by: - timestamp desc - where: state is not null and state!='' + where: '{{user.Var("state")}} is not null and {{user.Var("state")}}!=''''' - entity_var: name: country from: inputs/rsIdentifies @@ -115,27 +105,27 @@ models: - entity_var: name: first_name from: inputs/rsIdentifies - select: first_value(first_name) + select: first_value({{user.Var("first_name")}}) window: order_by: - timestamp desc - where: first_name is not null and first_name!='' + where: '{{user.Var("first_name")}} is not null and {{user.Var("first_name")}}!=''''' - entity_var: name: last_name from: inputs/rsIdentifies - select: first_value(last_name) + select: first_value({{user.Var("last_name")}}) window: order_by: - timestamp desc - where: last_name is not null and last_name!='' + where: '{{user.Var("last_name")}} is not null and {{user.Var("last_name")}}!=''''' - entity_var: name: currency from: inputs/rsIdentifies - select: first_value(currency) + select: first_value({{user.Var("currency")}}) window: order_by: - timestamp desc - where: currency is not null and currency!='' + where: '{{user.Var("currency")}} is not null and {{user.Var("currency")}}!=''''' dependencies: - session_start_time - entity_var: @@ -195,7 +185,7 @@ models: name: total_sessions_last_week from: models/rsTracksUnionPages select: count(distinct context_session_id) - where: " context_session_id is not null and {{macro_datediff('session_start_time')}} between 0 and 7" + where: " context_session_id is not null and {{macro_datediff('{{rsTracksUnionPages.Var(\"session_start_time\")}}')}} between 0 and 7" description: total number of sessions over last 7 days. dependencies: - session_row_number @@ -203,21 +193,21 @@ models: name: total_sessions_90_days from: models/rsTracksUnionPages select: count(distinct context_session_id) - where: " context_session_id is not null and {{macro_datediff('session_start_time')}} between 0 and 90" + where: " context_session_id is not null and {{macro_datediff('{{rsTracksUnionPages.Var(\"session_start_time\")}}')}} between 0 and 90" description: total number of sessions over last 90 days. - entity_var: name: total_sessions_365_days from: models/rsTracksUnionPages select: count(distinct context_session_id) - where: " context_session_id is not null and {{macro_datediff('session_start_time')}} between 0 and 365 " + where: " context_session_id is not null and {{macro_datediff('{{rsTracksUnionPages.Var(\"session_start_time\")}}')}} between 0 and 365 " description: total number of sessions over last 356 days. dependencies: - session_start_time - entity_var: name: avg_session_length_in_sec_overall from: models/rsTracksUnionPages - select: avg(datediff(second, session_start_time, session_end_time)) - where: session_row_number = 1 and context_session_id is not null + select: avg(datediff(second, {{rsTracksUnionPages.Var("session_start_time")}}, {{rsTracksUnionPages.Var("session_end_time")}})) + where: '{{rsTracksUnionPages.Var("session_row_number")}} = 1 and context_session_id is not null' description: Average session length (in seconds) of all the user sessions till date. dependencies: - session_row_number @@ -226,8 +216,8 @@ models: - entity_var: name: avg_session_length_in_sec_last_week from: models/rsTracksUnionPages - select: avg(datediff(second, session_start_time, session_end_time)) - where: context_session_id is not null and session_row_number = 1 and {{macro_datediff('session_start_time')}} between 0 and 7 + select: avg(datediff(second, {{rsTracksUnionPages.Var("session_start_time")}}, {{rsTracksUnionPages.Var("session_end_time")}})) + where: context_session_id is not null and {{rsTracksUnionPages.Var("session_row_number")}} = 1 and {{macro_datediff('{{rsTracksUnionPages.Var("session_start_time")}}')}} between 0 and 7 description: Average session length (in seconds) of all the user sessions that started in last 7 days dependencies: - session_start_time @@ -236,8 +226,8 @@ models: - entity_var: name: avg_session_length_in_sec_365_days from: models/rsTracksUnionPages - select: avg(datediff(second, session_start_time, session_end_time)) - where: "context_session_id is not null and session_row_number = 1 and {{macro_datediff_n('session_start_time','365')}}" + select: avg(datediff(second, {{rsTracksUnionPages.Var("session_start_time")}}, {{rsTracksUnionPages.Var("session_end_time")}})) + where: "context_session_id is not null and {{rsTracksUnionPages.Var(\"session_row_number\")}} = 1 and {{macro_datediff_n('{{rsTracksUnionPages.Var(\"session_start_time\")}}','365')}}" description: Average session length (in seconds) of all the user sessions that started in last 365 days dependencies: - session_row_number @@ -246,14 +236,14 @@ models: - entity_var: name: first_seen_date from: models/rsTracksUnionPages - select: min(date(session_start_time)) + select: min(date({{rsTracksUnionPages.Var("session_start_time")}})) description: The first date on which an event has been recorded by the user dependencies: - session_start_time - entity_var: name: last_seen_date from: models/rsTracksUnionPages - select: max(date(session_end_time)) + select: max(date({{rsTracksUnionPages.Var("session_end_time")}})) description: The latest date on which an event has been recorded by the user dependencies: - session_end_time @@ -340,4 +330,3 @@ models: - campaigns_list - mediums_list - sources_list - \ No newline at end of file diff --git a/original_project_folder b/original_project_folder new file mode 120000 index 0000000..b7b47c9 --- /dev/null +++ b/original_project_folder @@ -0,0 +1 @@ +/Users/admin/Desktop/Playground/lib_projects/rudderstack-profiles-base-features \ No newline at end of file diff --git a/pb_project.yaml b/pb_project.yaml index 918c974..85d144c 100644 --- a/pb_project.yaml +++ b/pb_project.yaml @@ -1,7 +1,7 @@ # Project name name: base_features # Project's yaml schema version -schema_version: 42 +schema_version: 44 # WH Connection Profile to use. connection: dev_wh # Whether to allow inputs having no timestamps,