diff --git a/dbt/.sqlfluff b/dbt/.sqlfluff index 936f1df6e..f8ce53e30 100644 --- a/dbt/.sqlfluff +++ b/dbt/.sqlfluff @@ -20,4 +20,8 @@ align_within = select_clause # in this example would likely be the boundary of a CTE. Stopping # when we hit brackets is usually a good rule of thumb for this # configuration. -align_scope = bracketed \ No newline at end of file +align_scope = bracketed + +[sqlfluff:indentation] +indent_unit = space +tab_space_size = 2 \ No newline at end of file diff --git a/dbt/models/fraud/trips/_simultaneous_group_trips.yml b/dbt/models/fraud/trips/_simultaneous_group_trips.yml new file mode 100644 index 000000000..b34814eaa --- /dev/null +++ b/dbt/models/fraud/trips/_simultaneous_group_trips.yml @@ -0,0 +1,28 @@ +version: 2 +models: + - name: simultaneous_group_trips + columns: + - name: start_geo_code + data_type: character varying + - name: end_geo_code + data_type: character varying + - name: start_datetime + data_type: timestamp with time zone + - name: distance_50m + data_type: integer + - name: total_driver_revenue + data_type: bigint + - name: total_passengers_contributions + data_type: bigint + - name: total_incentives + data_type: numeric + - name: num_incitations + data_type: numeric + - name: operators + data_type: ARRAY + - name: num_fraud_journeys + data_type: bigint + - name: num_anomaly_journeys + data_type: bigint + - name: has_fraud_labels + data_type: text diff --git a/dbt/models/fraud/trips/simultaneous_group_trips.sql b/dbt/models/fraud/trips/simultaneous_group_trips.sql new file mode 100644 index 000000000..0860bfba9 --- /dev/null +++ b/dbt/models/fraud/trips/simultaneous_group_trips.sql @@ -0,0 +1,132 @@ +{{ + config( + materialized = 'incremental', + unique_key = [ + 'start_geo_code', + 'end_geo_code', + 'start_datetime', + 'distance_50m' + ], + indexes = [ + { + 'columns':[ + 'start_geo_code', + 'end_geo_code', + 'start_datetime', + 'distance_50m' + ], + 'unique':true + + } + ] + ) +}} + +with incentives as ( + select + carpool_id, + sum(i.amount) as total_amount, + count(distinct i._id) as num_incitations + from {{ source('carpool', 'operator_incentives') }} as i + inner join + {{ source('carpool', 'carpools') }} as c + on + i.carpool_id = c._id + {% if is_incremental() %} + and c.start_datetime + >= ( + select date_trunc('week', max(start_datetime)) + from {{ this }} + ) + {% else %} + and c.start_datetime >= now() - interval '1 year' + {% endif %} + group by 1 +), + +groups as ( + select + g.start_geo_code, + g.end_geo_code, + c.start_datetime, + ( + c.distance / 50 + )::int as distance_50m, + count( + * + ) as num_journeys, + sum( + driver_revenue + ) as total_driver_revenue, + sum( + passenger_contribution + ) as total_passengers_contributions, + sum( + i.total_amount + ) as total_incentives, + sum( + i.num_incitations + ) as num_incitations, + array_agg( + distinct c.operator_id + ) as operators, + sum( + (s.fraud_status = 'failed')::int + ) as num_fraud_journeys, + sum( + (s.anomaly_status = 'failed')::int + ) as num_anomaly_journeys, + max( + case when tvel._id is not null then 'yes' else 'no' end + ) as has_fraud_labels, + json_agg(tvel.labels) filter ( + where tvel.labels is not null + ) as fraud_labels, + 1 as link + from + {{ source('carpool', 'carpools') }} as c + inner join {{ source('carpool', 'geo') }} as g + on + c._id = g.carpool_id + left join incentives as i + on + c._id = i.carpool_id + left join + {{ source('carpool', 'status') }} as s + on + c._id = s.carpool_id + left join + {{ source('carpool', 'terms_violation_error_labels') }} as tvel + on c._id = tvel.carpool_id + {% if is_incremental() %} + where + c.start_datetime + >= (select date_trunc('week', max(start_datetime)) from {{ this }}) + {% else %} + where c.start_datetime >= now() - interval '1 year' + {% endif %} + group by + 1, + 2, + 3, + 4 + having + count(*) >= 3 +) + +select + start_geo_code, + end_geo_code, + start_datetime, + distance_50m, + num_journeys, + total_driver_revenue, + total_passengers_contributions, + total_incentives, + num_incitations, + operators, + num_fraud_journeys, + num_anomaly_journeys, + has_fraud_labels +from groups +order by date_trunc('week', start_datetime) desc, total_driver_revenue desc diff --git a/dbt/models/fraud/_users_intraday_role_change_stats.yml b/dbt/models/fraud/users/_users_intraday_role_change_stats.yml similarity index 100% rename from dbt/models/fraud/_users_intraday_role_change_stats.yml rename to dbt/models/fraud/users/_users_intraday_role_change_stats.yml diff --git a/dbt/models/fraud/_users_statistics.yml b/dbt/models/fraud/users/_users_statistics.yml similarity index 100% rename from dbt/models/fraud/_users_statistics.yml rename to dbt/models/fraud/users/_users_statistics.yml diff --git a/dbt/models/fraud/users_intraday_role_change_stats.sql b/dbt/models/fraud/users/users_intraday_role_change_stats.sql similarity index 91% rename from dbt/models/fraud/users_intraday_role_change_stats.sql rename to dbt/models/fraud/users/users_intraday_role_change_stats.sql index 455d38fdd..07e22623d 100644 --- a/dbt/models/fraud/users_intraday_role_change_stats.sql +++ b/dbt/models/fraud/users/users_intraday_role_change_stats.sql @@ -50,6 +50,8 @@ select avg( count_consecutive_changes ) as avg_daily_consecutives_intraday_role_changes, - max(count_consecutive_changes) as max_daily_consecutives_intraday_role_changes + max( + count_consecutive_changes + ) as max_daily_consecutives_intraday_role_changes from intraday_stats group by 1 diff --git a/dbt/models/fraud/users_statistics.sql b/dbt/models/fraud/users/users_statistics.sql similarity index 100% rename from dbt/models/fraud/users_statistics.sql rename to dbt/models/fraud/users/users_statistics.sql diff --git a/dbt/models/fraud/users_trips.sql b/dbt/models/fraud/users/users_trips.sql similarity index 100% rename from dbt/models/fraud/users_trips.sql rename to dbt/models/fraud/users/users_trips.sql diff --git a/dbt/models/sources/carpool.yml b/dbt/models/sources/carpool.yml index 1c289efba..b02282e63 100644 --- a/dbt/models/sources/carpool.yml +++ b/dbt/models/sources/carpool.yml @@ -6,7 +6,7 @@ sources: tables: - name: carpools - tags: ['carpools_source'] + tags: [carpools_source] meta: elementary: # default config timestamp_column: start_datetime @@ -22,7 +22,7 @@ sources: seasonality: day_of_week tests: - elementary.dimension_anomalies: - name: "volume de trajets par opérateur" + name: volume de trajets par opérateur dimensions: - operator_id config: @@ -135,7 +135,7 @@ sources: - name: legacy_id data_type: bigint - name: geo - tags: ['geo_source'] + tags: [geo_source] meta: elementary: # default config timestamp_column: updated_at @@ -150,24 +150,24 @@ sources: count: 1 tests: - elementary.dimension_anomalies: - name: "volume de trajets par O/D" + name: volume de trajets par O/D timestamp_column: updated_at dimensions: - start_geo_code - end_geo_code - exclude_final_results: 'average > 50' + exclude_final_results: average > 50 - elementary.dimension_anomalies: - name: "volume de trajets par origine" + name: volume de trajets par origine timestamp_column: updated_at dimensions: - start_geo_code - exclude_final_results: 'average > 50' + exclude_final_results: average > 50 - elementary.dimension_anomalies: - name: "volume de trajets par destination" + name: volume de trajets par destination timestamp_column: updated_at dimensions: - end_geo_code - exclude_final_results: 'average > 50' + exclude_final_results: average > 50 columns: - name: _id data_type: integer @@ -219,3 +219,13 @@ sources: data_type: USER-DEFINED - name: anomaly_status data_type: USER-DEFINED + - name: terms_violation_error_labels + columns: + - name: _id + data_type: integer + - name: created_at + data_type: timestamp with time zone + - name: carpool_id + data_type: integer + - name: labels + data_type: ARRAY diff --git a/dbt/models/sources/policy.yml b/dbt/models/sources/policy.yml index 2ede9da88..ae1924712 100644 --- a/dbt/models/sources/policy.yml +++ b/dbt/models/sources/policy.yml @@ -57,6 +57,6 @@ sources: - name: incentive_sum data_type: integer - name: max_amount - data_type: integer + data_type: bigint - name: tz data_type: character varying diff --git a/notebooks/.pre-commit-config.yaml b/notebooks/.pre-commit-config.yaml index b8562edcc..ecc3d7dd2 100644 --- a/notebooks/.pre-commit-config.yaml +++ b/notebooks/.pre-commit-config.yaml @@ -4,6 +4,6 @@ repos: - id: jupyter-nb-clear-output name: jupyter-nb-clear-output files: \.ipynb$ - stages: [commit] + stages: [pre-commit] language: system entry: jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace diff --git a/notebooks/analytics/cee/cee_analysis.ipynb b/notebooks/analytics/cee/cee_analysis.ipynb index a0306680c..eae0dfee5 100644 --- a/notebooks/analytics/cee/cee_analysis.ipynb +++ b/notebooks/analytics/cee/cee_analysis.ipynb @@ -5,14 +5,22 @@ "id": "88314698-ff9d-4314-b576-cd6bc2456ab3", "metadata": {}, "source": [ - "# Imports" + "# Imports\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "069a1698-fb73-4445-b1e9-ec019f349be3", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-02-05T10:13:37.056696Z", + "iopub.status.busy": "2025-02-05T10:13:37.056287Z", + "iopub.status.idle": "2025-02-05T10:13:47.780935Z", + "shell.execute_reply": "2025-02-05T10:13:47.780693Z", + "shell.execute_reply.started": "2025-02-05T10:13:37.056664Z" + } + }, "outputs": [], "source": [ "import os\n", @@ -33,14 +41,22 @@ "id": "a5e803f4-ce31-4c93-8edd-d2341869b576", "metadata": {}, "source": [ - "# DB" + "# DB\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "b52ccb03-1fab-4750-a3e4-bdac2ba9c3f7", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-02-05T10:13:47.781693Z", + "iopub.status.busy": "2025-02-05T10:13:47.781551Z", + "iopub.status.idle": "2025-02-05T10:13:47.783222Z", + "shell.execute_reply": "2025-02-05T10:13:47.782976Z", + "shell.execute_reply.started": "2025-02-05T10:13:47.781684Z" + } + }, "outputs": [], "source": [ "DATABASE_URL = os.environ[\"DB_URL_RPC\"]" @@ -50,7 +66,15 @@ "cell_type": "code", "execution_count": null, "id": "78cd6961-0676-4e96-ad56-23b568f1e01a", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-02-05T10:13:47.783635Z", + "iopub.status.busy": "2025-02-05T10:13:47.783572Z", + "iopub.status.idle": "2025-02-05T10:13:47.786014Z", + "shell.execute_reply": "2025-02-05T10:13:47.785793Z", + "shell.execute_reply.started": "2025-02-05T10:13:47.783628Z" + } + }, "outputs": [], "source": [ "OUTPUT_PATH = Path(\"outputs\")\n", @@ -59,9 +83,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "34d842df-ffce-4110-9aab-a727a7a66d0b", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-02-05T10:13:47.786828Z", + "iopub.status.busy": "2025-02-05T10:13:47.786739Z", + "iopub.status.idle": "2025-02-05T10:13:47.891509Z", + "shell.execute_reply": "2025-02-05T10:13:47.891197Z", + "shell.execute_reply.started": "2025-02-05T10:13:47.786821Z" + } + }, "outputs": [], "source": [ "db_engine = create_engine(DATABASE_URL)" @@ -80,14 +112,22 @@ } }, "source": [ - "# Paramètres" + "# Paramètres\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "4d8b7f78", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-02-05T10:13:47.891874Z", + "iopub.status.busy": "2025-02-05T10:13:47.891800Z", + "iopub.status.idle": "2025-02-05T10:13:47.894077Z", + "shell.execute_reply": "2025-02-05T10:13:47.893714Z", + "shell.execute_reply.started": "2025-02-05T10:13:47.891865Z" + } + }, "outputs": [], "source": [ "aom_with_incentives = [\n", @@ -130,7 +170,7 @@ "id": "ef966c8a-5764-409b-a32a-9c8e5ddadfe7", "metadata": {}, "source": [ - "# Couleurs" + "# Couleurs\n" ] }, { @@ -402,7 +442,7 @@ "id": "df757060-db5d-4844-a63a-0f26a065acfb", "metadata": {}, "source": [ - "# Statistiques cohorte 2022" + "# Statistiques cohorte 2022\n" ] }, { @@ -460,7 +500,7 @@ "id": "0abcf0d8-6dce-4470-aa2b-c46c9fc1cc55", "metadata": {}, "source": [ - "## Nombre de CEE par mois et opérateur" + "## Nombre de CEE par mois et opérateur\n" ] }, { @@ -531,7 +571,7 @@ "id": "26a2c669-93b4-4672-8096-344d31b9aaf9", "metadata": {}, "source": [ - "## Part des trjets CEE vs trajets non CEE" + "## Part des trjets CEE vs trajets non CEE\n" ] }, { @@ -725,7 +765,7 @@ } }, "source": [ - "## Part des opérateurs par cohorte CEE" + "## Part des opérateurs par cohorte CEE\n" ] }, { @@ -808,7 +848,7 @@ "id": "5b47b941-04f2-443a-9c08-1f4e2d244621", "metadata": {}, "source": [ - "## Evolution de la part de CEE par opérateur" + "## Evolution de la part de CEE par opérateur\n" ] }, { @@ -912,7 +952,7 @@ "id": "dd7fab2a-0be8-4838-841f-8f6fc071d823", "metadata": {}, "source": [ - "### Part des conducteurs CEE qui ont obtenus leurs bonus sur l'ensemble des conducteurs CEE :" + "### Part des conducteurs CEE qui ont obtenus leurs bonus sur l'ensemble des conducteurs CEE :\n" ] }, { @@ -930,7 +970,7 @@ "id": "5cebf6e2-385d-4883-8e8f-5855efec61ed", "metadata": {}, "source": [ - "### Comparaison par opérateur" + "### Comparaison par opérateur\n" ] }, { @@ -1036,7 +1076,7 @@ "id": "dcdec20a-c647-4f11-8564-d6e6006d6da8", "metadata": {}, "source": [ - "### 2022" + "### 2022\n" ] }, { @@ -1084,7 +1124,7 @@ "id": "4cc33648-88bf-4fce-9f31-51ab80975ec4", "metadata": {}, "source": [ - "### CEE" + "### CEE\n" ] }, { @@ -1152,7 +1192,7 @@ "id": "f8dd35a4-d3de-4bd3-b98d-035862f8507c", "metadata": {}, "source": [ - "### AOM" + "### AOM\n" ] }, { @@ -1310,7 +1350,7 @@ "id": "eeba0204-a469-412d-a075-60bd2efeebd3", "metadata": {}, "source": [ - "## Moyenne par cohorte" + "## Moyenne par cohorte\n" ] }, { @@ -1340,7 +1380,7 @@ "id": "df55f0d2-4889-499f-b5ff-0d91c4c5d6c2", "metadata": {}, "source": [ - "## Visualisation 2022" + "## Visualisation 2022\n" ] }, { @@ -1366,7 +1406,7 @@ " elif breakpoint == np.inf:\n", " return \"Plus de 55\"\n", " else:\n", - " return f\"{breakpoint-4:.0f}-{breakpoint:.0f}\"\n", + " return f\"{breakpoint - 4:.0f}-{breakpoint:.0f}\"\n", "\n", "\n", "df_trips_by_drivers_2022_agg = (\n", @@ -1427,7 +1467,7 @@ " .with_columns(\n", " pl.col(\"breakpoint\")\n", " .cast(pl.String)\n", - " .replace(np.inf, f\"{bins[-1]+1}+\")\n", + " .replace(np.inf, f\"{bins[-1] + 1}+\")\n", " .str.replace(\"(\\.0)\", \"\"),\n", " (100 * pl.col(\"count\") / pl.col(\"count\").sum()).alias(\"share\"),\n", " )\n", @@ -1927,7 +1967,7 @@ "id": "89456da2-b0a5-4994-bb7b-11c437ecba1d", "metadata": {}, "source": [ - "### 2022" + "### 2022\n" ] }, { @@ -1973,7 +2013,7 @@ "id": "b24c4909-0c91-4610-b6ba-f32d18df2201", "metadata": {}, "source": [ - "### CEE" + "### CEE\n" ] }, { @@ -2030,7 +2070,7 @@ "id": "70082ce9-83aa-4903-98dc-10166b5b231a", "metadata": {}, "source": [ - "### AOM" + "### AOM\n" ] }, { @@ -2182,7 +2222,7 @@ "id": "d43f1018-5bb4-4658-b84c-e47dd2be529b", "metadata": {}, "source": [ - "## Moyennes" + "## Moyennes\n" ] }, { @@ -2250,7 +2290,7 @@ " .with_columns(\n", " pl.col(\"breakpoint\")\n", " .cast(pl.String)\n", - " .replace(np.inf, f\"{bins[-1]+1}\")\n", + " .replace(np.inf, f\"{bins[-1] + 1}\")\n", " .str.replace(\"(\\.0)\", \"\"),\n", " (100 * pl.col(\"count\") / pl.col(\"count\").sum()).alias(\"share\"),\n", " )\n", @@ -2643,7 +2683,7 @@ "id": "809ef076-f8b6-4cec-abd3-e50cc55e34f5", "metadata": {}, "source": [ - "### 2022" + "### 2022\n" ] }, { @@ -2688,7 +2728,7 @@ "id": "8fae42cc-8425-4628-adbc-bd031dc09a6e", "metadata": {}, "source": [ - "On filtre les trajets > 100km :" + "On filtre les trajets > 100km :\n" ] }, { @@ -2706,7 +2746,7 @@ "id": "e04691f7-cc40-4eb2-8395-1fbd7728ff55", "metadata": {}, "source": [ - "Part des trajets >100km :" + "Part des trajets >100km :\n" ] }, { @@ -2728,7 +2768,7 @@ "id": "26ccad3a-5983-496e-ab96-824a9425eddd", "metadata": {}, "source": [ - "### CEE" + "### CEE\n" ] }, { @@ -2779,7 +2819,7 @@ "id": "9848a3af-4393-4b34-ac1a-5cb244b912ed", "metadata": {}, "source": [ - "### AOM" + "### AOM\n" ] }, { @@ -2919,7 +2959,7 @@ "id": "c481e858-3c5d-4304-ae19-f32bf4adda61", "metadata": {}, "source": [ - "## Moyennes" + "## Moyennes\n" ] }, { @@ -2927,7 +2967,7 @@ "id": "9f22be16-b3f0-4ef1-aca5-33326ac22c81", "metadata": {}, "source": [ - "### 2022" + "### 2022\n" ] }, { @@ -2945,7 +2985,7 @@ "id": "75b1865f-0c62-4927-a4b8-8628dc643562", "metadata": {}, "source": [ - "Part des trajets >100km :" + "Part des trajets >100km :\n" ] }, { @@ -2967,7 +3007,7 @@ "id": "0fde838c-5013-4106-a75e-1fd4deaf1b92", "metadata": {}, "source": [ - "### CEE" + "### CEE\n" ] }, { @@ -2987,7 +3027,7 @@ "id": "f231e6f0-4122-4c16-99e9-137bb81709da", "metadata": {}, "source": [ - "On filtre les trajets > 100km :" + "On filtre les trajets > 100km :\n" ] }, { @@ -3029,7 +3069,7 @@ "id": "896da050-9423-4700-97a9-b13150154bf3", "metadata": {}, "source": [ - "Part des trajets >100km :" + "Part des trajets >100km :\n" ] }, { @@ -3629,7 +3669,7 @@ "id": "5af0572d-29e6-4222-89ed-c55646fe8a49", "metadata": {}, "source": [ - "### 2022" + "### 2022\n" ] }, { @@ -3679,7 +3719,7 @@ "id": "b5f73f33-4699-44dc-a800-418eb3fda060", "metadata": {}, "source": [ - "### CEE" + "### CEE\n" ] }, { @@ -3734,7 +3774,7 @@ "id": "5e3e2ed4-8aba-4580-8675-6d9d81ca94c6", "metadata": {}, "source": [ - "### AOM" + "### AOM\n" ] }, { @@ -3917,7 +3957,7 @@ "id": "b363bf3d-c946-484d-8edd-138f916fd4fc", "metadata": {}, "source": [ - "## Moyennes" + "## Moyennes\n" ] }, { @@ -3956,7 +3996,7 @@ " .with_columns(\n", " pl.col(\"breakpoint\")\n", " .cast(pl.String)\n", - " .replace(np.inf, f\"{bins[-1]+1}\")\n", + " .replace(np.inf, f\"{bins[-1] + 1}\")\n", " .str.replace(\"(\\.0)\", \"\"),\n", " (100 * pl.col(\"count\") / pl.col(\"count\").sum()).alias(\"share\"),\n", " )\n", @@ -4328,7 +4368,7 @@ "id": "aea6233d-35e4-421e-b9d8-06ba8abf86d1", "metadata": {}, "source": [ - "### 2022" + "### 2022\n" ] }, { @@ -4386,7 +4426,7 @@ "id": "8db415f1-c0f7-4f10-a8c3-c7d9cd3f22be", "metadata": {}, "source": [ - "### CEE" + "### CEE\n" ] }, { @@ -4744,7 +4784,7 @@ "id": "283f4d91-e461-44e7-af18-91674d711247", "metadata": {}, "source": [ - "### 2022" + "### 2022\n" ] }, { @@ -4822,7 +4862,7 @@ "id": "bee54326-db67-409b-a54f-82d163463a8e", "metadata": {}, "source": [ - "### CEE" + "### CEE\n" ] }, { @@ -4901,7 +4941,7 @@ "id": "501092b8-3877-4884-b3f5-cab62916afb6", "metadata": {}, "source": [ - "### AOM" + "### AOM\n" ] }, { @@ -5496,7 +5536,7 @@ "id": "fe2d6a74-0cb7-46d2-ab0f-e7c4769fdaa9", "metadata": {}, "source": [ - "## Application au nombre de trajets effectués" + "## Application au nombre de trajets effectués\n" ] }, { @@ -5504,7 +5544,7 @@ "id": "74805c9f-edde-40f1-a68d-4edec31d9173", "metadata": {}, "source": [ - "### 2022" + "### 2022\n" ] }, { @@ -5586,7 +5626,7 @@ "id": "d3bfd58d-06ad-42ad-82e4-4951f37fe947", "metadata": {}, "source": [ - "### CEE" + "### CEE\n" ] }, { @@ -5647,7 +5687,7 @@ "id": "01114a32-1c54-46b3-bab6-cdd2b2293aea", "metadata": {}, "source": [ - "### Moyenne 2023" + "### Moyenne 2023\n" ] }, { @@ -5692,7 +5732,7 @@ "id": "15a8f2ff-f0ce-4728-94e8-9e717e17ce58", "metadata": {}, "source": [ - "### Stats par cohortes" + "### Stats par cohortes\n" ] }, { @@ -5729,9 +5769,7 @@ " )\n", " .alias(\"churn_estimatation_mean_trips\")\n", " .round(1),\n", - ").sort(\n", - " pl.col(\"cohorte\").str.reverse()\n", - ")" + ").sort(pl.col(\"cohorte\").str.reverse())" ] }, { @@ -5755,7 +5793,7 @@ "id": "22f7b45b-448e-46d6-9a2b-48ccf471d4fd", "metadata": {}, "source": [ - "### 2022" + "### 2022\n" ] }, { @@ -5791,7 +5829,7 @@ "id": "7391236e-bdfc-4243-8b4e-ebf629fc8318", "metadata": {}, "source": [ - "### CEE" + "### CEE\n" ] }, { @@ -5873,7 +5911,7 @@ " .with_columns(\n", " pl.col(\"breakpoint\")\n", " .cast(pl.String)\n", - " .replace(np.inf, f\"{bins[-1]+1}+\")\n", + " .replace(np.inf, f\"{bins[-1] + 1}+\")\n", " .str.replace(\"(\\.0)\", \"\"),\n", " (100 * pl.col(\"count\") / pl.col(\"count\").sum()).alias(\"share\"),\n", " )\n", @@ -6063,7 +6101,7 @@ "id": "6fe11250-1408-49c5-a6a3-c5cdd5fd2b84", "metadata": {}, "source": [ - "### 2022" + "### 2022\n" ] }, { @@ -6138,7 +6176,7 @@ "id": "7c025adf-0138-4e03-958e-d7c69e0fe24f", "metadata": {}, "source": [ - "### CEE" + "### CEE\n" ] }, { @@ -6218,7 +6256,7 @@ "id": "78ffe334-d038-4988-8a13-53a396e9f69d", "metadata": {}, "source": [ - "### AOM" + "### AOM\n" ] }, { @@ -6520,7 +6558,7 @@ "id": "9fd467ed-ae43-4d28-92a0-1168fb94b284", "metadata": {}, "source": [ - "### Distribution des distance économisées" + "### Distribution des distance économisées\n" ] }, { @@ -6623,7 +6661,7 @@ "id": "f585048b-a222-469d-baf1-e7702558e06c", "metadata": {}, "source": [ - "### Moyenne par cohorte" + "### Moyenne par cohorte\n" ] }, { @@ -6725,7 +6763,7 @@ "id": "694f2c4c-ceb3-4e6b-b065-d1e2f6bdb5ad", "metadata": {}, "source": [ - "### Médiane par cohorte" + "### Médiane par cohorte\n" ] }, { @@ -6827,7 +6865,7 @@ "id": "c4b7c55e-9a7b-4bdd-9c7a-92722d9e596f", "metadata": {}, "source": [ - "### Par cohortes hebdomadaires" + "### Par cohortes hebdomadaires\n" ] }, { @@ -6962,6 +7000,246 @@ "## Comparaison par AOM\n" ] }, + { + "cell_type": "markdown", + "id": "0715a98d-75e2-4ffc-a7f7-74595a75d203", + "metadata": {}, + "source": [ + "## Par niveau de pratique\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "a37edd5d-9036-475e-ac67-f372012066c6", + "metadata": { + "execution": { + "iopub.execute_input": "2025-02-05T10:39:37.964058Z", + "iopub.status.busy": "2025-02-05T10:39:37.963490Z", + "iopub.status.idle": "2025-02-05T10:40:07.787110Z", + "shell.execute_reply": "2025-02-05T10:40:07.786613Z", + "shell.execute_reply.started": "2025-02-05T10:39:37.964030Z" + } + }, + "outputs": [], + "source": [ + "df_distance_eco_cee_by_month = pl.read_database(\n", + " \"\"\"\n", + "select \n", + "\tc.trip_id,\n", + "\tmax(cdv.\"uuid\"::text) filter (where c.is_driver) as \"uuid\",\n", + "\tmin(cdv.date_first_cee) as date_first_cee,\n", + "\tmin(c.datetime) as start_datetime,\n", + "\tsum(coalesce (distance,(c.meta->>'calc_distance')::int) * (case when seats=0 then 1 else seats end)) as distance_eco\n", + "from carpool.carpools c \n", + "inner join carpool.identities i on c.identity_id = i.\"_id\" \n", + "left join luis.cee_drivers_v4 cdv on i.\"uuid\" = cdv.\"uuid\"\n", + "where cdv.cohorte in ('t1_23','t2_23')\n", + "and c.datetime between cdv.date_first_cee and cdv.date_first_cee + interval '23 month'\n", + "group by 1\n", + " \"\"\",\n", + " connection=db_engine,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06b99ca8-e400-4f40-9e0b-94bb30cdb304", + "metadata": { + "execution": { + "iopub.execute_input": "2025-02-05T10:40:25.460896Z", + "iopub.status.busy": "2025-02-05T10:40:25.459002Z", + "iopub.status.idle": "2025-02-05T10:40:25.473438Z", + "shell.execute_reply": "2025-02-05T10:40:25.472344Z", + "shell.execute_reply.started": "2025-02-05T10:40:25.460798Z" + } + }, + "outputs": [], + "source": [ + "df_distance_eco_cee_by_month" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "546ff184-01ce-46fa-b743-ae46ee9003fb", + "metadata": { + "execution": { + "iopub.execute_input": "2025-02-05T10:40:30.414750Z", + "iopub.status.busy": "2025-02-05T10:40:30.414263Z", + "iopub.status.idle": "2025-02-05T10:40:30.662285Z", + "shell.execute_reply": "2025-02-05T10:40:30.661785Z", + "shell.execute_reply.started": "2025-02-05T10:40:30.414718Z" + } + }, + "outputs": [], + "source": [ + "df_distance_eco_cee_by_month.select(pl.col(\"uuid\").n_unique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a79b30c2-a83f-4b92-99ca-104b96986967", + "metadata": { + "execution": { + "iopub.execute_input": "2025-02-05T13:02:33.563164Z", + "iopub.status.busy": "2025-02-05T13:02:33.561637Z", + "iopub.status.idle": "2025-02-05T13:02:34.356764Z", + "shell.execute_reply": "2025-02-05T13:02:34.356449Z", + "shell.execute_reply.started": "2025-02-05T13:02:33.563085Z" + }, + "scrolled": true + }, + "outputs": [], + "source": [ + "df_distance_eco_cee_by_month_agg = (\n", + " df_distance_eco_cee_by_month.group_by(\"uuid\")\n", + " .agg(\n", + " pl.col(\"date_first_cee\").min(),\n", + " pl.col(\"trip_id\")\n", + " .filter(\n", + " pl.col(\"start_datetime\") <= pl.col(\"date_first_cee\").dt.offset_by(\"6mo\")\n", + " )\n", + " .n_unique()\n", + " .alias(\"num_trips_6mo\"),\n", + " )\n", + " .with_columns(\n", + " pl.date_ranges(\n", + " pl.col(\"date_first_cee\").dt.truncate(\"1mo\"),\n", + " pl.col(\"date_first_cee\").dt.offset_by(\"23mo\"),\n", + " \"1mo\",\n", + " ).alias(\"month\")\n", + " )\n", + " .explode(\"month\")\n", + " .with_columns(\n", + " pl.col(\"month\")\n", + " .rank()\n", + " .over(partition_by=\"uuid\", order_by=\"month\")\n", + " .alias(\"month_rank\")\n", + " )\n", + " .filter(pl.col(\"month\") <= datetime(2024, 9, 30))\n", + " .join(\n", + " df_distance_eco_cee_by_month.group_by(\n", + " [\"uuid\", pl.col(\"start_datetime\").dt.truncate(\"1mo\")]\n", + " ).agg(pl.col(\"distance_eco\").sum()),\n", + " left_on=[\"uuid\", \"month\"],\n", + " right_on=[\"uuid\", pl.col(\"start_datetime\").cast(pl.Date)],\n", + " how=\"left\",\n", + " )\n", + ")\n", + "df_distance_eco_cee_by_month_agg" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "id": "a955b38d-189e-46c1-806a-1277b68588f0", + "metadata": { + "execution": { + "iopub.execute_input": "2025-02-05T14:57:39.234336Z", + "iopub.status.busy": "2025-02-05T14:57:39.233833Z", + "iopub.status.idle": "2025-02-05T14:57:39.292762Z", + "shell.execute_reply": "2025-02-05T14:57:39.292386Z", + "shell.execute_reply.started": "2025-02-05T14:57:39.234309Z" + } + }, + "outputs": [], + "source": [ + "df_distance_eco_cee_by_month_agg.group_by(\"month_rank\").agg(\n", + " pl.col(\"uuid\").filter(pl.col(\"distance_eco\").is_not_null()).n_unique()\n", + ").sort(pl.col(\"month_rank\")).write_clipboard()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a82c408f-714d-4327-9fc8-c846eca0604f", + "metadata": { + "execution": { + "iopub.execute_input": "2025-02-05T14:45:39.094616Z", + "iopub.status.busy": "2025-02-05T14:45:39.092408Z", + "iopub.status.idle": "2025-02-05T14:45:39.451594Z", + "shell.execute_reply": "2025-02-05T14:45:39.451295Z", + "shell.execute_reply.started": "2025-02-05T14:45:39.094492Z" + } + }, + "outputs": [], + "source": [ + "traces = []\n", + "colors = dict(\n", + " zip(\n", + " [1, 10, 20, 30, 40, 50, 60, 70, 80],\n", + " [\n", + " \"#9da7f5\",\n", + " \"#7a88f2\",\n", + " \"#5669f0\",\n", + " \"#314bef\",\n", + " \"#112fe9\",\n", + " \"#0d28c7\",\n", + " \"#0921a5\",\n", + " \"#061a82\",\n", + " \"#04135e\",\n", + " ],\n", + " )\n", + ")\n", + "for num_trips in [1, 10, 20, 30, 40, 50, 60, 70, 80]:\n", + " data = (\n", + " df_distance_eco_cee_by_month_agg.filter(\n", + " (pl.col(\"num_trips_6mo\") >= num_trips) & (pl.col(\"month_rank\") != 21)\n", + " )\n", + " .with_columns(pl.col(\"distance_eco\").fill_null(0))\n", + " .group_by(\"month_rank\")\n", + " .agg((pl.col(\"distance_eco\").mean() / 1000).round(1))\n", + " .sort(pl.col(\"month_rank\"))\n", + " )\n", + "\n", + " data.write_excel(OUTPUT_PATH / f\"df_{num_trips}.xlsx\")\n", + "\n", + " trace = go.Scatter(\n", + " x=data[\"month_rank\"],\n", + " y=data[\"distance_eco\"],\n", + " name=f\"{num_trips}+\",\n", + " mode=\"lines+markers\",\n", + " line_color=colors[num_trips],\n", + " )\n", + "\n", + " traces.append(trace)\n", + "\n", + "fig_distance_eco_mensuelle = go.Figure(traces)\n", + "\n", + "fig_distance_eco_mensuelle.update_layout(\n", + " plot_bgcolor=\"white\",\n", + " legend_title=\"Nombre de trajets effectués
(6 premiers mois)\",\n", + " title=\"Distance moyenne économisée en fonction du mois covoituré\",\n", + " margin_t=80,\n", + " height=600,\n", + ")\n", + "fig_distance_eco_mensuelle.update_yaxes(\n", + " showgrid=True,\n", + " griddash=\"dashdot\",\n", + " gridwidth=1,\n", + " gridcolor=\"gray\",\n", + " title=\"Distance économisée (km)\",\n", + " zeroline=True,\n", + " zerolinecolor=\"black\",\n", + " zerolinewidth=1,\n", + ")\n", + "\n", + "fig_distance_eco_mensuelle.update_xaxes(title=\"Mois n°\", ticks=\"outside\")\n", + "fig_distance_eco_mensuelle.write_html(\n", + " OUTPUT_PATH / \"fig_distance_eco_mensuelle_par_niveau_pratique.html\"\n", + ")\n", + "fig_distance_eco_mensuelle.write_image(\n", + " OUTPUT_PATH / \"fig_distance_eco_mensuelle_par_niveau_pratique.svg\",\n", + " format=\"svg\",\n", + " width=1280,\n", + " height=720,\n", + ")\n", + "fig_distance_eco_mensuelle.show()" + ] + }, { "cell_type": "markdown", "id": "f0b35594", @@ -7142,7 +7420,7 @@ "id": "2d8ed25a-cf31-4436-bcfc-bd4c5be66a94", "metadata": {}, "source": [ - "# KwhCumac" + "# KwhCumac\n" ] }, { @@ -7150,7 +7428,7 @@ "id": "c99a204c-34eb-4205-84ff-86b894cc3cc6", "metadata": {}, "source": [ - "## Requetes" + "## Requetes\n" ] }, { @@ -7158,7 +7436,7 @@ "id": "01a98a2d-f611-4aef-865b-24cf86c40a94", "metadata": {}, "source": [ - "## 2022" + "## 2022\n" ] }, { @@ -7406,7 +7684,7 @@ "id": "b102ef82-c9c1-4f77-8846-53cf1549b08c", "metadata": {}, "source": [ - "## CEE" + "## CEE\n" ] }, { @@ -7486,7 +7764,7 @@ "id": "abbf4153-40fd-4ae0-acb8-b22922a90eb2", "metadata": {}, "source": [ - "### Courbe de churn" + "### Courbe de churn\n" ] }, { @@ -7507,7 +7785,7 @@ "id": "c1251002-b53a-4a54-8cf3-181452c65634", "metadata": {}, "source": [ - "### Stats 2023" + "### Stats 2023\n" ] }, { @@ -7532,7 +7810,7 @@ "id": "0a56ad57-9e04-4980-98b8-ca2801e475af", "metadata": {}, "source": [ - "### Stats par cohortes" + "### Stats par cohortes\n" ] }, { @@ -7676,7 +7954,7 @@ "id": "6ca341e9-63ea-4914-b648-68ae6a952353", "metadata": {}, "source": [ - "## Visualisation" + "## Visualisation\n" ] }, { @@ -7738,7 +8016,7 @@ "id": "0803075e-7d63-4c60-82cb-7f612d6df78d", "metadata": {}, "source": [ - "## Analayse de l'effet de seuil" + "## Analayse de l'effet de seuil\n" ] }, { @@ -7815,7 +8093,7 @@ " ],\n", " (100 * pl.col(\"num_drivers\") / pl.col(\"num_drivers\").max()).alias(\n", " \"share_drivers\"\n", - " )\n", + " ),\n", " )\n", ")\n", "cummulative_distance_eco_cee_df_agg" @@ -7915,7 +8193,7 @@ "id": "30077ed2-e07c-4091-a6a1-2c177acbefc4", "metadata": {}, "source": [ - "# KwhCumac mensuel" + "# KwhCumac mensuel\n" ] }, { @@ -7923,7 +8201,7 @@ "id": "7618446c-0a23-4734-a17a-4eeed63cc286", "metadata": {}, "source": [ - "## Requêtes" + "## Requêtes\n" ] }, { @@ -7931,7 +8209,7 @@ "id": "c46c6c61-f4bb-4220-ba84-1ea8614d93c6", "metadata": {}, "source": [ - "### 2022" + "### 2022\n" ] }, { @@ -8092,7 +8370,7 @@ " ),\n", " x=\"num_mois\",\n", " y=\"kwhcumac\",\n", - " template=\"simple_white\"\n", + " template=\"simple_white\",\n", ")" ] }, @@ -8101,7 +8379,7 @@ "id": "819e9ca2-be09-467a-aedb-b75c0bd776bd", "metadata": {}, "source": [ - "### CEE" + "### CEE\n" ] }, { @@ -8190,7 +8468,7 @@ "id": "3e0fa541-154d-4d66-a33a-1c52d56fc8c8", "metadata": {}, "source": [ - "## Visualisation" + "## Visualisation\n" ] }, { @@ -8216,15 +8494,11 @@ " template=\"simple_white\",\n", " labels={\"num_mois\": \"Mois n°\", \"cohorte\": \"Cohorte\", \"kwhcumac\": \"kWh cumac\"},\n", " title=\"Comparaison de khwcumac économisé au fil des mois par les différentes cohortes\",\n", - " color_discrete_map={\n", - " **cohortes_color_mapping,\"t1_23\":\"#82c4ad\"\n", - " },\n", + " color_discrete_map={**cohortes_color_mapping, \"t1_23\": \"#82c4ad\"},\n", ")\n", "\n", "fig_kwhcumac_mensuel.write_html(OUTPUT_PATH / \"fig_kwhcumac_mensuel_multi.html\")\n", - "fig_kwhcumac_mensuel.update_traces(\n", - " textposition = \"top center\"\n", - ")\n", + "fig_kwhcumac_mensuel.update_traces(textposition=\"top center\")\n", "fig_kwhcumac_mensuel.update_layout(\n", " hovermode=\"x unified\",\n", ")\n", @@ -8415,7 +8689,7 @@ "id": "b2890235-e3dd-4fab-9aab-925f253e1f41", "metadata": {}, "source": [ - "# Territoires" + "# Territoires\n" ] }, { @@ -8439,7 +8713,7 @@ "id": "a330d8f3-4ac4-483a-a336-b67886931a43", "metadata": {}, "source": [ - "## Requêtes" + "## Requêtes\n" ] }, { @@ -8447,7 +8721,7 @@ "id": "eb4d1ef1-d7ef-4311-9a32-5fcdaad625f9", "metadata": {}, "source": [ - "### 2022" + "### 2022\n" ] }, { @@ -8504,7 +8778,7 @@ } }, "source": [ - "### CEE" + "### CEE\n" ] }, { @@ -8555,7 +8829,7 @@ "id": "db38ef33-8679-4baf-b605-0a75c93a47d5", "metadata": {}, "source": [ - "## Visualisation" + "## Visualisation\n" ] }, { @@ -8566,7 +8840,6 @@ "outputs": [], "source": [ "def preprocess_territories_df(df: pl.DataFrame) -> pl.DataFrame:\n", - "\n", " agg_exprs = [pl.len()]\n", " group_by_exprs = [\"TYPE_COMMUNE_UU\"]\n", " if \"cohorte\" in df.columns:\n", @@ -8687,7 +8960,7 @@ "id": "0706aed4-64cd-4512-8ed8-980ecbb4ff80", "metadata": {}, "source": [ - "## Requêtes" + "## Requêtes\n" ] }, { @@ -8695,7 +8968,7 @@ "id": "43ca9beb-aebf-41db-85f2-54aead9c3111", "metadata": {}, "source": [ - "### Stats" + "### Stats\n" ] }, { @@ -8759,7 +9032,7 @@ "id": "10587d99-7c63-47a0-aec2-5c383f386012", "metadata": {}, "source": [ - "### Distance économisée" + "### Distance économisée\n" ] }, { @@ -8854,7 +9127,7 @@ "id": "dbbf418c-969b-4457-af41-c3b65f8d2e7b", "metadata": {}, "source": [ - "### Trajets" + "### Trajets\n" ] }, { @@ -8923,7 +9196,7 @@ "id": "0e123d7b-305f-434c-9f0d-bdfd9dc1782b", "metadata": {}, "source": [ - "## Nombre moyen de trajets effectués par semaine" + "## Nombre moyen de trajets effectués par semaine\n" ] }, { @@ -8993,7 +9266,7 @@ "id": "68e0e91a-f279-4827-9eb8-72c5e98c150f", "metadata": {}, "source": [ - "## Nombre de conducteurs loyaux par opérateurs" + "## Nombre de conducteurs loyaux par opérateurs\n" ] }, { @@ -9703,7 +9976,7 @@ "id": "c2f36535-026c-4455-bf7d-c0af50d04db7", "metadata": {}, "source": [ - "### Analyse du TOP 30 des Unités Urbaines" + "### Analyse du TOP 30 des Unités Urbaines\n" ] }, { @@ -9824,7 +10097,7 @@ "id": "d57dd8e4-3c4b-47b0-979c-9a7338e31eca", "metadata": {}, "source": [ - "### Analyse du TOP 30 par communes" + "### Analyse du TOP 30 par communes\n" ] }, { @@ -10027,7 +10300,7 @@ "id": "1295f9c4-80c4-4bb6-a4bc-aa3ffee8d2b4", "metadata": {}, "source": [ - "## Nombre de passagers différents" + "## Nombre de passagers différents\n" ] }, { @@ -10271,7 +10544,7 @@ "id": "d18c9f55-e76a-48df-8f30-b693a7c1e8b9", "metadata": {}, "source": [ - "## Conducteurs CEE ayant covoiturés avec un autre conducteur CEE" + "## Conducteurs CEE ayant covoiturés avec un autre conducteur CEE\n" ] }, { @@ -10373,7 +10646,7 @@ "id": "3a1dc631-9d4b-4b2d-9ade-0141fd2278c2", "metadata": {}, "source": [ - "## Analyse en réseau" + "## Analyse en réseau\n" ] }, { @@ -10563,7 +10836,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.10" } }, "nbformat": 4,