diff --git a/server/src/infra/database/migrations/20241231090357_housings-add-mutation-data.ts b/server/src/infra/database/migrations/20241231090357_housings-add-mutation-data.ts new file mode 100644 index 000000000..313f77ee0 --- /dev/null +++ b/server/src/infra/database/migrations/20241231090357_housings-add-mutation-data.ts @@ -0,0 +1,21 @@ +import type { Knex } from 'knex'; + +export async function up(knex: Knex): Promise { + await knex.schema.alterTable('fast_housing', (table) => { + table.integer('plot_area').nullable(); + table.timestamp('last_mutation_date').nullable(); + table.timestamp('last_transaction_date').nullable(); + table.integer('last_transaction_value').nullable(); + table.string('occupancy_history').nullable(); + }); +} + +export async function down(knex: Knex): Promise { + await knex.schema.alterTable('fast_housing', (table) => { + table.dropColumn('plot_area'); + table.dropColumn('last_mutation_date'); + table.dropColumn('last_transaction_date'); + table.dropColumn('last_transaction_value'); + table.dropColumn('occupancy_history'); + }); +} diff --git a/server/src/scripts/import-housing-mutation-data/README.md b/server/src/scripts/import-housing-mutation-data/README.md new file mode 100644 index 000000000..c93e3dea5 --- /dev/null +++ b/server/src/scripts/import-housing-mutation-data/README.md @@ -0,0 +1,94 @@ +## Usage + +### Migrate the production database + +```shell +DATABASE_URL=... yarn workspace @zerologementvacant/server migrate +``` + +### Create the CSV file + +```sql +INSTALL httpfs; +LOAD httpfs; + +CREATE OR REPLACE SECRET ( + TYPE S3, + KEY_ID '...', + SECRET '...', + ENDPOINT 'cellar-c2.services.clever-cloud.com', + REGION 'us-east-1' +); + +-- Copy data to a new table +CREATE OR REPLACE TABLE "dev"."main"."housings" AS +SELECT + DISTINCT ON (local_id) local_id, + plot_area, + cadastral_classification, + latitude, + longitude, + CASE + WHEN TRY_CAST(TRY_STRPTIME(last_mutation_date, '%d%m%Y') AS DATE) IS NOT NULL THEN + TRY_STRPTIME(last_mutation_date, '%d%m%Y') + ELSE + DATE_TRUNC('month', TRY_STRPTIME(last_mutation_date, '%d%m%Y')) + + INTERVAL '1 month' - INTERVAL '1 day' + END AS last_mutation_date, + last_transaction_date::date AS last_transaction_date, + last_transaction_value, + occupancy_history +FROM read_csv( + 's3://zlv-production/production/dump_20241218/housing_data.csv', + auto_detect = TRUE, + ignore_errors = TRUE +) +WHERE local_id IS NOT NULL +ORDER BY local_id; + +-- Check that the data has been loaded +SELECT * FROM "dev"."main"."housings" +LIMIT 100; + +-- Export housings to housings-gold.csv +COPY "dev"."main"."housings" TO 'housings-gold.csv' (HEADER, DELIMITER ','); +``` + +### Import the CSV file into the production database + +```sql +INSTALL postgres; +LOAD postgres; + +CREATE OR REPLACE SECRET ( + TYPE POSTGRES, + HOST '...', + PORT ..., + DATABASE '...', + USER '...', + PASSWORD '...' +); + +ATTACH IF NOT EXISTS '' AS postgres (TYPE POSTGRES); + +CREATE OR REPLACE TABLE "postgres"."public"."housings_gold" AS +SELECT * FROM read_csv('housings-gold.csv'); + +DETACH postgres; +``` + +### Add a primary key to the `housings_gold` table + +In another terminal, connect to the production database and run the following: +```sql +ALTER TABLE housings_gold +ADD CONSTRAINT housings_gold_pkey +PRIMARY KEY (local_id); +``` + +### Run the import script + +```shell +cd server +DATABASE_URL=... yarn dlx tsx src/scripts/import-housing-mutation-data/index.ts +``` diff --git a/server/src/scripts/import-housing-mutation-data/index.ts b/server/src/scripts/import-housing-mutation-data/index.ts new file mode 100644 index 000000000..3a8ab583a --- /dev/null +++ b/server/src/scripts/import-housing-mutation-data/index.ts @@ -0,0 +1,64 @@ +import async from 'async'; +import { Range } from 'immutable'; +import fp from 'lodash/fp'; + +import db from '~/infra/database'; + +const BATCH_SIZE = 1_000; +const LIMIT = 10; + +const HOUSINGS_GOLD = 'housings_gold'; + +async function run(): Promise { + async function update( + department: string, + ids: ReadonlyArray + ): Promise { + console.log( + `Updating ${ids.length} housings in department ${department}...` + ); + const fastHousing = `fast_housing_${department.toLowerCase()}`; + const chunks = fp.chunk(BATCH_SIZE, ids); + await async.forEachLimit(chunks, LIMIT, async (chunk) => { + await db(fastHousing) + .update({ + plot_area: db.ref(`${HOUSINGS_GOLD}.plot_area`), + cadastral_classification: db.ref( + `${HOUSINGS_GOLD}.cadastral_classification` + ), + latitude_dgfip: db.ref(`${HOUSINGS_GOLD}.latitude`), + longitude_dgfip: db.ref(`${HOUSINGS_GOLD}.longitude`), + last_mutation_date: db.ref(`${HOUSINGS_GOLD}.last_mutation_date`), + last_transaction_date: db.ref( + `${HOUSINGS_GOLD}.last_transaction_date` + ), + last_transaction_value: db.ref( + `${HOUSINGS_GOLD}.last_transaction_value` + ), + occupancy_history: db.ref(`${HOUSINGS_GOLD}.occupancy_history`) + }) + .updateFrom(HOUSINGS_GOLD) + .where(`${fastHousing}.local_id`, db.ref(`${HOUSINGS_GOLD}.local_id`)) + .whereIn(`${HOUSINGS_GOLD}.local_id`, chunk); + }); + } + + const departments = Range(1, 99) + .filter((department) => department !== 20) + .map((department) => department.toString().padStart(2, '0')) + .concat(['2A', '2B']); + + await async.forEachSeries( + departments.toArray(), + async (department: string) => { + const housings = await db(HOUSINGS_GOLD) + .select('local_id') + .whereLike('local_id', `${department}%`) + .orderBy('local_id'); + const ids = housings.map((housing) => housing.local_id); + await update(department, ids); + } + ); +} + +run();