From 1e5306193ceb5176b2a6c352cb17d7803bfa48b5 Mon Sep 17 00:00:00 2001 From: Rami Abdou Date: Tue, 25 Feb 2025 18:35:24 -0800 Subject: [PATCH 1/4] fix refinedAt being set + check for expired opportunity --- packages/core/src/modules/opportunities.ts | 60 +++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/packages/core/src/modules/opportunities.ts b/packages/core/src/modules/opportunities.ts index 68d2489f..adcf8fc0 100644 --- a/packages/core/src/modules/opportunities.ts +++ b/packages/core/src/modules/opportunities.ts @@ -142,7 +142,59 @@ export async function checkForDeletedOpportunity({ } } -// "Create Opportunity" +const EXPIRED_PHRASES = [ + '404', + 'closed', + 'does not exist', + "doesn't exist", + 'expired', + 'filled', + 'no longer accepting', + 'no longer available', + 'no longer exists', + 'no longer open', + 'not accepting', + 'not currently accepting', + 'not available', + 'not be found', + 'not found', + 'not open', + 'oops', + 'removed', + 'sorry', +]; + +/** + * This function uses puppeteer to scrape the opportunity's website and + * determine whether or not the opportunity has closed or not. If it has, + * the opportunity will be marked as "expired" and thus will no longer appear + * in the opportunities board. + * + * @param opportunityId - The ID of the opportunity to check. + */ +export async function checkForExpiredOpportunity( + opportunityId: string +): Promise { + const link = await getLinkFromOpportunity(opportunityId); + + if (!link) { + return; + } + + const content = await getPageContent(link); + + const hasExpired = EXPIRED_PHRASES.some((phrase) => { + return content.toLowerCase().includes(phrase); + }); + + if (hasExpired) { + await db + .updateTable('opportunities') + .set({ expiresAt: new Date() }) + .where('id', '=', opportunityId) + .executeTakeFirst(); + } +} type CreateOpportunityInput = { sendNotification?: boolean; @@ -563,6 +615,12 @@ export async function refineOpportunity( }); } + // If the AI didn't return a title, then we don't want to finish the process + // because there was no opportunity to refine. We exit gracefully. + if (!data.title || !data.description) { + return success({}); + } + const opportunity = await db.transaction().execute(async (trx) => { const companyId = data.company ? await getMostRelevantCompany(trx, data.company) From 869422df7dc9b641e89cb251fa5082d88a57a641 Mon Sep 17 00:00:00 2001 From: Rami Abdou Date: Tue, 25 Feb 2025 18:46:20 -0800 Subject: [PATCH 2/4] check expired --- .../core/src/infrastructure/bull.types.ts | 6 +++ packages/core/src/modules/opportunities.ts | 37 +++++++++++++++++-- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/packages/core/src/infrastructure/bull.types.ts b/packages/core/src/infrastructure/bull.types.ts index d5586922..8bd4c257 100644 --- a/packages/core/src/infrastructure/bull.types.ts +++ b/packages/core/src/infrastructure/bull.types.ts @@ -378,6 +378,12 @@ export const OneTimeCodeBullJob = z.discriminatedUnion('name', [ ]); export const OpportunityBullJob = z.discriminatedUnion('name', [ + z.object({ + name: z.literal('opportunity.check_expired'), + data: z.object({ + opportunityId: z.string().trim().min(1).optional(), + }), + }), z.object({ name: z.literal('opportunity.create'), data: z.object({ diff --git a/packages/core/src/modules/opportunities.ts b/packages/core/src/modules/opportunities.ts index adcf8fc0..64a781e4 100644 --- a/packages/core/src/modules/opportunities.ts +++ b/packages/core/src/modules/opportunities.ts @@ -172,13 +172,13 @@ const EXPIRED_PHRASES = [ * * @param opportunityId - The ID of the opportunity to check. */ -export async function checkForExpiredOpportunity( +async function checkForExpiredOpportunity( opportunityId: string -): Promise { +): Promise> { const link = await getLinkFromOpportunity(opportunityId); if (!link) { - return; + return success(false); } const content = await getPageContent(link); @@ -194,6 +194,30 @@ export async function checkForExpiredOpportunity( .where('id', '=', opportunityId) .executeTakeFirst(); } + + return success(hasExpired); +} + +async function checkForExpiredOpportunities(): Promise> { + const opportunities = await db + .selectFrom('opportunities') + .select('id') + .where('expiresAt', '>', new Date()) + .orderBy('createdAt', 'asc') + .limit(100) + .execute(); + + let count = 0; + + for (const opportunity of opportunities) { + const hasExpired = await checkForExpiredOpportunity(opportunity.id); + + if (hasExpired.ok && hasExpired.data === true) { + count += 1; + } + } + + return success(count); } type CreateOpportunityInput = { @@ -259,7 +283,7 @@ async function createOpportunity({ .values({ createdAt: new Date(), description: 'N/A', - expiresAt: dayjs().add(1, 'month').toDate(), + expiresAt: dayjs().add(3, 'month').toDate(), id: id(), postedBy: slackMessage.studentId, slackChannelId, @@ -936,6 +960,11 @@ export const opportunityWorker = registerWorker( OpportunityBullJob, async (job) => { const result = await match(job) + .with({ name: 'opportunity.check_expired' }, async ({ data }) => { + return data.opportunityId + ? checkForExpiredOpportunity(data.opportunityId) + : checkForExpiredOpportunities(); + }) .with({ name: 'opportunity.create' }, async ({ data }) => { return createOpportunity(data); }) From 24adcbd0bc8be17010f92444305557bd9642ef55 Mon Sep 17 00:00:00 2001 From: Rami Abdou Date: Wed, 26 Feb 2025 08:31:36 -0800 Subject: [PATCH 3/4] checked expired --- packages/core/src/modules/opportunities.ts | 44 +++++++++++++++++----- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/packages/core/src/modules/opportunities.ts b/packages/core/src/modules/opportunities.ts index 64a781e4..79577bbe 100644 --- a/packages/core/src/modules/opportunities.ts +++ b/packages/core/src/modules/opportunities.ts @@ -164,18 +164,30 @@ const EXPIRED_PHRASES = [ 'sorry', ]; +type CheckForExpiredOpportunityInput = { + link?: string | null; + opportunityId: string; +}; + /** * This function uses puppeteer to scrape the opportunity's website and * determine whether or not the opportunity has closed or not. If it has, * the opportunity will be marked as "expired" and thus will no longer appear * in the opportunities board. * - * @param opportunityId - The ID of the opportunity to check. + * Returns `true` if the opportunity has expired, `false` otherwise. + * + * @param input - The opportunity to check for expiration. */ -async function checkForExpiredOpportunity( - opportunityId: string -): Promise> { - const link = await getLinkFromOpportunity(opportunityId); +async function checkForExpiredOpportunity({ + link = null, + opportunityId, +}: CheckForExpiredOpportunityInput): Promise> { + // If the link is passed in, we'll use that. Otherwise, we'll scrape the + // opportunity's website to get the link. + if (!link) { + link = await getLinkFromOpportunity(opportunityId); + } if (!link) { return success(false); @@ -201,7 +213,12 @@ async function checkForExpiredOpportunity( async function checkForExpiredOpportunities(): Promise> { const opportunities = await db .selectFrom('opportunities') - .select('id') + .leftJoin('slackMessages', (join) => { + return join + .onRef('slackMessages.channelId', '=', 'opportunities.slackChannelId') + .onRef('slackMessages.id', '=', 'opportunities.slackMessageId'); + }) + .select(['opportunities.id', 'slackMessages.text']) .where('expiresAt', '>', new Date()) .orderBy('createdAt', 'asc') .limit(100) @@ -210,13 +227,20 @@ async function checkForExpiredOpportunities(): Promise> { let count = 0; for (const opportunity of opportunities) { - const hasExpired = await checkForExpiredOpportunity(opportunity.id); + const result = await checkForExpiredOpportunity({ + link: getFirstLinkInMessage(opportunity.text!), + opportunityId: opportunity.id, + }); - if (hasExpired.ok && hasExpired.data === true) { + if (result.ok && result.data === true) { count += 1; } } + console.log( + `Checked ${opportunities.length} opportunities and found ${count} expired opportunities.` + ); + return success(count); } @@ -741,7 +765,7 @@ export async function refineOpportunity( * @param message - Slack message to extract the URL from. * @returns First URL found in the message or `null` if it doesn't exist. */ -function getFirstLinkInMessage(message: string) { +function getFirstLinkInMessage(message: string): string | undefined { return message.match(/<(https?:\/\/[^\s|>]+)(?:\|[^>]+)?>/)?.[1]; } @@ -962,7 +986,7 @@ export const opportunityWorker = registerWorker( const result = await match(job) .with({ name: 'opportunity.check_expired' }, async ({ data }) => { return data.opportunityId - ? checkForExpiredOpportunity(data.opportunityId) + ? checkForExpiredOpportunity({ opportunityId: data.opportunityId }) : checkForExpiredOpportunities(); }) .with({ name: 'opportunity.create' }, async ({ data }) => { From 458805e5f02d2a8d10f4184486932af32e034eee Mon Sep 17 00:00:00 2001 From: Rami Abdou Date: Wed, 26 Feb 2025 08:38:32 -0800 Subject: [PATCH 4/4] revert --- packages/core/src/modules/opportunities.ts | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/packages/core/src/modules/opportunities.ts b/packages/core/src/modules/opportunities.ts index 79577bbe..5aabdfc2 100644 --- a/packages/core/src/modules/opportunities.ts +++ b/packages/core/src/modules/opportunities.ts @@ -154,9 +154,9 @@ const EXPIRED_PHRASES = [ 'no longer exists', 'no longer open', 'not accepting', - 'not currently accepting', 'not available', 'not be found', + 'not currently accepting', 'not found', 'not open', 'oops', @@ -210,6 +210,13 @@ async function checkForExpiredOpportunity({ return success(hasExpired); } +/** + * Checks for expired opportunities and marks them as expired. This can be + * triggered via a Bull job. This is limited to 100 opportunities at a time + * to prevent overwhelming our server with too many puppeteer instances. + * + * @returns The number of opportunities marked as expired. + */ async function checkForExpiredOpportunities(): Promise> { const opportunities = await db .selectFrom('opportunities') @@ -307,7 +314,7 @@ async function createOpportunity({ .values({ createdAt: new Date(), description: 'N/A', - expiresAt: dayjs().add(3, 'month').toDate(), + expiresAt: dayjs().add(1, 'month').toDate(), id: id(), postedBy: slackMessage.studentId, slackChannelId,