From 379953a8cb25a0574fa04981708db8efaac92e60 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 12 Feb 2025 14:40:00 -0600 Subject: [PATCH 1/6] Improve performance of account search --- src/controllers/api/accounts.ts | 30 ++++++++++++-------------- src/utils/search.ts | 37 +++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/src/controllers/api/accounts.ts b/src/controllers/api/accounts.ts index d3d67820..18433f1f 100644 --- a/src/controllers/api/accounts.ts +++ b/src/controllers/api/accounts.ts @@ -119,6 +119,7 @@ const accountSearchQuerySchema = z.object({ const accountSearchController: AppController = async (c) => { const { signal } = c.req.raw; const { limit } = c.get('pagination'); + const kysely = await Storages.kysely(); const viewerPubkey = await c.get('signer')?.getPublicKey(); @@ -136,27 +137,22 @@ const accountSearchController: AppController = async (c) => { if (!event && lookup) { const pubkey = await lookupPubkey(lookup); - return c.json(pubkey ? [await accountFromPubkey(pubkey)] : []); + return c.json(pubkey ? [accountFromPubkey(pubkey)] : []); } - const followedPubkeys: Set = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set(); - const pubkeys = Array.from(await getPubkeysBySearch(kysely, { q: query, limit, offset: 0, followedPubkeys })); + const events: NostrEvent[] = []; - let events = event ? [event] : await store.query([{ kinds: [0], authors: pubkeys, limit }], { - signal, - }); - - if (!event) { - events = pubkeys - .map((pubkey) => events.find((event) => event.pubkey === pubkey)) - .filter((event) => !!event); + if (event) { + events.push(event); + } else { + const following = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set(); + const authors = [...await getPubkeysBySearch(kysely, { q: query, limit, offset: 0, following })]; + const profiles = await store.query([{ kinds: [0], authors, limit }], { signal }); + events.push(...profiles); } - const accounts = await hydrateEvents({ events, store, signal }).then( - (events) => - Promise.all( - events.map((event) => renderAccount(event)), - ), - ); + + const accounts = await hydrateEvents({ events, store, signal }) + .then((events) => events.map((event) => renderAccount(event))); return c.json(accounts); }; diff --git a/src/utils/search.ts b/src/utils/search.ts index 29ecefd9..1c608c99 100644 --- a/src/utils/search.ts +++ b/src/utils/search.ts @@ -5,30 +5,35 @@ import { DittoTables } from '@/db/DittoTables.ts'; /** Get pubkeys whose name and NIP-05 is similar to 'q' */ export async function getPubkeysBySearch( kysely: Kysely, - opts: { q: string; limit: number; offset: number; followedPubkeys: Set }, + opts: { q: string; limit: number; offset: number; following: Set }, ): Promise> { - const { q, limit, followedPubkeys, offset } = opts; + const { q, limit, following, offset } = opts; - let query = kysely + const pubkeys = new Set(); + + const query = kysely .selectFrom('author_stats') - .select((eb) => [ - 'pubkey', - 'search', - eb.fn('word_similarity', [sql`${q}`, 'search']).as('sml'), - ]) - .where(() => sql`${q} <% search`) - .orderBy(['followers_count desc']) - .orderBy(['sml desc', 'search']) + .select('pubkey') + .where('search', sql`%>`, q) + .orderBy('followers_count desc') .limit(limit) .offset(offset); - const pubkeys = new Set((await query.execute()).map(({ pubkey }) => pubkey)); + if (following.size) { + const authorsQuery = query.where('pubkey', 'in', [...following]); - if (followedPubkeys.size > 0) { - query = query.where('pubkey', 'in', [...followedPubkeys]); + for (const { pubkey } of await authorsQuery.execute()) { + pubkeys.add(pubkey); + } } - const followingPubkeys = new Set((await query.execute()).map(({ pubkey }) => pubkey)); + if (pubkeys.size >= limit) { + return pubkeys; + } - return new Set(Array.from(followingPubkeys.union(pubkeys))); + for (const { pubkey } of await query.execute()) { + pubkeys.add(pubkey); + } + + return pubkeys; } From 5969d9b3fa26f91f4e125e56df66d9d1594d1582 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 12 Feb 2025 14:47:20 -0600 Subject: [PATCH 2/6] getPubkeysBySearch: reduce limit of second query if applicable --- src/utils/search.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/search.ts b/src/utils/search.ts index 1c608c99..205ee740 100644 --- a/src/utils/search.ts +++ b/src/utils/search.ts @@ -31,7 +31,7 @@ export async function getPubkeysBySearch( return pubkeys; } - for (const { pubkey } of await query.execute()) { + for (const { pubkey } of await query.limit(limit - pubkeys.size).execute()) { pubkeys.add(pubkey); } From 510ad647be71cdcac06c72a5e7c97cbb91cef428 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 12 Feb 2025 15:02:19 -0600 Subject: [PATCH 3/6] Fix type errors --- src/controllers/api/search.ts | 4 ++-- src/utils/search.test.ts | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/controllers/api/search.ts b/src/controllers/api/search.ts index c050fe9d..b3c80a2f 100644 --- a/src/controllers/api/search.ts +++ b/src/controllers/api/search.ts @@ -104,8 +104,8 @@ async function searchEvents( // For account search, use a special index, and prioritize followed accounts. if (type === 'accounts') { - const followedPubkeys = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set(); - const searchPubkeys = await getPubkeysBySearch(kysely, { q, limit, offset, followedPubkeys }); + const following = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set(); + const searchPubkeys = await getPubkeysBySearch(kysely, { q, limit, offset, following }); filter.authors = [...searchPubkeys]; filter.search = undefined; diff --git a/src/utils/search.test.ts b/src/utils/search.test.ts index 056c2927..0b2e36ab 100644 --- a/src/utils/search.test.ts +++ b/src/utils/search.test.ts @@ -15,17 +15,17 @@ Deno.test('fuzzy search works', async () => { }).execute(); assertEquals( - await getPubkeysBySearch(db.kysely, { q: 'pat rick', limit: 1, offset: 0, followedPubkeys: new Set() }), + await getPubkeysBySearch(db.kysely, { q: 'pat rick', limit: 1, offset: 0, following: new Set() }), new Set(), ); assertEquals( - await getPubkeysBySearch(db.kysely, { q: 'patrick dosreis', limit: 1, offset: 0, followedPubkeys: new Set() }), + await getPubkeysBySearch(db.kysely, { q: 'patrick dosreis', limit: 1, offset: 0, following: new Set() }), new Set([ '47259076c85f9240e852420d7213c95e95102f1de929fb60f33a2c32570c98c4', ]), ); assertEquals( - await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 0, followedPubkeys: new Set() }), + await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 0, following: new Set() }), new Set([ '47259076c85f9240e852420d7213c95e95102f1de929fb60f33a2c32570c98c4', ]), @@ -44,7 +44,7 @@ Deno.test('fuzzy search works with offset', async () => { }).execute(); assertEquals( - await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 1, followedPubkeys: new Set() }), + await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 1, following: new Set() }), new Set(), ); }); From ab7a0e06c7e77590532f839671127980f1f00920 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 12 Feb 2025 15:21:09 -0600 Subject: [PATCH 4/6] Add a top_authors materialized view --- src/cron.ts | 18 +++++++++++++++--- src/db/DittoTables.ts | 2 ++ src/db/migrations/049_author_stats_sorted.ts | 13 +++++++++++++ src/utils/search.ts | 3 +-- 4 files changed, 31 insertions(+), 5 deletions(-) create mode 100644 src/db/migrations/049_author_stats_sorted.ts diff --git a/src/cron.ts b/src/cron.ts index 6994561e..ba8a18d5 100644 --- a/src/cron.ts +++ b/src/cron.ts @@ -1,6 +1,13 @@ -import { updateTrendingLinks } from '@/trends.ts'; -import { updateTrendingHashtags } from '@/trends.ts'; -import { updateTrendingEvents, updateTrendingPubkeys, updateTrendingZappedEvents } from '@/trends.ts'; +import { sql } from 'kysely'; + +import { Storages } from '@/storages.ts'; +import { + updateTrendingEvents, + updateTrendingHashtags, + updateTrendingLinks, + updateTrendingPubkeys, + updateTrendingZappedEvents, +} from '@/trends.ts'; /** Start cron jobs for the application. */ export function cron() { @@ -9,4 +16,9 @@ export function cron() { Deno.cron('update trending events', '15 * * * *', updateTrendingEvents); Deno.cron('update trending hashtags', '30 * * * *', updateTrendingHashtags); Deno.cron('update trending links', '45 * * * *', updateTrendingLinks); + + Deno.cron('refresh top authors', '20 * * * *', async () => { + const kysely = await Storages.kysely(); + await sql`refresh materialized view top_authors`.execute(kysely); + }); } diff --git a/src/db/DittoTables.ts b/src/db/DittoTables.ts index ea326724..5a7e4c73 100644 --- a/src/db/DittoTables.ts +++ b/src/db/DittoTables.ts @@ -9,6 +9,8 @@ export interface DittoTables extends NPostgresSchema { event_stats: EventStatsRow; event_zaps: EventZapRow; push_subscriptions: PushSubscriptionRow; + /** This is a materialized view of `author_stats` pre-sorted by followers_count. */ + top_authors: Pick; } interface AuthorStatsRow { diff --git a/src/db/migrations/049_author_stats_sorted.ts b/src/db/migrations/049_author_stats_sorted.ts new file mode 100644 index 00000000..425cc7c8 --- /dev/null +++ b/src/db/migrations/049_author_stats_sorted.ts @@ -0,0 +1,13 @@ +import { Kysely } from 'kysely'; + +export async function up(db: Kysely): Promise { + await db.schema + .createView('top_authors') + .materialized() + .as(db.selectFrom('author_stats').select(['pubkey', 'followers_count', 'search']).orderBy('followers_count desc')) + .execute(); +} + +export async function down(db: Kysely): Promise { + await db.schema.dropView('top_authors').execute(); +} diff --git a/src/utils/search.ts b/src/utils/search.ts index 205ee740..e41cd413 100644 --- a/src/utils/search.ts +++ b/src/utils/search.ts @@ -12,10 +12,9 @@ export async function getPubkeysBySearch( const pubkeys = new Set(); const query = kysely - .selectFrom('author_stats') + .selectFrom('top_authors') .select('pubkey') .where('search', sql`%>`, q) - .orderBy('followers_count desc') .limit(limit) .offset(offset); From 1482ee148e541fd86f4947218f483f6d4177da6f Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 12 Feb 2025 16:13:44 -0600 Subject: [PATCH 5/6] Add missing indexes, fix order of results --- src/controllers/api/accounts.ts | 10 ++++++++-- src/db/migrations/049_author_stats_sorted.ts | 9 ++++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/controllers/api/accounts.ts b/src/controllers/api/accounts.ts index 18433f1f..7b1b4216 100644 --- a/src/controllers/api/accounts.ts +++ b/src/controllers/api/accounts.ts @@ -112,7 +112,7 @@ const accountLookupController: AppController = async (c) => { const accountSearchQuerySchema = z.object({ q: z.string().transform(decodeURIComponent), - resolve: booleanParamSchema.optional().transform(Boolean), + resolve: booleanParamSchema.optional(), following: z.boolean().default(false), }); @@ -148,7 +148,13 @@ const accountSearchController: AppController = async (c) => { const following = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set(); const authors = [...await getPubkeysBySearch(kysely, { q: query, limit, offset: 0, following })]; const profiles = await store.query([{ kinds: [0], authors, limit }], { signal }); - events.push(...profiles); + + for (const pubkey of authors) { + const profile = profiles.find((event) => event.pubkey === pubkey); + if (profile) { + events.push(profile); + } + } } const accounts = await hydrateEvents({ events, store, signal }) diff --git a/src/db/migrations/049_author_stats_sorted.ts b/src/db/migrations/049_author_stats_sorted.ts index 425cc7c8..6eca40cd 100644 --- a/src/db/migrations/049_author_stats_sorted.ts +++ b/src/db/migrations/049_author_stats_sorted.ts @@ -1,4 +1,4 @@ -import { Kysely } from 'kysely'; +import { Kysely, sql } from 'kysely'; export async function up(db: Kysely): Promise { await db.schema @@ -6,8 +6,15 @@ export async function up(db: Kysely): Promise { .materialized() .as(db.selectFrom('author_stats').select(['pubkey', 'followers_count', 'search']).orderBy('followers_count desc')) .execute(); + + await sql`CREATE INDEX top_authors_search_idx ON top_authors USING GIN (search gin_trgm_ops)`.execute(db); + + await db.schema.createIndex('top_authors_pubkey_idx').on('top_authors').column('pubkey').execute(); + + await db.schema.dropIndex('author_stats_search_idx').execute(); } export async function down(db: Kysely): Promise { await db.schema.dropView('top_authors').execute(); + await sql`CREATE INDEX author_stats_search_idx ON author_stats USING GIN (search gin_trgm_ops)`.execute(db); } From db793a3c6c33f38d2190bcf36e5249b5361e8dd6 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 12 Feb 2025 16:20:04 -0600 Subject: [PATCH 6/6] Refresh materialized view in search test --- src/utils/search.test.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/utils/search.test.ts b/src/utils/search.test.ts index 0b2e36ab..d3c92011 100644 --- a/src/utils/search.test.ts +++ b/src/utils/search.test.ts @@ -1,4 +1,5 @@ import { assertEquals } from '@std/assert'; +import { sql } from 'kysely'; import { createTestDB } from '@/test.ts'; import { getPubkeysBySearch } from '@/utils/search.ts'; @@ -14,6 +15,8 @@ Deno.test('fuzzy search works', async () => { following_count: 0, }).execute(); + await sql`REFRESH MATERIALIZED VIEW top_authors`.execute(db.kysely); + assertEquals( await getPubkeysBySearch(db.kysely, { q: 'pat rick', limit: 1, offset: 0, following: new Set() }), new Set(), @@ -43,6 +46,8 @@ Deno.test('fuzzy search works with offset', async () => { following_count: 0, }).execute(); + await sql`REFRESH MATERIALIZED VIEW top_authors`.execute(db.kysely); + assertEquals( await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 1, following: new Set() }), new Set(),