Merge branch 'search-perf' into 'main'

Improve performance of account search

See merge request soapbox-pub/ditto!653
This commit is contained in:
Alex Gleason 2025-02-12 22:20:13 +00:00
commit a62d33991c
7 changed files with 88 additions and 43 deletions

View file

@ -112,13 +112,14 @@ const accountLookupController: AppController = async (c) => {
const accountSearchQuerySchema = z.object({ const accountSearchQuerySchema = z.object({
q: z.string().transform(decodeURIComponent), q: z.string().transform(decodeURIComponent),
resolve: booleanParamSchema.optional().transform(Boolean), resolve: booleanParamSchema.optional(),
following: z.boolean().default(false), following: z.boolean().default(false),
}); });
const accountSearchController: AppController = async (c) => { const accountSearchController: AppController = async (c) => {
const { signal } = c.req.raw; const { signal } = c.req.raw;
const { limit } = c.get('pagination'); const { limit } = c.get('pagination');
const kysely = await Storages.kysely(); const kysely = await Storages.kysely();
const viewerPubkey = await c.get('signer')?.getPublicKey(); const viewerPubkey = await c.get('signer')?.getPublicKey();
@ -136,27 +137,28 @@ const accountSearchController: AppController = async (c) => {
if (!event && lookup) { if (!event && lookup) {
const pubkey = await lookupPubkey(lookup); const pubkey = await lookupPubkey(lookup);
return c.json(pubkey ? [await accountFromPubkey(pubkey)] : []); return c.json(pubkey ? [accountFromPubkey(pubkey)] : []);
} }
const followedPubkeys: Set<string> = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set(); const events: NostrEvent[] = [];
const pubkeys = Array.from(await getPubkeysBySearch(kysely, { q: query, limit, offset: 0, followedPubkeys }));
let events = event ? [event] : await store.query([{ kinds: [0], authors: pubkeys, limit }], { if (event) {
signal, events.push(event);
}); } else {
const following = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set<string>();
const authors = [...await getPubkeysBySearch(kysely, { q: query, limit, offset: 0, following })];
const profiles = await store.query([{ kinds: [0], authors, limit }], { signal });
if (!event) { for (const pubkey of authors) {
events = pubkeys const profile = profiles.find((event) => event.pubkey === pubkey);
.map((pubkey) => events.find((event) => event.pubkey === pubkey)) if (profile) {
.filter((event) => !!event); events.push(profile);
}
}
} }
const accounts = await hydrateEvents({ events, store, signal }).then(
(events) => const accounts = await hydrateEvents({ events, store, signal })
Promise.all( .then((events) => events.map((event) => renderAccount(event)));
events.map((event) => renderAccount(event)),
),
);
return c.json(accounts); return c.json(accounts);
}; };

View file

@ -104,8 +104,8 @@ async function searchEvents(
// For account search, use a special index, and prioritize followed accounts. // For account search, use a special index, and prioritize followed accounts.
if (type === 'accounts') { if (type === 'accounts') {
const followedPubkeys = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set<string>(); const following = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set<string>();
const searchPubkeys = await getPubkeysBySearch(kysely, { q, limit, offset, followedPubkeys }); const searchPubkeys = await getPubkeysBySearch(kysely, { q, limit, offset, following });
filter.authors = [...searchPubkeys]; filter.authors = [...searchPubkeys];
filter.search = undefined; filter.search = undefined;

View file

@ -1,6 +1,13 @@
import { updateTrendingLinks } from '@/trends.ts'; import { sql } from 'kysely';
import { updateTrendingHashtags } from '@/trends.ts';
import { updateTrendingEvents, updateTrendingPubkeys, updateTrendingZappedEvents } from '@/trends.ts'; import { Storages } from '@/storages.ts';
import {
updateTrendingEvents,
updateTrendingHashtags,
updateTrendingLinks,
updateTrendingPubkeys,
updateTrendingZappedEvents,
} from '@/trends.ts';
/** Start cron jobs for the application. */ /** Start cron jobs for the application. */
export function cron() { export function cron() {
@ -9,4 +16,9 @@ export function cron() {
Deno.cron('update trending events', '15 * * * *', updateTrendingEvents); Deno.cron('update trending events', '15 * * * *', updateTrendingEvents);
Deno.cron('update trending hashtags', '30 * * * *', updateTrendingHashtags); Deno.cron('update trending hashtags', '30 * * * *', updateTrendingHashtags);
Deno.cron('update trending links', '45 * * * *', updateTrendingLinks); Deno.cron('update trending links', '45 * * * *', updateTrendingLinks);
Deno.cron('refresh top authors', '20 * * * *', async () => {
const kysely = await Storages.kysely();
await sql`refresh materialized view top_authors`.execute(kysely);
});
} }

View file

@ -9,6 +9,8 @@ export interface DittoTables extends NPostgresSchema {
event_stats: EventStatsRow; event_stats: EventStatsRow;
event_zaps: EventZapRow; event_zaps: EventZapRow;
push_subscriptions: PushSubscriptionRow; push_subscriptions: PushSubscriptionRow;
/** This is a materialized view of `author_stats` pre-sorted by followers_count. */
top_authors: Pick<AuthorStatsRow, 'pubkey' | 'followers_count' | 'search'>;
} }
interface AuthorStatsRow { interface AuthorStatsRow {

View file

@ -0,0 +1,20 @@
import { Kysely, sql } from 'kysely';
export async function up(db: Kysely<any>): Promise<void> {
await db.schema
.createView('top_authors')
.materialized()
.as(db.selectFrom('author_stats').select(['pubkey', 'followers_count', 'search']).orderBy('followers_count desc'))
.execute();
await sql`CREATE INDEX top_authors_search_idx ON top_authors USING GIN (search gin_trgm_ops)`.execute(db);
await db.schema.createIndex('top_authors_pubkey_idx').on('top_authors').column('pubkey').execute();
await db.schema.dropIndex('author_stats_search_idx').execute();
}
export async function down(db: Kysely<unknown>): Promise<void> {
await db.schema.dropView('top_authors').execute();
await sql`CREATE INDEX author_stats_search_idx ON author_stats USING GIN (search gin_trgm_ops)`.execute(db);
}

View file

@ -1,4 +1,5 @@
import { assertEquals } from '@std/assert'; import { assertEquals } from '@std/assert';
import { sql } from 'kysely';
import { createTestDB } from '@/test.ts'; import { createTestDB } from '@/test.ts';
import { getPubkeysBySearch } from '@/utils/search.ts'; import { getPubkeysBySearch } from '@/utils/search.ts';
@ -14,18 +15,20 @@ Deno.test('fuzzy search works', async () => {
following_count: 0, following_count: 0,
}).execute(); }).execute();
await sql`REFRESH MATERIALIZED VIEW top_authors`.execute(db.kysely);
assertEquals( assertEquals(
await getPubkeysBySearch(db.kysely, { q: 'pat rick', limit: 1, offset: 0, followedPubkeys: new Set() }), await getPubkeysBySearch(db.kysely, { q: 'pat rick', limit: 1, offset: 0, following: new Set() }),
new Set(), new Set(),
); );
assertEquals( assertEquals(
await getPubkeysBySearch(db.kysely, { q: 'patrick dosreis', limit: 1, offset: 0, followedPubkeys: new Set() }), await getPubkeysBySearch(db.kysely, { q: 'patrick dosreis', limit: 1, offset: 0, following: new Set() }),
new Set([ new Set([
'47259076c85f9240e852420d7213c95e95102f1de929fb60f33a2c32570c98c4', '47259076c85f9240e852420d7213c95e95102f1de929fb60f33a2c32570c98c4',
]), ]),
); );
assertEquals( assertEquals(
await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 0, followedPubkeys: new Set() }), await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 0, following: new Set() }),
new Set([ new Set([
'47259076c85f9240e852420d7213c95e95102f1de929fb60f33a2c32570c98c4', '47259076c85f9240e852420d7213c95e95102f1de929fb60f33a2c32570c98c4',
]), ]),
@ -43,8 +46,10 @@ Deno.test('fuzzy search works with offset', async () => {
following_count: 0, following_count: 0,
}).execute(); }).execute();
await sql`REFRESH MATERIALIZED VIEW top_authors`.execute(db.kysely);
assertEquals( assertEquals(
await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 1, followedPubkeys: new Set() }), await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 1, following: new Set() }),
new Set(), new Set(),
); );
}); });

View file

@ -5,30 +5,34 @@ import { DittoTables } from '@/db/DittoTables.ts';
/** Get pubkeys whose name and NIP-05 is similar to 'q' */ /** Get pubkeys whose name and NIP-05 is similar to 'q' */
export async function getPubkeysBySearch( export async function getPubkeysBySearch(
kysely: Kysely<DittoTables>, kysely: Kysely<DittoTables>,
opts: { q: string; limit: number; offset: number; followedPubkeys: Set<string> }, opts: { q: string; limit: number; offset: number; following: Set<string> },
): Promise<Set<string>> { ): Promise<Set<string>> {
const { q, limit, followedPubkeys, offset } = opts; const { q, limit, following, offset } = opts;
let query = kysely const pubkeys = new Set<string>();
.selectFrom('author_stats')
.select((eb) => [ const query = kysely
'pubkey', .selectFrom('top_authors')
'search', .select('pubkey')
eb.fn('word_similarity', [sql`${q}`, 'search']).as('sml'), .where('search', sql`%>`, q)
])
.where(() => sql`${q} <% search`)
.orderBy(['followers_count desc'])
.orderBy(['sml desc', 'search'])
.limit(limit) .limit(limit)
.offset(offset); .offset(offset);
const pubkeys = new Set((await query.execute()).map(({ pubkey }) => pubkey)); if (following.size) {
const authorsQuery = query.where('pubkey', 'in', [...following]);
if (followedPubkeys.size > 0) { for (const { pubkey } of await authorsQuery.execute()) {
query = query.where('pubkey', 'in', [...followedPubkeys]); pubkeys.add(pubkey);
}
} }
const followingPubkeys = new Set((await query.execute()).map(({ pubkey }) => pubkey)); if (pubkeys.size >= limit) {
return pubkeys;
}
return new Set(Array.from(followingPubkeys.union(pubkeys))); for (const { pubkey } of await query.limit(limit - pubkeys.size).execute()) {
pubkeys.add(pubkey);
}
return pubkeys;
} }