Merge branch 'search-perf' into 'main'

Improve performance of account search

See merge request soapbox-pub/ditto!653
This commit is contained in:
Alex Gleason 2025-02-12 22:20:13 +00:00
commit a62d33991c
7 changed files with 88 additions and 43 deletions

View file

@ -112,13 +112,14 @@ const accountLookupController: AppController = async (c) => {
const accountSearchQuerySchema = z.object({
q: z.string().transform(decodeURIComponent),
resolve: booleanParamSchema.optional().transform(Boolean),
resolve: booleanParamSchema.optional(),
following: z.boolean().default(false),
});
const accountSearchController: AppController = async (c) => {
const { signal } = c.req.raw;
const { limit } = c.get('pagination');
const kysely = await Storages.kysely();
const viewerPubkey = await c.get('signer')?.getPublicKey();
@ -136,27 +137,28 @@ const accountSearchController: AppController = async (c) => {
if (!event && lookup) {
const pubkey = await lookupPubkey(lookup);
return c.json(pubkey ? [await accountFromPubkey(pubkey)] : []);
return c.json(pubkey ? [accountFromPubkey(pubkey)] : []);
}
const followedPubkeys: Set<string> = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set();
const pubkeys = Array.from(await getPubkeysBySearch(kysely, { q: query, limit, offset: 0, followedPubkeys }));
const events: NostrEvent[] = [];
let events = event ? [event] : await store.query([{ kinds: [0], authors: pubkeys, limit }], {
signal,
});
if (event) {
events.push(event);
} else {
const following = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set<string>();
const authors = [...await getPubkeysBySearch(kysely, { q: query, limit, offset: 0, following })];
const profiles = await store.query([{ kinds: [0], authors, limit }], { signal });
if (!event) {
events = pubkeys
.map((pubkey) => events.find((event) => event.pubkey === pubkey))
.filter((event) => !!event);
for (const pubkey of authors) {
const profile = profiles.find((event) => event.pubkey === pubkey);
if (profile) {
events.push(profile);
}
}
}
const accounts = await hydrateEvents({ events, store, signal }).then(
(events) =>
Promise.all(
events.map((event) => renderAccount(event)),
),
);
const accounts = await hydrateEvents({ events, store, signal })
.then((events) => events.map((event) => renderAccount(event)));
return c.json(accounts);
};

View file

@ -104,8 +104,8 @@ async function searchEvents(
// For account search, use a special index, and prioritize followed accounts.
if (type === 'accounts') {
const followedPubkeys = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set<string>();
const searchPubkeys = await getPubkeysBySearch(kysely, { q, limit, offset, followedPubkeys });
const following = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set<string>();
const searchPubkeys = await getPubkeysBySearch(kysely, { q, limit, offset, following });
filter.authors = [...searchPubkeys];
filter.search = undefined;

View file

@ -1,6 +1,13 @@
import { updateTrendingLinks } from '@/trends.ts';
import { updateTrendingHashtags } from '@/trends.ts';
import { updateTrendingEvents, updateTrendingPubkeys, updateTrendingZappedEvents } from '@/trends.ts';
import { sql } from 'kysely';
import { Storages } from '@/storages.ts';
import {
updateTrendingEvents,
updateTrendingHashtags,
updateTrendingLinks,
updateTrendingPubkeys,
updateTrendingZappedEvents,
} from '@/trends.ts';
/** Start cron jobs for the application. */
export function cron() {
@ -9,4 +16,9 @@ export function cron() {
Deno.cron('update trending events', '15 * * * *', updateTrendingEvents);
Deno.cron('update trending hashtags', '30 * * * *', updateTrendingHashtags);
Deno.cron('update trending links', '45 * * * *', updateTrendingLinks);
Deno.cron('refresh top authors', '20 * * * *', async () => {
const kysely = await Storages.kysely();
await sql`refresh materialized view top_authors`.execute(kysely);
});
}

View file

@ -9,6 +9,8 @@ export interface DittoTables extends NPostgresSchema {
event_stats: EventStatsRow;
event_zaps: EventZapRow;
push_subscriptions: PushSubscriptionRow;
/** This is a materialized view of `author_stats` pre-sorted by followers_count. */
top_authors: Pick<AuthorStatsRow, 'pubkey' | 'followers_count' | 'search'>;
}
interface AuthorStatsRow {

View file

@ -0,0 +1,20 @@
import { Kysely, sql } from 'kysely';
export async function up(db: Kysely<any>): Promise<void> {
await db.schema
.createView('top_authors')
.materialized()
.as(db.selectFrom('author_stats').select(['pubkey', 'followers_count', 'search']).orderBy('followers_count desc'))
.execute();
await sql`CREATE INDEX top_authors_search_idx ON top_authors USING GIN (search gin_trgm_ops)`.execute(db);
await db.schema.createIndex('top_authors_pubkey_idx').on('top_authors').column('pubkey').execute();
await db.schema.dropIndex('author_stats_search_idx').execute();
}
export async function down(db: Kysely<unknown>): Promise<void> {
await db.schema.dropView('top_authors').execute();
await sql`CREATE INDEX author_stats_search_idx ON author_stats USING GIN (search gin_trgm_ops)`.execute(db);
}

View file

@ -1,4 +1,5 @@
import { assertEquals } from '@std/assert';
import { sql } from 'kysely';
import { createTestDB } from '@/test.ts';
import { getPubkeysBySearch } from '@/utils/search.ts';
@ -14,18 +15,20 @@ Deno.test('fuzzy search works', async () => {
following_count: 0,
}).execute();
await sql`REFRESH MATERIALIZED VIEW top_authors`.execute(db.kysely);
assertEquals(
await getPubkeysBySearch(db.kysely, { q: 'pat rick', limit: 1, offset: 0, followedPubkeys: new Set() }),
await getPubkeysBySearch(db.kysely, { q: 'pat rick', limit: 1, offset: 0, following: new Set() }),
new Set(),
);
assertEquals(
await getPubkeysBySearch(db.kysely, { q: 'patrick dosreis', limit: 1, offset: 0, followedPubkeys: new Set() }),
await getPubkeysBySearch(db.kysely, { q: 'patrick dosreis', limit: 1, offset: 0, following: new Set() }),
new Set([
'47259076c85f9240e852420d7213c95e95102f1de929fb60f33a2c32570c98c4',
]),
);
assertEquals(
await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 0, followedPubkeys: new Set() }),
await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 0, following: new Set() }),
new Set([
'47259076c85f9240e852420d7213c95e95102f1de929fb60f33a2c32570c98c4',
]),
@ -43,8 +46,10 @@ Deno.test('fuzzy search works with offset', async () => {
following_count: 0,
}).execute();
await sql`REFRESH MATERIALIZED VIEW top_authors`.execute(db.kysely);
assertEquals(
await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 1, followedPubkeys: new Set() }),
await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 1, following: new Set() }),
new Set(),
);
});

View file

@ -5,30 +5,34 @@ import { DittoTables } from '@/db/DittoTables.ts';
/** Get pubkeys whose name and NIP-05 is similar to 'q' */
export async function getPubkeysBySearch(
kysely: Kysely<DittoTables>,
opts: { q: string; limit: number; offset: number; followedPubkeys: Set<string> },
opts: { q: string; limit: number; offset: number; following: Set<string> },
): Promise<Set<string>> {
const { q, limit, followedPubkeys, offset } = opts;
const { q, limit, following, offset } = opts;
let query = kysely
.selectFrom('author_stats')
.select((eb) => [
'pubkey',
'search',
eb.fn('word_similarity', [sql`${q}`, 'search']).as('sml'),
])
.where(() => sql`${q} <% search`)
.orderBy(['followers_count desc'])
.orderBy(['sml desc', 'search'])
const pubkeys = new Set<string>();
const query = kysely
.selectFrom('top_authors')
.select('pubkey')
.where('search', sql`%>`, q)
.limit(limit)
.offset(offset);
const pubkeys = new Set((await query.execute()).map(({ pubkey }) => pubkey));
if (following.size) {
const authorsQuery = query.where('pubkey', 'in', [...following]);
if (followedPubkeys.size > 0) {
query = query.where('pubkey', 'in', [...followedPubkeys]);
for (const { pubkey } of await authorsQuery.execute()) {
pubkeys.add(pubkey);
}
}
const followingPubkeys = new Set((await query.execute()).map(({ pubkey }) => pubkey));
if (pubkeys.size >= limit) {
return pubkeys;
}
return new Set(Array.from(followingPubkeys.union(pubkeys)));
for (const { pubkey } of await query.limit(limit - pubkeys.size).execute()) {
pubkeys.add(pubkey);
}
return pubkeys;
}