diff --git a/src/controllers/api/search.ts b/src/controllers/api/search.ts index e8ec6057..4c3aa75f 100644 --- a/src/controllers/api/search.ts +++ b/src/controllers/api/search.ts @@ -11,7 +11,7 @@ import { nip05Cache } from '@/utils/nip05.ts'; import { accountFromPubkey, renderAccount } from '@/views/mastodon/accounts.ts'; import { renderStatus } from '@/views/mastodon/statuses.ts'; import { getFollowedPubkeys } from '@/queries.ts'; -import { getPubkeysBySearch } from '@/utils/search.ts'; +import { getIdsBySearch, getPubkeysBySearch } from '@/utils/search.ts'; const searchQuerySchema = z.object({ q: z.string().transform(decodeURIComponent), @@ -94,10 +94,10 @@ async function searchEvents( limit, }; + const kysely = await Storages.kysely(); + // For account search, use a special index, and prioritize followed accounts. if (type === 'accounts') { - const kysely = await Storages.kysely(); - const followedPubkeys = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set(); const searchPubkeys = await getPubkeysBySearch(kysely, { q, limit, offset, followedPubkeys }); @@ -105,6 +105,13 @@ async function searchEvents( filter.search = undefined; } + // For status search, use a specific query so it supports offset and is open to customizations. + if (type === 'statuses') { + const ids = await getIdsBySearch(kysely, { q, limit, offset }); + filter.ids = [...ids]; + filter.search = undefined; + } + // Results should only be shown from one author. if (account_id) { filter.authors = [account_id]; diff --git a/src/utils/search.test.ts b/src/utils/search.test.ts index 056c2927..d7073a39 100644 --- a/src/utils/search.test.ts +++ b/src/utils/search.test.ts @@ -1,7 +1,7 @@ import { assertEquals } from '@std/assert'; -import { createTestDB } from '@/test.ts'; -import { getPubkeysBySearch } from '@/utils/search.ts'; +import { createTestDB, genEvent } from '@/test.ts'; +import { getIdsBySearch, getPubkeysBySearch } from '@/utils/search.ts'; Deno.test('fuzzy search works', async () => { await using db = await createTestDB(); @@ -48,3 +48,45 @@ Deno.test('fuzzy search works with offset', async () => { new Set(), ); }); + +Deno.test('Searching for posts work', async () => { + await using db = await createTestDB(); + + const event = genEvent({ content: "I'm not an orphan. Death is my importance", kind: 1 }); + await db.store.event(event); + await db.kysely.updateTable('nostr_events').set('language', 'en').where('id', '=', event.id).execute(); + + const event2 = genEvent({ content: 'The more I explore is the more I fall in love with the music I make.', kind: 1 }); + await db.store.event(event2); + await db.kysely.updateTable('nostr_events').set('language', 'en').where('id', '=', event2.id).execute(); + + assertEquals( + await getIdsBySearch(db.kysely, { q: 'Death is my importance', limit: 1, offset: 0 }), // ordered words + new Set([event.id]), + ); + + assertEquals( + await getIdsBySearch(db.kysely, { q: 'make I music', limit: 1, offset: 0 }), // reversed words + new Set([event2.id]), + ); + + assertEquals( + await getIdsBySearch(db.kysely, { q: 'language:en make I music', limit: 10, offset: 0 }), // reversed words, english + new Set([event2.id]), + ); + + assertEquals( + await getIdsBySearch(db.kysely, { q: 'language:en an orphan', limit: 10, offset: 0 }), // all posts in english plus search + new Set([event.id]), + ); + + assertEquals( + await getIdsBySearch(db.kysely, { q: 'language:en', limit: 10, offset: 0 }), // all posts in english + new Set([event.id, event2.id]), + ); + + assertEquals( + await getIdsBySearch(db.kysely, { q: '', limit: 10, offset: 0 }), + new Set(), + ); +}); diff --git a/src/utils/search.ts b/src/utils/search.ts index 29ecefd9..c2d69bdb 100644 --- a/src/utils/search.ts +++ b/src/utils/search.ts @@ -1,6 +1,7 @@ -import { Kysely, sql } from 'kysely'; +import { Kysely, type SelectExpression, sql } from 'kysely'; import { DittoTables } from '@/db/DittoTables.ts'; +import { NIP50 } from '@nostrify/nostrify'; /** Get pubkeys whose name and NIP-05 is similar to 'q' */ export async function getPubkeysBySearch( @@ -32,3 +33,78 @@ export async function getPubkeysBySearch( return new Set(Array.from(followingPubkeys.union(pubkeys))); } + +/** Get kind 1 ids whose content matches 'q' + * It supports NIP-50 extensions */ +export async function getIdsBySearch( + kysely: Kysely, + opts: { q: string; limit: number; offset: number }, +): Promise> { + const { q, limit, offset } = opts; + + const lexemes = await kysely.selectNoFrom( + sql`phraseto_tsquery(${q})` as unknown as SelectExpression, + ) + .execute() as { phraseto_tsquery: 'string' }[]; + + // if it's just stop words, don't bother making a request to the database + if (!lexemes[0].phraseto_tsquery) { + return new Set(); + } + + const tokens = NIP50.parseInput(q); + const parsedSearch = tokens.filter((t) => typeof t === 'string').join(' '); + + let query = kysely + .selectFrom('nostr_events') + .select('id') + .where('kind', '=', 1) + .orderBy(['created_at desc']) + .limit(limit) + .offset(offset); + + const languages = new Set(); + const domains = new Set(); + + for (const token of tokens) { + if (typeof token === 'object' && token.key === 'language') { + languages.add(token.value); + } + if (typeof token === 'object' && token.key === 'domain') { + domains.add(token.value); + } + } + + if (languages.size) { + query = query.where('language', 'in', [...languages]); + } + + if (domains.size) { + const pubkeys = kysely + .selectFrom('pubkey_domains') + .select('pubkey') + .where('domain', 'in', [...domains]); + + query = query.where('pubkey', 'in', pubkeys); + } + + let queryWithoutPhraseto_tsquery = query; + if (parsedSearch) { + query = query.where('search', '@@', sql`phraseto_tsquery(${parsedSearch})`); + } + + const ids = new Set((await query.execute()).map(({ id }) => id)); + + // If there is no ids, fallback to `plainto_tsquery` + if (!ids.size) { + queryWithoutPhraseto_tsquery = queryWithoutPhraseto_tsquery.where( + 'search', + '@@', + sql`plainto_tsquery(${parsedSearch})`, + ); + const ids = new Set((await queryWithoutPhraseto_tsquery.execute()).map(({ id }) => id)); + return ids; + } + + return ids; +}