From 8267c170f0988e8fe95cfece97577722d965c0e2 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Mon, 10 Feb 2025 16:15:13 -0600 Subject: [PATCH 01/24] hydrate: batch queries together --- src/storages/hydrate.ts | 196 ++++++++++------------------------------ 1 file changed, 47 insertions(+), 149 deletions(-) diff --git a/src/storages/hydrate.ts b/src/storages/hydrate.ts index a162571a..a656d590 100644 --- a/src/storages/hydrate.ts +++ b/src/storages/hydrate.ts @@ -30,23 +30,11 @@ async function hydrateEvents(opts: HydrateOpts): Promise { const cache = [...events]; - for (const event of await gatherReposts({ events: cache, store, signal })) { + for (const event of await gatherRelatedEvents({ events: cache, store, signal })) { cache.push(event); } - for (const event of await gatherReacted({ events: cache, store, signal })) { - cache.push(event); - } - - for (const event of await gatherQuotes({ events: cache, store, signal })) { - cache.push(event); - } - - for (const event of await gatherMentions({ events: cache, store, signal })) { - cache.push(event); - } - - for (const event of await gatherAuthors({ events: cache, store, signal })) { + for (const event of await gatherProfiles({ events: cache, store, signal })) { cache.push(event); } @@ -58,18 +46,6 @@ async function hydrateEvents(opts: HydrateOpts): Promise { cache.push(event); } - for (const event of await gatherReportedProfiles({ events: cache, store, signal })) { - cache.push(event); - } - - for (const event of await gatherReportedNotes({ events: cache, store, signal })) { - cache.push(event); - } - - for (const event of await gatherZapped({ events: cache, store, signal })) { - cache.push(event); - } - const authorStats = await gatherAuthorStats(cache, kysely as Kysely); const eventStats = await gatherEventStats(cache, kysely as Kysely); @@ -217,17 +193,47 @@ export function assembleEvents( return a; } -/** Collect reposts from the events. */ -function gatherReposts({ events, store, signal }: HydrateOpts): Promise { +/** Collect event targets (eg reposts, quote posts, reacted posts, etc.) */ +function gatherRelatedEvents({ events, store, signal }: HydrateOpts): Promise { const ids = new Set(); for (const event of events) { + // Quoted events + if (event.kind === 1) { + const id = findQuoteTag(event.tags)?.[1] || findQuoteInContent(event.content); + if (id) { + ids.add(id); + } + } + // Reposted events if (event.kind === 6) { const id = event.tags.find(([name]) => name === 'e')?.[1]; if (id) { ids.add(id); } } + // Reacted events + if (event.kind === 7) { + const id = event.tags.findLast(([name]) => name === 'e')?.[1]; + if (id) { + ids.add(id); + } + } + // Reported events + if (event.kind === 1984) { + for (const [name, value] of event.tags) { + if (name === 'e') { + ids.add(value); + } + } + } + // Zapped events + if (event.kind === 9735) { + const id = event.tags.find(([name]) => name === 'e')?.[1]; + if (id) { + ids.add(id); + } + } } return store.query( @@ -236,49 +242,15 @@ function gatherReposts({ events, store, signal }: HydrateOpts): Promise { - const ids = new Set(); - - for (const event of events) { - if (event.kind === 7) { - const id = event.tags.findLast(([name]) => name === 'e')?.[1]; - if (id) { - ids.add(id); - } - } - } - - return store.query( - [{ ids: [...ids], limit: ids.size }], - { signal }, - ); -} - -/** Collect quotes from the events. */ -function gatherQuotes({ events, store, signal }: HydrateOpts): Promise { - const ids = new Set(); - - for (const event of events) { - if (event.kind === 1) { - const id = findQuoteTag(event.tags)?.[1] || findQuoteInContent(event.content); - if (id) { - ids.add(id); - } - } - } - - return store.query( - [{ ids: [...ids], limit: ids.size }], - { signal }, - ); -} - -/** Collect mentioned profiles from notes. */ -async function gatherMentions({ events, store, signal }: HydrateOpts): Promise { +/** Collect profiles from the events. */ +async function gatherProfiles({ events, store, signal }: HydrateOpts): Promise { const pubkeys = new Set(); for (const event of events) { + // Authors + pubkeys.add(event.pubkey); + + // Mentions if (event.kind === 1) { for (const [name, value] of event.tags) { if (name === 'p') { @@ -286,29 +258,14 @@ async function gatherMentions({ events, store, signal }: HydrateOpts): Promise matchFilter({ kinds: [0], authors: [pubkey] }, e)); - if (!author) { - const fallback = fallbackAuthor(pubkey); - authors.push(fallback); + // Reported profiles + if (event.kind === 1984) { + const pubkey = event.tags.find(([name]) => name === 'p')?.[1]; + if (pubkey) { + pubkeys.add(pubkey); + } } - } - - return authors; -} - -/** Collect authors from the events. */ -async function gatherAuthors({ events, store, signal }: HydrateOpts): Promise { - const pubkeys = new Set(); - - for (const event of events) { + // Zap recipients if (event.kind === 9735) { const zapReceiver = event.tags.find(([name]) => name === 'p')?.[1]; if (zapReceiver) { @@ -324,7 +281,6 @@ async function gatherAuthors({ events, store, signal }: HydrateOpts): Promise { - const ids = new Set(); - for (const event of events) { - if (event.kind === 1984) { - const status_ids = event.tags.filter(([name]) => name === 'e').map((tag) => tag[1]); - if (status_ids.length > 0) { - for (const id of status_ids) { - ids.add(id); - } - } - } - } - - return store.query( - [{ kinds: [1, 20], ids: [...ids], limit: ids.size }], - { signal }, - ); -} - -/** Collect reported profiles from the events. */ -function gatherReportedProfiles({ events, store, signal }: HydrateOpts): Promise { - const pubkeys = new Set(); - - for (const event of events) { - if (event.kind === 1984) { - const pubkey = event.tags.find(([name]) => name === 'p')?.[1]; - if (pubkey) { - pubkeys.add(pubkey); - } - } - } - - return store.query( - [{ kinds: [0], authors: [...pubkeys], limit: pubkeys.size }], - { signal }, - ); -} - -/** Collect events being zapped. */ -function gatherZapped({ events, store, signal }: HydrateOpts): Promise { - const ids = new Set(); - - for (const event of events) { - if (event.kind === 9735) { - const id = event.tags.find(([name]) => name === 'e')?.[1]; - if (id) { - ids.add(id); - } - } - } - - return store.query( - [{ ids: [...ids], limit: ids.size }], - { signal }, - ); -} - /** Collect author stats from the events. */ async function gatherAuthorStats( events: DittoEvent[], From 4e86b6bf3f987c9a2ffe1a502a1ab43e00cf99de Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Mon, 10 Feb 2025 16:21:01 -0600 Subject: [PATCH 02/24] hydrate: quotes must be gathered in a separate step --- src/storages/hydrate.ts | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/storages/hydrate.ts b/src/storages/hydrate.ts index a656d590..36df74f6 100644 --- a/src/storages/hydrate.ts +++ b/src/storages/hydrate.ts @@ -34,6 +34,10 @@ async function hydrateEvents(opts: HydrateOpts): Promise { cache.push(event); } + for (const event of await gatherQuotes({ events: cache, store, signal })) { + cache.push(event); + } + for (const event of await gatherProfiles({ events: cache, store, signal })) { cache.push(event); } @@ -198,13 +202,6 @@ function gatherRelatedEvents({ events, store, signal }: HydrateOpts): Promise(); for (const event of events) { - // Quoted events - if (event.kind === 1) { - const id = findQuoteTag(event.tags)?.[1] || findQuoteInContent(event.content); - if (id) { - ids.add(id); - } - } // Reposted events if (event.kind === 6) { const id = event.tags.find(([name]) => name === 'e')?.[1]; @@ -242,6 +239,25 @@ function gatherRelatedEvents({ events, store, signal }: HydrateOpts): Promise { + const ids = new Set(); + + for (const event of events) { + if (event.kind === 1) { + const id = findQuoteTag(event.tags)?.[1] || findQuoteInContent(event.content); + if (id) { + ids.add(id); + } + } + } + + return store.query( + [{ ids: [...ids], limit: ids.size }], + { signal }, + ); +} + /** Collect profiles from the events. */ async function gatherProfiles({ events, store, signal }: HydrateOpts): Promise { const pubkeys = new Set(); From 56c782e6e58ef1f50cdb4017516a31929c8292f4 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Mon, 10 Feb 2025 16:22:56 -0600 Subject: [PATCH 03/24] Set CORS header on NIP-11 response --- src/controllers/nostr/relay-info.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/controllers/nostr/relay-info.ts b/src/controllers/nostr/relay-info.ts index 9ee7babb..cedd09d7 100644 --- a/src/controllers/nostr/relay-info.ts +++ b/src/controllers/nostr/relay-info.ts @@ -9,6 +9,8 @@ const relayInfoController: AppController = async (c) => { const store = await Storages.db(); const meta = await getInstanceMetadata(store, c.req.raw.signal); + c.res.headers.set('access-control-allow-origin', '*'); + return c.json({ name: meta.name, description: meta.about, From 8a9928696b4ff7ed2ec235814f532e6acbe2d5a1 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Mon, 10 Feb 2025 16:56:59 -0600 Subject: [PATCH 04/24] accountStatusesController: handle exclude_replies with NIP-50 search ext --- src/controllers/api/accounts.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/controllers/api/accounts.ts b/src/controllers/api/accounts.ts index 23f3190d..40b3e7b6 100644 --- a/src/controllers/api/accounts.ts +++ b/src/controllers/api/accounts.ts @@ -241,14 +241,24 @@ const accountStatusesController: AppController = async (c) => { limit, }; + const search: string[] = []; + if (only_media) { - filter.search = 'media:true'; + search.push('media:true'); + } + + if (exclude_replies) { + search.push('reply:false'); } if (tagged) { filter['#t'] = [tagged]; } + if (search.length) { + filter.search = search.join(' '); + } + const opts = { signal, limit, timeout: Conf.db.timeouts.timelines }; const events = await store.query([filter], opts) From 11adaef2438512c391023fe8b643b279f22161af Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Mon, 10 Feb 2025 17:08:29 -0600 Subject: [PATCH 05/24] homeTimelineController: support exclude_replies, only_media params --- src/controllers/api/accounts.ts | 2 +- src/controllers/api/timelines.ts | 31 ++++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/controllers/api/accounts.ts b/src/controllers/api/accounts.ts index 40b3e7b6..d3d67820 100644 --- a/src/controllers/api/accounts.ts +++ b/src/controllers/api/accounts.ts @@ -197,7 +197,7 @@ const accountStatusesQuerySchema = z.object({ limit: z.coerce.number().nonnegative().transform((v) => Math.min(v, 40)).catch(20), exclude_replies: booleanParamSchema.optional(), tagged: z.string().optional(), - only_media: z.coerce.boolean().optional(), + only_media: booleanParamSchema.optional(), }); const accountStatusesController: AppController = async (c) => { diff --git a/src/controllers/api/timelines.ts b/src/controllers/api/timelines.ts index fa5f44f6..f6bb8d37 100644 --- a/src/controllers/api/timelines.ts +++ b/src/controllers/api/timelines.ts @@ -10,11 +10,40 @@ import { paginated } from '@/utils/api.ts'; import { getTagSet } from '@/utils/tags.ts'; import { renderReblog, renderStatus } from '@/views/mastodon/statuses.ts'; +const homeQuerySchema = z.object({ + exclude_replies: booleanParamSchema.optional(), + only_media: booleanParamSchema.optional(), +}); + const homeTimelineController: AppController = async (c) => { const params = c.get('pagination'); const pubkey = await c.get('signer')?.getPublicKey()!; + const result = homeQuerySchema.safeParse(c.req.query()); + + if (!result.success) { + return c.json({ error: 'Bad request', schema: result.error }, 400); + } + + const { exclude_replies, only_media } = result.data; + const authors = [...await getFeedPubkeys(pubkey)]; - return renderStatuses(c, [{ authors, kinds: [1, 6, 20], ...params }]); + const filter: NostrFilter = { authors, kinds: [1, 6, 20], ...params }; + + const search: string[] = []; + + if (only_media) { + search.push('media:true'); + } + + if (exclude_replies) { + search.push('reply:false'); + } + + if (search.length) { + filter.search = search.join(' '); + } + + return renderStatuses(c, [filter]); }; const publicQuerySchema = z.object({ From 756a9d960797c24e1a41adf16419b7e1193a9ea1 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Mon, 10 Feb 2025 21:21:18 -0600 Subject: [PATCH 06/24] favicon: image/x-icon is an acceptable mime type --- src/utils/favicon.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/favicon.ts b/src/utils/favicon.ts index fc49c75d..f1ae0f95 100644 --- a/src/utils/favicon.ts +++ b/src/utils/favicon.ts @@ -91,7 +91,7 @@ async function fetchFavicon(domain: string, signal?: AbortSignal): Promise const fallback = await safeFetch(url, { method: 'HEAD', signal }); const contentType = fallback.headers.get('content-type'); - if (fallback.ok && contentType === 'image/vnd.microsoft.icon') { + if (fallback.ok && ['image/vnd.microsoft.icon', 'image/x-icon'].includes(contentType!)) { logi({ level: 'info', ns: 'ditto.favicon', domain, state: 'found', url }); return url; } From 207e04ef082378802d9cb2350b078484c72fd631 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Mon, 10 Feb 2025 22:19:35 -0600 Subject: [PATCH 07/24] Prewarm card cache in pipeline --- src/config.ts | 2 +- src/pipeline.ts | 10 ++++++++++ src/views/mastodon/statuses.ts | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/config.ts b/src/config.ts index cdd88705..5bc5d865 100644 --- a/src/config.ts +++ b/src/config.ts @@ -334,7 +334,7 @@ class Conf { /** Link preview cache settings. */ get linkPreview(): { max: number; ttl: number } { return { - max: Number(Deno.env.get('DITTO_CACHE_LINK_PREVIEW_MAX') || 1000), + max: Number(Deno.env.get('DITTO_CACHE_LINK_PREVIEW_MAX') || 3000), ttl: Number(Deno.env.get('DITTO_CACHE_LINK_PREVIEW_TTL') || 12 * 60 * 60 * 1000), }; }, diff --git a/src/pipeline.ts b/src/pipeline.ts index 7540bc82..31912530 100644 --- a/src/pipeline.ts +++ b/src/pipeline.ts @@ -19,9 +19,11 @@ import { getAmount } from '@/utils/bolt11.ts'; import { faviconCache } from '@/utils/favicon.ts'; import { errorJson } from '@/utils/log.ts'; import { nip05Cache } from '@/utils/nip05.ts'; +import { parseNoteContent, stripimeta } from '@/utils/note.ts'; import { purifyEvent } from '@/utils/purify.ts'; import { updateStats } from '@/utils/stats.ts'; import { getTagSet } from '@/utils/tags.ts'; +import { unfurlCardCached } from '@/utils/unfurl.ts'; import { renderWebPushNotification } from '@/views/mastodon/push.ts'; import { policyWorker } from '@/workers/policy.ts'; import { verifyEventWorker } from '@/workers/verify.ts'; @@ -122,6 +124,7 @@ async function handleEvent(event: DittoEvent, opts: PipelineOpts): Promise Promise.allSettled([ handleZaps(kysely, event), updateAuthorData(event, opts.signal), + prewarmLinkPreview(event, opts.signal), generateSetEvents(event), ]) .then(() => @@ -268,6 +271,13 @@ async function updateAuthorData(event: NostrEvent, signal: AbortSignal): Promise } } +async function prewarmLinkPreview(event: NostrEvent, signal: AbortSignal): Promise { + const { firstUrl } = parseNoteContent(stripimeta(event.content, event.tags), []); + if (firstUrl) { + await unfurlCardCached(firstUrl, signal); + } +} + /** Determine if the event is being received in a timely manner. */ function isFresh(event: NostrEvent): boolean { return eventAge(event) < Time.minutes(1); diff --git a/src/views/mastodon/statuses.ts b/src/views/mastodon/statuses.ts index 0c0eb9f2..00f7dd55 100644 --- a/src/views/mastodon/statuses.ts +++ b/src/views/mastodon/statuses.ts @@ -46,7 +46,7 @@ async function renderStatus(event: DittoEvent, opts: RenderStatusOpts): Promise< const [card, relatedEvents] = await Promise .all([ - firstUrl ? unfurlCardCached(firstUrl) : null, + firstUrl ? unfurlCardCached(firstUrl, AbortSignal.timeout(500)) : null, viewerPubkey ? await store.query([ { kinds: [6], '#e': [event.id], authors: [viewerPubkey], limit: 1 }, From 43d675b8372fba90545e2acbc0cd358d7602dc03 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Tue, 11 Feb 2025 13:19:32 -0600 Subject: [PATCH 08/24] Ensure `.language` property gets added to DittoEvent when it's queried --- src/storages/EventsDB.ts | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/storages/EventsDB.ts b/src/storages/EventsDB.ts index fd2323a8..d9eea001 100644 --- a/src/storages/EventsDB.ts +++ b/src/storages/EventsDB.ts @@ -1,9 +1,10 @@ // deno-lint-ignore-file require-await -import { NPostgres } from '@nostrify/db'; +import { NPostgres, NPostgresSchema } from '@nostrify/db'; import { NIP50, NKinds, NostrEvent, NostrFilter, NSchema as n } from '@nostrify/nostrify'; import { logi } from '@soapbox/logi'; import { JsonValue } from '@std/json'; +import { LanguageCode } from 'iso-639-1'; import { Kysely } from 'kysely'; import { nip27 } from 'nostr-tools'; import { z } from 'zod'; @@ -231,6 +232,25 @@ class EventsDB extends NPostgres { return super.query(filters, { ...opts, timeout: opts.timeout ?? this.opts.timeout }); } + /** Parse an event row from the database. */ + protected override parseEventRow(row: NPostgresSchema['nostr_events']): DittoEvent { + const event: DittoEvent = { + id: row.id, + kind: row.kind, + pubkey: row.pubkey, + content: row.content, + created_at: Number(row.created_at), + tags: row.tags, + sig: row.sig, + }; + + if (!this.opts.pure) { + event.language = row.search_ext.language as LanguageCode | undefined; + } + + return event; + } + /** Delete events based on filters from the database. */ override async remove(filters: NostrFilter[], opts: { signal?: AbortSignal; timeout?: number } = {}): Promise { logi({ level: 'debug', ns: 'ditto.remove', source: 'db', filters: filters as JsonValue }); From eb94da6cca3d808ec0fe1b83f242d5ff351bce68 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Tue, 11 Feb 2025 17:40:28 -0600 Subject: [PATCH 09/24] Upgrade Nostrify to support negative search queries, remove getIdsBySearch function --- deno.json | 2 +- src/controllers/api/search.ts | 9 +-- src/utils/search.test.ts | 48 +-------------- src/utils/search.ts | 107 ---------------------------------- 4 files changed, 4 insertions(+), 162 deletions(-) diff --git a/deno.json b/deno.json index dabb1ac9..18a6621c 100644 --- a/deno.json +++ b/deno.json @@ -48,7 +48,7 @@ "@isaacs/ttlcache": "npm:@isaacs/ttlcache@^1.4.1", "@negrel/webpush": "jsr:@negrel/webpush@^0.3.0", "@noble/secp256k1": "npm:@noble/secp256k1@^2.0.0", - "@nostrify/db": "jsr:@nostrify/db@^0.38.0", + "@nostrify/db": "jsr:@nostrify/db@^0.39.0", "@nostrify/nostrify": "jsr:@nostrify/nostrify@^0.38.1", "@nostrify/policies": "jsr:@nostrify/policies@^0.36.1", "@nostrify/types": "jsr:@nostrify/types@^0.36.0", diff --git a/src/controllers/api/search.ts b/src/controllers/api/search.ts index 8bfe4ffd..c0a4a54e 100644 --- a/src/controllers/api/search.ts +++ b/src/controllers/api/search.ts @@ -11,7 +11,7 @@ import { nip05Cache } from '@/utils/nip05.ts'; import { accountFromPubkey, renderAccount } from '@/views/mastodon/accounts.ts'; import { renderStatus } from '@/views/mastodon/statuses.ts'; import { getFollowedPubkeys } from '@/queries.ts'; -import { getIdsBySearch, getPubkeysBySearch } from '@/utils/search.ts'; +import { getPubkeysBySearch } from '@/utils/search.ts'; const searchQuerySchema = z.object({ q: z.string().transform(decodeURIComponent), @@ -105,13 +105,6 @@ async function searchEvents( filter.search = undefined; } - // For status search, use a specific query so it supports offset and is open to customizations. - if (type === 'statuses') { - const ids = await getIdsBySearch(kysely, { q, limit, offset }); - filter.ids = [...ids]; - filter.search = undefined; - } - // Results should only be shown from one author. if (account_id) { filter.authors = [account_id]; diff --git a/src/utils/search.test.ts b/src/utils/search.test.ts index 71f96de2..056c2927 100644 --- a/src/utils/search.test.ts +++ b/src/utils/search.test.ts @@ -1,7 +1,7 @@ import { assertEquals } from '@std/assert'; -import { createTestDB, genEvent } from '@/test.ts'; -import { getIdsBySearch, getPubkeysBySearch } from '@/utils/search.ts'; +import { createTestDB } from '@/test.ts'; +import { getPubkeysBySearch } from '@/utils/search.ts'; Deno.test('fuzzy search works', async () => { await using db = await createTestDB(); @@ -48,47 +48,3 @@ Deno.test('fuzzy search works with offset', async () => { new Set(), ); }); - -Deno.test('Searching for posts work', async () => { - await using db = await createTestDB(); - - const event = genEvent({ content: "I'm not an orphan. Death is my importance", kind: 1 }); - await db.store.event(event); - await db.kysely.updateTable('nostr_events').set('search_ext', { language: 'en' }).where('id', '=', event.id) - .execute(); - - const event2 = genEvent({ content: 'The more I explore is the more I fall in love with the music I make.', kind: 1 }); - await db.store.event(event2); - await db.kysely.updateTable('nostr_events').set('search_ext', { language: 'en' }).where('id', '=', event2.id) - .execute(); - - assertEquals( - await getIdsBySearch(db.kysely, { q: 'Death is my importance', limit: 1, offset: 0 }), // ordered words - new Set([event.id]), - ); - - assertEquals( - await getIdsBySearch(db.kysely, { q: 'make I music', limit: 1, offset: 0 }), // reversed words - new Set([event2.id]), - ); - - assertEquals( - await getIdsBySearch(db.kysely, { q: 'language:en make I music', limit: 10, offset: 0 }), // reversed words, english - new Set([event2.id]), - ); - - assertEquals( - await getIdsBySearch(db.kysely, { q: 'language:en an orphan', limit: 10, offset: 0 }), // all posts in english plus search - new Set([event.id]), - ); - - assertEquals( - await getIdsBySearch(db.kysely, { q: 'language:en', limit: 10, offset: 0 }), // all posts in english - new Set([event.id, event2.id]), - ); - - assertEquals( - await getIdsBySearch(db.kysely, { q: '', limit: 10, offset: 0 }), - new Set(), - ); -}); diff --git a/src/utils/search.ts b/src/utils/search.ts index f44e00c8..29ecefd9 100644 --- a/src/utils/search.ts +++ b/src/utils/search.ts @@ -1,7 +1,6 @@ import { Kysely, sql } from 'kysely'; import { DittoTables } from '@/db/DittoTables.ts'; -import { NIP50 } from '@nostrify/nostrify'; /** Get pubkeys whose name and NIP-05 is similar to 'q' */ export async function getPubkeysBySearch( @@ -33,109 +32,3 @@ export async function getPubkeysBySearch( return new Set(Array.from(followingPubkeys.union(pubkeys))); } - -/** - * Get kind 1 ids whose content matches `q`. - * It supports NIP-50 extensions. - */ -export async function getIdsBySearch( - kysely: Kysely, - opts: { q: string; limit: number; offset: number }, -): Promise> { - const { q, limit, offset } = opts; - - const [lexemes] = (await sql<{ phraseto_tsquery: 'string' }>`SELECT phraseto_tsquery(${q})`.execute(kysely)).rows; - - // if it's just stop words, don't bother making a request to the database - if (!lexemes.phraseto_tsquery) { - return new Set(); - } - - const tokens = NIP50.parseInput(q); - - const ext: Record = {}; - const txt = tokens.filter((token) => typeof token === 'string').join(' '); - - let query = kysely - .selectFrom('nostr_events') - .select('id') - .where('kind', '=', 1) - .orderBy(['created_at desc']) - .limit(limit) - .offset(offset); - - const domains = new Set(); - - for (const token of tokens) { - if (typeof token === 'object' && token.key === 'domain') { - domains.add(token.value); - } - } - - for (const token of tokens) { - if (typeof token === 'object') { - ext[token.key] ??= []; - ext[token.key].push(token.value); - } - } - - for (let [key, values] of Object.entries(ext)) { - if (key === 'domain' || key === '-domain') continue; - - let negated = false; - - if (key.startsWith('-')) { - key = key.slice(1); - negated = true; - } - - query = query.where((eb) => { - if (negated) { - return eb.and( - values.map((value) => eb.not(eb('nostr_events.search_ext', '@>', { [key]: value }))), - ); - } else { - return eb.or( - values.map((value) => eb('nostr_events.search_ext', '@>', { [key]: value })), - ); - } - }); - } - - if (domains.size) { - const pubkeys = (await kysely - .selectFrom('pubkey_domains') - .select('pubkey') - .where('domain', 'in', [...domains]) - .execute()).map(({ pubkey }) => pubkey); - - query = query.where('pubkey', 'in', pubkeys); - } - - // If there is not a specific content to search, return the query already - // This is useful if the person only makes a query search such as `domain:patrickdosreis.com` - if (!txt.length) { - const ids = new Set((await query.execute()).map(({ id }) => id)); - return ids; - } - - let fallbackQuery = query; - if (txt) { - query = query.where('search', '@@', sql`phraseto_tsquery(${txt})`); - } - - const ids = new Set((await query.execute()).map(({ id }) => id)); - - // If there is no ids, fallback to `plainto_tsquery` - if (!ids.size) { - fallbackQuery = fallbackQuery.where( - 'search', - '@@', - sql`plainto_tsquery(${txt})`, - ); - const ids = new Set((await fallbackQuery.execute()).map(({ id }) => id)); - return ids; - } - - return ids; -} From c379c11b252e5b92223229ea70182feb752f1e79 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Tue, 11 Feb 2025 18:13:47 -0600 Subject: [PATCH 10/24] search: support pagination via Link header --- deno.lock | 12 +----------- src/controllers/api/search.ts | 22 ++++++++++++++-------- src/utils/api.ts | 6 ++---- 3 files changed, 17 insertions(+), 23 deletions(-) diff --git a/deno.lock b/deno.lock index 874085e8..70dc97b8 100644 --- a/deno.lock +++ b/deno.lock @@ -31,7 +31,6 @@ "jsr:@hono/hono@^4.4.6": "4.6.15", "jsr:@negrel/http-ece@0.6.0": "0.6.0", "jsr:@negrel/webpush@0.3": "0.3.0", - "jsr:@nostrify/db@0.38": "0.38.0", "jsr:@nostrify/nostrify@0.31": "0.31.0", "jsr:@nostrify/nostrify@0.32": "0.32.0", "jsr:@nostrify/nostrify@0.36": "0.36.2", @@ -357,15 +356,6 @@ "jsr:@std/path@0.224.0" ] }, - "@nostrify/db@0.38.0": { - "integrity": "44118756b95f747779839f0e578a5e1dbca164ec44edb8885bd1c99840775e8a", - "dependencies": [ - "jsr:@nostrify/nostrify@~0.38.1", - "jsr:@nostrify/types@0.36", - "npm:kysely@~0.27.3", - "npm:nostr-tools@^2.10.4" - ] - }, "@nostrify/nostrify@0.22.4": { "integrity": "1c8a7847e5773213044b491e85fd7cafae2ad194ce59da4d957d2b27c776b42d", "dependencies": [ @@ -2372,7 +2362,7 @@ "jsr:@gfx/canvas-wasm@~0.4.2", "jsr:@hono/hono@^4.4.6", "jsr:@negrel/webpush@0.3", - "jsr:@nostrify/db@0.38", + "jsr:@nostrify/db@0.39", "jsr:@nostrify/nostrify@~0.38.1", "jsr:@nostrify/policies@~0.36.1", "jsr:@nostrify/types@0.36", diff --git a/src/controllers/api/search.ts b/src/controllers/api/search.ts index c0a4a54e..c050fe9d 100644 --- a/src/controllers/api/search.ts +++ b/src/controllers/api/search.ts @@ -12,6 +12,7 @@ import { accountFromPubkey, renderAccount } from '@/views/mastodon/accounts.ts'; import { renderStatus } from '@/views/mastodon/statuses.ts'; import { getFollowedPubkeys } from '@/queries.ts'; import { getPubkeysBySearch } from '@/utils/search.ts'; +import { paginated } from '@/utils/api.ts'; const searchQuerySchema = z.object({ q: z.string().transform(decodeURIComponent), @@ -19,14 +20,14 @@ const searchQuerySchema = z.object({ resolve: booleanParamSchema.optional().transform(Boolean), following: z.boolean().default(false), account_id: n.id().optional(), - limit: z.coerce.number().catch(20).transform((value) => Math.min(Math.max(value, 0), 40)), offset: z.coerce.number().nonnegative().catch(0), }); -type SearchQuery = z.infer; +type SearchQuery = z.infer & { since?: number; until?: number; limit: number }; const searchController: AppController = async (c) => { const result = searchQuerySchema.safeParse(c.req.query()); + const params = c.get('pagination'); const { signal } = c.req.raw; const viewerPubkey = await c.get('signer')?.getPublicKey(); @@ -34,14 +35,14 @@ const searchController: AppController = async (c) => { return c.json({ error: 'Bad request', schema: result.error }, 422); } - const event = await lookupEvent(result.data, signal); + const event = await lookupEvent({ ...result.data, ...params }, signal); const lookup = extractIdentifier(result.data.q); // Render account from pubkey. if (!event && lookup) { const pubkey = await lookupPubkey(lookup); return c.json({ - accounts: pubkey ? [await accountFromPubkey(pubkey)] : [], + accounts: pubkey ? [accountFromPubkey(pubkey)] : [], statuses: [], hashtags: [], }); @@ -52,7 +53,8 @@ const searchController: AppController = async (c) => { if (event) { events = [event]; } - events.push(...(await searchEvents({ ...result.data, viewerPubkey }, signal))); + + events.push(...(await searchEvents({ ...result.data, ...params, viewerPubkey }, signal))); const [accounts, statuses] = await Promise.all([ Promise.all( @@ -69,16 +71,18 @@ const searchController: AppController = async (c) => { ), ]); - return c.json({ + const body = { accounts, statuses, hashtags: [], - }); + }; + + return paginated(c, events, body); }; /** Get events for the search params. */ async function searchEvents( - { q, type, limit, offset, account_id, viewerPubkey }: SearchQuery & { viewerPubkey?: string }, + { q, type, since, until, limit, offset, account_id, viewerPubkey }: SearchQuery & { viewerPubkey?: string }, signal: AbortSignal, ): Promise { // Hashtag search is not supported. @@ -91,6 +95,8 @@ async function searchEvents( const filter: NostrFilter = { kinds: typeToKinds(type), search: q, + since, + until, limit, }; diff --git a/src/utils/api.ts b/src/utils/api.ts index 29304cbd..ebe07748 100644 --- a/src/utils/api.ts +++ b/src/utils/api.ts @@ -207,12 +207,10 @@ function buildLinkHeader(url: string, events: NostrEvent[]): string | undefined return `<${next}>; rel="next", <${prev}>; rel="prev"`; } -// deno-lint-ignore ban-types -type Entity = {}; type HeaderRecord = Record; /** Return results with pagination headers. Assumes chronological sorting of events. */ -function paginated(c: AppContext, events: NostrEvent[], entities: (Entity | undefined)[], headers: HeaderRecord = {}) { +function paginated(c: AppContext, events: NostrEvent[], body: object | unknown[], headers: HeaderRecord = {}) { const link = buildLinkHeader(c.req.url, events); if (link) { @@ -220,7 +218,7 @@ function paginated(c: AppContext, events: NostrEvent[], entities: (Entity | unde } // Filter out undefined entities. - const results = entities.filter((entity): entity is Entity => Boolean(entity)); + const results = Array.isArray(body) ? body.filter(Boolean) : body; return c.json(results, 200, headers); } From 173aea6458fa1e787363dd68173cfa6a097f2ea5 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Tue, 11 Feb 2025 18:17:22 -0600 Subject: [PATCH 11/24] Update deno.lock --- deno.lock | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/deno.lock b/deno.lock index 70dc97b8..029740a6 100644 --- a/deno.lock +++ b/deno.lock @@ -31,6 +31,7 @@ "jsr:@hono/hono@^4.4.6": "4.6.15", "jsr:@negrel/http-ece@0.6.0": "0.6.0", "jsr:@negrel/webpush@0.3": "0.3.0", + "jsr:@nostrify/db@0.39": "0.39.0", "jsr:@nostrify/nostrify@0.31": "0.31.0", "jsr:@nostrify/nostrify@0.32": "0.32.0", "jsr:@nostrify/nostrify@0.36": "0.36.2", @@ -356,6 +357,15 @@ "jsr:@std/path@0.224.0" ] }, + "@nostrify/db@0.39.0": { + "integrity": "13a88c610eb15a5dd13848d5beec9170406376c9d05299ce5e5298452a5431ac", + "dependencies": [ + "jsr:@nostrify/nostrify@~0.38.1", + "jsr:@nostrify/types@0.36", + "npm:kysely@~0.27.3", + "npm:nostr-tools@^2.10.4" + ] + }, "@nostrify/nostrify@0.22.4": { "integrity": "1c8a7847e5773213044b491e85fd7cafae2ad194ce59da4d957d2b27c776b42d", "dependencies": [ From efbefd918a19553a8c2dc303ee0084ece05301a6 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Tue, 11 Feb 2025 21:20:16 -0600 Subject: [PATCH 12/24] Speed up db:populate-extensions task --- scripts/db-populate-extensions.ts | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/scripts/db-populate-extensions.ts b/scripts/db-populate-extensions.ts index 428b591f..ca6d1927 100644 --- a/scripts/db-populate-extensions.ts +++ b/scripts/db-populate-extensions.ts @@ -1,25 +1,26 @@ +import { NostrEvent } from '@nostrify/nostrify'; + import { Storages } from '@/storages.ts'; import { EventsDB } from '@/storages/EventsDB.ts'; -const store = await Storages.db(); const kysely = await Storages.kysely(); -for await (const msg of store.req([{}])) { - if (msg[0] === 'EVENT') { - const event = msg[2]; +const query = kysely + .selectFrom('nostr_events') + .select(['id', 'kind', 'content', 'pubkey', 'tags', 'created_at', 'sig']); - const ext = EventsDB.indexExtensions(event); +for await (const row of query.stream()) { + const event: NostrEvent = { ...row, created_at: Number(row.created_at) }; + const ext = EventsDB.indexExtensions(event); - try { - await kysely.updateTable('nostr_events') - .set('search_ext', ext) - .where('id', '=', event.id) - .execute(); - } catch { - // do nothing - } - } else { - break; + try { + await kysely + .updateTable('nostr_events') + .set('search_ext', ext) + .where('id', '=', event.id) + .execute(); + } catch { + // do nothing } } From f6fe777e78b3e18b610af2983cd4e30b3298d682 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Tue, 11 Feb 2025 21:49:58 -0600 Subject: [PATCH 13/24] Remove pubkey_domains table --- src/db/DittoTables.ts | 7 ------- src/db/migrations/048_rm_pubkey_domains.ts | 22 ++++++++++++++++++++++ src/interfaces/DittoEvent.ts | 1 - src/pipeline.ts | 9 --------- src/storages/EventsDB.test.ts | 12 ++++++++++-- src/storages/EventsDB.ts | 4 ++-- src/storages/InternalRelay.ts | 2 +- 7 files changed, 35 insertions(+), 22 deletions(-) create mode 100644 src/db/migrations/048_rm_pubkey_domains.ts diff --git a/src/db/DittoTables.ts b/src/db/DittoTables.ts index 19ea6e1b..ea326724 100644 --- a/src/db/DittoTables.ts +++ b/src/db/DittoTables.ts @@ -7,7 +7,6 @@ export interface DittoTables extends NPostgresSchema { author_stats: AuthorStatsRow; domain_favicons: DomainFaviconRow; event_stats: EventStatsRow; - pubkey_domains: PubkeyDomainRow; event_zaps: EventZapRow; push_subscriptions: PushSubscriptionRow; } @@ -45,12 +44,6 @@ interface AuthTokenRow { created_at: Date; } -interface PubkeyDomainRow { - pubkey: string; - domain: string; - last_updated_at: number; -} - interface DomainFaviconRow { domain: string; favicon: string; diff --git a/src/db/migrations/048_rm_pubkey_domains.ts b/src/db/migrations/048_rm_pubkey_domains.ts new file mode 100644 index 00000000..20938159 --- /dev/null +++ b/src/db/migrations/048_rm_pubkey_domains.ts @@ -0,0 +1,22 @@ +import { Kysely } from 'kysely'; + +export async function up(db: Kysely): Promise { + await db.schema.dropTable('pubkey_domains').execute(); +} + +export async function down(db: Kysely): Promise { + await db.schema + .createTable('pubkey_domains') + .ifNotExists() + .addColumn('pubkey', 'text', (col) => col.primaryKey()) + .addColumn('domain', 'text', (col) => col.notNull()) + .addColumn('last_updated_at', 'integer', (col) => col.notNull().defaultTo(0)) + .execute(); + + await db.schema + .createIndex('pubkey_domains_domain_index') + .on('pubkey_domains') + .column('domain') + .ifNotExists() + .execute(); +} diff --git a/src/interfaces/DittoEvent.ts b/src/interfaces/DittoEvent.ts index bca65856..d1b0c280 100644 --- a/src/interfaces/DittoEvent.ts +++ b/src/interfaces/DittoEvent.ts @@ -27,7 +27,6 @@ export interface EventStats { /** Internal Event representation used by Ditto, including extra keys. */ export interface DittoEvent extends NostrEvent { author?: DittoEvent; - author_domain?: string; author_stats?: AuthorStats; event_stats?: EventStats; mentions?: DittoEvent[]; diff --git a/src/pipeline.ts b/src/pipeline.ts index 31912530..4fcd43bf 100644 --- a/src/pipeline.ts +++ b/src/pipeline.ts @@ -161,15 +161,6 @@ function isProtectedEvent(event: NostrEvent): boolean { /** Hydrate the event with the user, if applicable. */ async function hydrateEvent(event: DittoEvent, signal: AbortSignal): Promise { await hydrateEvents({ events: [event], store: await Storages.db(), signal }); - - const kysely = await Storages.kysely(); - const domain = await kysely - .selectFrom('pubkey_domains') - .select('domain') - .where('pubkey', '=', event.pubkey) - .executeTakeFirst(); - - event.author_domain = domain?.domain; } /** Maybe store the event, if eligible. */ diff --git a/src/storages/EventsDB.test.ts b/src/storages/EventsDB.test.ts index 70be622e..810907be 100644 --- a/src/storages/EventsDB.test.ts +++ b/src/storages/EventsDB.test.ts @@ -47,8 +47,16 @@ Deno.test('query events with domain search filter', async () => { assertEquals(await store.query([{ search: '' }]), [event1]); await kysely - .insertInto('pubkey_domains') - .values({ pubkey: event1.pubkey, domain: 'localhost:4036', last_updated_at: event1.created_at }) + .insertInto('author_stats') + .values({ + pubkey: event1.pubkey, + nip05_domain: 'localhost:4036', + nip05_last_verified_at: event1.created_at, + followers_count: 0, + following_count: 0, + notes_count: 0, + search: '', + }) .execute(); assertEquals(await store.query([{ kinds: [1], search: 'domain:localhost:4036' }]), [event1]); diff --git a/src/storages/EventsDB.ts b/src/storages/EventsDB.ts index d9eea001..a96a2ba3 100644 --- a/src/storages/EventsDB.ts +++ b/src/storages/EventsDB.ts @@ -371,9 +371,9 @@ class EventsDB extends NPostgres { if (domains.size) { let query = this.opts.kysely - .selectFrom('pubkey_domains') + .selectFrom('author_stats') .select('pubkey') - .where('domain', 'in', [...domains]); + .where('nip05_domain', 'in', [...domains]); if (filter.authors) { query = query.where('pubkey', 'in', filter.authors); diff --git a/src/storages/InternalRelay.ts b/src/storages/InternalRelay.ts index 4f38c863..746af8a5 100644 --- a/src/storages/InternalRelay.ts +++ b/src/storages/InternalRelay.ts @@ -61,7 +61,7 @@ export class InternalRelay implements NRelay { typeof t === 'object' && t.key === 'domain' ) as { key: 'domain'; value: string } | undefined)?.value; - if (domain === event.author_domain) { + if (domain === event.author_stats?.nip05_domain) { machina.push(purifyEvent(event)); break; } From 112081e4bb6025419eda23fd84bd8eae8de2d620 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 12 Feb 2025 13:21:04 -0600 Subject: [PATCH 14/24] Enable media:true for legacy (non-imeta) URL attachments --- src/storages/EventsDB.ts | 8 ++++++++ src/utils/note.ts | 7 +------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/storages/EventsDB.ts b/src/storages/EventsDB.ts index a96a2ba3..f8e47f2f 100644 --- a/src/storages/EventsDB.ts +++ b/src/storages/EventsDB.ts @@ -6,6 +6,7 @@ import { logi } from '@soapbox/logi'; import { JsonValue } from '@std/json'; import { LanguageCode } from 'iso-639-1'; import { Kysely } from 'kysely'; +import linkify from 'linkifyjs'; import { nip27 } from 'nostr-tools'; import { z } from 'zod'; @@ -17,6 +18,7 @@ import { abortError } from '@/utils/abort.ts'; import { purifyEvent } from '@/utils/purify.ts'; import { DittoEvent } from '@/interfaces/DittoEvent.ts'; import { detectLanguage } from '@/utils/language.ts'; +import { getMediaLinks } from '@/utils/note.ts'; /** Function to decide whether or not to index a tag. */ type TagCondition = (opts: TagConditionOpts) => boolean; @@ -97,6 +99,12 @@ class EventsDB extends NPostgres { }) ); + // quirks mode + if (!imeta.length && event.kind === 1) { + const links = linkify.find(event.content).filter(({ type }) => type === 'url'); + imeta.push(...getMediaLinks(links)); + } + if (imeta.length) { ext.media = 'true'; diff --git a/src/utils/note.ts b/src/utils/note.ts index bae371ff..45fcf94a 100644 --- a/src/utils/note.ts +++ b/src/utils/note.ts @@ -22,7 +22,7 @@ interface ParsedNoteContent { /** Convert Nostr content to Mastodon API HTML. Also return parsed data. */ function parseNoteContent(content: string, mentions: MastodonMention[]): ParsedNoteContent { - const links = linkify.find(content).filter(isLinkURL); + const links = linkify.find(content).filter(({ type }) => type === 'url'); const firstUrl = links.find(isNonMediaLink)?.href; const result = linkifyStr(content, { @@ -123,11 +123,6 @@ function isNonMediaLink({ href }: Link): boolean { return /^https?:\/\//.test(href) && !getUrlMediaType(href); } -/** Ensures the Link is a URL so it can be parsed. */ -function isLinkURL(link: Link): boolean { - return link.type === 'url'; -} - /** Get pubkey from decoded bech32 entity, or undefined if not applicable. */ function getDecodedPubkey(decoded: nip19.DecodeResult): string | undefined { switch (decoded.type) { From 379953a8cb25a0574fa04981708db8efaac92e60 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 12 Feb 2025 14:40:00 -0600 Subject: [PATCH 15/24] Improve performance of account search --- src/controllers/api/accounts.ts | 30 ++++++++++++-------------- src/utils/search.ts | 37 +++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/src/controllers/api/accounts.ts b/src/controllers/api/accounts.ts index d3d67820..18433f1f 100644 --- a/src/controllers/api/accounts.ts +++ b/src/controllers/api/accounts.ts @@ -119,6 +119,7 @@ const accountSearchQuerySchema = z.object({ const accountSearchController: AppController = async (c) => { const { signal } = c.req.raw; const { limit } = c.get('pagination'); + const kysely = await Storages.kysely(); const viewerPubkey = await c.get('signer')?.getPublicKey(); @@ -136,27 +137,22 @@ const accountSearchController: AppController = async (c) => { if (!event && lookup) { const pubkey = await lookupPubkey(lookup); - return c.json(pubkey ? [await accountFromPubkey(pubkey)] : []); + return c.json(pubkey ? [accountFromPubkey(pubkey)] : []); } - const followedPubkeys: Set = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set(); - const pubkeys = Array.from(await getPubkeysBySearch(kysely, { q: query, limit, offset: 0, followedPubkeys })); + const events: NostrEvent[] = []; - let events = event ? [event] : await store.query([{ kinds: [0], authors: pubkeys, limit }], { - signal, - }); - - if (!event) { - events = pubkeys - .map((pubkey) => events.find((event) => event.pubkey === pubkey)) - .filter((event) => !!event); + if (event) { + events.push(event); + } else { + const following = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set(); + const authors = [...await getPubkeysBySearch(kysely, { q: query, limit, offset: 0, following })]; + const profiles = await store.query([{ kinds: [0], authors, limit }], { signal }); + events.push(...profiles); } - const accounts = await hydrateEvents({ events, store, signal }).then( - (events) => - Promise.all( - events.map((event) => renderAccount(event)), - ), - ); + + const accounts = await hydrateEvents({ events, store, signal }) + .then((events) => events.map((event) => renderAccount(event))); return c.json(accounts); }; diff --git a/src/utils/search.ts b/src/utils/search.ts index 29ecefd9..1c608c99 100644 --- a/src/utils/search.ts +++ b/src/utils/search.ts @@ -5,30 +5,35 @@ import { DittoTables } from '@/db/DittoTables.ts'; /** Get pubkeys whose name and NIP-05 is similar to 'q' */ export async function getPubkeysBySearch( kysely: Kysely, - opts: { q: string; limit: number; offset: number; followedPubkeys: Set }, + opts: { q: string; limit: number; offset: number; following: Set }, ): Promise> { - const { q, limit, followedPubkeys, offset } = opts; + const { q, limit, following, offset } = opts; - let query = kysely + const pubkeys = new Set(); + + const query = kysely .selectFrom('author_stats') - .select((eb) => [ - 'pubkey', - 'search', - eb.fn('word_similarity', [sql`${q}`, 'search']).as('sml'), - ]) - .where(() => sql`${q} <% search`) - .orderBy(['followers_count desc']) - .orderBy(['sml desc', 'search']) + .select('pubkey') + .where('search', sql`%>`, q) + .orderBy('followers_count desc') .limit(limit) .offset(offset); - const pubkeys = new Set((await query.execute()).map(({ pubkey }) => pubkey)); + if (following.size) { + const authorsQuery = query.where('pubkey', 'in', [...following]); - if (followedPubkeys.size > 0) { - query = query.where('pubkey', 'in', [...followedPubkeys]); + for (const { pubkey } of await authorsQuery.execute()) { + pubkeys.add(pubkey); + } } - const followingPubkeys = new Set((await query.execute()).map(({ pubkey }) => pubkey)); + if (pubkeys.size >= limit) { + return pubkeys; + } - return new Set(Array.from(followingPubkeys.union(pubkeys))); + for (const { pubkey } of await query.execute()) { + pubkeys.add(pubkey); + } + + return pubkeys; } From 5969d9b3fa26f91f4e125e56df66d9d1594d1582 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 12 Feb 2025 14:47:20 -0600 Subject: [PATCH 16/24] getPubkeysBySearch: reduce limit of second query if applicable --- src/utils/search.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/search.ts b/src/utils/search.ts index 1c608c99..205ee740 100644 --- a/src/utils/search.ts +++ b/src/utils/search.ts @@ -31,7 +31,7 @@ export async function getPubkeysBySearch( return pubkeys; } - for (const { pubkey } of await query.execute()) { + for (const { pubkey } of await query.limit(limit - pubkeys.size).execute()) { pubkeys.add(pubkey); } From 510ad647be71cdcac06c72a5e7c97cbb91cef428 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 12 Feb 2025 15:02:19 -0600 Subject: [PATCH 17/24] Fix type errors --- src/controllers/api/search.ts | 4 ++-- src/utils/search.test.ts | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/controllers/api/search.ts b/src/controllers/api/search.ts index c050fe9d..b3c80a2f 100644 --- a/src/controllers/api/search.ts +++ b/src/controllers/api/search.ts @@ -104,8 +104,8 @@ async function searchEvents( // For account search, use a special index, and prioritize followed accounts. if (type === 'accounts') { - const followedPubkeys = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set(); - const searchPubkeys = await getPubkeysBySearch(kysely, { q, limit, offset, followedPubkeys }); + const following = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set(); + const searchPubkeys = await getPubkeysBySearch(kysely, { q, limit, offset, following }); filter.authors = [...searchPubkeys]; filter.search = undefined; diff --git a/src/utils/search.test.ts b/src/utils/search.test.ts index 056c2927..0b2e36ab 100644 --- a/src/utils/search.test.ts +++ b/src/utils/search.test.ts @@ -15,17 +15,17 @@ Deno.test('fuzzy search works', async () => { }).execute(); assertEquals( - await getPubkeysBySearch(db.kysely, { q: 'pat rick', limit: 1, offset: 0, followedPubkeys: new Set() }), + await getPubkeysBySearch(db.kysely, { q: 'pat rick', limit: 1, offset: 0, following: new Set() }), new Set(), ); assertEquals( - await getPubkeysBySearch(db.kysely, { q: 'patrick dosreis', limit: 1, offset: 0, followedPubkeys: new Set() }), + await getPubkeysBySearch(db.kysely, { q: 'patrick dosreis', limit: 1, offset: 0, following: new Set() }), new Set([ '47259076c85f9240e852420d7213c95e95102f1de929fb60f33a2c32570c98c4', ]), ); assertEquals( - await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 0, followedPubkeys: new Set() }), + await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 0, following: new Set() }), new Set([ '47259076c85f9240e852420d7213c95e95102f1de929fb60f33a2c32570c98c4', ]), @@ -44,7 +44,7 @@ Deno.test('fuzzy search works with offset', async () => { }).execute(); assertEquals( - await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 1, followedPubkeys: new Set() }), + await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 1, following: new Set() }), new Set(), ); }); From ab7a0e06c7e77590532f839671127980f1f00920 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 12 Feb 2025 15:21:09 -0600 Subject: [PATCH 18/24] Add a top_authors materialized view --- src/cron.ts | 18 +++++++++++++++--- src/db/DittoTables.ts | 2 ++ src/db/migrations/049_author_stats_sorted.ts | 13 +++++++++++++ src/utils/search.ts | 3 +-- 4 files changed, 31 insertions(+), 5 deletions(-) create mode 100644 src/db/migrations/049_author_stats_sorted.ts diff --git a/src/cron.ts b/src/cron.ts index 6994561e..ba8a18d5 100644 --- a/src/cron.ts +++ b/src/cron.ts @@ -1,6 +1,13 @@ -import { updateTrendingLinks } from '@/trends.ts'; -import { updateTrendingHashtags } from '@/trends.ts'; -import { updateTrendingEvents, updateTrendingPubkeys, updateTrendingZappedEvents } from '@/trends.ts'; +import { sql } from 'kysely'; + +import { Storages } from '@/storages.ts'; +import { + updateTrendingEvents, + updateTrendingHashtags, + updateTrendingLinks, + updateTrendingPubkeys, + updateTrendingZappedEvents, +} from '@/trends.ts'; /** Start cron jobs for the application. */ export function cron() { @@ -9,4 +16,9 @@ export function cron() { Deno.cron('update trending events', '15 * * * *', updateTrendingEvents); Deno.cron('update trending hashtags', '30 * * * *', updateTrendingHashtags); Deno.cron('update trending links', '45 * * * *', updateTrendingLinks); + + Deno.cron('refresh top authors', '20 * * * *', async () => { + const kysely = await Storages.kysely(); + await sql`refresh materialized view top_authors`.execute(kysely); + }); } diff --git a/src/db/DittoTables.ts b/src/db/DittoTables.ts index ea326724..5a7e4c73 100644 --- a/src/db/DittoTables.ts +++ b/src/db/DittoTables.ts @@ -9,6 +9,8 @@ export interface DittoTables extends NPostgresSchema { event_stats: EventStatsRow; event_zaps: EventZapRow; push_subscriptions: PushSubscriptionRow; + /** This is a materialized view of `author_stats` pre-sorted by followers_count. */ + top_authors: Pick; } interface AuthorStatsRow { diff --git a/src/db/migrations/049_author_stats_sorted.ts b/src/db/migrations/049_author_stats_sorted.ts new file mode 100644 index 00000000..425cc7c8 --- /dev/null +++ b/src/db/migrations/049_author_stats_sorted.ts @@ -0,0 +1,13 @@ +import { Kysely } from 'kysely'; + +export async function up(db: Kysely): Promise { + await db.schema + .createView('top_authors') + .materialized() + .as(db.selectFrom('author_stats').select(['pubkey', 'followers_count', 'search']).orderBy('followers_count desc')) + .execute(); +} + +export async function down(db: Kysely): Promise { + await db.schema.dropView('top_authors').execute(); +} diff --git a/src/utils/search.ts b/src/utils/search.ts index 205ee740..e41cd413 100644 --- a/src/utils/search.ts +++ b/src/utils/search.ts @@ -12,10 +12,9 @@ export async function getPubkeysBySearch( const pubkeys = new Set(); const query = kysely - .selectFrom('author_stats') + .selectFrom('top_authors') .select('pubkey') .where('search', sql`%>`, q) - .orderBy('followers_count desc') .limit(limit) .offset(offset); From 1482ee148e541fd86f4947218f483f6d4177da6f Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 12 Feb 2025 16:13:44 -0600 Subject: [PATCH 19/24] Add missing indexes, fix order of results --- src/controllers/api/accounts.ts | 10 ++++++++-- src/db/migrations/049_author_stats_sorted.ts | 9 ++++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/controllers/api/accounts.ts b/src/controllers/api/accounts.ts index 18433f1f..7b1b4216 100644 --- a/src/controllers/api/accounts.ts +++ b/src/controllers/api/accounts.ts @@ -112,7 +112,7 @@ const accountLookupController: AppController = async (c) => { const accountSearchQuerySchema = z.object({ q: z.string().transform(decodeURIComponent), - resolve: booleanParamSchema.optional().transform(Boolean), + resolve: booleanParamSchema.optional(), following: z.boolean().default(false), }); @@ -148,7 +148,13 @@ const accountSearchController: AppController = async (c) => { const following = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set(); const authors = [...await getPubkeysBySearch(kysely, { q: query, limit, offset: 0, following })]; const profiles = await store.query([{ kinds: [0], authors, limit }], { signal }); - events.push(...profiles); + + for (const pubkey of authors) { + const profile = profiles.find((event) => event.pubkey === pubkey); + if (profile) { + events.push(profile); + } + } } const accounts = await hydrateEvents({ events, store, signal }) diff --git a/src/db/migrations/049_author_stats_sorted.ts b/src/db/migrations/049_author_stats_sorted.ts index 425cc7c8..6eca40cd 100644 --- a/src/db/migrations/049_author_stats_sorted.ts +++ b/src/db/migrations/049_author_stats_sorted.ts @@ -1,4 +1,4 @@ -import { Kysely } from 'kysely'; +import { Kysely, sql } from 'kysely'; export async function up(db: Kysely): Promise { await db.schema @@ -6,8 +6,15 @@ export async function up(db: Kysely): Promise { .materialized() .as(db.selectFrom('author_stats').select(['pubkey', 'followers_count', 'search']).orderBy('followers_count desc')) .execute(); + + await sql`CREATE INDEX top_authors_search_idx ON top_authors USING GIN (search gin_trgm_ops)`.execute(db); + + await db.schema.createIndex('top_authors_pubkey_idx').on('top_authors').column('pubkey').execute(); + + await db.schema.dropIndex('author_stats_search_idx').execute(); } export async function down(db: Kysely): Promise { await db.schema.dropView('top_authors').execute(); + await sql`CREATE INDEX author_stats_search_idx ON author_stats USING GIN (search gin_trgm_ops)`.execute(db); } From db793a3c6c33f38d2190bcf36e5249b5361e8dd6 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 12 Feb 2025 16:20:04 -0600 Subject: [PATCH 20/24] Refresh materialized view in search test --- src/utils/search.test.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/utils/search.test.ts b/src/utils/search.test.ts index 0b2e36ab..d3c92011 100644 --- a/src/utils/search.test.ts +++ b/src/utils/search.test.ts @@ -1,4 +1,5 @@ import { assertEquals } from '@std/assert'; +import { sql } from 'kysely'; import { createTestDB } from '@/test.ts'; import { getPubkeysBySearch } from '@/utils/search.ts'; @@ -14,6 +15,8 @@ Deno.test('fuzzy search works', async () => { following_count: 0, }).execute(); + await sql`REFRESH MATERIALIZED VIEW top_authors`.execute(db.kysely); + assertEquals( await getPubkeysBySearch(db.kysely, { q: 'pat rick', limit: 1, offset: 0, following: new Set() }), new Set(), @@ -43,6 +46,8 @@ Deno.test('fuzzy search works with offset', async () => { following_count: 0, }).execute(); + await sql`REFRESH MATERIALIZED VIEW top_authors`.execute(db.kysely); + assertEquals( await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 1, following: new Set() }), new Set(), From d991464810a5e4738f02c2c621aa6aa048deb00e Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 12 Feb 2025 22:45:53 -0600 Subject: [PATCH 21/24] Fix domain feeds --- src/storages/EventsDB.ts | 2 +- src/storages/InternalRelay.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/storages/EventsDB.ts b/src/storages/EventsDB.ts index f8e47f2f..96fe4e06 100644 --- a/src/storages/EventsDB.ts +++ b/src/storages/EventsDB.ts @@ -381,7 +381,7 @@ class EventsDB extends NPostgres { let query = this.opts.kysely .selectFrom('author_stats') .select('pubkey') - .where('nip05_domain', 'in', [...domains]); + .where('nip05_hostname', 'in', [...domains]); if (filter.authors) { query = query.where('pubkey', 'in', filter.authors); diff --git a/src/storages/InternalRelay.ts b/src/storages/InternalRelay.ts index 746af8a5..9ab942fb 100644 --- a/src/storages/InternalRelay.ts +++ b/src/storages/InternalRelay.ts @@ -61,7 +61,7 @@ export class InternalRelay implements NRelay { typeof t === 'object' && t.key === 'domain' ) as { key: 'domain'; value: string } | undefined)?.value; - if (domain === event.author_stats?.nip05_domain) { + if (domain === event.author_stats?.nip05_hostname) { machina.push(purifyEvent(event)); break; } From aabb20efa3bfcc6713e8fe8ccc77d82bb43d65ab Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 12 Feb 2025 23:04:08 -0600 Subject: [PATCH 22/24] Rework domain queries so allow querying all subdomains by base domain --- src/storages/EventsDB.ts | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/storages/EventsDB.ts b/src/storages/EventsDB.ts index 96fe4e06..64876718 100644 --- a/src/storages/EventsDB.ts +++ b/src/storages/EventsDB.ts @@ -8,6 +8,7 @@ import { LanguageCode } from 'iso-639-1'; import { Kysely } from 'kysely'; import linkify from 'linkifyjs'; import { nip27 } from 'nostr-tools'; +import tldts from 'tldts'; import { z } from 'zod'; import { DittoTables } from '@/db/DittoTables.ts'; @@ -370,18 +371,36 @@ class EventsDB extends NPostgres { const tokens = NIP50.parseInput(filter.search); const domains = new Set(); + const hostnames = new Set(); for (const token of tokens) { if (typeof token === 'object' && token.key === 'domain') { - domains.add(token.value); + const { domain, hostname } = tldts.parse(token.value); + if (domain === hostname) { + domains.add(token.value); + } else { + hostnames.add(token.value); + } } } - if (domains.size) { + if (domains.size || hostnames.size) { let query = this.opts.kysely .selectFrom('author_stats') .select('pubkey') - .where('nip05_hostname', 'in', [...domains]); + .where((eb) => { + const expr = []; + if (domains.size) { + expr.push(eb('nip05_domain', 'in', [...domains])); + } + if (hostnames.size) { + expr.push(eb('nip05_hostname', 'in', [...hostnames])); + } + if (expr.length === 1) { + return expr[0]; + } + return eb.or(expr); + }); if (filter.authors) { query = query.where('pubkey', 'in', filter.authors); From a85daf1e67d0c857953fe6a2af9434b2f0838d1f Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 12 Feb 2025 23:11:09 -0600 Subject: [PATCH 23/24] EventsDB: fix domain query test --- src/storages/EventsDB.test.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/storages/EventsDB.test.ts b/src/storages/EventsDB.test.ts index 810907be..d0947075 100644 --- a/src/storages/EventsDB.test.ts +++ b/src/storages/EventsDB.test.ts @@ -43,14 +43,14 @@ Deno.test('query events with domain search filter', async () => { await store.event(event1); assertEquals(await store.query([{}]), [event1]); - assertEquals(await store.query([{ search: 'domain:localhost:4036' }]), []); + assertEquals(await store.query([{ search: 'domain:gleasonator.dev' }]), []); assertEquals(await store.query([{ search: '' }]), [event1]); await kysely .insertInto('author_stats') .values({ pubkey: event1.pubkey, - nip05_domain: 'localhost:4036', + nip05_domain: 'gleasonator.dev', nip05_last_verified_at: event1.created_at, followers_count: 0, following_count: 0, @@ -59,7 +59,7 @@ Deno.test('query events with domain search filter', async () => { }) .execute(); - assertEquals(await store.query([{ kinds: [1], search: 'domain:localhost:4036' }]), [event1]); + assertEquals(await store.query([{ kinds: [1], search: 'domain:gleasonator.dev' }]), [event1]); assertEquals(await store.query([{ kinds: [1], search: 'domain:example.com' }]), []); }); From 3c0e6dac76b038aeacb4bec878589d91604f675b Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 12 Feb 2025 23:35:43 -0600 Subject: [PATCH 24/24] Try using offset pagination for account search --- src/controllers/api/search.ts | 8 ++++++-- src/utils/api.ts | 6 +++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/controllers/api/search.ts b/src/controllers/api/search.ts index b3c80a2f..e5761f32 100644 --- a/src/controllers/api/search.ts +++ b/src/controllers/api/search.ts @@ -12,7 +12,7 @@ import { accountFromPubkey, renderAccount } from '@/views/mastodon/accounts.ts'; import { renderStatus } from '@/views/mastodon/statuses.ts'; import { getFollowedPubkeys } from '@/queries.ts'; import { getPubkeysBySearch } from '@/utils/search.ts'; -import { paginated } from '@/utils/api.ts'; +import { paginated, paginatedList } from '@/utils/api.ts'; const searchQuerySchema = z.object({ q: z.string().transform(decodeURIComponent), @@ -77,7 +77,11 @@ const searchController: AppController = async (c) => { hashtags: [], }; - return paginated(c, events, body); + if (result.data.type === 'accounts') { + return paginatedList(c, { ...result.data, ...params }, body); + } else { + return paginated(c, events, body); + } }; /** Get events for the search params. */ diff --git a/src/utils/api.ts b/src/utils/api.ts index ebe07748..91eda723 100644 --- a/src/utils/api.ts +++ b/src/utils/api.ts @@ -243,18 +243,18 @@ function buildListLinkHeader(url: string, params: { offset: number; limit: numbe function paginatedList( c: AppContext, params: { offset: number; limit: number }, - entities: unknown[], + body: object | unknown[], headers: HeaderRecord = {}, ) { const link = buildListLinkHeader(c.req.url, params); - const hasMore = entities.length > 0; + const hasMore = Array.isArray(body) ? body.length > 0 : true; if (link) { headers.link = hasMore ? link : link.split(', ').find((link) => link.endsWith('; rel="prev"'))!; } // Filter out undefined entities. - const results = entities.filter(Boolean); + const results = Array.isArray(body) ? body.filter(Boolean) : body; return c.json(results, 200, headers); }