diff --git a/deno.json b/deno.json index 1bc89109..2a8d18f9 100644 --- a/deno.json +++ b/deno.json @@ -49,7 +49,7 @@ "@isaacs/ttlcache": "npm:@isaacs/ttlcache@^1.4.1", "@negrel/webpush": "jsr:@negrel/webpush@^0.3.0", "@noble/secp256k1": "npm:@noble/secp256k1@^2.0.0", - "@nostrify/db": "jsr:@nostrify/db@^0.38.0", + "@nostrify/db": "jsr:@nostrify/db@^0.39.0", "@nostrify/nostrify": "jsr:@nostrify/nostrify@^0.38.1", "@nostrify/policies": "jsr:@nostrify/policies@^0.36.1", "@nostrify/types": "jsr:@nostrify/types@^0.36.0", diff --git a/deno.lock b/deno.lock index a470f793..7737f3d0 100644 --- a/deno.lock +++ b/deno.lock @@ -31,7 +31,7 @@ "jsr:@hono/hono@^4.4.6": "4.6.15", "jsr:@negrel/http-ece@0.6.0": "0.6.0", "jsr:@negrel/webpush@0.3": "0.3.0", - "jsr:@nostrify/db@0.38": "0.38.0", + "jsr:@nostrify/db@0.39": "0.39.0", "jsr:@nostrify/nostrify@0.31": "0.31.0", "jsr:@nostrify/nostrify@0.32": "0.32.0", "jsr:@nostrify/nostrify@0.36": "0.36.2", @@ -358,8 +358,8 @@ "jsr:@std/path@0.224.0" ] }, - "@nostrify/db@0.38.0": { - "integrity": "44118756b95f747779839f0e578a5e1dbca164ec44edb8885bd1c99840775e8a", + "@nostrify/db@0.39.0": { + "integrity": "13a88c610eb15a5dd13848d5beec9170406376c9d05299ce5e5298452a5431ac", "dependencies": [ "jsr:@nostrify/nostrify@~0.38.1", "jsr:@nostrify/types@0.36", @@ -2432,7 +2432,7 @@ "jsr:@gfx/canvas-wasm@~0.4.2", "jsr:@hono/hono@^4.4.6", "jsr:@negrel/webpush@0.3", - "jsr:@nostrify/db@0.38", + "jsr:@nostrify/db@0.39", "jsr:@nostrify/nostrify@~0.38.1", "jsr:@nostrify/policies@~0.36.1", "jsr:@nostrify/types@0.36", diff --git a/scripts/db-populate-extensions.ts b/scripts/db-populate-extensions.ts index 428b591f..ca6d1927 100644 --- a/scripts/db-populate-extensions.ts +++ b/scripts/db-populate-extensions.ts @@ -1,25 +1,26 @@ +import { NostrEvent } from '@nostrify/nostrify'; + import { Storages } from '@/storages.ts'; import { EventsDB } from '@/storages/EventsDB.ts'; -const store = await Storages.db(); const kysely = await Storages.kysely(); -for await (const msg of store.req([{}])) { - if (msg[0] === 'EVENT') { - const event = msg[2]; +const query = kysely + .selectFrom('nostr_events') + .select(['id', 'kind', 'content', 'pubkey', 'tags', 'created_at', 'sig']); - const ext = EventsDB.indexExtensions(event); +for await (const row of query.stream()) { + const event: NostrEvent = { ...row, created_at: Number(row.created_at) }; + const ext = EventsDB.indexExtensions(event); - try { - await kysely.updateTable('nostr_events') - .set('search_ext', ext) - .where('id', '=', event.id) - .execute(); - } catch { - // do nothing - } - } else { - break; + try { + await kysely + .updateTable('nostr_events') + .set('search_ext', ext) + .where('id', '=', event.id) + .execute(); + } catch { + // do nothing } } diff --git a/src/config.ts b/src/config.ts index 4a79a1a0..be333334 100644 --- a/src/config.ts +++ b/src/config.ts @@ -338,7 +338,7 @@ class Conf { /** Link preview cache settings. */ get linkPreview(): { max: number; ttl: number } { return { - max: Number(Deno.env.get('DITTO_CACHE_LINK_PREVIEW_MAX') || 1000), + max: Number(Deno.env.get('DITTO_CACHE_LINK_PREVIEW_MAX') || 3000), ttl: Number(Deno.env.get('DITTO_CACHE_LINK_PREVIEW_TTL') || 12 * 60 * 60 * 1000), }; }, diff --git a/src/controllers/api/accounts.ts b/src/controllers/api/accounts.ts index 23f3190d..7b1b4216 100644 --- a/src/controllers/api/accounts.ts +++ b/src/controllers/api/accounts.ts @@ -112,13 +112,14 @@ const accountLookupController: AppController = async (c) => { const accountSearchQuerySchema = z.object({ q: z.string().transform(decodeURIComponent), - resolve: booleanParamSchema.optional().transform(Boolean), + resolve: booleanParamSchema.optional(), following: z.boolean().default(false), }); const accountSearchController: AppController = async (c) => { const { signal } = c.req.raw; const { limit } = c.get('pagination'); + const kysely = await Storages.kysely(); const viewerPubkey = await c.get('signer')?.getPublicKey(); @@ -136,27 +137,28 @@ const accountSearchController: AppController = async (c) => { if (!event && lookup) { const pubkey = await lookupPubkey(lookup); - return c.json(pubkey ? [await accountFromPubkey(pubkey)] : []); + return c.json(pubkey ? [accountFromPubkey(pubkey)] : []); } - const followedPubkeys: Set = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set(); - const pubkeys = Array.from(await getPubkeysBySearch(kysely, { q: query, limit, offset: 0, followedPubkeys })); + const events: NostrEvent[] = []; - let events = event ? [event] : await store.query([{ kinds: [0], authors: pubkeys, limit }], { - signal, - }); + if (event) { + events.push(event); + } else { + const following = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set(); + const authors = [...await getPubkeysBySearch(kysely, { q: query, limit, offset: 0, following })]; + const profiles = await store.query([{ kinds: [0], authors, limit }], { signal }); - if (!event) { - events = pubkeys - .map((pubkey) => events.find((event) => event.pubkey === pubkey)) - .filter((event) => !!event); + for (const pubkey of authors) { + const profile = profiles.find((event) => event.pubkey === pubkey); + if (profile) { + events.push(profile); + } + } } - const accounts = await hydrateEvents({ events, store, signal }).then( - (events) => - Promise.all( - events.map((event) => renderAccount(event)), - ), - ); + + const accounts = await hydrateEvents({ events, store, signal }) + .then((events) => events.map((event) => renderAccount(event))); return c.json(accounts); }; @@ -197,7 +199,7 @@ const accountStatusesQuerySchema = z.object({ limit: z.coerce.number().nonnegative().transform((v) => Math.min(v, 40)).catch(20), exclude_replies: booleanParamSchema.optional(), tagged: z.string().optional(), - only_media: z.coerce.boolean().optional(), + only_media: booleanParamSchema.optional(), }); const accountStatusesController: AppController = async (c) => { @@ -241,14 +243,24 @@ const accountStatusesController: AppController = async (c) => { limit, }; + const search: string[] = []; + if (only_media) { - filter.search = 'media:true'; + search.push('media:true'); + } + + if (exclude_replies) { + search.push('reply:false'); } if (tagged) { filter['#t'] = [tagged]; } + if (search.length) { + filter.search = search.join(' '); + } + const opts = { signal, limit, timeout: Conf.db.timeouts.timelines }; const events = await store.query([filter], opts) diff --git a/src/controllers/api/search.ts b/src/controllers/api/search.ts index 8bfe4ffd..e5761f32 100644 --- a/src/controllers/api/search.ts +++ b/src/controllers/api/search.ts @@ -11,7 +11,8 @@ import { nip05Cache } from '@/utils/nip05.ts'; import { accountFromPubkey, renderAccount } from '@/views/mastodon/accounts.ts'; import { renderStatus } from '@/views/mastodon/statuses.ts'; import { getFollowedPubkeys } from '@/queries.ts'; -import { getIdsBySearch, getPubkeysBySearch } from '@/utils/search.ts'; +import { getPubkeysBySearch } from '@/utils/search.ts'; +import { paginated, paginatedList } from '@/utils/api.ts'; const searchQuerySchema = z.object({ q: z.string().transform(decodeURIComponent), @@ -19,14 +20,14 @@ const searchQuerySchema = z.object({ resolve: booleanParamSchema.optional().transform(Boolean), following: z.boolean().default(false), account_id: n.id().optional(), - limit: z.coerce.number().catch(20).transform((value) => Math.min(Math.max(value, 0), 40)), offset: z.coerce.number().nonnegative().catch(0), }); -type SearchQuery = z.infer; +type SearchQuery = z.infer & { since?: number; until?: number; limit: number }; const searchController: AppController = async (c) => { const result = searchQuerySchema.safeParse(c.req.query()); + const params = c.get('pagination'); const { signal } = c.req.raw; const viewerPubkey = await c.get('signer')?.getPublicKey(); @@ -34,14 +35,14 @@ const searchController: AppController = async (c) => { return c.json({ error: 'Bad request', schema: result.error }, 422); } - const event = await lookupEvent(result.data, signal); + const event = await lookupEvent({ ...result.data, ...params }, signal); const lookup = extractIdentifier(result.data.q); // Render account from pubkey. if (!event && lookup) { const pubkey = await lookupPubkey(lookup); return c.json({ - accounts: pubkey ? [await accountFromPubkey(pubkey)] : [], + accounts: pubkey ? [accountFromPubkey(pubkey)] : [], statuses: [], hashtags: [], }); @@ -52,7 +53,8 @@ const searchController: AppController = async (c) => { if (event) { events = [event]; } - events.push(...(await searchEvents({ ...result.data, viewerPubkey }, signal))); + + events.push(...(await searchEvents({ ...result.data, ...params, viewerPubkey }, signal))); const [accounts, statuses] = await Promise.all([ Promise.all( @@ -69,16 +71,22 @@ const searchController: AppController = async (c) => { ), ]); - return c.json({ + const body = { accounts, statuses, hashtags: [], - }); + }; + + if (result.data.type === 'accounts') { + return paginatedList(c, { ...result.data, ...params }, body); + } else { + return paginated(c, events, body); + } }; /** Get events for the search params. */ async function searchEvents( - { q, type, limit, offset, account_id, viewerPubkey }: SearchQuery & { viewerPubkey?: string }, + { q, type, since, until, limit, offset, account_id, viewerPubkey }: SearchQuery & { viewerPubkey?: string }, signal: AbortSignal, ): Promise { // Hashtag search is not supported. @@ -91,6 +99,8 @@ async function searchEvents( const filter: NostrFilter = { kinds: typeToKinds(type), search: q, + since, + until, limit, }; @@ -98,20 +108,13 @@ async function searchEvents( // For account search, use a special index, and prioritize followed accounts. if (type === 'accounts') { - const followedPubkeys = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set(); - const searchPubkeys = await getPubkeysBySearch(kysely, { q, limit, offset, followedPubkeys }); + const following = viewerPubkey ? await getFollowedPubkeys(viewerPubkey) : new Set(); + const searchPubkeys = await getPubkeysBySearch(kysely, { q, limit, offset, following }); filter.authors = [...searchPubkeys]; filter.search = undefined; } - // For status search, use a specific query so it supports offset and is open to customizations. - if (type === 'statuses') { - const ids = await getIdsBySearch(kysely, { q, limit, offset }); - filter.ids = [...ids]; - filter.search = undefined; - } - // Results should only be shown from one author. if (account_id) { filter.authors = [account_id]; diff --git a/src/controllers/api/timelines.ts b/src/controllers/api/timelines.ts index fa5f44f6..f6bb8d37 100644 --- a/src/controllers/api/timelines.ts +++ b/src/controllers/api/timelines.ts @@ -10,11 +10,40 @@ import { paginated } from '@/utils/api.ts'; import { getTagSet } from '@/utils/tags.ts'; import { renderReblog, renderStatus } from '@/views/mastodon/statuses.ts'; +const homeQuerySchema = z.object({ + exclude_replies: booleanParamSchema.optional(), + only_media: booleanParamSchema.optional(), +}); + const homeTimelineController: AppController = async (c) => { const params = c.get('pagination'); const pubkey = await c.get('signer')?.getPublicKey()!; + const result = homeQuerySchema.safeParse(c.req.query()); + + if (!result.success) { + return c.json({ error: 'Bad request', schema: result.error }, 400); + } + + const { exclude_replies, only_media } = result.data; + const authors = [...await getFeedPubkeys(pubkey)]; - return renderStatuses(c, [{ authors, kinds: [1, 6, 20], ...params }]); + const filter: NostrFilter = { authors, kinds: [1, 6, 20], ...params }; + + const search: string[] = []; + + if (only_media) { + search.push('media:true'); + } + + if (exclude_replies) { + search.push('reply:false'); + } + + if (search.length) { + filter.search = search.join(' '); + } + + return renderStatuses(c, [filter]); }; const publicQuerySchema = z.object({ diff --git a/src/controllers/nostr/relay-info.ts b/src/controllers/nostr/relay-info.ts index 9ee7babb..cedd09d7 100644 --- a/src/controllers/nostr/relay-info.ts +++ b/src/controllers/nostr/relay-info.ts @@ -9,6 +9,8 @@ const relayInfoController: AppController = async (c) => { const store = await Storages.db(); const meta = await getInstanceMetadata(store, c.req.raw.signal); + c.res.headers.set('access-control-allow-origin', '*'); + return c.json({ name: meta.name, description: meta.about, diff --git a/src/cron.ts b/src/cron.ts index 6994561e..ba8a18d5 100644 --- a/src/cron.ts +++ b/src/cron.ts @@ -1,6 +1,13 @@ -import { updateTrendingLinks } from '@/trends.ts'; -import { updateTrendingHashtags } from '@/trends.ts'; -import { updateTrendingEvents, updateTrendingPubkeys, updateTrendingZappedEvents } from '@/trends.ts'; +import { sql } from 'kysely'; + +import { Storages } from '@/storages.ts'; +import { + updateTrendingEvents, + updateTrendingHashtags, + updateTrendingLinks, + updateTrendingPubkeys, + updateTrendingZappedEvents, +} from '@/trends.ts'; /** Start cron jobs for the application. */ export function cron() { @@ -9,4 +16,9 @@ export function cron() { Deno.cron('update trending events', '15 * * * *', updateTrendingEvents); Deno.cron('update trending hashtags', '30 * * * *', updateTrendingHashtags); Deno.cron('update trending links', '45 * * * *', updateTrendingLinks); + + Deno.cron('refresh top authors', '20 * * * *', async () => { + const kysely = await Storages.kysely(); + await sql`refresh materialized view top_authors`.execute(kysely); + }); } diff --git a/src/db/DittoTables.ts b/src/db/DittoTables.ts index 19ea6e1b..5a7e4c73 100644 --- a/src/db/DittoTables.ts +++ b/src/db/DittoTables.ts @@ -7,9 +7,10 @@ export interface DittoTables extends NPostgresSchema { author_stats: AuthorStatsRow; domain_favicons: DomainFaviconRow; event_stats: EventStatsRow; - pubkey_domains: PubkeyDomainRow; event_zaps: EventZapRow; push_subscriptions: PushSubscriptionRow; + /** This is a materialized view of `author_stats` pre-sorted by followers_count. */ + top_authors: Pick; } interface AuthorStatsRow { @@ -45,12 +46,6 @@ interface AuthTokenRow { created_at: Date; } -interface PubkeyDomainRow { - pubkey: string; - domain: string; - last_updated_at: number; -} - interface DomainFaviconRow { domain: string; favicon: string; diff --git a/src/db/migrations/048_rm_pubkey_domains.ts b/src/db/migrations/048_rm_pubkey_domains.ts new file mode 100644 index 00000000..20938159 --- /dev/null +++ b/src/db/migrations/048_rm_pubkey_domains.ts @@ -0,0 +1,22 @@ +import { Kysely } from 'kysely'; + +export async function up(db: Kysely): Promise { + await db.schema.dropTable('pubkey_domains').execute(); +} + +export async function down(db: Kysely): Promise { + await db.schema + .createTable('pubkey_domains') + .ifNotExists() + .addColumn('pubkey', 'text', (col) => col.primaryKey()) + .addColumn('domain', 'text', (col) => col.notNull()) + .addColumn('last_updated_at', 'integer', (col) => col.notNull().defaultTo(0)) + .execute(); + + await db.schema + .createIndex('pubkey_domains_domain_index') + .on('pubkey_domains') + .column('domain') + .ifNotExists() + .execute(); +} diff --git a/src/db/migrations/049_author_stats_sorted.ts b/src/db/migrations/049_author_stats_sorted.ts new file mode 100644 index 00000000..6eca40cd --- /dev/null +++ b/src/db/migrations/049_author_stats_sorted.ts @@ -0,0 +1,20 @@ +import { Kysely, sql } from 'kysely'; + +export async function up(db: Kysely): Promise { + await db.schema + .createView('top_authors') + .materialized() + .as(db.selectFrom('author_stats').select(['pubkey', 'followers_count', 'search']).orderBy('followers_count desc')) + .execute(); + + await sql`CREATE INDEX top_authors_search_idx ON top_authors USING GIN (search gin_trgm_ops)`.execute(db); + + await db.schema.createIndex('top_authors_pubkey_idx').on('top_authors').column('pubkey').execute(); + + await db.schema.dropIndex('author_stats_search_idx').execute(); +} + +export async function down(db: Kysely): Promise { + await db.schema.dropView('top_authors').execute(); + await sql`CREATE INDEX author_stats_search_idx ON author_stats USING GIN (search gin_trgm_ops)`.execute(db); +} diff --git a/src/interfaces/DittoEvent.ts b/src/interfaces/DittoEvent.ts index bca65856..d1b0c280 100644 --- a/src/interfaces/DittoEvent.ts +++ b/src/interfaces/DittoEvent.ts @@ -27,7 +27,6 @@ export interface EventStats { /** Internal Event representation used by Ditto, including extra keys. */ export interface DittoEvent extends NostrEvent { author?: DittoEvent; - author_domain?: string; author_stats?: AuthorStats; event_stats?: EventStats; mentions?: DittoEvent[]; diff --git a/src/pipeline.ts b/src/pipeline.ts index 7540bc82..4fcd43bf 100644 --- a/src/pipeline.ts +++ b/src/pipeline.ts @@ -19,9 +19,11 @@ import { getAmount } from '@/utils/bolt11.ts'; import { faviconCache } from '@/utils/favicon.ts'; import { errorJson } from '@/utils/log.ts'; import { nip05Cache } from '@/utils/nip05.ts'; +import { parseNoteContent, stripimeta } from '@/utils/note.ts'; import { purifyEvent } from '@/utils/purify.ts'; import { updateStats } from '@/utils/stats.ts'; import { getTagSet } from '@/utils/tags.ts'; +import { unfurlCardCached } from '@/utils/unfurl.ts'; import { renderWebPushNotification } from '@/views/mastodon/push.ts'; import { policyWorker } from '@/workers/policy.ts'; import { verifyEventWorker } from '@/workers/verify.ts'; @@ -122,6 +124,7 @@ async function handleEvent(event: DittoEvent, opts: PipelineOpts): Promise Promise.allSettled([ handleZaps(kysely, event), updateAuthorData(event, opts.signal), + prewarmLinkPreview(event, opts.signal), generateSetEvents(event), ]) .then(() => @@ -158,15 +161,6 @@ function isProtectedEvent(event: NostrEvent): boolean { /** Hydrate the event with the user, if applicable. */ async function hydrateEvent(event: DittoEvent, signal: AbortSignal): Promise { await hydrateEvents({ events: [event], store: await Storages.db(), signal }); - - const kysely = await Storages.kysely(); - const domain = await kysely - .selectFrom('pubkey_domains') - .select('domain') - .where('pubkey', '=', event.pubkey) - .executeTakeFirst(); - - event.author_domain = domain?.domain; } /** Maybe store the event, if eligible. */ @@ -268,6 +262,13 @@ async function updateAuthorData(event: NostrEvent, signal: AbortSignal): Promise } } +async function prewarmLinkPreview(event: NostrEvent, signal: AbortSignal): Promise { + const { firstUrl } = parseNoteContent(stripimeta(event.content, event.tags), []); + if (firstUrl) { + await unfurlCardCached(firstUrl, signal); + } +} + /** Determine if the event is being received in a timely manner. */ function isFresh(event: NostrEvent): boolean { return eventAge(event) < Time.minutes(1); diff --git a/src/storages/EventsDB.test.ts b/src/storages/EventsDB.test.ts index 70be622e..d0947075 100644 --- a/src/storages/EventsDB.test.ts +++ b/src/storages/EventsDB.test.ts @@ -43,15 +43,23 @@ Deno.test('query events with domain search filter', async () => { await store.event(event1); assertEquals(await store.query([{}]), [event1]); - assertEquals(await store.query([{ search: 'domain:localhost:4036' }]), []); + assertEquals(await store.query([{ search: 'domain:gleasonator.dev' }]), []); assertEquals(await store.query([{ search: '' }]), [event1]); await kysely - .insertInto('pubkey_domains') - .values({ pubkey: event1.pubkey, domain: 'localhost:4036', last_updated_at: event1.created_at }) + .insertInto('author_stats') + .values({ + pubkey: event1.pubkey, + nip05_domain: 'gleasonator.dev', + nip05_last_verified_at: event1.created_at, + followers_count: 0, + following_count: 0, + notes_count: 0, + search: '', + }) .execute(); - assertEquals(await store.query([{ kinds: [1], search: 'domain:localhost:4036' }]), [event1]); + assertEquals(await store.query([{ kinds: [1], search: 'domain:gleasonator.dev' }]), [event1]); assertEquals(await store.query([{ kinds: [1], search: 'domain:example.com' }]), []); }); diff --git a/src/storages/EventsDB.ts b/src/storages/EventsDB.ts index 2625c6b2..c0a9bec4 100644 --- a/src/storages/EventsDB.ts +++ b/src/storages/EventsDB.ts @@ -1,11 +1,14 @@ // deno-lint-ignore-file require-await -import { NPostgres } from '@nostrify/db'; +import { NPostgres, NPostgresSchema } from '@nostrify/db'; import { NIP50, NKinds, NostrEvent, NostrFilter, NSchema as n } from '@nostrify/nostrify'; import { logi } from '@soapbox/logi'; import { JsonValue } from '@std/json'; +import { LanguageCode } from 'iso-639-1'; import { Kysely } from 'kysely'; +import linkify from 'linkifyjs'; import { nip27 } from 'nostr-tools'; +import tldts from 'tldts'; import { z } from 'zod'; import { DittoTables } from '@/db/DittoTables.ts'; @@ -16,6 +19,7 @@ import { abortError } from '@/utils/abort.ts'; import { purifyEvent } from '@/utils/purify.ts'; import { DittoEvent } from '@/interfaces/DittoEvent.ts'; import { detectLanguage } from '@/utils/language.ts'; +import { getMediaLinks } from '@/utils/note.ts'; /** Function to decide whether or not to index a tag. */ type TagCondition = (opts: TagConditionOpts) => boolean; @@ -96,6 +100,12 @@ class EventsDB extends NPostgres { }) ); + // quirks mode + if (!imeta.length && event.kind === 1) { + const links = linkify.find(event.content).filter(({ type }) => type === 'url'); + imeta.push(...getMediaLinks(links)); + } + if (imeta.length) { ext.media = 'true'; @@ -231,6 +241,25 @@ class EventsDB extends NPostgres { return super.query(filters, { ...opts, timeout: opts.timeout ?? this.opts.timeout }); } + /** Parse an event row from the database. */ + protected override parseEventRow(row: NPostgresSchema['nostr_events']): DittoEvent { + const event: DittoEvent = { + id: row.id, + kind: row.kind, + pubkey: row.pubkey, + content: row.content, + created_at: Number(row.created_at), + tags: row.tags, + sig: row.sig, + }; + + if (!this.opts.pure) { + event.language = row.search_ext.language as LanguageCode | undefined; + } + + return event; + } + /** Delete events based on filters from the database. */ override async remove(filters: NostrFilter[], opts: { signal?: AbortSignal; timeout?: number } = {}): Promise { logi({ level: 'debug', ns: 'ditto.remove', source: 'db', filters: filters as JsonValue }); @@ -342,18 +371,36 @@ class EventsDB extends NPostgres { const tokens = NIP50.parseInput(filter.search); const domains = new Set(); + const hostnames = new Set(); for (const token of tokens) { if (typeof token === 'object' && token.key === 'domain') { - domains.add(token.value); + const { domain, hostname } = tldts.parse(token.value); + if (domain === hostname) { + domains.add(token.value); + } else { + hostnames.add(token.value); + } } } - if (domains.size) { + if (domains.size || hostnames.size) { let query = this.opts.kysely - .selectFrom('pubkey_domains') + .selectFrom('author_stats') .select('pubkey') - .where('domain', 'in', [...domains]); + .where((eb) => { + const expr = []; + if (domains.size) { + expr.push(eb('nip05_domain', 'in', [...domains])); + } + if (hostnames.size) { + expr.push(eb('nip05_hostname', 'in', [...hostnames])); + } + if (expr.length === 1) { + return expr[0]; + } + return eb.or(expr); + }); if (filter.authors) { query = query.where('pubkey', 'in', filter.authors); diff --git a/src/storages/InternalRelay.ts b/src/storages/InternalRelay.ts index 4f38c863..9ab942fb 100644 --- a/src/storages/InternalRelay.ts +++ b/src/storages/InternalRelay.ts @@ -61,7 +61,7 @@ export class InternalRelay implements NRelay { typeof t === 'object' && t.key === 'domain' ) as { key: 'domain'; value: string } | undefined)?.value; - if (domain === event.author_domain) { + if (domain === event.author_stats?.nip05_hostname) { machina.push(purifyEvent(event)); break; } diff --git a/src/storages/hydrate.ts b/src/storages/hydrate.ts index a162571a..36df74f6 100644 --- a/src/storages/hydrate.ts +++ b/src/storages/hydrate.ts @@ -30,11 +30,7 @@ async function hydrateEvents(opts: HydrateOpts): Promise { const cache = [...events]; - for (const event of await gatherReposts({ events: cache, store, signal })) { - cache.push(event); - } - - for (const event of await gatherReacted({ events: cache, store, signal })) { + for (const event of await gatherRelatedEvents({ events: cache, store, signal })) { cache.push(event); } @@ -42,11 +38,7 @@ async function hydrateEvents(opts: HydrateOpts): Promise { cache.push(event); } - for (const event of await gatherMentions({ events: cache, store, signal })) { - cache.push(event); - } - - for (const event of await gatherAuthors({ events: cache, store, signal })) { + for (const event of await gatherProfiles({ events: cache, store, signal })) { cache.push(event); } @@ -58,18 +50,6 @@ async function hydrateEvents(opts: HydrateOpts): Promise { cache.push(event); } - for (const event of await gatherReportedProfiles({ events: cache, store, signal })) { - cache.push(event); - } - - for (const event of await gatherReportedNotes({ events: cache, store, signal })) { - cache.push(event); - } - - for (const event of await gatherZapped({ events: cache, store, signal })) { - cache.push(event); - } - const authorStats = await gatherAuthorStats(cache, kysely as Kysely); const eventStats = await gatherEventStats(cache, kysely as Kysely); @@ -217,36 +197,40 @@ export function assembleEvents( return a; } -/** Collect reposts from the events. */ -function gatherReposts({ events, store, signal }: HydrateOpts): Promise { +/** Collect event targets (eg reposts, quote posts, reacted posts, etc.) */ +function gatherRelatedEvents({ events, store, signal }: HydrateOpts): Promise { const ids = new Set(); for (const event of events) { + // Reposted events if (event.kind === 6) { const id = event.tags.find(([name]) => name === 'e')?.[1]; if (id) { ids.add(id); } } - } - - return store.query( - [{ ids: [...ids], limit: ids.size }], - { signal }, - ); -} - -/** Collect events being reacted to by the events. */ -function gatherReacted({ events, store, signal }: HydrateOpts): Promise { - const ids = new Set(); - - for (const event of events) { + // Reacted events if (event.kind === 7) { const id = event.tags.findLast(([name]) => name === 'e')?.[1]; if (id) { ids.add(id); } } + // Reported events + if (event.kind === 1984) { + for (const [name, value] of event.tags) { + if (name === 'e') { + ids.add(value); + } + } + } + // Zapped events + if (event.kind === 9735) { + const id = event.tags.find(([name]) => name === 'e')?.[1]; + if (id) { + ids.add(id); + } + } } return store.query( @@ -274,11 +258,15 @@ function gatherQuotes({ events, store, signal }: HydrateOpts): Promise { +/** Collect profiles from the events. */ +async function gatherProfiles({ events, store, signal }: HydrateOpts): Promise { const pubkeys = new Set(); for (const event of events) { + // Authors + pubkeys.add(event.pubkey); + + // Mentions if (event.kind === 1) { for (const [name, value] of event.tags) { if (name === 'p') { @@ -286,29 +274,14 @@ async function gatherMentions({ events, store, signal }: HydrateOpts): Promise matchFilter({ kinds: [0], authors: [pubkey] }, e)); - if (!author) { - const fallback = fallbackAuthor(pubkey); - authors.push(fallback); + // Reported profiles + if (event.kind === 1984) { + const pubkey = event.tags.find(([name]) => name === 'p')?.[1]; + if (pubkey) { + pubkeys.add(pubkey); + } } - } - - return authors; -} - -/** Collect authors from the events. */ -async function gatherAuthors({ events, store, signal }: HydrateOpts): Promise { - const pubkeys = new Set(); - - for (const event of events) { + // Zap recipients if (event.kind === 9735) { const zapReceiver = event.tags.find(([name]) => name === 'p')?.[1]; if (zapReceiver) { @@ -324,7 +297,6 @@ async function gatherAuthors({ events, store, signal }: HydrateOpts): Promise { - const ids = new Set(); - for (const event of events) { - if (event.kind === 1984) { - const status_ids = event.tags.filter(([name]) => name === 'e').map((tag) => tag[1]); - if (status_ids.length > 0) { - for (const id of status_ids) { - ids.add(id); - } - } - } - } - - return store.query( - [{ kinds: [1, 20], ids: [...ids], limit: ids.size }], - { signal }, - ); -} - -/** Collect reported profiles from the events. */ -function gatherReportedProfiles({ events, store, signal }: HydrateOpts): Promise { - const pubkeys = new Set(); - - for (const event of events) { - if (event.kind === 1984) { - const pubkey = event.tags.find(([name]) => name === 'p')?.[1]; - if (pubkey) { - pubkeys.add(pubkey); - } - } - } - - return store.query( - [{ kinds: [0], authors: [...pubkeys], limit: pubkeys.size }], - { signal }, - ); -} - -/** Collect events being zapped. */ -function gatherZapped({ events, store, signal }: HydrateOpts): Promise { - const ids = new Set(); - - for (const event of events) { - if (event.kind === 9735) { - const id = event.tags.find(([name]) => name === 'e')?.[1]; - if (id) { - ids.add(id); - } - } - } - - return store.query( - [{ ids: [...ids], limit: ids.size }], - { signal }, - ); -} - /** Collect author stats from the events. */ async function gatherAuthorStats( events: DittoEvent[], diff --git a/src/utils/api.ts b/src/utils/api.ts index a01cf277..79512190 100644 --- a/src/utils/api.ts +++ b/src/utils/api.ts @@ -207,12 +207,10 @@ function buildLinkHeader(url: string, events: NostrEvent[]): string | undefined return `<${next}>; rel="next", <${prev}>; rel="prev"`; } -// deno-lint-ignore ban-types -type Entity = {}; type HeaderRecord = Record; /** Return results with pagination headers. Assumes chronological sorting of events. */ -function paginated(c: AppContext, events: NostrEvent[], entities: (Entity | undefined)[], headers: HeaderRecord = {}) { +function paginated(c: AppContext, events: NostrEvent[], body: object | unknown[], headers: HeaderRecord = {}) { const link = buildLinkHeader(c.req.url, events); if (link) { @@ -220,7 +218,7 @@ function paginated(c: AppContext, events: NostrEvent[], entities: (Entity | unde } // Filter out undefined entities. - const results = entities.filter((entity): entity is Entity => Boolean(entity)); + const results = Array.isArray(body) ? body.filter(Boolean) : body; return c.json(results, 200, headers); } @@ -245,18 +243,18 @@ function buildListLinkHeader(url: string, params: { offset: number; limit: numbe function paginatedList( c: AppContext, params: { offset: number; limit: number }, - entities: unknown[], + body: object | unknown[], headers: HeaderRecord = {}, ) { const link = buildListLinkHeader(c.req.url, params); - const hasMore = entities.length > 0; + const hasMore = Array.isArray(body) ? body.length > 0 : true; if (link) { headers.link = hasMore ? link : link.split(', ').find((link) => link.endsWith('; rel="prev"'))!; } // Filter out undefined entities. - const results = entities.filter(Boolean); + const results = Array.isArray(body) ? body.filter(Boolean) : body; return c.json(results, 200, headers); } diff --git a/src/utils/favicon.ts b/src/utils/favicon.ts index fc49c75d..f1ae0f95 100644 --- a/src/utils/favicon.ts +++ b/src/utils/favicon.ts @@ -91,7 +91,7 @@ async function fetchFavicon(domain: string, signal?: AbortSignal): Promise const fallback = await safeFetch(url, { method: 'HEAD', signal }); const contentType = fallback.headers.get('content-type'); - if (fallback.ok && contentType === 'image/vnd.microsoft.icon') { + if (fallback.ok && ['image/vnd.microsoft.icon', 'image/x-icon'].includes(contentType!)) { logi({ level: 'info', ns: 'ditto.favicon', domain, state: 'found', url }); return url; } diff --git a/src/utils/note.ts b/src/utils/note.ts index bae371ff..45fcf94a 100644 --- a/src/utils/note.ts +++ b/src/utils/note.ts @@ -22,7 +22,7 @@ interface ParsedNoteContent { /** Convert Nostr content to Mastodon API HTML. Also return parsed data. */ function parseNoteContent(content: string, mentions: MastodonMention[]): ParsedNoteContent { - const links = linkify.find(content).filter(isLinkURL); + const links = linkify.find(content).filter(({ type }) => type === 'url'); const firstUrl = links.find(isNonMediaLink)?.href; const result = linkifyStr(content, { @@ -123,11 +123,6 @@ function isNonMediaLink({ href }: Link): boolean { return /^https?:\/\//.test(href) && !getUrlMediaType(href); } -/** Ensures the Link is a URL so it can be parsed. */ -function isLinkURL(link: Link): boolean { - return link.type === 'url'; -} - /** Get pubkey from decoded bech32 entity, or undefined if not applicable. */ function getDecodedPubkey(decoded: nip19.DecodeResult): string | undefined { switch (decoded.type) { diff --git a/src/utils/search.test.ts b/src/utils/search.test.ts index 71f96de2..d3c92011 100644 --- a/src/utils/search.test.ts +++ b/src/utils/search.test.ts @@ -1,7 +1,8 @@ import { assertEquals } from '@std/assert'; +import { sql } from 'kysely'; -import { createTestDB, genEvent } from '@/test.ts'; -import { getIdsBySearch, getPubkeysBySearch } from '@/utils/search.ts'; +import { createTestDB } from '@/test.ts'; +import { getPubkeysBySearch } from '@/utils/search.ts'; Deno.test('fuzzy search works', async () => { await using db = await createTestDB(); @@ -14,18 +15,20 @@ Deno.test('fuzzy search works', async () => { following_count: 0, }).execute(); + await sql`REFRESH MATERIALIZED VIEW top_authors`.execute(db.kysely); + assertEquals( - await getPubkeysBySearch(db.kysely, { q: 'pat rick', limit: 1, offset: 0, followedPubkeys: new Set() }), + await getPubkeysBySearch(db.kysely, { q: 'pat rick', limit: 1, offset: 0, following: new Set() }), new Set(), ); assertEquals( - await getPubkeysBySearch(db.kysely, { q: 'patrick dosreis', limit: 1, offset: 0, followedPubkeys: new Set() }), + await getPubkeysBySearch(db.kysely, { q: 'patrick dosreis', limit: 1, offset: 0, following: new Set() }), new Set([ '47259076c85f9240e852420d7213c95e95102f1de929fb60f33a2c32570c98c4', ]), ); assertEquals( - await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 0, followedPubkeys: new Set() }), + await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 0, following: new Set() }), new Set([ '47259076c85f9240e852420d7213c95e95102f1de929fb60f33a2c32570c98c4', ]), @@ -43,52 +46,10 @@ Deno.test('fuzzy search works with offset', async () => { following_count: 0, }).execute(); + await sql`REFRESH MATERIALIZED VIEW top_authors`.execute(db.kysely); + assertEquals( - await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 1, followedPubkeys: new Set() }), - new Set(), - ); -}); - -Deno.test('Searching for posts work', async () => { - await using db = await createTestDB(); - - const event = genEvent({ content: "I'm not an orphan. Death is my importance", kind: 1 }); - await db.store.event(event); - await db.kysely.updateTable('nostr_events').set('search_ext', { language: 'en' }).where('id', '=', event.id) - .execute(); - - const event2 = genEvent({ content: 'The more I explore is the more I fall in love with the music I make.', kind: 1 }); - await db.store.event(event2); - await db.kysely.updateTable('nostr_events').set('search_ext', { language: 'en' }).where('id', '=', event2.id) - .execute(); - - assertEquals( - await getIdsBySearch(db.kysely, { q: 'Death is my importance', limit: 1, offset: 0 }), // ordered words - new Set([event.id]), - ); - - assertEquals( - await getIdsBySearch(db.kysely, { q: 'make I music', limit: 1, offset: 0 }), // reversed words - new Set([event2.id]), - ); - - assertEquals( - await getIdsBySearch(db.kysely, { q: 'language:en make I music', limit: 10, offset: 0 }), // reversed words, english - new Set([event2.id]), - ); - - assertEquals( - await getIdsBySearch(db.kysely, { q: 'language:en an orphan', limit: 10, offset: 0 }), // all posts in english plus search - new Set([event.id]), - ); - - assertEquals( - await getIdsBySearch(db.kysely, { q: 'language:en', limit: 10, offset: 0 }), // all posts in english - new Set([event.id, event2.id]), - ); - - assertEquals( - await getIdsBySearch(db.kysely, { q: '', limit: 10, offset: 0 }), + await getPubkeysBySearch(db.kysely, { q: 'dosreis.com', limit: 1, offset: 1, following: new Set() }), new Set(), ); }); diff --git a/src/utils/search.ts b/src/utils/search.ts index f44e00c8..e41cd413 100644 --- a/src/utils/search.ts +++ b/src/utils/search.ts @@ -1,141 +1,38 @@ import { Kysely, sql } from 'kysely'; import { DittoTables } from '@/db/DittoTables.ts'; -import { NIP50 } from '@nostrify/nostrify'; /** Get pubkeys whose name and NIP-05 is similar to 'q' */ export async function getPubkeysBySearch( kysely: Kysely, - opts: { q: string; limit: number; offset: number; followedPubkeys: Set }, + opts: { q: string; limit: number; offset: number; following: Set }, ): Promise> { - const { q, limit, followedPubkeys, offset } = opts; + const { q, limit, following, offset } = opts; - let query = kysely - .selectFrom('author_stats') - .select((eb) => [ - 'pubkey', - 'search', - eb.fn('word_similarity', [sql`${q}`, 'search']).as('sml'), - ]) - .where(() => sql`${q} <% search`) - .orderBy(['followers_count desc']) - .orderBy(['sml desc', 'search']) + const pubkeys = new Set(); + + const query = kysely + .selectFrom('top_authors') + .select('pubkey') + .where('search', sql`%>`, q) .limit(limit) .offset(offset); - const pubkeys = new Set((await query.execute()).map(({ pubkey }) => pubkey)); + if (following.size) { + const authorsQuery = query.where('pubkey', 'in', [...following]); - if (followedPubkeys.size > 0) { - query = query.where('pubkey', 'in', [...followedPubkeys]); + for (const { pubkey } of await authorsQuery.execute()) { + pubkeys.add(pubkey); + } } - const followingPubkeys = new Set((await query.execute()).map(({ pubkey }) => pubkey)); + if (pubkeys.size >= limit) { + return pubkeys; + } - return new Set(Array.from(followingPubkeys.union(pubkeys))); -} - -/** - * Get kind 1 ids whose content matches `q`. - * It supports NIP-50 extensions. - */ -export async function getIdsBySearch( - kysely: Kysely, - opts: { q: string; limit: number; offset: number }, -): Promise> { - const { q, limit, offset } = opts; - - const [lexemes] = (await sql<{ phraseto_tsquery: 'string' }>`SELECT phraseto_tsquery(${q})`.execute(kysely)).rows; - - // if it's just stop words, don't bother making a request to the database - if (!lexemes.phraseto_tsquery) { - return new Set(); - } - - const tokens = NIP50.parseInput(q); - - const ext: Record = {}; - const txt = tokens.filter((token) => typeof token === 'string').join(' '); - - let query = kysely - .selectFrom('nostr_events') - .select('id') - .where('kind', '=', 1) - .orderBy(['created_at desc']) - .limit(limit) - .offset(offset); - - const domains = new Set(); - - for (const token of tokens) { - if (typeof token === 'object' && token.key === 'domain') { - domains.add(token.value); - } - } - - for (const token of tokens) { - if (typeof token === 'object') { - ext[token.key] ??= []; - ext[token.key].push(token.value); - } - } - - for (let [key, values] of Object.entries(ext)) { - if (key === 'domain' || key === '-domain') continue; - - let negated = false; - - if (key.startsWith('-')) { - key = key.slice(1); - negated = true; - } - - query = query.where((eb) => { - if (negated) { - return eb.and( - values.map((value) => eb.not(eb('nostr_events.search_ext', '@>', { [key]: value }))), - ); - } else { - return eb.or( - values.map((value) => eb('nostr_events.search_ext', '@>', { [key]: value })), - ); - } - }); - } - - if (domains.size) { - const pubkeys = (await kysely - .selectFrom('pubkey_domains') - .select('pubkey') - .where('domain', 'in', [...domains]) - .execute()).map(({ pubkey }) => pubkey); - - query = query.where('pubkey', 'in', pubkeys); - } - - // If there is not a specific content to search, return the query already - // This is useful if the person only makes a query search such as `domain:patrickdosreis.com` - if (!txt.length) { - const ids = new Set((await query.execute()).map(({ id }) => id)); - return ids; - } - - let fallbackQuery = query; - if (txt) { - query = query.where('search', '@@', sql`phraseto_tsquery(${txt})`); - } - - const ids = new Set((await query.execute()).map(({ id }) => id)); - - // If there is no ids, fallback to `plainto_tsquery` - if (!ids.size) { - fallbackQuery = fallbackQuery.where( - 'search', - '@@', - sql`plainto_tsquery(${txt})`, - ); - const ids = new Set((await fallbackQuery.execute()).map(({ id }) => id)); - return ids; - } - - return ids; + for (const { pubkey } of await query.limit(limit - pubkeys.size).execute()) { + pubkeys.add(pubkey); + } + + return pubkeys; } diff --git a/src/views/mastodon/statuses.ts b/src/views/mastodon/statuses.ts index 0c0eb9f2..00f7dd55 100644 --- a/src/views/mastodon/statuses.ts +++ b/src/views/mastodon/statuses.ts @@ -46,7 +46,7 @@ async function renderStatus(event: DittoEvent, opts: RenderStatusOpts): Promise< const [card, relatedEvents] = await Promise .all([ - firstUrl ? unfurlCardCached(firstUrl) : null, + firstUrl ? unfurlCardCached(firstUrl, AbortSignal.timeout(500)) : null, viewerPubkey ? await store.query([ { kinds: [6], '#e': [event.id], authors: [viewerPubkey], limit: 1 },