diff --git a/deno.json b/deno.json index 073f8cc6..11ef1e5a 100644 --- a/deno.json +++ b/deno.json @@ -22,7 +22,6 @@ "trends": "deno run -A --env-file --deny-read=.env scripts/trends.ts", "clean:deps": "deno cache --reload src/app.ts", "db:populate-search": "deno run -A --env-file --deny-read=.env scripts/db-populate-search.ts", - "db:populate-mime-type": "deno run -A --env-file --deny-read=.env scripts/db-populate-mime-type.ts", "vapid": "deno run scripts/vapid.ts" }, "unstable": [ diff --git a/scripts/db-populate-mime-type.ts b/scripts/db-populate-mime-type.ts deleted file mode 100644 index 9608b80e..00000000 --- a/scripts/db-populate-mime-type.ts +++ /dev/null @@ -1,29 +0,0 @@ -import { Storages } from '@/storages.ts'; - -const store = await Storages.db(); -const kysely = await Storages.kysely(); - -for await (const msg of store.req([{ kinds: [1] }])) { // Only kind 1 can contain media in Ditto? - if (msg[0] === 'EVENT') { - const event = msg[2]; - - const imeta = event.tags.find(([value]) => value === 'imeta'); - if (!imeta) continue; - - const mime_type = imeta.find((value) => value?.split(' ')[0] === 'm')?.split(' ')[1]; - if (!mime_type) continue; - - try { - await kysely.updateTable('nostr_events') - .set('mime_type', mime_type) - .where('id', '=', event.id) - .execute(); - } catch { - // do nothing - } - } else { - break; - } -} - -Deno.exit(); diff --git a/src/controllers/api/accounts.ts b/src/controllers/api/accounts.ts index 46188fae..abbc084b 100644 --- a/src/controllers/api/accounts.ts +++ b/src/controllers/api/accounts.ts @@ -197,7 +197,7 @@ const accountStatusesQuerySchema = z.object({ limit: z.coerce.number().nonnegative().transform((v) => Math.min(v, 40)).catch(20), exclude_replies: booleanParamSchema.optional(), tagged: z.string().optional(), - only_media: z.coerce.boolean().catch(false), + only_media: z.boolean().optional(), }); const accountStatusesController: AppController = async (c) => { @@ -242,7 +242,7 @@ const accountStatusesController: AppController = async (c) => { }; if (only_media) { - filter.search = 'only_media:true'; + filter.search = 'media:true'; } if (tagged) { diff --git a/src/db/DittoTables.ts b/src/db/DittoTables.ts index dade81ae..6ffed988 100644 --- a/src/db/DittoTables.ts +++ b/src/db/DittoTables.ts @@ -3,7 +3,6 @@ import { Generated } from 'kysely'; import { NPostgresSchema } from '@nostrify/db'; export interface DittoTables extends NPostgresSchema { - nostr_events: NostrEventsRow; auth_tokens: AuthTokenRow; author_stats: AuthorStatsRow; event_stats: EventStatsRow; @@ -12,11 +11,6 @@ export interface DittoTables extends NPostgresSchema { push_subscriptions: PushSubscriptionRow; } -type NostrEventsRow = NPostgresSchema['nostr_events'] & { - language: string | null; - mime_type: string | null; -}; - interface AuthorStatsRow { pubkey: string; followers_count: number; diff --git a/src/db/migrations/042_add_mime_type.ts b/src/db/migrations/042_add_search_ext.ts similarity index 51% rename from src/db/migrations/042_add_mime_type.ts rename to src/db/migrations/042_add_search_ext.ts index c2d75232..923bc5cc 100644 --- a/src/db/migrations/042_add_mime_type.ts +++ b/src/db/migrations/042_add_search_ext.ts @@ -3,38 +3,36 @@ import { Kysely, sql } from 'kysely'; export async function up(db: Kysely): Promise { await db.schema .alterTable('nostr_events') - .addColumn('mime_type', 'text').execute(); - - await db.schema - .createIndex('nostr_events_mime_type_prefix_idx') - .on('nostr_events') - .expression(sql`split_part(mime_type, '/', 1)`) - .column('mime_type') - .ifNotExists() + .addColumn('search_ext', 'jsonb', (col) => col.notNull().defaultTo({})) .execute(); await db.schema - .createIndex('nostr_events_mime_type_hash_idx') + .alterTable('nostr_events') + .addCheckConstraint('nostr_events_search_ext_chk', sql`jsonb_typeof(search_ext) = 'object'`) + .execute(); + + await db.schema + .createIndex('nostr_events_search_ext_idx').using('gin') .on('nostr_events') - .column('mime_type') - .using('hash') + .column('search_ext') .ifNotExists() .execute(); } export async function down(db: Kysely): Promise { + await db.schema + .dropIndex('nostr_events_search_ext_idx') + .on('nostr_events') + .ifExists() + .execute(); + await db.schema .alterTable('nostr_events') - .dropColumn('mime_type') + .dropConstraint('nostr_events_search_ext_chk') .execute(); await db.schema - .dropIndex('nostr_events_mime_type_prefix_idx') - .ifExists() - .execute(); - - await db.schema - .dropIndex('nostr_events_mime_type_hash_idx') - .ifExists() + .alterTable('nostr_events') + .dropColumn('search_ext') .execute(); } diff --git a/src/pipeline.ts b/src/pipeline.ts index 1d311bb0..a4161233 100644 --- a/src/pipeline.ts +++ b/src/pipeline.ts @@ -15,7 +15,6 @@ import { hydrateEvents } from '@/storages/hydrate.ts'; import { Storages } from '@/storages.ts'; import { eventAge, parseNip05, Time } from '@/utils.ts'; import { getAmount } from '@/utils/bolt11.ts'; -import { detectLanguage } from '@/utils/language.ts'; import { errorJson } from '@/utils/log.ts'; import { nip05Cache } from '@/utils/nip05.ts'; import { purifyEvent } from '@/utils/purify.ts'; @@ -121,8 +120,6 @@ async function handleEvent(event: DittoEvent, opts: PipelineOpts): Promise Promise.allSettled([ handleZaps(kysely, event), parseMetadata(event, opts.signal), - setLanguage(event), - setMimeType(event), generateSetEvents(event), ]) .then(() => @@ -238,43 +235,6 @@ async function parseMetadata(event: NostrEvent, signal: AbortSignal): Promise { - if (event.kind !== 1) return; - - const language = detectLanguage(event.content, 0.90); - if (!language) return; - - const kysely = await Storages.kysely(); - try { - await kysely.updateTable('nostr_events') - .set('language', language) - .where('id', '=', event.id) - .execute(); - } catch { - // do nothing - } -} - -/** Update the event in the database and set its MIME type. */ -async function setMimeType(event: NostrEvent): Promise { - const imeta = event.tags.find(([value]) => value === 'imeta'); - if (!imeta) return; - - const mime_type = imeta.find((value) => value?.split(' ')[0] === 'm')?.split(' ')[1]; - if (!mime_type) return; - - const kysely = await Storages.kysely(); - try { - await kysely.updateTable('nostr_events') - .set('mime_type', mime_type) - .where('id', '=', event.id) - .execute(); - } catch { - // do nothing - } -} - /** Determine if the event is being received in a timely manner. */ function isFresh(event: NostrEvent): boolean { return eventAge(event) < Time.minutes(1); diff --git a/src/storages/EventsDB.test.ts b/src/storages/EventsDB.test.ts index 8dc09859..70be622e 100644 --- a/src/storages/EventsDB.test.ts +++ b/src/storages/EventsDB.test.ts @@ -65,8 +65,8 @@ Deno.test('query events with language search filter', async () => { await store.event(en); await store.event(es); - await kysely.updateTable('nostr_events').set('language', 'en').where('id', '=', en.id).execute(); - await kysely.updateTable('nostr_events').set('language', 'es').where('id', '=', es.id).execute(); + await kysely.updateTable('nostr_events').set('search_ext', { language: 'en' }).where('id', '=', en.id).execute(); + await kysely.updateTable('nostr_events').set('search_ext', { language: 'es' }).where('id', '=', es.id).execute(); assertEquals(await store.query([{ search: 'language:en' }]), [en]); assertEquals(await store.query([{ search: 'language:es' }]), [es]); diff --git a/src/storages/EventsDB.ts b/src/storages/EventsDB.ts index cc6f1170..e77e07bf 100644 --- a/src/storages/EventsDB.ts +++ b/src/storages/EventsDB.ts @@ -1,11 +1,10 @@ // deno-lint-ignore-file require-await -import { LanguageCode } from 'iso-639-1'; -import { NPostgres, NPostgresSchema } from '@nostrify/db'; +import { NPostgres } from '@nostrify/db'; import { NIP50, NKinds, NostrEvent, NostrFilter, NSchema as n } from '@nostrify/nostrify'; import { logi } from '@soapbox/logi'; import { JsonValue } from '@std/json'; -import { Kysely, SelectQueryBuilder } from 'kysely'; +import { Kysely } from 'kysely'; import { nip27 } from 'nostr-tools'; import { DittoTables } from '@/db/DittoTables.ts'; @@ -15,6 +14,7 @@ import { isNostrId } from '@/utils.ts'; import { abortError } from '@/utils/abort.ts'; import { purifyEvent } from '@/utils/purify.ts'; import { DittoEvent } from '@/interfaces/DittoEvent.ts'; +import { detectLanguage } from '@/utils/language.ts'; /** Function to decide whether or not to index a tag. */ type TagCondition = (opts: TagConditionOpts) => boolean; @@ -62,10 +62,44 @@ class EventsDB extends NPostgres { 't': ({ event, count, value }) => (event.kind === 1985 ? count < 20 : count < 5) && value.length < 50, }; + static indexExtensions(event: NostrEvent): Record { + const ext: Record = {}; + + if (event.kind === 1) { + ext.reply = event.tags.some(([name]) => name === 'e').toString(); + + const language = detectLanguage(event.content, 0.90); + + if (language) { + ext.language = language; + } + } + + const imeta: string[][][] = event.tags + .filter(([name]) => name === 'imeta') + .map(([_, ...entries]) => + entries.map((entry) => { + const split = entry.split(' '); + return [split[0], split.splice(1).join(' ')]; + }) + ); + + if (imeta.length) { + ext.media = 'true'; + } + + if (imeta.every((tags) => tags.some(([name, value]) => name === 'm' && value.startsWith('video/')))) { + ext.video = 'true'; + } + + return ext; + } + constructor(private opts: EventsDBOpts) { super(opts.kysely, { indexTags: EventsDB.indexTags, indexSearch: EventsDB.searchText, + indexExtensions: EventsDB.indexExtensions, }); } @@ -155,58 +189,6 @@ class EventsDB extends NPostgres { } } - protected override getFilterQuery(trx: Kysely, filter: NostrFilter) { - if (filter.search) { - const tokens = NIP50.parseInput(filter.search); - - let query = super.getFilterQuery(trx, { - ...filter, - search: tokens.filter((t) => typeof t === 'string').join(' '), - }) as SelectQueryBuilder; - - const languages = new Set(); - let exact_mime_type: string | undefined; - let partial_mime_type: string | undefined; - let only_media: boolean | undefined; - - for (const token of tokens) { - if (typeof token === 'object' && token.key === 'language') { - languages.add(token.value); - } - if (typeof token === 'object' && token.key === 'exact_mime_type') { - exact_mime_type = token.value; - } - if (typeof token === 'object' && token.key === 'partial_mime_type') { - partial_mime_type = token.value; - } - if (typeof token === 'object' && token.key === 'only_media') { - if (token.value === 'true') only_media = true; - if (token.value === 'false') only_media = false; - } - } - - if (languages.size) { - query = query.where('language', 'in', [...languages]); - } - if (exact_mime_type) { - query = query.where('mime_type', '=', exact_mime_type); - } - if (partial_mime_type) { - query = query.where( - (eb) => eb.fn('split_part', [eb.ref('mime_type'), eb.val('/'), eb.val(1)]), - '=', - partial_mime_type, - ); - } - if (only_media) query = query.where('mime_type', 'is not', null); - if (only_media === false) query = query.where('mime_type', 'is', null); - - return query; - } - - return super.getFilterQuery(trx, filter); - } - /** Get events for filters from the database. */ override async query( filters: NostrFilter[], @@ -235,29 +217,6 @@ class EventsDB extends NPostgres { return super.query(filters, { ...opts, timeout: opts.timeout ?? this.opts.timeout }); } - /** Parse an event row from the database. */ - protected override parseEventRow(row: DittoTables['nostr_events']): DittoEvent { - const event: DittoEvent = { - id: row.id, - kind: row.kind, - pubkey: row.pubkey, - content: row.content, - created_at: Number(row.created_at), - tags: row.tags, - sig: row.sig, - }; - - if (this.opts.pure) { - return event; - } - - if (row.language) { - event.language = row.language as LanguageCode; - } - - return event; - } - /** Delete events based on filters from the database. */ override async remove(filters: NostrFilter[], opts: { signal?: AbortSignal; timeout?: number } = {}): Promise { logi({ level: 'debug', ns: 'ditto.remove', source: 'db', filters: filters as JsonValue }); diff --git a/src/trends.test.ts b/src/trends.test.ts index 47b79eb4..79eaf8e0 100644 --- a/src/trends.test.ts +++ b/src/trends.test.ts @@ -93,12 +93,12 @@ Deno.test("getTrendingTagValues(): 'e' tag and WITH language parameter", async ( } await db.kysely.updateTable('nostr_events') - .set('language', 'pt') + .set('search_ext', { language: 'pt' }) .where('id', '=', post1.id) .execute(); await db.kysely.updateTable('nostr_events') - .set('language', 'en') + .set('search_ext', { language: 'en' }) .where('id', '=', post2.id) .execute(); diff --git a/src/trends.ts b/src/trends.ts index ed0ea930..e4da152d 100644 --- a/src/trends.ts +++ b/src/trends.ts @@ -145,7 +145,7 @@ export async function updateTrendingEvents(): Promise { const rows = await kysely .selectFrom('nostr_events') .select('nostr_events.id') - .where('nostr_events.language', '=', language) + .where(sql`nostr_events.search_ext->>'language'`, '=', language) .where('nostr_events.created_at', '>=', yesterday) .where('nostr_events.created_at', '<=', now) .execute(); diff --git a/src/utils/search.test.ts b/src/utils/search.test.ts index d7073a39..71f96de2 100644 --- a/src/utils/search.test.ts +++ b/src/utils/search.test.ts @@ -54,11 +54,13 @@ Deno.test('Searching for posts work', async () => { const event = genEvent({ content: "I'm not an orphan. Death is my importance", kind: 1 }); await db.store.event(event); - await db.kysely.updateTable('nostr_events').set('language', 'en').where('id', '=', event.id).execute(); + await db.kysely.updateTable('nostr_events').set('search_ext', { language: 'en' }).where('id', '=', event.id) + .execute(); const event2 = genEvent({ content: 'The more I explore is the more I fall in love with the music I make.', kind: 1 }); await db.store.event(event2); - await db.kysely.updateTable('nostr_events').set('language', 'en').where('id', '=', event2.id).execute(); + await db.kysely.updateTable('nostr_events').set('search_ext', { language: 'en' }).where('id', '=', event2.id) + .execute(); assertEquals( await getIdsBySearch(db.kysely, { q: 'Death is my importance', limit: 1, offset: 0 }), // ordered words diff --git a/src/utils/search.ts b/src/utils/search.ts index 649afdd6..9482bcfc 100644 --- a/src/utils/search.ts +++ b/src/utils/search.ts @@ -75,7 +75,7 @@ export async function getIdsBySearch( } if (languages.size) { - query = query.where('language', 'in', [...languages]); + query = query.where(sql`search_ext->>'language'`, 'in', [...languages]); } if (domains.size) {