Merge branch 'feat-search-mime-type' into 'main'

feat: add mime_type column in nostr_events, add the NIP 50 search extensions: exact_mime_type, partial_mime_type, only_media

See merge request soapbox-pub/ditto!623
This commit is contained in:
Alex Gleason 2025-02-03 01:46:54 +00:00
commit 3d221aa357
12 changed files with 97 additions and 109 deletions

View file

@ -45,7 +45,7 @@
"@lambdalisue/async": "jsr:@lambdalisue/async@^2.1.1",
"@negrel/webpush": "jsr:@negrel/webpush@^0.3.0",
"@noble/secp256k1": "npm:@noble/secp256k1@^2.0.0",
"@nostrify/db": "jsr:@nostrify/db@^0.36.2",
"@nostrify/db": "jsr:@nostrify/db@^0.37.0",
"@nostrify/nostrify": "jsr:@nostrify/nostrify@^0.38.0",
"@nostrify/policies": "jsr:@nostrify/policies@^0.36.1",
"@nostrify/types": "jsr:@nostrify/types@^0.36.0",

26
deno.lock generated
View file

@ -30,11 +30,10 @@
"jsr:@lambdalisue/async@^2.1.1": "2.1.1",
"jsr:@negrel/http-ece@0.6.0": "0.6.0",
"jsr:@negrel/webpush@0.3": "0.3.0",
"jsr:@nostrify/db@~0.36.2": "0.36.2",
"jsr:@nostrify/db@0.37": "0.37.0",
"jsr:@nostrify/nostrify@0.31": "0.31.0",
"jsr:@nostrify/nostrify@0.32": "0.32.0",
"jsr:@nostrify/nostrify@0.36": "0.36.2",
"jsr:@nostrify/nostrify@0.37": "0.37.0",
"jsr:@nostrify/nostrify@0.38": "0.38.0",
"jsr:@nostrify/nostrify@~0.22.1": "0.22.5",
"jsr:@nostrify/nostrify@~0.22.4": "0.22.4",
@ -349,10 +348,10 @@
"jsr:@std/path@0.224.0"
]
},
"@nostrify/db@0.36.2": {
"integrity": "6bf079b44fcb3ff5a85eadf9a9d4eb677fc770f1c80ad966602aa3d9dd8c88e8",
"@nostrify/db@0.37.0": {
"integrity": "77398757ff52b1cf29ad3f610a8d3fcb8da37dd3300264baa4c318b4036684ab",
"dependencies": [
"jsr:@nostrify/nostrify@0.37",
"jsr:@nostrify/nostrify@0.38",
"jsr:@nostrify/types@0.36",
"npm:kysely@~0.27.3",
"npm:nostr-tools@^2.10.4"
@ -456,21 +455,6 @@
"npm:zod"
]
},
"@nostrify/nostrify@0.37.0": {
"integrity": "fa1439cc5e9a74986c4fb799a38a9ed7bd8663c62ae2a9363ca9b987548e27a0",
"dependencies": [
"jsr:@nostrify/types@0.36",
"jsr:@std/crypto",
"jsr:@std/encoding@~0.224.1",
"npm:@scure/base",
"npm:@scure/bip32",
"npm:@scure/bip39",
"npm:lru-cache@^10.2.0",
"npm:nostr-tools@^2.7.0",
"npm:websocket-ts",
"npm:zod"
]
},
"@nostrify/nostrify@0.38.0": {
"integrity": "9ec7920057ee3a4dcbaef7e706dedea622bfdfdf0f6aac11047443f88d953deb",
"dependencies": [
@ -2364,7 +2348,7 @@
"jsr:@hono/hono@^4.4.6",
"jsr:@lambdalisue/async@^2.1.1",
"jsr:@negrel/webpush@0.3",
"jsr:@nostrify/db@~0.36.2",
"jsr:@nostrify/db@0.37",
"jsr:@nostrify/nostrify@0.38",
"jsr:@nostrify/policies@~0.36.1",
"jsr:@nostrify/types@0.36",

View file

@ -197,12 +197,13 @@ const accountStatusesQuerySchema = z.object({
limit: z.coerce.number().nonnegative().transform((v) => Math.min(v, 40)).catch(20),
exclude_replies: booleanParamSchema.optional(),
tagged: z.string().optional(),
only_media: z.boolean().optional(),
});
const accountStatusesController: AppController = async (c) => {
const pubkey = c.req.param('pubkey');
const { since, until } = c.get('pagination');
const { pinned, limit, exclude_replies, tagged } = accountStatusesQuerySchema.parse(c.req.query());
const { pinned, limit, exclude_replies, tagged, only_media } = accountStatusesQuerySchema.parse(c.req.query());
const { signal } = c.req.raw;
const store = await Storages.db();
@ -240,6 +241,10 @@ const accountStatusesController: AppController = async (c) => {
limit,
};
if (only_media) {
filter.search = 'media:true';
}
if (tagged) {
filter['#t'] = [tagged];
}

View file

@ -3,7 +3,6 @@ import { Generated } from 'kysely';
import { NPostgresSchema } from '@nostrify/db';
export interface DittoTables extends NPostgresSchema {
nostr_events: NostrEventsRow;
auth_tokens: AuthTokenRow;
author_stats: AuthorStatsRow;
event_stats: EventStatsRow;
@ -12,10 +11,6 @@ export interface DittoTables extends NPostgresSchema {
push_subscriptions: PushSubscriptionRow;
}
type NostrEventsRow = NPostgresSchema['nostr_events'] & {
language: string | null;
};
interface AuthorStatsRow {
pubkey: string;
followers_count: number;

View file

@ -0,0 +1,38 @@
import { Kysely, sql } from 'kysely';
export async function up(db: Kysely<any>): Promise<void> {
await db.schema
.alterTable('nostr_events')
.addColumn('search_ext', 'jsonb', (col) => col.notNull().defaultTo(sql`'{}'::jsonb`))
.execute();
await db.schema
.alterTable('nostr_events')
.addCheckConstraint('nostr_events_search_ext_chk', sql`jsonb_typeof(search_ext) = 'object'`)
.execute();
await db.schema
.createIndex('nostr_events_search_ext_idx').using('gin')
.on('nostr_events')
.column('search_ext')
.ifNotExists()
.execute();
}
export async function down(db: Kysely<any>): Promise<void> {
await db.schema
.dropIndex('nostr_events_search_ext_idx')
.on('nostr_events')
.ifExists()
.execute();
await db.schema
.alterTable('nostr_events')
.dropConstraint('nostr_events_search_ext_chk')
.execute();
await db.schema
.alterTable('nostr_events')
.dropColumn('search_ext')
.execute();
}

View file

@ -15,7 +15,6 @@ import { hydrateEvents } from '@/storages/hydrate.ts';
import { Storages } from '@/storages.ts';
import { eventAge, parseNip05, Time } from '@/utils.ts';
import { getAmount } from '@/utils/bolt11.ts';
import { detectLanguage } from '@/utils/language.ts';
import { errorJson } from '@/utils/log.ts';
import { nip05Cache } from '@/utils/nip05.ts';
import { purifyEvent } from '@/utils/purify.ts';
@ -121,7 +120,6 @@ async function handleEvent(event: DittoEvent, opts: PipelineOpts): Promise<void>
Promise.allSettled([
handleZaps(kysely, event),
parseMetadata(event, opts.signal),
setLanguage(event),
generateSetEvents(event),
])
.then(() =>
@ -237,24 +235,6 @@ async function parseMetadata(event: NostrEvent, signal: AbortSignal): Promise<vo
}
}
/** Update the event in the database and set its language. */
async function setLanguage(event: NostrEvent): Promise<void> {
if (event.kind !== 1) return;
const language = detectLanguage(event.content, 0.90);
if (!language) return;
const kysely = await Storages.kysely();
try {
await kysely.updateTable('nostr_events')
.set('language', language)
.where('id', '=', event.id)
.execute();
} catch {
// do nothing
}
}
/** Determine if the event is being received in a timely manner. */
function isFresh(event: NostrEvent): boolean {
return eventAge(event) < Time.minutes(1);

View file

@ -65,8 +65,8 @@ Deno.test('query events with language search filter', async () => {
await store.event(en);
await store.event(es);
await kysely.updateTable('nostr_events').set('language', 'en').where('id', '=', en.id).execute();
await kysely.updateTable('nostr_events').set('language', 'es').where('id', '=', es.id).execute();
await kysely.updateTable('nostr_events').set('search_ext', { language: 'en' }).where('id', '=', en.id).execute();
await kysely.updateTable('nostr_events').set('search_ext', { language: 'es' }).where('id', '=', es.id).execute();
assertEquals(await store.query([{ search: 'language:en' }]), [en]);
assertEquals(await store.query([{ search: 'language:es' }]), [es]);

View file

@ -1,11 +1,10 @@
// deno-lint-ignore-file require-await
import { LanguageCode } from 'iso-639-1';
import { NPostgres, NPostgresSchema } from '@nostrify/db';
import { NPostgres } from '@nostrify/db';
import { NIP50, NKinds, NostrEvent, NostrFilter, NSchema as n } from '@nostrify/nostrify';
import { logi } from '@soapbox/logi';
import { JsonValue } from '@std/json';
import { Kysely, SelectQueryBuilder } from 'kysely';
import { Kysely } from 'kysely';
import { nip27 } from 'nostr-tools';
import { DittoTables } from '@/db/DittoTables.ts';
@ -15,6 +14,7 @@ import { isNostrId } from '@/utils.ts';
import { abortError } from '@/utils/abort.ts';
import { purifyEvent } from '@/utils/purify.ts';
import { DittoEvent } from '@/interfaces/DittoEvent.ts';
import { detectLanguage } from '@/utils/language.ts';
/** Function to decide whether or not to index a tag. */
type TagCondition = (opts: TagConditionOpts) => boolean;
@ -62,10 +62,44 @@ class EventsDB extends NPostgres {
't': ({ event, count, value }) => (event.kind === 1985 ? count < 20 : count < 5) && value.length < 50,
};
static indexExtensions(event: NostrEvent): Record<string, string> {
const ext: Record<string, string> = {};
if (event.kind === 1) {
ext.reply = event.tags.some(([name]) => name === 'e').toString();
const language = detectLanguage(event.content, 0.90);
if (language) {
ext.language = language;
}
}
const imeta: string[][][] = event.tags
.filter(([name]) => name === 'imeta')
.map(([_, ...entries]) =>
entries.map((entry) => {
const split = entry.split(' ');
return [split[0], split.splice(1).join(' ')];
})
);
if (imeta.length) {
ext.media = 'true';
}
if (imeta.every((tags) => tags.some(([name, value]) => name === 'm' && value.startsWith('video/')))) {
ext.video = 'true';
}
return ext;
}
constructor(private opts: EventsDBOpts) {
super(opts.kysely, {
indexTags: EventsDB.indexTags,
indexSearch: EventsDB.searchText,
indexExtensions: EventsDB.indexExtensions,
});
}
@ -155,33 +189,6 @@ class EventsDB extends NPostgres {
}
}
protected override getFilterQuery(trx: Kysely<NPostgresSchema>, filter: NostrFilter) {
if (filter.search) {
const tokens = NIP50.parseInput(filter.search);
let query = super.getFilterQuery(trx, {
...filter,
search: tokens.filter((t) => typeof t === 'string').join(' '),
}) as SelectQueryBuilder<DittoTables, 'nostr_events', DittoTables['nostr_events']>;
const languages = new Set<string>();
for (const token of tokens) {
if (typeof token === 'object' && token.key === 'language') {
languages.add(token.value);
}
}
if (languages.size) {
query = query.where('language', 'in', [...languages]);
}
return query;
}
return super.getFilterQuery(trx, filter);
}
/** Get events for filters from the database. */
override async query(
filters: NostrFilter[],
@ -210,29 +217,6 @@ class EventsDB extends NPostgres {
return super.query(filters, { ...opts, timeout: opts.timeout ?? this.opts.timeout });
}
/** Parse an event row from the database. */
protected override parseEventRow(row: DittoTables['nostr_events']): DittoEvent {
const event: DittoEvent = {
id: row.id,
kind: row.kind,
pubkey: row.pubkey,
content: row.content,
created_at: Number(row.created_at),
tags: row.tags,
sig: row.sig,
};
if (this.opts.pure) {
return event;
}
if (row.language) {
event.language = row.language as LanguageCode;
}
return event;
}
/** Delete events based on filters from the database. */
override async remove(filters: NostrFilter[], opts: { signal?: AbortSignal; timeout?: number } = {}): Promise<void> {
logi({ level: 'debug', ns: 'ditto.remove', source: 'db', filters: filters as JsonValue });

View file

@ -93,12 +93,12 @@ Deno.test("getTrendingTagValues(): 'e' tag and WITH language parameter", async (
}
await db.kysely.updateTable('nostr_events')
.set('language', 'pt')
.set('search_ext', { language: 'pt' })
.where('id', '=', post1.id)
.execute();
await db.kysely.updateTable('nostr_events')
.set('language', 'en')
.set('search_ext', { language: 'en' })
.where('id', '=', post2.id)
.execute();

View file

@ -145,7 +145,7 @@ export async function updateTrendingEvents(): Promise<void> {
const rows = await kysely
.selectFrom('nostr_events')
.select('nostr_events.id')
.where('nostr_events.language', '=', language)
.where(sql`nostr_events.search_ext->>'language'`, '=', language)
.where('nostr_events.created_at', '>=', yesterday)
.where('nostr_events.created_at', '<=', now)
.execute();

View file

@ -54,11 +54,13 @@ Deno.test('Searching for posts work', async () => {
const event = genEvent({ content: "I'm not an orphan. Death is my importance", kind: 1 });
await db.store.event(event);
await db.kysely.updateTable('nostr_events').set('language', 'en').where('id', '=', event.id).execute();
await db.kysely.updateTable('nostr_events').set('search_ext', { language: 'en' }).where('id', '=', event.id)
.execute();
const event2 = genEvent({ content: 'The more I explore is the more I fall in love with the music I make.', kind: 1 });
await db.store.event(event2);
await db.kysely.updateTable('nostr_events').set('language', 'en').where('id', '=', event2.id).execute();
await db.kysely.updateTable('nostr_events').set('search_ext', { language: 'en' }).where('id', '=', event2.id)
.execute();
assertEquals(
await getIdsBySearch(db.kysely, { q: 'Death is my importance', limit: 1, offset: 0 }), // ordered words

View file

@ -75,7 +75,7 @@ export async function getIdsBySearch(
}
if (languages.size) {
query = query.where('language', 'in', [...languages]);
query = query.where(sql`search_ext->>'language'`, 'in', [...languages]);
}
if (domains.size) {