Rework language and media columns to use Nostrify search_ext column

This commit is contained in:
Alex Gleason 2025-02-02 19:32:43 -06:00
parent baad8821f5
commit f04ca2b6ff
No known key found for this signature in database
GPG key ID: 7211D1F99744FBB7
12 changed files with 66 additions and 183 deletions

View file

@ -22,7 +22,6 @@
"trends": "deno run -A --env-file --deny-read=.env scripts/trends.ts",
"clean:deps": "deno cache --reload src/app.ts",
"db:populate-search": "deno run -A --env-file --deny-read=.env scripts/db-populate-search.ts",
"db:populate-mime-type": "deno run -A --env-file --deny-read=.env scripts/db-populate-mime-type.ts",
"vapid": "deno run scripts/vapid.ts"
},
"unstable": [

View file

@ -1,29 +0,0 @@
import { Storages } from '@/storages.ts';
const store = await Storages.db();
const kysely = await Storages.kysely();
for await (const msg of store.req([{ kinds: [1] }])) { // Only kind 1 can contain media in Ditto?
if (msg[0] === 'EVENT') {
const event = msg[2];
const imeta = event.tags.find(([value]) => value === 'imeta');
if (!imeta) continue;
const mime_type = imeta.find((value) => value?.split(' ')[0] === 'm')?.split(' ')[1];
if (!mime_type) continue;
try {
await kysely.updateTable('nostr_events')
.set('mime_type', mime_type)
.where('id', '=', event.id)
.execute();
} catch {
// do nothing
}
} else {
break;
}
}
Deno.exit();

View file

@ -197,7 +197,7 @@ const accountStatusesQuerySchema = z.object({
limit: z.coerce.number().nonnegative().transform((v) => Math.min(v, 40)).catch(20),
exclude_replies: booleanParamSchema.optional(),
tagged: z.string().optional(),
only_media: z.coerce.boolean().catch(false),
only_media: z.boolean().optional(),
});
const accountStatusesController: AppController = async (c) => {
@ -242,7 +242,7 @@ const accountStatusesController: AppController = async (c) => {
};
if (only_media) {
filter.search = 'only_media:true';
filter.search = 'media:true';
}
if (tagged) {

View file

@ -3,7 +3,6 @@ import { Generated } from 'kysely';
import { NPostgresSchema } from '@nostrify/db';
export interface DittoTables extends NPostgresSchema {
nostr_events: NostrEventsRow;
auth_tokens: AuthTokenRow;
author_stats: AuthorStatsRow;
event_stats: EventStatsRow;
@ -12,11 +11,6 @@ export interface DittoTables extends NPostgresSchema {
push_subscriptions: PushSubscriptionRow;
}
type NostrEventsRow = NPostgresSchema['nostr_events'] & {
language: string | null;
mime_type: string | null;
};
interface AuthorStatsRow {
pubkey: string;
followers_count: number;

View file

@ -3,38 +3,36 @@ import { Kysely, sql } from 'kysely';
export async function up(db: Kysely<any>): Promise<void> {
await db.schema
.alterTable('nostr_events')
.addColumn('mime_type', 'text').execute();
await db.schema
.createIndex('nostr_events_mime_type_prefix_idx')
.on('nostr_events')
.expression(sql`split_part(mime_type, '/', 1)`)
.column('mime_type')
.ifNotExists()
.addColumn('search_ext', 'jsonb', (col) => col.notNull().defaultTo({}))
.execute();
await db.schema
.createIndex('nostr_events_mime_type_hash_idx')
.alterTable('nostr_events')
.addCheckConstraint('nostr_events_search_ext_chk', sql`jsonb_typeof(search_ext) = 'object'`)
.execute();
await db.schema
.createIndex('nostr_events_search_ext_idx').using('gin')
.on('nostr_events')
.column('mime_type')
.using('hash')
.column('search_ext')
.ifNotExists()
.execute();
}
export async function down(db: Kysely<any>): Promise<void> {
await db.schema
.dropIndex('nostr_events_search_ext_idx')
.on('nostr_events')
.ifExists()
.execute();
await db.schema
.alterTable('nostr_events')
.dropColumn('mime_type')
.dropConstraint('nostr_events_search_ext_chk')
.execute();
await db.schema
.dropIndex('nostr_events_mime_type_prefix_idx')
.ifExists()
.execute();
await db.schema
.dropIndex('nostr_events_mime_type_hash_idx')
.ifExists()
.alterTable('nostr_events')
.dropColumn('search_ext')
.execute();
}

View file

@ -15,7 +15,6 @@ import { hydrateEvents } from '@/storages/hydrate.ts';
import { Storages } from '@/storages.ts';
import { eventAge, parseNip05, Time } from '@/utils.ts';
import { getAmount } from '@/utils/bolt11.ts';
import { detectLanguage } from '@/utils/language.ts';
import { errorJson } from '@/utils/log.ts';
import { nip05Cache } from '@/utils/nip05.ts';
import { purifyEvent } from '@/utils/purify.ts';
@ -121,8 +120,6 @@ async function handleEvent(event: DittoEvent, opts: PipelineOpts): Promise<void>
Promise.allSettled([
handleZaps(kysely, event),
parseMetadata(event, opts.signal),
setLanguage(event),
setMimeType(event),
generateSetEvents(event),
])
.then(() =>
@ -238,43 +235,6 @@ async function parseMetadata(event: NostrEvent, signal: AbortSignal): Promise<vo
}
}
/** Update the event in the database and set its language. */
async function setLanguage(event: NostrEvent): Promise<void> {
if (event.kind !== 1) return;
const language = detectLanguage(event.content, 0.90);
if (!language) return;
const kysely = await Storages.kysely();
try {
await kysely.updateTable('nostr_events')
.set('language', language)
.where('id', '=', event.id)
.execute();
} catch {
// do nothing
}
}
/** Update the event in the database and set its MIME type. */
async function setMimeType(event: NostrEvent): Promise<void> {
const imeta = event.tags.find(([value]) => value === 'imeta');
if (!imeta) return;
const mime_type = imeta.find((value) => value?.split(' ')[0] === 'm')?.split(' ')[1];
if (!mime_type) return;
const kysely = await Storages.kysely();
try {
await kysely.updateTable('nostr_events')
.set('mime_type', mime_type)
.where('id', '=', event.id)
.execute();
} catch {
// do nothing
}
}
/** Determine if the event is being received in a timely manner. */
function isFresh(event: NostrEvent): boolean {
return eventAge(event) < Time.minutes(1);

View file

@ -65,8 +65,8 @@ Deno.test('query events with language search filter', async () => {
await store.event(en);
await store.event(es);
await kysely.updateTable('nostr_events').set('language', 'en').where('id', '=', en.id).execute();
await kysely.updateTable('nostr_events').set('language', 'es').where('id', '=', es.id).execute();
await kysely.updateTable('nostr_events').set('search_ext', { language: 'en' }).where('id', '=', en.id).execute();
await kysely.updateTable('nostr_events').set('search_ext', { language: 'es' }).where('id', '=', es.id).execute();
assertEquals(await store.query([{ search: 'language:en' }]), [en]);
assertEquals(await store.query([{ search: 'language:es' }]), [es]);

View file

@ -1,11 +1,10 @@
// deno-lint-ignore-file require-await
import { LanguageCode } from 'iso-639-1';
import { NPostgres, NPostgresSchema } from '@nostrify/db';
import { NPostgres } from '@nostrify/db';
import { NIP50, NKinds, NostrEvent, NostrFilter, NSchema as n } from '@nostrify/nostrify';
import { logi } from '@soapbox/logi';
import { JsonValue } from '@std/json';
import { Kysely, SelectQueryBuilder } from 'kysely';
import { Kysely } from 'kysely';
import { nip27 } from 'nostr-tools';
import { DittoTables } from '@/db/DittoTables.ts';
@ -15,6 +14,7 @@ import { isNostrId } from '@/utils.ts';
import { abortError } from '@/utils/abort.ts';
import { purifyEvent } from '@/utils/purify.ts';
import { DittoEvent } from '@/interfaces/DittoEvent.ts';
import { detectLanguage } from '@/utils/language.ts';
/** Function to decide whether or not to index a tag. */
type TagCondition = (opts: TagConditionOpts) => boolean;
@ -62,10 +62,44 @@ class EventsDB extends NPostgres {
't': ({ event, count, value }) => (event.kind === 1985 ? count < 20 : count < 5) && value.length < 50,
};
static indexExtensions(event: NostrEvent): Record<string, string> {
const ext: Record<string, string> = {};
if (event.kind === 1) {
ext.reply = event.tags.some(([name]) => name === 'e').toString();
const language = detectLanguage(event.content, 0.90);
if (language) {
ext.language = language;
}
}
const imeta: string[][][] = event.tags
.filter(([name]) => name === 'imeta')
.map(([_, ...entries]) =>
entries.map((entry) => {
const split = entry.split(' ');
return [split[0], split.splice(1).join(' ')];
})
);
if (imeta.length) {
ext.media = 'true';
}
if (imeta.every((tags) => tags.some(([name, value]) => name === 'm' && value.startsWith('video/')))) {
ext.video = 'true';
}
return ext;
}
constructor(private opts: EventsDBOpts) {
super(opts.kysely, {
indexTags: EventsDB.indexTags,
indexSearch: EventsDB.searchText,
indexExtensions: EventsDB.indexExtensions,
});
}
@ -155,58 +189,6 @@ class EventsDB extends NPostgres {
}
}
protected override getFilterQuery(trx: Kysely<NPostgresSchema>, filter: NostrFilter) {
if (filter.search) {
const tokens = NIP50.parseInput(filter.search);
let query = super.getFilterQuery(trx, {
...filter,
search: tokens.filter((t) => typeof t === 'string').join(' '),
}) as SelectQueryBuilder<DittoTables, 'nostr_events', DittoTables['nostr_events']>;
const languages = new Set<string>();
let exact_mime_type: string | undefined;
let partial_mime_type: string | undefined;
let only_media: boolean | undefined;
for (const token of tokens) {
if (typeof token === 'object' && token.key === 'language') {
languages.add(token.value);
}
if (typeof token === 'object' && token.key === 'exact_mime_type') {
exact_mime_type = token.value;
}
if (typeof token === 'object' && token.key === 'partial_mime_type') {
partial_mime_type = token.value;
}
if (typeof token === 'object' && token.key === 'only_media') {
if (token.value === 'true') only_media = true;
if (token.value === 'false') only_media = false;
}
}
if (languages.size) {
query = query.where('language', 'in', [...languages]);
}
if (exact_mime_type) {
query = query.where('mime_type', '=', exact_mime_type);
}
if (partial_mime_type) {
query = query.where(
(eb) => eb.fn('split_part', [eb.ref('mime_type'), eb.val('/'), eb.val(1)]),
'=',
partial_mime_type,
);
}
if (only_media) query = query.where('mime_type', 'is not', null);
if (only_media === false) query = query.where('mime_type', 'is', null);
return query;
}
return super.getFilterQuery(trx, filter);
}
/** Get events for filters from the database. */
override async query(
filters: NostrFilter[],
@ -235,29 +217,6 @@ class EventsDB extends NPostgres {
return super.query(filters, { ...opts, timeout: opts.timeout ?? this.opts.timeout });
}
/** Parse an event row from the database. */
protected override parseEventRow(row: DittoTables['nostr_events']): DittoEvent {
const event: DittoEvent = {
id: row.id,
kind: row.kind,
pubkey: row.pubkey,
content: row.content,
created_at: Number(row.created_at),
tags: row.tags,
sig: row.sig,
};
if (this.opts.pure) {
return event;
}
if (row.language) {
event.language = row.language as LanguageCode;
}
return event;
}
/** Delete events based on filters from the database. */
override async remove(filters: NostrFilter[], opts: { signal?: AbortSignal; timeout?: number } = {}): Promise<void> {
logi({ level: 'debug', ns: 'ditto.remove', source: 'db', filters: filters as JsonValue });

View file

@ -93,12 +93,12 @@ Deno.test("getTrendingTagValues(): 'e' tag and WITH language parameter", async (
}
await db.kysely.updateTable('nostr_events')
.set('language', 'pt')
.set('search_ext', { language: 'pt' })
.where('id', '=', post1.id)
.execute();
await db.kysely.updateTable('nostr_events')
.set('language', 'en')
.set('search_ext', { language: 'en' })
.where('id', '=', post2.id)
.execute();

View file

@ -145,7 +145,7 @@ export async function updateTrendingEvents(): Promise<void> {
const rows = await kysely
.selectFrom('nostr_events')
.select('nostr_events.id')
.where('nostr_events.language', '=', language)
.where(sql`nostr_events.search_ext->>'language'`, '=', language)
.where('nostr_events.created_at', '>=', yesterday)
.where('nostr_events.created_at', '<=', now)
.execute();

View file

@ -54,11 +54,13 @@ Deno.test('Searching for posts work', async () => {
const event = genEvent({ content: "I'm not an orphan. Death is my importance", kind: 1 });
await db.store.event(event);
await db.kysely.updateTable('nostr_events').set('language', 'en').where('id', '=', event.id).execute();
await db.kysely.updateTable('nostr_events').set('search_ext', { language: 'en' }).where('id', '=', event.id)
.execute();
const event2 = genEvent({ content: 'The more I explore is the more I fall in love with the music I make.', kind: 1 });
await db.store.event(event2);
await db.kysely.updateTable('nostr_events').set('language', 'en').where('id', '=', event2.id).execute();
await db.kysely.updateTable('nostr_events').set('search_ext', { language: 'en' }).where('id', '=', event2.id)
.execute();
assertEquals(
await getIdsBySearch(db.kysely, { q: 'Death is my importance', limit: 1, offset: 0 }), // ordered words

View file

@ -75,7 +75,7 @@ export async function getIdsBySearch(
}
if (languages.size) {
query = query.where('language', 'in', [...languages]);
query = query.where(sql`search_ext->>'language'`, 'in', [...languages]);
}
if (domains.size) {