mirror of
https://gitlab.com/soapbox-pub/ditto.git
synced 2025-12-06 03:19:46 +00:00
Rework language and media columns to use Nostrify search_ext column
This commit is contained in:
parent
baad8821f5
commit
f04ca2b6ff
12 changed files with 66 additions and 183 deletions
|
|
@ -22,7 +22,6 @@
|
|||
"trends": "deno run -A --env-file --deny-read=.env scripts/trends.ts",
|
||||
"clean:deps": "deno cache --reload src/app.ts",
|
||||
"db:populate-search": "deno run -A --env-file --deny-read=.env scripts/db-populate-search.ts",
|
||||
"db:populate-mime-type": "deno run -A --env-file --deny-read=.env scripts/db-populate-mime-type.ts",
|
||||
"vapid": "deno run scripts/vapid.ts"
|
||||
},
|
||||
"unstable": [
|
||||
|
|
|
|||
|
|
@ -1,29 +0,0 @@
|
|||
import { Storages } from '@/storages.ts';
|
||||
|
||||
const store = await Storages.db();
|
||||
const kysely = await Storages.kysely();
|
||||
|
||||
for await (const msg of store.req([{ kinds: [1] }])) { // Only kind 1 can contain media in Ditto?
|
||||
if (msg[0] === 'EVENT') {
|
||||
const event = msg[2];
|
||||
|
||||
const imeta = event.tags.find(([value]) => value === 'imeta');
|
||||
if (!imeta) continue;
|
||||
|
||||
const mime_type = imeta.find((value) => value?.split(' ')[0] === 'm')?.split(' ')[1];
|
||||
if (!mime_type) continue;
|
||||
|
||||
try {
|
||||
await kysely.updateTable('nostr_events')
|
||||
.set('mime_type', mime_type)
|
||||
.where('id', '=', event.id)
|
||||
.execute();
|
||||
} catch {
|
||||
// do nothing
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Deno.exit();
|
||||
|
|
@ -197,7 +197,7 @@ const accountStatusesQuerySchema = z.object({
|
|||
limit: z.coerce.number().nonnegative().transform((v) => Math.min(v, 40)).catch(20),
|
||||
exclude_replies: booleanParamSchema.optional(),
|
||||
tagged: z.string().optional(),
|
||||
only_media: z.coerce.boolean().catch(false),
|
||||
only_media: z.boolean().optional(),
|
||||
});
|
||||
|
||||
const accountStatusesController: AppController = async (c) => {
|
||||
|
|
@ -242,7 +242,7 @@ const accountStatusesController: AppController = async (c) => {
|
|||
};
|
||||
|
||||
if (only_media) {
|
||||
filter.search = 'only_media:true';
|
||||
filter.search = 'media:true';
|
||||
}
|
||||
|
||||
if (tagged) {
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ import { Generated } from 'kysely';
|
|||
import { NPostgresSchema } from '@nostrify/db';
|
||||
|
||||
export interface DittoTables extends NPostgresSchema {
|
||||
nostr_events: NostrEventsRow;
|
||||
auth_tokens: AuthTokenRow;
|
||||
author_stats: AuthorStatsRow;
|
||||
event_stats: EventStatsRow;
|
||||
|
|
@ -12,11 +11,6 @@ export interface DittoTables extends NPostgresSchema {
|
|||
push_subscriptions: PushSubscriptionRow;
|
||||
}
|
||||
|
||||
type NostrEventsRow = NPostgresSchema['nostr_events'] & {
|
||||
language: string | null;
|
||||
mime_type: string | null;
|
||||
};
|
||||
|
||||
interface AuthorStatsRow {
|
||||
pubkey: string;
|
||||
followers_count: number;
|
||||
|
|
|
|||
|
|
@ -3,38 +3,36 @@ import { Kysely, sql } from 'kysely';
|
|||
export async function up(db: Kysely<any>): Promise<void> {
|
||||
await db.schema
|
||||
.alterTable('nostr_events')
|
||||
.addColumn('mime_type', 'text').execute();
|
||||
|
||||
await db.schema
|
||||
.createIndex('nostr_events_mime_type_prefix_idx')
|
||||
.on('nostr_events')
|
||||
.expression(sql`split_part(mime_type, '/', 1)`)
|
||||
.column('mime_type')
|
||||
.ifNotExists()
|
||||
.addColumn('search_ext', 'jsonb', (col) => col.notNull().defaultTo({}))
|
||||
.execute();
|
||||
|
||||
await db.schema
|
||||
.createIndex('nostr_events_mime_type_hash_idx')
|
||||
.alterTable('nostr_events')
|
||||
.addCheckConstraint('nostr_events_search_ext_chk', sql`jsonb_typeof(search_ext) = 'object'`)
|
||||
.execute();
|
||||
|
||||
await db.schema
|
||||
.createIndex('nostr_events_search_ext_idx').using('gin')
|
||||
.on('nostr_events')
|
||||
.column('mime_type')
|
||||
.using('hash')
|
||||
.column('search_ext')
|
||||
.ifNotExists()
|
||||
.execute();
|
||||
}
|
||||
|
||||
export async function down(db: Kysely<any>): Promise<void> {
|
||||
await db.schema
|
||||
.dropIndex('nostr_events_search_ext_idx')
|
||||
.on('nostr_events')
|
||||
.ifExists()
|
||||
.execute();
|
||||
|
||||
await db.schema
|
||||
.alterTable('nostr_events')
|
||||
.dropColumn('mime_type')
|
||||
.dropConstraint('nostr_events_search_ext_chk')
|
||||
.execute();
|
||||
|
||||
await db.schema
|
||||
.dropIndex('nostr_events_mime_type_prefix_idx')
|
||||
.ifExists()
|
||||
.execute();
|
||||
|
||||
await db.schema
|
||||
.dropIndex('nostr_events_mime_type_hash_idx')
|
||||
.ifExists()
|
||||
.alterTable('nostr_events')
|
||||
.dropColumn('search_ext')
|
||||
.execute();
|
||||
}
|
||||
|
|
@ -15,7 +15,6 @@ import { hydrateEvents } from '@/storages/hydrate.ts';
|
|||
import { Storages } from '@/storages.ts';
|
||||
import { eventAge, parseNip05, Time } from '@/utils.ts';
|
||||
import { getAmount } from '@/utils/bolt11.ts';
|
||||
import { detectLanguage } from '@/utils/language.ts';
|
||||
import { errorJson } from '@/utils/log.ts';
|
||||
import { nip05Cache } from '@/utils/nip05.ts';
|
||||
import { purifyEvent } from '@/utils/purify.ts';
|
||||
|
|
@ -121,8 +120,6 @@ async function handleEvent(event: DittoEvent, opts: PipelineOpts): Promise<void>
|
|||
Promise.allSettled([
|
||||
handleZaps(kysely, event),
|
||||
parseMetadata(event, opts.signal),
|
||||
setLanguage(event),
|
||||
setMimeType(event),
|
||||
generateSetEvents(event),
|
||||
])
|
||||
.then(() =>
|
||||
|
|
@ -238,43 +235,6 @@ async function parseMetadata(event: NostrEvent, signal: AbortSignal): Promise<vo
|
|||
}
|
||||
}
|
||||
|
||||
/** Update the event in the database and set its language. */
|
||||
async function setLanguage(event: NostrEvent): Promise<void> {
|
||||
if (event.kind !== 1) return;
|
||||
|
||||
const language = detectLanguage(event.content, 0.90);
|
||||
if (!language) return;
|
||||
|
||||
const kysely = await Storages.kysely();
|
||||
try {
|
||||
await kysely.updateTable('nostr_events')
|
||||
.set('language', language)
|
||||
.where('id', '=', event.id)
|
||||
.execute();
|
||||
} catch {
|
||||
// do nothing
|
||||
}
|
||||
}
|
||||
|
||||
/** Update the event in the database and set its MIME type. */
|
||||
async function setMimeType(event: NostrEvent): Promise<void> {
|
||||
const imeta = event.tags.find(([value]) => value === 'imeta');
|
||||
if (!imeta) return;
|
||||
|
||||
const mime_type = imeta.find((value) => value?.split(' ')[0] === 'm')?.split(' ')[1];
|
||||
if (!mime_type) return;
|
||||
|
||||
const kysely = await Storages.kysely();
|
||||
try {
|
||||
await kysely.updateTable('nostr_events')
|
||||
.set('mime_type', mime_type)
|
||||
.where('id', '=', event.id)
|
||||
.execute();
|
||||
} catch {
|
||||
// do nothing
|
||||
}
|
||||
}
|
||||
|
||||
/** Determine if the event is being received in a timely manner. */
|
||||
function isFresh(event: NostrEvent): boolean {
|
||||
return eventAge(event) < Time.minutes(1);
|
||||
|
|
|
|||
|
|
@ -65,8 +65,8 @@ Deno.test('query events with language search filter', async () => {
|
|||
await store.event(en);
|
||||
await store.event(es);
|
||||
|
||||
await kysely.updateTable('nostr_events').set('language', 'en').where('id', '=', en.id).execute();
|
||||
await kysely.updateTable('nostr_events').set('language', 'es').where('id', '=', es.id).execute();
|
||||
await kysely.updateTable('nostr_events').set('search_ext', { language: 'en' }).where('id', '=', en.id).execute();
|
||||
await kysely.updateTable('nostr_events').set('search_ext', { language: 'es' }).where('id', '=', es.id).execute();
|
||||
|
||||
assertEquals(await store.query([{ search: 'language:en' }]), [en]);
|
||||
assertEquals(await store.query([{ search: 'language:es' }]), [es]);
|
||||
|
|
|
|||
|
|
@ -1,11 +1,10 @@
|
|||
// deno-lint-ignore-file require-await
|
||||
|
||||
import { LanguageCode } from 'iso-639-1';
|
||||
import { NPostgres, NPostgresSchema } from '@nostrify/db';
|
||||
import { NPostgres } from '@nostrify/db';
|
||||
import { NIP50, NKinds, NostrEvent, NostrFilter, NSchema as n } from '@nostrify/nostrify';
|
||||
import { logi } from '@soapbox/logi';
|
||||
import { JsonValue } from '@std/json';
|
||||
import { Kysely, SelectQueryBuilder } from 'kysely';
|
||||
import { Kysely } from 'kysely';
|
||||
import { nip27 } from 'nostr-tools';
|
||||
|
||||
import { DittoTables } from '@/db/DittoTables.ts';
|
||||
|
|
@ -15,6 +14,7 @@ import { isNostrId } from '@/utils.ts';
|
|||
import { abortError } from '@/utils/abort.ts';
|
||||
import { purifyEvent } from '@/utils/purify.ts';
|
||||
import { DittoEvent } from '@/interfaces/DittoEvent.ts';
|
||||
import { detectLanguage } from '@/utils/language.ts';
|
||||
|
||||
/** Function to decide whether or not to index a tag. */
|
||||
type TagCondition = (opts: TagConditionOpts) => boolean;
|
||||
|
|
@ -62,10 +62,44 @@ class EventsDB extends NPostgres {
|
|||
't': ({ event, count, value }) => (event.kind === 1985 ? count < 20 : count < 5) && value.length < 50,
|
||||
};
|
||||
|
||||
static indexExtensions(event: NostrEvent): Record<string, string> {
|
||||
const ext: Record<string, string> = {};
|
||||
|
||||
if (event.kind === 1) {
|
||||
ext.reply = event.tags.some(([name]) => name === 'e').toString();
|
||||
|
||||
const language = detectLanguage(event.content, 0.90);
|
||||
|
||||
if (language) {
|
||||
ext.language = language;
|
||||
}
|
||||
}
|
||||
|
||||
const imeta: string[][][] = event.tags
|
||||
.filter(([name]) => name === 'imeta')
|
||||
.map(([_, ...entries]) =>
|
||||
entries.map((entry) => {
|
||||
const split = entry.split(' ');
|
||||
return [split[0], split.splice(1).join(' ')];
|
||||
})
|
||||
);
|
||||
|
||||
if (imeta.length) {
|
||||
ext.media = 'true';
|
||||
}
|
||||
|
||||
if (imeta.every((tags) => tags.some(([name, value]) => name === 'm' && value.startsWith('video/')))) {
|
||||
ext.video = 'true';
|
||||
}
|
||||
|
||||
return ext;
|
||||
}
|
||||
|
||||
constructor(private opts: EventsDBOpts) {
|
||||
super(opts.kysely, {
|
||||
indexTags: EventsDB.indexTags,
|
||||
indexSearch: EventsDB.searchText,
|
||||
indexExtensions: EventsDB.indexExtensions,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -155,58 +189,6 @@ class EventsDB extends NPostgres {
|
|||
}
|
||||
}
|
||||
|
||||
protected override getFilterQuery(trx: Kysely<NPostgresSchema>, filter: NostrFilter) {
|
||||
if (filter.search) {
|
||||
const tokens = NIP50.parseInput(filter.search);
|
||||
|
||||
let query = super.getFilterQuery(trx, {
|
||||
...filter,
|
||||
search: tokens.filter((t) => typeof t === 'string').join(' '),
|
||||
}) as SelectQueryBuilder<DittoTables, 'nostr_events', DittoTables['nostr_events']>;
|
||||
|
||||
const languages = new Set<string>();
|
||||
let exact_mime_type: string | undefined;
|
||||
let partial_mime_type: string | undefined;
|
||||
let only_media: boolean | undefined;
|
||||
|
||||
for (const token of tokens) {
|
||||
if (typeof token === 'object' && token.key === 'language') {
|
||||
languages.add(token.value);
|
||||
}
|
||||
if (typeof token === 'object' && token.key === 'exact_mime_type') {
|
||||
exact_mime_type = token.value;
|
||||
}
|
||||
if (typeof token === 'object' && token.key === 'partial_mime_type') {
|
||||
partial_mime_type = token.value;
|
||||
}
|
||||
if (typeof token === 'object' && token.key === 'only_media') {
|
||||
if (token.value === 'true') only_media = true;
|
||||
if (token.value === 'false') only_media = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (languages.size) {
|
||||
query = query.where('language', 'in', [...languages]);
|
||||
}
|
||||
if (exact_mime_type) {
|
||||
query = query.where('mime_type', '=', exact_mime_type);
|
||||
}
|
||||
if (partial_mime_type) {
|
||||
query = query.where(
|
||||
(eb) => eb.fn('split_part', [eb.ref('mime_type'), eb.val('/'), eb.val(1)]),
|
||||
'=',
|
||||
partial_mime_type,
|
||||
);
|
||||
}
|
||||
if (only_media) query = query.where('mime_type', 'is not', null);
|
||||
if (only_media === false) query = query.where('mime_type', 'is', null);
|
||||
|
||||
return query;
|
||||
}
|
||||
|
||||
return super.getFilterQuery(trx, filter);
|
||||
}
|
||||
|
||||
/** Get events for filters from the database. */
|
||||
override async query(
|
||||
filters: NostrFilter[],
|
||||
|
|
@ -235,29 +217,6 @@ class EventsDB extends NPostgres {
|
|||
return super.query(filters, { ...opts, timeout: opts.timeout ?? this.opts.timeout });
|
||||
}
|
||||
|
||||
/** Parse an event row from the database. */
|
||||
protected override parseEventRow(row: DittoTables['nostr_events']): DittoEvent {
|
||||
const event: DittoEvent = {
|
||||
id: row.id,
|
||||
kind: row.kind,
|
||||
pubkey: row.pubkey,
|
||||
content: row.content,
|
||||
created_at: Number(row.created_at),
|
||||
tags: row.tags,
|
||||
sig: row.sig,
|
||||
};
|
||||
|
||||
if (this.opts.pure) {
|
||||
return event;
|
||||
}
|
||||
|
||||
if (row.language) {
|
||||
event.language = row.language as LanguageCode;
|
||||
}
|
||||
|
||||
return event;
|
||||
}
|
||||
|
||||
/** Delete events based on filters from the database. */
|
||||
override async remove(filters: NostrFilter[], opts: { signal?: AbortSignal; timeout?: number } = {}): Promise<void> {
|
||||
logi({ level: 'debug', ns: 'ditto.remove', source: 'db', filters: filters as JsonValue });
|
||||
|
|
|
|||
|
|
@ -93,12 +93,12 @@ Deno.test("getTrendingTagValues(): 'e' tag and WITH language parameter", async (
|
|||
}
|
||||
|
||||
await db.kysely.updateTable('nostr_events')
|
||||
.set('language', 'pt')
|
||||
.set('search_ext', { language: 'pt' })
|
||||
.where('id', '=', post1.id)
|
||||
.execute();
|
||||
|
||||
await db.kysely.updateTable('nostr_events')
|
||||
.set('language', 'en')
|
||||
.set('search_ext', { language: 'en' })
|
||||
.where('id', '=', post2.id)
|
||||
.execute();
|
||||
|
||||
|
|
|
|||
|
|
@ -145,7 +145,7 @@ export async function updateTrendingEvents(): Promise<void> {
|
|||
const rows = await kysely
|
||||
.selectFrom('nostr_events')
|
||||
.select('nostr_events.id')
|
||||
.where('nostr_events.language', '=', language)
|
||||
.where(sql`nostr_events.search_ext->>'language'`, '=', language)
|
||||
.where('nostr_events.created_at', '>=', yesterday)
|
||||
.where('nostr_events.created_at', '<=', now)
|
||||
.execute();
|
||||
|
|
|
|||
|
|
@ -54,11 +54,13 @@ Deno.test('Searching for posts work', async () => {
|
|||
|
||||
const event = genEvent({ content: "I'm not an orphan. Death is my importance", kind: 1 });
|
||||
await db.store.event(event);
|
||||
await db.kysely.updateTable('nostr_events').set('language', 'en').where('id', '=', event.id).execute();
|
||||
await db.kysely.updateTable('nostr_events').set('search_ext', { language: 'en' }).where('id', '=', event.id)
|
||||
.execute();
|
||||
|
||||
const event2 = genEvent({ content: 'The more I explore is the more I fall in love with the music I make.', kind: 1 });
|
||||
await db.store.event(event2);
|
||||
await db.kysely.updateTable('nostr_events').set('language', 'en').where('id', '=', event2.id).execute();
|
||||
await db.kysely.updateTable('nostr_events').set('search_ext', { language: 'en' }).where('id', '=', event2.id)
|
||||
.execute();
|
||||
|
||||
assertEquals(
|
||||
await getIdsBySearch(db.kysely, { q: 'Death is my importance', limit: 1, offset: 0 }), // ordered words
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ export async function getIdsBySearch(
|
|||
}
|
||||
|
||||
if (languages.size) {
|
||||
query = query.where('language', 'in', [...languages]);
|
||||
query = query.where(sql`search_ext->>'language'`, 'in', [...languages]);
|
||||
}
|
||||
|
||||
if (domains.size) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue