Support NIP-50 language extension

This commit is contained in:
Alex Gleason 2024-09-15 12:40:58 -05:00
parent b384fcf572
commit c24d11c6f3
No known key found for this signature in database
GPG key ID: 7211D1F99744FBB7
7 changed files with 156 additions and 36 deletions

View file

@ -59,6 +59,7 @@
"isomorphic-dompurify": "npm:isomorphic-dompurify@^2.11.0",
"kysely": "npm:kysely@^0.27.4",
"kysely-postgres-js": "npm:kysely-postgres-js@2.0.0",
"lande": "npm:lande@^1.0.10",
"light-bolt11-decoder": "npm:light-bolt11-decoder",
"linkify-plugin-hashtag": "npm:linkify-plugin-hashtag@^4.1.1",
"linkify-string": "npm:linkify-string@^4.1.1",

66
deno.lock generated
View file

@ -29,13 +29,16 @@
"jsr:@soapbox/kysely-pglite@^0.0.1": "jsr:@soapbox/kysely-pglite@0.0.1",
"jsr:@soapbox/stickynotes@^0.4.0": "jsr:@soapbox/stickynotes@0.4.0",
"jsr:@std/assert@^0.213.1": "jsr:@std/assert@0.213.1",
"jsr:@std/assert@^0.223.0": "jsr:@std/assert@0.223.0",
"jsr:@std/assert@^0.224.0": "jsr:@std/assert@0.224.0",
"jsr:@std/assert@^0.225.1": "jsr:@std/assert@0.225.3",
"jsr:@std/bytes@^0.223.0": "jsr:@std/bytes@0.223.0",
"jsr:@std/bytes@^0.224.0": "jsr:@std/bytes@0.224.0",
"jsr:@std/bytes@^1.0.0-rc.3": "jsr:@std/bytes@1.0.0",
"jsr:@std/bytes@^1.0.1-rc.3": "jsr:@std/bytes@1.0.2",
"jsr:@std/bytes@^1.0.2": "jsr:@std/bytes@1.0.2",
"jsr:@std/bytes@^1.0.2-rc.3": "jsr:@std/bytes@1.0.2",
"jsr:@std/cli@^0.223.0": "jsr:@std/cli@0.223.0",
"jsr:@std/crypto@^0.224.0": "jsr:@std/crypto@0.224.0",
"jsr:@std/dotenv@^0.224.0": "jsr:@std/dotenv@0.224.2",
"jsr:@std/encoding@0.213.1": "jsr:@std/encoding@0.213.1",
@ -45,14 +48,17 @@
"jsr:@std/fs@0.213.1": "jsr:@std/fs@0.213.1",
"jsr:@std/fs@^0.229.3": "jsr:@std/fs@0.229.3",
"jsr:@std/internal@^1.0.0": "jsr:@std/internal@1.0.1",
"jsr:@std/io@^0.223.0": "jsr:@std/io@0.223.0",
"jsr:@std/io@^0.224": "jsr:@std/io@0.224.7",
"jsr:@std/json@^0.223.0": "jsr:@std/json@0.223.0",
"jsr:@std/media-types@^0.224.1": "jsr:@std/media-types@0.224.1",
"jsr:@std/path@0.213.1": "jsr:@std/path@0.213.1",
"jsr:@std/path@1.0.0-rc.1": "jsr:@std/path@1.0.0-rc.1",
"jsr:@std/path@^0.213.1": "jsr:@std/path@0.213.1",
"jsr:@std/streams@^0.223.0": "jsr:@std/streams@0.223.0",
"npm:@isaacs/ttlcache@^1.4.1": "npm:@isaacs/ttlcache@1.4.1",
"npm:@noble/hashes@^1.4.0": "npm:@noble/hashes@1.4.0",
"npm:@noble/secp256k1@^2.0.0": "npm:@noble/secp256k1@2.1.0",
"npm:@scure/base@^1.1.6": "npm:@scure/base@1.1.6",
"npm:@scure/bip32@^1.4.0": "npm:@scure/bip32@1.4.0",
"npm:@scure/bip39@^1.3.0": "npm:@scure/bip39@1.3.0",
@ -72,6 +78,7 @@
"npm:kysely@^0.27.2": "npm:kysely@0.27.4",
"npm:kysely@^0.27.3": "npm:kysely@0.27.4",
"npm:kysely@^0.27.4": "npm:kysely@0.27.4",
"npm:lande@^1.0.10": "npm:lande@1.0.10",
"npm:light-bolt11-decoder": "npm:light-bolt11-decoder@3.1.1",
"npm:linkify-plugin-hashtag@^4.1.1": "npm:linkify-plugin-hashtag@4.1.3_linkifyjs@4.1.3",
"npm:linkify-string@^4.1.1": "npm:linkify-string@4.1.3_linkifyjs@4.1.3",
@ -88,6 +95,7 @@
"npm:postgres@3.4.4": "npm:postgres@3.4.4",
"npm:prom-client@^15.1.2": "npm:prom-client@15.1.2",
"npm:tldts@^6.0.14": "npm:tldts@6.1.18",
"npm:tseep@^1.2.1": "npm:tseep@1.2.1",
"npm:type-fest@^4.3.0": "npm:type-fest@4.18.2",
"npm:unfurl.js@^6.4.0": "npm:unfurl.js@6.4.0",
"npm:websocket-ts@^2.1.5": "npm:websocket-ts@2.1.5",
@ -303,6 +311,9 @@
"@std/assert@0.213.1": {
"integrity": "24c28178b30c8e0782c18e8e94ea72b16282207569cdd10ffb9d1d26f2edebfe"
},
"@std/assert@0.223.0": {
"integrity": "eb8d6d879d76e1cc431205bd346ed4d88dc051c6366365b1af47034b0670be24"
},
"@std/assert@0.224.0": {
"integrity": "8643233ec7aec38a940a8264a6e3eed9bfa44e7a71cc6b3c8874213ff401967f"
},
@ -312,6 +323,9 @@
"jsr:@std/internal@^1.0.0"
]
},
"@std/bytes@0.223.0": {
"integrity": "84b75052cd8680942c397c2631318772b295019098f40aac5c36cead4cba51a8"
},
"@std/bytes@0.224.0": {
"integrity": "a2250e1d0eb7d1c5a426f21267ab9bdeac2447fa87a3d0d1a467d3f7a6058e49"
},
@ -321,6 +335,12 @@
"@std/bytes@1.0.2": {
"integrity": "fbdee322bbd8c599a6af186a1603b3355e59a5fb1baa139f8f4c3c9a1b3e3d57"
},
"@std/cli@0.223.0": {
"integrity": "2feb7970f2028904c3edc22ea916ce9538113dfc170844f3eae03578c333c356",
"dependencies": [
"jsr:@std/assert@^0.223.0"
]
},
"@std/crypto@0.224.0": {
"integrity": "154ef3ff08ef535562ef1a718718c5b2c5fc3808f0f9100daad69e829bfcdf2d",
"dependencies": [
@ -351,7 +371,10 @@
]
},
"@std/fs@0.229.3": {
"integrity": "783bca21f24da92e04c3893c9e79653227ab016c48e96b3078377ebd5222e6eb"
"integrity": "783bca21f24da92e04c3893c9e79653227ab016c48e96b3078377ebd5222e6eb",
"dependencies": [
"jsr:@std/path@1.0.0-rc.1"
]
},
"@std/internal@1.0.0": {
"integrity": "ac6a6dfebf838582c4b4f61a6907374e27e05bedb6ce276e0f1608fe84e7cd9a"
@ -359,6 +382,13 @@
"@std/internal@1.0.1": {
"integrity": "6f8c7544d06a11dd256c8d6ba54b11ed870aac6c5aeafff499892662c57673e6"
},
"@std/io@0.223.0": {
"integrity": "2d8c3c2ab3a515619b90da2c6ff5ea7b75a94383259ef4d02116b228393f84f1",
"dependencies": [
"jsr:@std/assert@^0.223.0",
"jsr:@std/bytes@^0.223.0"
]
},
"@std/io@0.224.0": {
"integrity": "0aff885d21d829c050b8a08b1d71b54aed5841aecf227f8d77e99ec529a11e8e",
"dependencies": [
@ -396,7 +426,10 @@
]
},
"@std/json@0.223.0": {
"integrity": "9a4a255931dd0397924c6b10bb6a72fe3e28ddd876b981ada2e3b8dd0764163f"
"integrity": "9a4a255931dd0397924c6b10bb6a72fe3e28ddd876b981ada2e3b8dd0764163f",
"dependencies": [
"jsr:@std/streams@^0.223.0"
]
},
"@std/media-types@0.224.1": {
"integrity": "9e69a5daed37c5b5c6d3ce4731dc191f80e67f79bed392b0957d1d03b87f11e1"
@ -407,8 +440,16 @@
"jsr:@std/assert@^0.213.1"
]
},
"@std/path@1.0.0-rc.1": {
"integrity": "b8c00ae2f19106a6bb7cbf1ab9be52aa70de1605daeb2dbdc4f87a7cbaf10ff6"
},
"@std/streams@0.223.0": {
"integrity": "d6b28e498ced3960b04dc5d251f2dcfc1df244b5ec5a48dc23a8f9b490be3b99"
"integrity": "d6b28e498ced3960b04dc5d251f2dcfc1df244b5ec5a48dc23a8f9b490be3b99",
"dependencies": [
"jsr:@std/assert@^0.223.0",
"jsr:@std/bytes@^0.223.0",
"jsr:@std/io@^0.223.0"
]
}
},
"npm": {
@ -454,6 +495,10 @@
"integrity": "sha512-V1JJ1WTRUqHHrOSh597hURcMqVKVGL/ea3kv0gSnEdsEZ0/+VyPghM1lMNGc00z7CIQorSvbKpuJkxvuHbvdbg==",
"dependencies": {}
},
"@noble/secp256k1@2.1.0": {
"integrity": "sha512-XLEQQNdablO0XZOIniFQimiXsZDNwaYgL96dZwC54Q30imSbAOFf3NKtepc+cXyuZf5Q1HCgbqgZ2UFFuHVcEw==",
"dependencies": {}
},
"@opentelemetry/api@1.9.0": {
"integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
"dependencies": {}
@ -864,6 +909,12 @@
"integrity": "sha512-dyNKv2KRvYOQPLCAOCjjQuCk4YFd33BvGdf/o5bC7FiW+BB6snA81Zt+2wT9QDFzKqxKa5rrOmvlK/anehCcgA==",
"dependencies": {}
},
"lande@1.0.10": {
"integrity": "sha512-yT52DQh+UV2pEp08jOYrA4drDv0DbjpiRyZYgl25ak9G2cVR2AimzrqkYQWrD9a7Ud+qkAcaiDDoNH9DXfHPmw==",
"dependencies": {
"toygrad": "toygrad@2.6.0"
}
},
"light-bolt11-decoder@3.1.1": {
"integrity": "sha512-sLg/KCwYkgsHWkefWd6KqpCHrLFWWaXTOX3cf6yD2hAzL0SLpX+lFcaFK2spkjbgzG6hhijKfORDc9WoUHwX0A==",
"dependencies": {
@ -1213,6 +1264,10 @@
"url-parse": "url-parse@1.5.10"
}
},
"toygrad@2.6.0": {
"integrity": "sha512-g4zBmlSbvzOE5FOILxYkAybTSxijKLkj1WoNqVGnbMcWDyj4wWQ+eYSr3ik7XOpIgMq/7eBcPRTJX3DM2E0YMg==",
"dependencies": {}
},
"tr46@0.0.3": {
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
"dependencies": {}
@ -1223,6 +1278,10 @@
"punycode": "punycode@2.3.1"
}
},
"tseep@1.2.1": {
"integrity": "sha512-VFnsNcPGC4qFJ1nxbIPSjTmtRZOhlqLmtwRqtLVos8mbRHki8HO9cy9Z1e89EiWyxFmq6LBviI9TQjijxw/mEw==",
"dependencies": {}
},
"type-fest@3.13.1": {
"integrity": "sha512-tLq3bSNx+xSpwvAJnzrK0Ep5CLNWjvFTOp71URMaAEWBfRb9nnJiBoUe0tF8bI4ZFO3omgBR6NvnbzVUT3Ly4g==",
"dependencies": {}
@ -1963,6 +2022,7 @@
"npm:isomorphic-dompurify@^2.11.0",
"npm:kysely-postgres-js@2.0.0",
"npm:kysely@^0.27.4",
"npm:lande@^1.0.10",
"npm:light-bolt11-decoder",
"npm:linkify-plugin-hashtag@^4.1.1",
"npm:linkify-string@^4.1.1",

View file

@ -1,6 +1,9 @@
import { Nullable } from 'kysely';
import { NPostgresSchema } from '@nostrify/db';
export interface DittoTables extends NPostgresSchema {
nostr_events: NostrEventsRow;
nip46_tokens: NIP46TokenRow;
author_stats: AuthorStatsRow;
event_stats: EventStatsRow;
@ -8,6 +11,10 @@ export interface DittoTables extends NPostgresSchema {
event_zaps: EventZapRow;
}
type NostrEventsRow = NPostgresSchema['nostr_events'] & {
language: Nullable<string>;
};
interface AuthorStatsRow {
pubkey: string;
followers_count: number;

View file

@ -0,0 +1,11 @@
import { Kysely } from 'kysely';
export async function up(db: Kysely<any>): Promise<void> {
await db.schema.alterTable('nostr_events').addColumn('language', 'char(2)').execute();
await db.schema.createIndex('nostr_events_language_idx').on('nostr_events').column('language').execute();
}
export async function down(db: Kysely<any>): Promise<void> {
await db.schema.alterTable('nostr_events').dropColumn('language').execute();
await db.schema.dropIndex('nostr_events_language_idx').execute();
}

View file

@ -1,6 +1,8 @@
import { NKinds, NostrEvent, NSchema as n } from '@nostrify/nostrify';
import Debug from '@soapbox/stickynotes/debug';
import ISO6391 from 'iso-639-1';
import { Kysely, sql } from 'kysely';
import lande from 'lande';
import { LRUCache } from 'lru-cache';
import { z } from 'zod';
@ -55,10 +57,11 @@ async function handleEvent(event: DittoEvent, signal: AbortSignal): Promise<void
const kysely = await Storages.kysely();
await storeEvent(purifyEvent(event), signal);
await Promise.all([
storeEvent(purifyEvent(event), signal),
handleZaps(kysely, event),
parseMetadata(event, signal),
setLanguage(event),
generateSetEvents(event),
streamOut(event),
]);
@ -163,6 +166,28 @@ async function parseMetadata(event: NostrEvent, signal: AbortSignal): Promise<vo
}
}
/** Update the event in the database and set its language. */
async function setLanguage(event: NostrEvent): Promise<void> {
const [topResult] = lande(event.content);
if (topResult) {
const [iso6393, confidence] = topResult;
const locale = new Intl.Locale(iso6393);
if (confidence >= 0.95 && ISO6391.validate(locale.language)) {
const kysely = await Storages.kysely();
try {
await kysely.updateTable('nostr_events')
.set('language', locale.language)
.where('id', '=', event.id)
.execute();
} catch {
// do nothing
}
}
}
}
/** Determine if the event is being received in a timely manner. */
function isFresh(event: NostrEvent): boolean {
return eventAge(event) < Time.seconds(10);

View file

@ -54,6 +54,23 @@ Deno.test('query events with domain search filter', async () => {
assertEquals(await store.query([{ kinds: [1], search: 'domain:example.com' }]), []);
});
Deno.test('query events with language search filter', async () => {
await using db = await createTestDB();
const { store, kysely } = db;
const en = genEvent({ kind: 1, content: 'hello world!' });
const es = genEvent({ kind: 1, content: 'hola mundo!' });
await store.event(en);
await store.event(es);
await kysely.updateTable('nostr_events').set('language', 'en').where('id', '=', en.id).execute();
await kysely.updateTable('nostr_events').set('language', 'es').where('id', '=', es.id).execute();
assertEquals(await store.query([{ search: 'language:en' }]), [en]);
assertEquals(await store.query([{ search: 'language:es' }]), [es]);
});
Deno.test('delete events', async () => {
await using db = await createTestDB();
const { store } = db;

View file

@ -3,7 +3,7 @@
import { NPostgres, NPostgresSchema } from '@nostrify/db';
import { NIP50, NKinds, NostrEvent, NostrFilter, NSchema as n } from '@nostrify/nostrify';
import { Stickynotes } from '@soapbox/stickynotes';
import { Kysely } from 'kysely';
import { Kysely, SelectQueryBuilder } from 'kysely';
import { nip27 } from 'nostr-tools';
import { DittoTables } from '@/db/DittoTables.ts';
@ -145,8 +145,36 @@ class EventsDB extends NPostgres {
}
protected getFilterQuery(trx: Kysely<NPostgresSchema>, filter: NostrFilter) {
const query = super.getFilterQuery(trx, filter);
return query;
if (filter.search) {
const tokens = NIP50.parseInput(filter.search);
let query = super.getFilterQuery(trx, {
...filter,
search: tokens.filter((t) => typeof t === 'string').join(' '),
}) as SelectQueryBuilder<DittoTables, 'nostr_events', Pick<DittoTables['nostr_events'], keyof NostrEvent>>;
const data = tokens.filter((t) => typeof t === 'object').reduce(
(acc, t) => acc.set(t.key, t.value),
new Map<string, string>(),
);
const domain = data.get('domain');
const language = data.get('language');
if (domain) {
query = query
.innerJoin('pubkey_domains', 'nostr_events.pubkey', 'pubkey_domains.pubkey')
.where('pubkey_domains.domain', '=', domain);
}
if (language) {
query = query.where('language', '=', language);
}
return query;
}
return super.getFilterQuery(trx, filter);
}
/** Get events for filters from the database. */
@ -260,35 +288,6 @@ class EventsDB extends NPostgres {
filters = structuredClone(filters);
for (const filter of filters) {
if (filter.search) {
const tokens = NIP50.parseInput(filter.search);
const domain = (tokens.find((t) =>
typeof t === 'object' && t.key === 'domain'
) as { key: 'domain'; value: string } | undefined)?.value;
if (domain) {
const query = this.opts.kysely
.selectFrom('pubkey_domains')
.select('pubkey')
.where('domain', '=', domain);
if (filter.authors) {
query.where('pubkey', 'in', filter.authors);
}
const pubkeys = await query
.execute()
.then((rows) =>
rows.map((row) => row.pubkey)
);
filter.authors = pubkeys;
}
filter.search = tokens.filter((t) => typeof t === 'string').join(' ');
}
if (filter.kinds) {
// Ephemeral events are not stored, so don't bother querying for them.
// If this results in an empty kinds array, NDatabase will remove the filter before querying and return no results.