diff --git a/deno.json b/deno.json index 699ab620..8edf0e53 100644 --- a/deno.json +++ b/deno.json @@ -59,6 +59,7 @@ "isomorphic-dompurify": "npm:isomorphic-dompurify@^2.11.0", "kysely": "npm:kysely@^0.27.4", "kysely-postgres-js": "npm:kysely-postgres-js@2.0.0", + "lande": "npm:lande@^1.0.10", "light-bolt11-decoder": "npm:light-bolt11-decoder", "linkify-plugin-hashtag": "npm:linkify-plugin-hashtag@^4.1.1", "linkify-string": "npm:linkify-string@^4.1.1", diff --git a/deno.lock b/deno.lock index a275accb..3a25d8d3 100644 --- a/deno.lock +++ b/deno.lock @@ -29,13 +29,16 @@ "jsr:@soapbox/kysely-pglite@^0.0.1": "jsr:@soapbox/kysely-pglite@0.0.1", "jsr:@soapbox/stickynotes@^0.4.0": "jsr:@soapbox/stickynotes@0.4.0", "jsr:@std/assert@^0.213.1": "jsr:@std/assert@0.213.1", + "jsr:@std/assert@^0.223.0": "jsr:@std/assert@0.223.0", "jsr:@std/assert@^0.224.0": "jsr:@std/assert@0.224.0", "jsr:@std/assert@^0.225.1": "jsr:@std/assert@0.225.3", + "jsr:@std/bytes@^0.223.0": "jsr:@std/bytes@0.223.0", "jsr:@std/bytes@^0.224.0": "jsr:@std/bytes@0.224.0", "jsr:@std/bytes@^1.0.0-rc.3": "jsr:@std/bytes@1.0.0", "jsr:@std/bytes@^1.0.1-rc.3": "jsr:@std/bytes@1.0.2", "jsr:@std/bytes@^1.0.2": "jsr:@std/bytes@1.0.2", "jsr:@std/bytes@^1.0.2-rc.3": "jsr:@std/bytes@1.0.2", + "jsr:@std/cli@^0.223.0": "jsr:@std/cli@0.223.0", "jsr:@std/crypto@^0.224.0": "jsr:@std/crypto@0.224.0", "jsr:@std/dotenv@^0.224.0": "jsr:@std/dotenv@0.224.2", "jsr:@std/encoding@0.213.1": "jsr:@std/encoding@0.213.1", @@ -45,14 +48,17 @@ "jsr:@std/fs@0.213.1": "jsr:@std/fs@0.213.1", "jsr:@std/fs@^0.229.3": "jsr:@std/fs@0.229.3", "jsr:@std/internal@^1.0.0": "jsr:@std/internal@1.0.1", + "jsr:@std/io@^0.223.0": "jsr:@std/io@0.223.0", "jsr:@std/io@^0.224": "jsr:@std/io@0.224.7", "jsr:@std/json@^0.223.0": "jsr:@std/json@0.223.0", "jsr:@std/media-types@^0.224.1": "jsr:@std/media-types@0.224.1", "jsr:@std/path@0.213.1": "jsr:@std/path@0.213.1", + "jsr:@std/path@1.0.0-rc.1": "jsr:@std/path@1.0.0-rc.1", "jsr:@std/path@^0.213.1": "jsr:@std/path@0.213.1", "jsr:@std/streams@^0.223.0": "jsr:@std/streams@0.223.0", "npm:@isaacs/ttlcache@^1.4.1": "npm:@isaacs/ttlcache@1.4.1", "npm:@noble/hashes@^1.4.0": "npm:@noble/hashes@1.4.0", + "npm:@noble/secp256k1@^2.0.0": "npm:@noble/secp256k1@2.1.0", "npm:@scure/base@^1.1.6": "npm:@scure/base@1.1.6", "npm:@scure/bip32@^1.4.0": "npm:@scure/bip32@1.4.0", "npm:@scure/bip39@^1.3.0": "npm:@scure/bip39@1.3.0", @@ -72,6 +78,7 @@ "npm:kysely@^0.27.2": "npm:kysely@0.27.4", "npm:kysely@^0.27.3": "npm:kysely@0.27.4", "npm:kysely@^0.27.4": "npm:kysely@0.27.4", + "npm:lande@^1.0.10": "npm:lande@1.0.10", "npm:light-bolt11-decoder": "npm:light-bolt11-decoder@3.1.1", "npm:linkify-plugin-hashtag@^4.1.1": "npm:linkify-plugin-hashtag@4.1.3_linkifyjs@4.1.3", "npm:linkify-string@^4.1.1": "npm:linkify-string@4.1.3_linkifyjs@4.1.3", @@ -88,6 +95,7 @@ "npm:postgres@3.4.4": "npm:postgres@3.4.4", "npm:prom-client@^15.1.2": "npm:prom-client@15.1.2", "npm:tldts@^6.0.14": "npm:tldts@6.1.18", + "npm:tseep@^1.2.1": "npm:tseep@1.2.1", "npm:type-fest@^4.3.0": "npm:type-fest@4.18.2", "npm:unfurl.js@^6.4.0": "npm:unfurl.js@6.4.0", "npm:websocket-ts@^2.1.5": "npm:websocket-ts@2.1.5", @@ -303,6 +311,9 @@ "@std/assert@0.213.1": { "integrity": "24c28178b30c8e0782c18e8e94ea72b16282207569cdd10ffb9d1d26f2edebfe" }, + "@std/assert@0.223.0": { + "integrity": "eb8d6d879d76e1cc431205bd346ed4d88dc051c6366365b1af47034b0670be24" + }, "@std/assert@0.224.0": { "integrity": "8643233ec7aec38a940a8264a6e3eed9bfa44e7a71cc6b3c8874213ff401967f" }, @@ -312,6 +323,9 @@ "jsr:@std/internal@^1.0.0" ] }, + "@std/bytes@0.223.0": { + "integrity": "84b75052cd8680942c397c2631318772b295019098f40aac5c36cead4cba51a8" + }, "@std/bytes@0.224.0": { "integrity": "a2250e1d0eb7d1c5a426f21267ab9bdeac2447fa87a3d0d1a467d3f7a6058e49" }, @@ -321,6 +335,12 @@ "@std/bytes@1.0.2": { "integrity": "fbdee322bbd8c599a6af186a1603b3355e59a5fb1baa139f8f4c3c9a1b3e3d57" }, + "@std/cli@0.223.0": { + "integrity": "2feb7970f2028904c3edc22ea916ce9538113dfc170844f3eae03578c333c356", + "dependencies": [ + "jsr:@std/assert@^0.223.0" + ] + }, "@std/crypto@0.224.0": { "integrity": "154ef3ff08ef535562ef1a718718c5b2c5fc3808f0f9100daad69e829bfcdf2d", "dependencies": [ @@ -351,7 +371,10 @@ ] }, "@std/fs@0.229.3": { - "integrity": "783bca21f24da92e04c3893c9e79653227ab016c48e96b3078377ebd5222e6eb" + "integrity": "783bca21f24da92e04c3893c9e79653227ab016c48e96b3078377ebd5222e6eb", + "dependencies": [ + "jsr:@std/path@1.0.0-rc.1" + ] }, "@std/internal@1.0.0": { "integrity": "ac6a6dfebf838582c4b4f61a6907374e27e05bedb6ce276e0f1608fe84e7cd9a" @@ -359,6 +382,13 @@ "@std/internal@1.0.1": { "integrity": "6f8c7544d06a11dd256c8d6ba54b11ed870aac6c5aeafff499892662c57673e6" }, + "@std/io@0.223.0": { + "integrity": "2d8c3c2ab3a515619b90da2c6ff5ea7b75a94383259ef4d02116b228393f84f1", + "dependencies": [ + "jsr:@std/assert@^0.223.0", + "jsr:@std/bytes@^0.223.0" + ] + }, "@std/io@0.224.0": { "integrity": "0aff885d21d829c050b8a08b1d71b54aed5841aecf227f8d77e99ec529a11e8e", "dependencies": [ @@ -396,7 +426,10 @@ ] }, "@std/json@0.223.0": { - "integrity": "9a4a255931dd0397924c6b10bb6a72fe3e28ddd876b981ada2e3b8dd0764163f" + "integrity": "9a4a255931dd0397924c6b10bb6a72fe3e28ddd876b981ada2e3b8dd0764163f", + "dependencies": [ + "jsr:@std/streams@^0.223.0" + ] }, "@std/media-types@0.224.1": { "integrity": "9e69a5daed37c5b5c6d3ce4731dc191f80e67f79bed392b0957d1d03b87f11e1" @@ -407,8 +440,16 @@ "jsr:@std/assert@^0.213.1" ] }, + "@std/path@1.0.0-rc.1": { + "integrity": "b8c00ae2f19106a6bb7cbf1ab9be52aa70de1605daeb2dbdc4f87a7cbaf10ff6" + }, "@std/streams@0.223.0": { - "integrity": "d6b28e498ced3960b04dc5d251f2dcfc1df244b5ec5a48dc23a8f9b490be3b99" + "integrity": "d6b28e498ced3960b04dc5d251f2dcfc1df244b5ec5a48dc23a8f9b490be3b99", + "dependencies": [ + "jsr:@std/assert@^0.223.0", + "jsr:@std/bytes@^0.223.0", + "jsr:@std/io@^0.223.0" + ] } }, "npm": { @@ -454,6 +495,10 @@ "integrity": "sha512-V1JJ1WTRUqHHrOSh597hURcMqVKVGL/ea3kv0gSnEdsEZ0/+VyPghM1lMNGc00z7CIQorSvbKpuJkxvuHbvdbg==", "dependencies": {} }, + "@noble/secp256k1@2.1.0": { + "integrity": "sha512-XLEQQNdablO0XZOIniFQimiXsZDNwaYgL96dZwC54Q30imSbAOFf3NKtepc+cXyuZf5Q1HCgbqgZ2UFFuHVcEw==", + "dependencies": {} + }, "@opentelemetry/api@1.9.0": { "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", "dependencies": {} @@ -864,6 +909,12 @@ "integrity": "sha512-dyNKv2KRvYOQPLCAOCjjQuCk4YFd33BvGdf/o5bC7FiW+BB6snA81Zt+2wT9QDFzKqxKa5rrOmvlK/anehCcgA==", "dependencies": {} }, + "lande@1.0.10": { + "integrity": "sha512-yT52DQh+UV2pEp08jOYrA4drDv0DbjpiRyZYgl25ak9G2cVR2AimzrqkYQWrD9a7Ud+qkAcaiDDoNH9DXfHPmw==", + "dependencies": { + "toygrad": "toygrad@2.6.0" + } + }, "light-bolt11-decoder@3.1.1": { "integrity": "sha512-sLg/KCwYkgsHWkefWd6KqpCHrLFWWaXTOX3cf6yD2hAzL0SLpX+lFcaFK2spkjbgzG6hhijKfORDc9WoUHwX0A==", "dependencies": { @@ -1213,6 +1264,10 @@ "url-parse": "url-parse@1.5.10" } }, + "toygrad@2.6.0": { + "integrity": "sha512-g4zBmlSbvzOE5FOILxYkAybTSxijKLkj1WoNqVGnbMcWDyj4wWQ+eYSr3ik7XOpIgMq/7eBcPRTJX3DM2E0YMg==", + "dependencies": {} + }, "tr46@0.0.3": { "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", "dependencies": {} @@ -1223,6 +1278,10 @@ "punycode": "punycode@2.3.1" } }, + "tseep@1.2.1": { + "integrity": "sha512-VFnsNcPGC4qFJ1nxbIPSjTmtRZOhlqLmtwRqtLVos8mbRHki8HO9cy9Z1e89EiWyxFmq6LBviI9TQjijxw/mEw==", + "dependencies": {} + }, "type-fest@3.13.1": { "integrity": "sha512-tLq3bSNx+xSpwvAJnzrK0Ep5CLNWjvFTOp71URMaAEWBfRb9nnJiBoUe0tF8bI4ZFO3omgBR6NvnbzVUT3Ly4g==", "dependencies": {} @@ -1963,6 +2022,7 @@ "npm:isomorphic-dompurify@^2.11.0", "npm:kysely-postgres-js@2.0.0", "npm:kysely@^0.27.4", + "npm:lande@^1.0.10", "npm:light-bolt11-decoder", "npm:linkify-plugin-hashtag@^4.1.1", "npm:linkify-string@^4.1.1", diff --git a/src/db/DittoTables.ts b/src/db/DittoTables.ts index a62c485d..48cb06cb 100644 --- a/src/db/DittoTables.ts +++ b/src/db/DittoTables.ts @@ -1,6 +1,9 @@ +import { Nullable } from 'kysely'; + import { NPostgresSchema } from '@nostrify/db'; export interface DittoTables extends NPostgresSchema { + nostr_events: NostrEventsRow; nip46_tokens: NIP46TokenRow; author_stats: AuthorStatsRow; event_stats: EventStatsRow; @@ -8,6 +11,10 @@ export interface DittoTables extends NPostgresSchema { event_zaps: EventZapRow; } +type NostrEventsRow = NPostgresSchema['nostr_events'] & { + language: Nullable; +}; + interface AuthorStatsRow { pubkey: string; followers_count: number; diff --git a/src/db/migrations/032_add_language.ts b/src/db/migrations/032_add_language.ts new file mode 100644 index 00000000..a0f828fe --- /dev/null +++ b/src/db/migrations/032_add_language.ts @@ -0,0 +1,11 @@ +import { Kysely } from 'kysely'; + +export async function up(db: Kysely): Promise { + await db.schema.alterTable('nostr_events').addColumn('language', 'char(2)').execute(); + await db.schema.createIndex('nostr_events_language_idx').on('nostr_events').column('language').execute(); +} + +export async function down(db: Kysely): Promise { + await db.schema.alterTable('nostr_events').dropColumn('language').execute(); + await db.schema.dropIndex('nostr_events_language_idx').execute(); +} diff --git a/src/pipeline.ts b/src/pipeline.ts index 85d27964..cc4975cd 100644 --- a/src/pipeline.ts +++ b/src/pipeline.ts @@ -1,6 +1,8 @@ import { NKinds, NostrEvent, NSchema as n } from '@nostrify/nostrify'; import Debug from '@soapbox/stickynotes/debug'; +import ISO6391 from 'iso-639-1'; import { Kysely, sql } from 'kysely'; +import lande from 'lande'; import { LRUCache } from 'lru-cache'; import { z } from 'zod'; @@ -55,10 +57,11 @@ async function handleEvent(event: DittoEvent, signal: AbortSignal): Promise { + const [topResult] = lande(event.content); + + if (topResult) { + const [iso6393, confidence] = topResult; + const locale = new Intl.Locale(iso6393); + + if (confidence >= 0.95 && ISO6391.validate(locale.language)) { + const kysely = await Storages.kysely(); + try { + await kysely.updateTable('nostr_events') + .set('language', locale.language) + .where('id', '=', event.id) + .execute(); + } catch { + // do nothing + } + } + } +} + /** Determine if the event is being received in a timely manner. */ function isFresh(event: NostrEvent): boolean { return eventAge(event) < Time.seconds(10); diff --git a/src/storages/EventsDB.test.ts b/src/storages/EventsDB.test.ts index 7a5f7b93..b24032aa 100644 --- a/src/storages/EventsDB.test.ts +++ b/src/storages/EventsDB.test.ts @@ -54,6 +54,23 @@ Deno.test('query events with domain search filter', async () => { assertEquals(await store.query([{ kinds: [1], search: 'domain:example.com' }]), []); }); +Deno.test('query events with language search filter', async () => { + await using db = await createTestDB(); + const { store, kysely } = db; + + const en = genEvent({ kind: 1, content: 'hello world!' }); + const es = genEvent({ kind: 1, content: 'hola mundo!' }); + + await store.event(en); + await store.event(es); + + await kysely.updateTable('nostr_events').set('language', 'en').where('id', '=', en.id).execute(); + await kysely.updateTable('nostr_events').set('language', 'es').where('id', '=', es.id).execute(); + + assertEquals(await store.query([{ search: 'language:en' }]), [en]); + assertEquals(await store.query([{ search: 'language:es' }]), [es]); +}); + Deno.test('delete events', async () => { await using db = await createTestDB(); const { store } = db; diff --git a/src/storages/EventsDB.ts b/src/storages/EventsDB.ts index bedc1cac..b4dc0b9b 100644 --- a/src/storages/EventsDB.ts +++ b/src/storages/EventsDB.ts @@ -3,7 +3,7 @@ import { NPostgres, NPostgresSchema } from '@nostrify/db'; import { NIP50, NKinds, NostrEvent, NostrFilter, NSchema as n } from '@nostrify/nostrify'; import { Stickynotes } from '@soapbox/stickynotes'; -import { Kysely } from 'kysely'; +import { Kysely, SelectQueryBuilder } from 'kysely'; import { nip27 } from 'nostr-tools'; import { DittoTables } from '@/db/DittoTables.ts'; @@ -145,8 +145,36 @@ class EventsDB extends NPostgres { } protected getFilterQuery(trx: Kysely, filter: NostrFilter) { - const query = super.getFilterQuery(trx, filter); - return query; + if (filter.search) { + const tokens = NIP50.parseInput(filter.search); + + let query = super.getFilterQuery(trx, { + ...filter, + search: tokens.filter((t) => typeof t === 'string').join(' '), + }) as SelectQueryBuilder>; + + const data = tokens.filter((t) => typeof t === 'object').reduce( + (acc, t) => acc.set(t.key, t.value), + new Map(), + ); + + const domain = data.get('domain'); + const language = data.get('language'); + + if (domain) { + query = query + .innerJoin('pubkey_domains', 'nostr_events.pubkey', 'pubkey_domains.pubkey') + .where('pubkey_domains.domain', '=', domain); + } + + if (language) { + query = query.where('language', '=', language); + } + + return query; + } + + return super.getFilterQuery(trx, filter); } /** Get events for filters from the database. */ @@ -260,35 +288,6 @@ class EventsDB extends NPostgres { filters = structuredClone(filters); for (const filter of filters) { - if (filter.search) { - const tokens = NIP50.parseInput(filter.search); - - const domain = (tokens.find((t) => - typeof t === 'object' && t.key === 'domain' - ) as { key: 'domain'; value: string } | undefined)?.value; - - if (domain) { - const query = this.opts.kysely - .selectFrom('pubkey_domains') - .select('pubkey') - .where('domain', '=', domain); - - if (filter.authors) { - query.where('pubkey', 'in', filter.authors); - } - - const pubkeys = await query - .execute() - .then((rows) => - rows.map((row) => row.pubkey) - ); - - filter.authors = pubkeys; - } - - filter.search = tokens.filter((t) => typeof t === 'string').join(' '); - } - if (filter.kinds) { // Ephemeral events are not stored, so don't bother querying for them. // If this results in an empty kinds array, NDatabase will remove the filter before querying and return no results.