diff --git a/deno.json b/deno.json index bb580e72..201bff33 100644 --- a/deno.json +++ b/deno.json @@ -41,7 +41,7 @@ "@isaacs/ttlcache": "npm:@isaacs/ttlcache@^1.4.1", "@lambdalisue/async": "jsr:@lambdalisue/async@^2.1.1", "@noble/secp256k1": "npm:@noble/secp256k1@^2.0.0", - "@nostrify/db": "jsr:@nostrify/db@^0.35.0", + "@nostrify/db": "jsr:@nostrify/db@^0.36.1", "@nostrify/nostrify": "jsr:@nostrify/nostrify@^0.36.0", "@nostrify/policies": "jsr:@nostrify/policies@^0.35.0", "@scure/base": "npm:@scure/base@^1.1.6", diff --git a/deno.lock b/deno.lock index 0a0d9e31..a00d4d31 100644 --- a/deno.lock +++ b/deno.lock @@ -22,7 +22,7 @@ "jsr:@gleasonator/policy@0.8.0": "0.8.0", "jsr:@hono/hono@^4.4.6": "4.6.2", "jsr:@lambdalisue/async@^2.1.1": "2.1.1", - "jsr:@nostrify/db@0.35": "0.35.0", + "jsr:@nostrify/db@~0.36.1": "0.36.1", "jsr:@nostrify/nostrify@0.31": "0.31.0", "jsr:@nostrify/nostrify@0.32": "0.32.0", "jsr:@nostrify/nostrify@0.35": "0.35.0", @@ -270,10 +270,10 @@ "@lambdalisue/async@2.1.1": { "integrity": "1fc9bc6f4ed50215cd2f7217842b18cea80f81c25744f88f8c5eb4be5a1c9ab4" }, - "@nostrify/db@0.35.0": { - "integrity": "637191c41812544e361b7997dc44ea098f8bd7efebb28f37a8a7142a0ecada8d", + "@nostrify/db@0.36.1": { + "integrity": "b65b89ca6fe98d9dbcc0402b5c9c07b8430c2c91f84ba4128ff2eeed70c3d49f", "dependencies": [ - "jsr:@nostrify/nostrify@0.35", + "jsr:@nostrify/nostrify@0.36", "jsr:@nostrify/types@0.35", "npm:kysely@~0.27.3", "npm:nostr-tools@^2.7.0" @@ -2048,7 +2048,7 @@ "jsr:@gfx/canvas-wasm@~0.4.2", "jsr:@hono/hono@^4.4.6", "jsr:@lambdalisue/async@^2.1.1", - "jsr:@nostrify/db@0.35", + "jsr:@nostrify/db@~0.36.1", "jsr:@nostrify/nostrify@0.36", "jsr:@nostrify/policies@0.35", "jsr:@soapbox/kysely-pglite@1", diff --git a/src/app.ts b/src/app.ts index e9ab44fc..6659169f 100644 --- a/src/app.ts +++ b/src/app.ts @@ -1,4 +1,4 @@ -import { Context, Env as HonoEnv, Handler, Hono, Input as HonoInput, MiddlewareHandler } from '@hono/hono'; +import { type Context, Env as HonoEnv, Handler, Hono, Input as HonoInput, MiddlewareHandler } from '@hono/hono'; import { cors } from '@hono/hono/cors'; import { serveStatic } from '@hono/hono/deno'; import { logger } from '@hono/hono/logger'; @@ -112,6 +112,7 @@ import { trendingStatusesController, trendingTagsController, } from '@/controllers/api/trends.ts'; +import { translateController } from '@/controllers/api/translate.ts'; import { errorHandler } from '@/controllers/error.ts'; import { frontendController } from '@/controllers/frontend.ts'; import { metricsController } from '@/controllers/metrics.ts'; @@ -128,6 +129,8 @@ import { requireSigner } from '@/middleware/requireSigner.ts'; import { signerMiddleware } from '@/middleware/signerMiddleware.ts'; import { storeMiddleware } from '@/middleware/storeMiddleware.ts'; import { uploaderMiddleware } from '@/middleware/uploaderMiddleware.ts'; +import { DittoTranslator } from '@/translators/translator.ts'; +import { translatorMiddleware } from '@/middleware/translatorMiddleware.ts'; interface AppEnv extends HonoEnv { Variables: { @@ -143,6 +146,8 @@ interface AppEnv extends HonoEnv { pagination: { since?: number; until?: number; limit: number }; /** Normalized list pagination params. */ listPagination: { offset: number; limit: number }; + /** Translation service. */ + translator?: DittoTranslator; }; } @@ -222,6 +227,13 @@ app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/bookmark', requireSigner, bookmarkC app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/unbookmark', requireSigner, unbookmarkController); app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/pin', requireSigner, pinController); app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/unpin', requireSigner, unpinController); +app.post( + '/api/v1/statuses/:id{[0-9a-f]{64}}/translate', + requireSigner, + rateLimitMiddleware(30, Time.minutes(1)), + translatorMiddleware, + translateController, +); app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/reblog', requireSigner, reblogStatusController); app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/unreblog', requireSigner, unreblogStatusController); app.post('/api/v1/statuses', requireSigner, createStatusController); diff --git a/src/config.ts b/src/config.ts index c7f5e7cb..0e4fc816 100644 --- a/src/config.ts +++ b/src/config.ts @@ -271,6 +271,26 @@ class Conf { static get preferredLanguages(): LanguageCode[] | undefined { return Deno.env.get('DITTO_LANGUAGES')?.split(',')?.filter(ISO6391.validate) as LanguageCode[]; } + /** Translation provider used to translate posts. */ + static get translationProvider(): string | undefined { + return Deno.env.get('TRANSLATION_PROVIDER'); + } + /** DeepL URL endpoint. */ + static get deepLendpoint(): string | undefined { + return Deno.env.get('DEEPL_ENDPOINT'); + } + /** DeepL API KEY. */ + static get deepLapiKey(): string | undefined { + return Deno.env.get('DEEPL_API_KEY'); + } + /** LibreTranslate URL endpoint. */ + static get libreTranslateEndpoint(): string | undefined { + return Deno.env.get('LIBRETRANSLATE_ENDPOINT'); + } + /** LibreTranslate API KEY. */ + static get libreTranslateApiKey(): string | undefined { + return Deno.env.get('LIBRETRANSLATE_API_KEY'); + } /** Cache settings. */ static caches = { /** NIP-05 cache settings. */ diff --git a/src/controllers/api/instance.ts b/src/controllers/api/instance.ts index 06345b5f..c8f57f06 100644 --- a/src/controllers/api/instance.ts +++ b/src/controllers/api/instance.ts @@ -129,7 +129,7 @@ const instanceV2Controller: AppController = async (c) => { max_expiration: 2629746, }, translation: { - enabled: false, + enabled: true, }, }, nostr: { diff --git a/src/controllers/api/translate.ts b/src/controllers/api/translate.ts new file mode 100644 index 00000000..f2ca9eae --- /dev/null +++ b/src/controllers/api/translate.ts @@ -0,0 +1,140 @@ +import { LanguageCode } from 'iso-639-1'; +import { z } from 'zod'; + +import { AppController } from '@/app.ts'; +import { localeSchema } from '@/schema.ts'; +import { dittoTranslations, dittoTranslationsKey, MastodonTranslation } from '@/translators/translator.ts'; +import { parseBody } from '@/utils/api.ts'; +import { getEvent } from '@/queries.ts'; +import { renderStatus } from '@/views/mastodon/statuses.ts'; + +const translateSchema = z.object({ + lang: localeSchema, +}); + +const translateController: AppController = async (c) => { + const result = translateSchema.safeParse(await parseBody(c.req.raw)); + const { signal } = c.req.raw; + + if (!result.success) { + return c.json({ error: 'Bad request.', schema: result.error }, 422); + } + + const translator = c.get('translator'); + if (!translator) { + return c.json({ error: 'No translator configured.' }, 500); + } + + const lang = result.data.lang.language.slice(0, 2) as LanguageCode; + + const id = c.req.param('id'); + + const event = await getEvent(id, { signal }); + if (!event) { + return c.json({ error: 'Record not found' }, 400); + } + + const viewerPubkey = await c.get('signer')?.getPublicKey(); + + if (lang.toLowerCase() === event?.language?.toLowerCase()) { + return c.json({ error: 'Source and target languages are the same. No translation needed.' }, 400); + } + + const status = await renderStatus(event, { viewerPubkey }); + if (!status?.content) { + return c.json({ error: 'Bad request.', schema: result.error }, 400); + } + + const translatedId = `${lang}-${id}` as dittoTranslationsKey; + const translationCache = dittoTranslations.get(translatedId); + + if (translationCache) { + return c.json(translationCache.data, 200); + } + + const mediaAttachments = status?.media_attachments.map((value) => { + return { + id: value.id, + description: value.description ?? '', + }; + }) ?? []; + + try { + const texts: string[] = []; + + const mastodonTranslation: MastodonTranslation = { + content: '', + spoiler_text: '', + media_attachments: [], + poll: null, + detected_source_language: event.language ?? 'en', + provider: translator.getProvider(), + }; + + if ((status?.poll as MastodonTranslation['poll'])?.options) { + mastodonTranslation.poll = { id: (status?.poll as MastodonTranslation['poll'])?.id!, options: [] }; + } + + type TranslationIndex = { + [key: number]: 'content' | 'spoilerText' | 'poll' | { type: 'media'; id: string }; + }; + const translationIndex: TranslationIndex = {}; + let index = 0; + + // Content + translationIndex[index] = 'content'; + texts.push(status.content); + index++; + + // Spoiler text + if (status.spoiler_text) { + translationIndex[index] = 'spoilerText'; + texts.push(status.spoiler_text); + index++; + } + + // Media description + for (const [mediaIndex, value] of mediaAttachments.entries()) { + translationIndex[index + mediaIndex] = { type: 'media', id: value.id }; + texts.push(mediaAttachments[mediaIndex].description); + index += mediaIndex; + } + + // Poll title + if (status?.poll) { + for (const [pollIndex] of (status?.poll as MastodonTranslation['poll'])!.options.entries()) { + translationIndex[index + pollIndex] = 'poll'; + texts.push((status.poll as MastodonTranslation['poll'])!.options[pollIndex].title); + index += pollIndex; + } + } + + const data = await translator.translate(texts, event.language, lang, { signal }); + const translatedTexts = data.results; + + for (let i = 0; i < texts.length; i++) { + if (translationIndex[i] === 'content') { + mastodonTranslation.content = translatedTexts[i]; + } else if (translationIndex[i] === 'spoilerText') { + mastodonTranslation.spoiler_text = translatedTexts[i]; + } else if (translationIndex[i] === 'poll') { + mastodonTranslation.poll?.options.push({ title: translatedTexts[i] }); + } else { + const media = translationIndex[i] as { type: 'media'; id: string }; + mastodonTranslation.media_attachments.push({ + id: media.id, + description: translatedTexts[i], + }); + } + } + + mastodonTranslation.detected_source_language = data.source_lang; + + dittoTranslations.set(translatedId, { data: mastodonTranslation }); + return c.json(mastodonTranslation, 200); + } catch { + return c.json({ error: 'Service Unavailable' }, 503); + } +}; + +export { translateController }; diff --git a/src/controllers/nostr/relay.ts b/src/controllers/nostr/relay.ts index 2a38e751..f62ad76b 100644 --- a/src/controllers/nostr/relay.ts +++ b/src/controllers/nostr/relay.ts @@ -18,6 +18,7 @@ import * as pipeline from '@/pipeline.ts'; import { RelayError } from '@/RelayError.ts'; import { Storages } from '@/storages.ts'; import { Time } from '@/utils/time.ts'; +import { purifyEvent } from '@/utils/purify.ts'; /** Limit of initial events returned for a subscription. */ const FILTER_LIMIT = 100; @@ -105,7 +106,7 @@ function connectStream(socket: WebSocket, ip: string | undefined) { try { for (const event of await store.query(filters, { limit: FILTER_LIMIT, timeout: Conf.db.timeouts.relay })) { - send(['EVENT', subId, event]); + send(['EVENT', subId, purifyEvent(event)]); } } catch (e: any) { if (e instanceof RelayError) { @@ -137,7 +138,7 @@ function connectStream(socket: WebSocket, ip: string | undefined) { relayEventsCounter.inc({ kind: event.kind.toString() }); try { // This will store it (if eligible) and run other side-effects. - await pipeline.handleEvent(event, AbortSignal.timeout(1000)); + await pipeline.handleEvent(purifyEvent(event), AbortSignal.timeout(1000)); send(['OK', event.id, true, '']); } catch (e) { if (e instanceof RelayError) { diff --git a/src/db/DittoTables.ts b/src/db/DittoTables.ts index b6fa93f4..46eeeab9 100644 --- a/src/db/DittoTables.ts +++ b/src/db/DittoTables.ts @@ -1,5 +1,3 @@ -import { Nullable } from 'kysely'; - import { NPostgresSchema } from '@nostrify/db'; export interface DittoTables extends NPostgresSchema { @@ -12,7 +10,7 @@ export interface DittoTables extends NPostgresSchema { } type NostrEventsRow = NPostgresSchema['nostr_events'] & { - language: Nullable; + language: string | null; }; interface AuthorStatsRow { diff --git a/src/interfaces/DittoEvent.ts b/src/interfaces/DittoEvent.ts index dcaec6ae..cca7c0ca 100644 --- a/src/interfaces/DittoEvent.ts +++ b/src/interfaces/DittoEvent.ts @@ -1,4 +1,5 @@ import { NostrEvent } from '@nostrify/nostrify'; +import { LanguageCode } from 'iso-639-1'; /** Ditto internal stats for the event's author. */ export interface AuthorStats { @@ -43,4 +44,6 @@ export interface DittoEvent extends NostrEvent { zap_sender?: DittoEvent | string; zap_amount?: number; zap_message?: string; + /** Language of the event (kind 1s are more accurate). */ + language?: LanguageCode; } diff --git a/src/middleware/translatorMiddleware.ts b/src/middleware/translatorMiddleware.ts new file mode 100644 index 00000000..b8a07686 --- /dev/null +++ b/src/middleware/translatorMiddleware.ts @@ -0,0 +1,39 @@ +import { AppMiddleware } from '@/app.ts'; +import { Conf } from '@/config.ts'; +import { fetchWorker } from '@/workers/fetch.ts'; +import { DeepLTranslator } from '@/translators/DeepLTranslator.ts'; +import { LibreTranslateTranslator } from '@/translators/LibreTranslateTranslator.ts'; + +/** Set the translator used for translating posts. */ +export const translatorMiddleware: AppMiddleware = async (c, next) => { + const deepLendpoint = Conf.deepLendpoint; + const deepLapiKey = Conf.deepLapiKey; + const libreTranslateEndpoint = Conf.libreTranslateEndpoint; + const libreTranslateApiKey = Conf.libreTranslateApiKey; + const translationProvider = Conf.translationProvider; + + switch (translationProvider) { + case 'deepl': + if (deepLapiKey) { + c.set( + 'translator', + new DeepLTranslator({ endpoint: deepLendpoint, apiKey: deepLapiKey, fetch: fetchWorker }), + ); + } + break; + case 'libretranslate': + if (libreTranslateApiKey) { + c.set( + 'translator', + new LibreTranslateTranslator({ + endpoint: libreTranslateEndpoint, + apiKey: libreTranslateApiKey, + fetch: fetchWorker, + }), + ); + } + break; + } + + await next(); +}; diff --git a/src/schema.ts b/src/schema.ts index 5efa7769..a9dd56e3 100644 --- a/src/schema.ts +++ b/src/schema.ts @@ -1,4 +1,4 @@ -import ISO6391 from 'iso-639-1'; +import ISO6391, { LanguageCode } from 'iso-639-1'; import { z } from 'zod'; /** Validates individual items in an array, dropping any that aren't valid. */ @@ -41,7 +41,8 @@ const fileSchema = z.custom((value) => value instanceof File); const percentageSchema = z.coerce.number().int().gte(1).lte(100); -const languageSchema = z.string().transform((val, ctx) => { +const languageSchema = z.string().transform((val, ctx) => { + val = val.toLowerCase(); if (!ISO6391.validate(val)) { ctx.addIssue({ code: z.ZodIssueCode.custom, @@ -49,7 +50,19 @@ const languageSchema = z.string().transform((val, ctx) => { }); return z.NEVER; } - return val; + return val as LanguageCode; +}); + +const localeSchema = z.string().transform((val, ctx) => { + try { + return new Intl.Locale(val); + } catch { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'Invalid locale', + }); + return z.NEVER; + } }); export { @@ -59,6 +72,7 @@ export { filteredArray, hashtagSchema, languageSchema, + localeSchema, percentageSchema, safeUrlSchema, }; diff --git a/src/storages/EventsDB.test.ts b/src/storages/EventsDB.test.ts index b24032aa..e19fe775 100644 --- a/src/storages/EventsDB.test.ts +++ b/src/storages/EventsDB.test.ts @@ -7,7 +7,7 @@ import { Conf } from '@/config.ts'; import { createTestDB } from '@/test.ts'; Deno.test('count filters', async () => { - await using db = await createTestDB(); + await using db = await createTestDB({ pure: true }); const { store } = db; const event1 = await eventFixture('event-1'); @@ -18,7 +18,7 @@ Deno.test('count filters', async () => { }); Deno.test('insert and filter events', async () => { - await using db = await createTestDB(); + await using db = await createTestDB({ pure: true }); const { store } = db; const event1 = await eventFixture('event-1'); @@ -35,7 +35,7 @@ Deno.test('insert and filter events', async () => { }); Deno.test('query events with domain search filter', async () => { - await using db = await createTestDB(); + await using db = await createTestDB({ pure: true }); const { store, kysely } = db; const event1 = await eventFixture('event-1'); @@ -55,7 +55,7 @@ Deno.test('query events with domain search filter', async () => { }); Deno.test('query events with language search filter', async () => { - await using db = await createTestDB(); + await using db = await createTestDB({ pure: true }); const { store, kysely } = db; const en = genEvent({ kind: 1, content: 'hello world!' }); @@ -72,7 +72,7 @@ Deno.test('query events with language search filter', async () => { }); Deno.test('delete events', async () => { - await using db = await createTestDB(); + await using db = await createTestDB({ pure: true }); const { store } = db; const sk = generateSecretKey(); @@ -96,7 +96,7 @@ Deno.test('delete events', async () => { }); Deno.test("user cannot delete another user's event", async () => { - await using db = await createTestDB(); + await using db = await createTestDB({ pure: true }); const { store } = db; const event = genEvent({ kind: 1, content: 'hello world', created_at: 1 }); @@ -113,7 +113,7 @@ Deno.test("user cannot delete another user's event", async () => { }); Deno.test('admin can delete any event', async () => { - await using db = await createTestDB(); + await using db = await createTestDB({ pure: true }); const { store } = db; const sk = generateSecretKey(); @@ -137,7 +137,7 @@ Deno.test('admin can delete any event', async () => { }); Deno.test('throws a RelayError when inserting an event deleted by the admin', async () => { - await using db = await createTestDB(); + await using db = await createTestDB({ pure: true }); const { store } = db; const event = genEvent(); @@ -154,7 +154,7 @@ Deno.test('throws a RelayError when inserting an event deleted by the admin', as }); Deno.test('throws a RelayError when inserting an event deleted by a user', async () => { - await using db = await createTestDB(); + await using db = await createTestDB({ pure: true }); const { store } = db; const sk = generateSecretKey(); @@ -173,7 +173,7 @@ Deno.test('throws a RelayError when inserting an event deleted by a user', async }); Deno.test('inserting replaceable events', async () => { - await using db = await createTestDB(); + await using db = await createTestDB({ pure: true }); const { store } = db; const sk = generateSecretKey(); @@ -190,7 +190,7 @@ Deno.test('inserting replaceable events', async () => { }); Deno.test("throws a RelayError when querying an event with a large 'since'", async () => { - await using db = await createTestDB(); + await using db = await createTestDB({ pure: true }); const { store } = db; await assertRejects( @@ -201,7 +201,7 @@ Deno.test("throws a RelayError when querying an event with a large 'since'", asy }); Deno.test("throws a RelayError when querying an event with a large 'until'", async () => { - await using db = await createTestDB(); + await using db = await createTestDB({ pure: true }); const { store } = db; await assertRejects( @@ -212,7 +212,7 @@ Deno.test("throws a RelayError when querying an event with a large 'until'", asy }); Deno.test("throws a RelayError when querying an event with a large 'kind'", async () => { - await using db = await createTestDB(); + await using db = await createTestDB({ pure: true }); const { store } = db; await assertRejects( diff --git a/src/storages/EventsDB.ts b/src/storages/EventsDB.ts index 1bf3cd86..b303dad0 100644 --- a/src/storages/EventsDB.ts +++ b/src/storages/EventsDB.ts @@ -1,5 +1,6 @@ // deno-lint-ignore-file require-await +import { LanguageCode } from 'iso-639-1'; import { NPostgres, NPostgresSchema } from '@nostrify/db'; import { NIP50, NKinds, NostrEvent, NostrFilter, NSchema as n } from '@nostrify/nostrify'; import { Stickynotes } from '@soapbox/stickynotes'; @@ -12,6 +13,7 @@ import { RelayError } from '@/RelayError.ts'; import { isNostrId, isURL } from '@/utils.ts'; import { abortError } from '@/utils/abort.ts'; import { purifyEvent } from '@/utils/purify.ts'; +import { DittoEvent } from '@/interfaces/DittoEvent.ts'; /** Function to decide whether or not to index a tag. */ type TagCondition = ({ event, count, value }: { @@ -28,6 +30,8 @@ interface EventsDBOpts { pubkey: string; /** Timeout in milliseconds for database queries. */ timeout: number; + /** Whether the event returned should be a Nostr event or a Ditto event. Defaults to false. */ + pure?: boolean; } /** SQL database storage adapter for Nostr events. */ @@ -151,7 +155,7 @@ class EventsDB extends NPostgres { let query = super.getFilterQuery(trx, { ...filter, search: tokens.filter((t) => typeof t === 'string').join(' '), - }) as SelectQueryBuilder>; + }) as SelectQueryBuilder; const languages = new Set(); @@ -175,7 +179,7 @@ class EventsDB extends NPostgres { override async query( filters: NostrFilter[], opts: { signal?: AbortSignal; timeout?: number; limit?: number } = {}, - ): Promise { + ): Promise { filters = await this.expandFilters(filters); for (const filter of filters) { @@ -199,6 +203,29 @@ class EventsDB extends NPostgres { return super.query(filters, { ...opts, timeout: opts.timeout ?? this.opts.timeout }); } + /** Parse an event row from the database. */ + protected override parseEventRow(row: DittoTables['nostr_events']): DittoEvent { + const event: DittoEvent = { + id: row.id, + kind: row.kind, + pubkey: row.pubkey, + content: row.content, + created_at: Number(row.created_at), + tags: row.tags, + sig: row.sig, + }; + + if (this.opts.pure) { + return event; + } + + if (row.language) { + event.language = row.language as LanguageCode; + } + + return event; + } + /** Delete events based on filters from the database. */ override async remove(filters: NostrFilter[], opts: { signal?: AbortSignal; timeout?: number } = {}): Promise { this.console.debug('DELETE', JSON.stringify(filters)); diff --git a/src/test.ts b/src/test.ts index f4e720e1..3f2d1c38 100644 --- a/src/test.ts +++ b/src/test.ts @@ -1,3 +1,5 @@ +import ISO6391, { LanguageCode } from 'iso-639-1'; +import lande from 'lande'; import { NostrEvent } from '@nostrify/nostrify'; import { finalizeEvent, generateSecretKey } from 'nostr-tools'; @@ -33,7 +35,7 @@ export function genEvent(t: Partial = {}, sk: Uint8Array = generateS } /** Create a database for testing. It uses `TEST_DATABASE_URL`, or creates an in-memory database by default. */ -export async function createTestDB() { +export async function createTestDB(opts?: { pure?: boolean }) { const { testDatabaseUrl } = Conf; const { kysely } = DittoDB.create(testDatabaseUrl, { poolSize: 1 }); @@ -43,6 +45,7 @@ export async function createTestDB() { kysely, timeout: Conf.db.timeouts.default, pubkey: Conf.pubkey, + pure: opts?.pure ?? false, }); return { @@ -65,3 +68,15 @@ export async function createTestDB() { export function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } + +export function getLanguage(text: string): LanguageCode | undefined { + const [topResult] = lande(text); + if (topResult) { + const [iso6393] = topResult; + const locale = new Intl.Locale(iso6393); + if (ISO6391.validate(locale.language)) { + return locale.language as LanguageCode; + } + } + return; +} diff --git a/src/translators/DeepLTranslator.test.ts b/src/translators/DeepLTranslator.test.ts new file mode 100644 index 00000000..385c10fc --- /dev/null +++ b/src/translators/DeepLTranslator.test.ts @@ -0,0 +1,52 @@ +import { assertEquals } from '@std/assert'; + +import { Conf } from '@/config.ts'; +import { DeepLTranslator } from '@/translators/DeepLTranslator.ts'; +import { getLanguage } from '@/test.ts'; + +const endpoint = Conf.deepLendpoint; +const apiKey = Conf.deepLapiKey; +const translationProvider = Conf.translationProvider; +const deepL = 'deepl'; + +Deno.test('DeepL translation with source language omitted', { + ignore: !(translationProvider === deepL && apiKey), +}, async () => { + const translator = new DeepLTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string }); + + const data = await translator.translate( + [ + 'Bom dia amigos', + 'Meu nome é Patrick', + 'Eu irei morar na America, eu prometo. Mas antes, eu devo mencionar que o lande está interpretando este texto como italiano, que estranho.', + ], + undefined, + 'en', + ); + + assertEquals(data.source_lang, 'pt'); + assertEquals(getLanguage(data.results[0]), 'en'); + assertEquals(getLanguage(data.results[1]), 'en'); + assertEquals(getLanguage(data.results[2]), 'en'); +}); + +Deno.test('DeepL translation with source language set', { + ignore: !(translationProvider === deepL && apiKey), +}, async () => { + const translator = new DeepLTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string }); + + const data = await translator.translate( + [ + 'Bom dia amigos', + 'Meu nome é Patrick', + 'Eu irei morar na America, eu prometo. Mas antes, eu devo mencionar que o lande está interpretando este texto como italiano, que estranho.', + ], + 'pt', + 'en', + ); + + assertEquals(data.source_lang, 'pt'); + assertEquals(getLanguage(data.results[0]), 'en'); + assertEquals(getLanguage(data.results[1]), 'en'); + assertEquals(getLanguage(data.results[2]), 'en'); +}); diff --git a/src/translators/DeepLTranslator.ts b/src/translators/DeepLTranslator.ts new file mode 100644 index 00000000..d97c59a1 --- /dev/null +++ b/src/translators/DeepLTranslator.ts @@ -0,0 +1,93 @@ +import { z } from 'zod'; + +import { DittoTranslator, Provider, SourceLanguage, TargetLanguage } from '@/translators/translator.ts'; +import { languageSchema } from '@/schema.ts'; + +interface DeepLTranslatorOpts { + /** DeepL endpoint to use. Default: 'https://api.deepl.com' */ + endpoint?: string; + /** DeepL API key. */ + apiKey: string; + /** Custom fetch implementation. */ + fetch?: typeof fetch; +} + +export class DeepLTranslator implements DittoTranslator { + private readonly endpoint: string; + private readonly apiKey: string; + private readonly fetch: typeof fetch; + private static provider: Provider = 'DeepL.com'; + + constructor(opts: DeepLTranslatorOpts) { + this.endpoint = opts.endpoint ?? 'https://api.deepl.com'; + this.fetch = opts.fetch ?? globalThis.fetch; + this.apiKey = opts.apiKey; + } + + async translate( + texts: string[], + source: SourceLanguage | undefined, + dest: TargetLanguage, + opts?: { signal?: AbortSignal }, + ) { + const data = (await this.translateMany(texts, source, dest, opts)).translations; + + return { + results: data.map((value) => value.text), + source_lang: data[0].detected_source_language, + }; + } + + /** DeepL translate request. */ + private async translateMany( + texts: string[], + source: SourceLanguage | undefined, + targetLanguage: TargetLanguage, + opts?: { signal?: AbortSignal }, + ) { + const body: any = { + text: texts, + target_lang: targetLanguage.toUpperCase(), + tag_handling: 'html', + split_sentences: '1', + }; + if (source) { + body.source_lang = source.toUpperCase(); + } + + const headers = new Headers(); + headers.append('Authorization', 'DeepL-Auth-Key' + ' ' + this.apiKey); + headers.append('Content-Type', 'application/json'); + + const request = new Request(this.endpoint + '/v2/translate', { + method: 'POST', + body: JSON.stringify(body), + headers, + signal: opts?.signal, + }); + + const response = await this.fetch(request); + const json = await response.json(); + const data = DeepLTranslator.schema().parse(json); + + return data; + } + + /** DeepL response schema. + * https://developers.deepl.com/docs/api-reference/translate/openapi-spec-for-text-translation */ + private static schema() { + return z.object({ + translations: z.array( + z.object({ + detected_source_language: languageSchema, + text: z.string(), + }), + ), + }); + } + + /** DeepL provider. */ + getProvider(): Provider { + return DeepLTranslator.provider; + } +} diff --git a/src/translators/LibreTranslateTranslator.test.ts b/src/translators/LibreTranslateTranslator.test.ts new file mode 100644 index 00000000..6b87cc91 --- /dev/null +++ b/src/translators/LibreTranslateTranslator.test.ts @@ -0,0 +1,52 @@ +import { assertEquals } from '@std/assert'; + +import { Conf } from '@/config.ts'; +import { LibreTranslateTranslator } from '@/translators/LibreTranslateTranslator.ts'; +import { getLanguage } from '@/test.ts'; + +const endpoint = Conf.libreTranslateEndpoint; +const apiKey = Conf.libreTranslateApiKey; +const translationProvider = Conf.translationProvider; +const libreTranslate = 'libretranslate'; + +Deno.test('LibreTranslate translation with source language omitted', { + ignore: !(translationProvider === libreTranslate && apiKey), +}, async () => { + const translator = new LibreTranslateTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string }); + + const data = await translator.translate( + [ + 'Bom dia amigos', + 'Meu nome é Patrick, um nome belo ou feio? A questão é mais profunda do que parece.', + 'A respiração é mais importante do que comer e tomar agua.', + ], + undefined, + 'ca', + ); + + assertEquals(data.source_lang, 'pt'); + assertEquals(getLanguage(data.results[0]), 'ca'); + assertEquals(getLanguage(data.results[1]), 'ca'); + assertEquals(getLanguage(data.results[2]), 'ca'); +}); + +Deno.test('LibreTranslate translation with source language set', { + ignore: !(translationProvider === libreTranslate && apiKey), +}, async () => { + const translator = new LibreTranslateTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string }); + + const data = await translator.translate( + [ + 'Bom dia amigos', + 'Meu nome é Patrick, um nome belo ou feio? A questão é mais profunda do que parece.', + 'A respiração é mais importante do que comer e tomar agua.', + ], + 'pt', + 'ca', + ); + + assertEquals(data.source_lang, 'pt'); + assertEquals(getLanguage(data.results[0]), 'ca'); + assertEquals(getLanguage(data.results[1]), 'ca'); + assertEquals(getLanguage(data.results[2]), 'ca'); +}); diff --git a/src/translators/LibreTranslateTranslator.ts b/src/translators/LibreTranslateTranslator.ts new file mode 100644 index 00000000..d632c71e --- /dev/null +++ b/src/translators/LibreTranslateTranslator.ts @@ -0,0 +1,92 @@ +import { LanguageCode } from 'iso-639-1'; +import { z } from 'zod'; + +import { DittoTranslator, Provider, SourceLanguage, TargetLanguage } from '@/translators/translator.ts'; +import { languageSchema } from '@/schema.ts'; + +interface LibreTranslateTranslatorOpts { + /** Libretranslate endpoint to use. Default: 'https://libretranslate.com' */ + endpoint?: string; + /** Libretranslate API key. */ + apiKey: string; + /** Custom fetch implementation. */ + fetch?: typeof fetch; +} + +export class LibreTranslateTranslator implements DittoTranslator { + private readonly endpoint: string; + private readonly apiKey: string; + private readonly fetch: typeof fetch; + private static provider: Provider = 'libretranslate.com'; + + constructor(opts: LibreTranslateTranslatorOpts) { + this.endpoint = opts.endpoint ?? 'https://libretranslate.com'; + this.fetch = opts.fetch ?? globalThis.fetch; + this.apiKey = opts.apiKey; + } + + async translate( + texts: string[], + source: SourceLanguage | undefined, + dest: TargetLanguage, + opts?: { signal?: AbortSignal }, + ) { + const translations = await Promise.all( + texts.map((text) => this.translateOne(text, source, dest, 'html', { signal: opts?.signal })), + ); + + return { + results: translations.map((value) => value.translatedText), + source_lang: translations[0]?.detectedLanguage?.language ?? source as LanguageCode, // cast is ok + }; + } + + private async translateOne( + q: string, + sourceLanguage: string | undefined, + targetLanguage: string, + format: 'html' | 'text', + opts?: { signal?: AbortSignal }, + ) { + const body = { + q, + source: sourceLanguage?.toLowerCase() ?? 'auto', + target: targetLanguage.toLowerCase(), + format, + api_key: this.apiKey, + }; + + const headers = new Headers(); + headers.append('Content-Type', 'application/json'); + + const request = new Request(this.endpoint + '/translate', { + method: 'POST', + body: JSON.stringify(body), + headers, + signal: opts?.signal, + }); + + const response = await this.fetch(request); + const json = await response.json(); + const data = LibreTranslateTranslator.schema().parse(json); + + return data; + } + + /** Libretranslate response schema. + * https://libretranslate.com/docs/#/translate/post_translate */ + private static schema() { + return z.object({ + translatedText: z.string(), + /** This field is only available if the 'source' is set to 'auto' */ + detectedLanguage: z.object({ + language: languageSchema, + }).optional(), + }); + } + + /** LibreTranslate provider. */ + getProvider(): Provider { + return LibreTranslateTranslator.provider; + } +} diff --git a/src/translators/translator.ts b/src/translators/translator.ts new file mode 100644 index 00000000..29874964 --- /dev/null +++ b/src/translators/translator.ts @@ -0,0 +1,59 @@ +import { LanguageCode } from 'iso-639-1'; +import { LRUCache } from 'lru-cache'; + +import { Time } from '@/utils/time.ts'; + +/** Supported providers. */ +export type Provider = 'DeepL.com' | 'libretranslate.com'; + +/** Original language of the post */ +export type SourceLanguage = LanguageCode; + +/** Content will be translated to this language */ +export type TargetLanguage = LanguageCode; + +/** Entity returned by DittoTranslator and LRUCache */ +type DittoTranslation = { + data: MastodonTranslation; +}; + +export type MastodonTranslation = { + /** HTML-encoded translated content of the status. */ + content: string; + /** The translated spoiler warning of the status. */ + spoiler_text: string; + /** The translated media descriptions of the status. */ + media_attachments: { id: string; description: string }[]; + /** The translated poll of the status. */ + poll: { id: string; options: { title: string }[] } | null; + //** The language of the source text, as auto-detected by the machine translation provider. */ + detected_source_language: SourceLanguage; + /** The service that provided the machine translation. */ + provider: Provider; +}; + +/** DittoTranslator class, used for status translation. */ +export interface DittoTranslator { + /** Translate the 'content' into 'targetLanguage'. */ + translate( + texts: string[], + /** The language of the source text/status. */ + sourceLanguage: SourceLanguage | undefined, + /** The status content will be translated into this language. */ + targetLanguage: TargetLanguage, + /** Custom options. */ + opts?: { signal?: AbortSignal }, + ): Promise<{ results: string[]; source_lang: SourceLanguage }>; + getProvider(): Provider; +} + +/** Includes the TARGET language and the status id. + * Example: en-390f5b01b49a8ee6e13fe917420c023d889b3da8e983a14c9e84587e43d12c15 + * The example above means: + * I want the status 390f5b01b49a8ee6e13fe917420c023d889b3da8e983a14c9e84587e43d12c15 translated to english (if it exists in the LRUCache). */ +export type dittoTranslationsKey = `${TargetLanguage}-${string}`; + +export const dittoTranslations = new LRUCache({ + max: 1000, + ttl: Time.hours(6), +}); diff --git a/src/views/mastodon/statuses.ts b/src/views/mastodon/statuses.ts index e21c9e1c..48d8e099 100644 --- a/src/views/mastodon/statuses.ts +++ b/src/views/mastodon/statuses.ts @@ -113,7 +113,7 @@ async function renderStatus(event: DittoEvent, opts: RenderStatusOpts): Promise< sensitive: !!cw, spoiler_text: (cw ? cw[1] : subject?.[1]) || '', visibility: 'public', - language: event.tags.find((tag) => tag[0] === 'l' && tag[2] === 'ISO-639-1')?.[1] || null, + language: event.language ?? null, replies_count: event.event_stats?.replies_count ?? 0, reblogs_count: event.event_stats?.reposts_count ?? 0, favourites_count: event.event_stats?.reactions['+'] ?? 0,