Merge branch 'translate-status' into 'main'

Feat: Support Machine Translations

Closes #201

See merge request soapbox-pub/ditto!537
This commit is contained in:
Alex Gleason 2024-10-09 18:59:04 +00:00
commit 91b82c36a8
20 changed files with 650 additions and 33 deletions

View file

@ -41,7 +41,7 @@
"@isaacs/ttlcache": "npm:@isaacs/ttlcache@^1.4.1", "@isaacs/ttlcache": "npm:@isaacs/ttlcache@^1.4.1",
"@lambdalisue/async": "jsr:@lambdalisue/async@^2.1.1", "@lambdalisue/async": "jsr:@lambdalisue/async@^2.1.1",
"@noble/secp256k1": "npm:@noble/secp256k1@^2.0.0", "@noble/secp256k1": "npm:@noble/secp256k1@^2.0.0",
"@nostrify/db": "jsr:@nostrify/db@^0.35.0", "@nostrify/db": "jsr:@nostrify/db@^0.36.1",
"@nostrify/nostrify": "jsr:@nostrify/nostrify@^0.36.0", "@nostrify/nostrify": "jsr:@nostrify/nostrify@^0.36.0",
"@nostrify/policies": "jsr:@nostrify/policies@^0.35.0", "@nostrify/policies": "jsr:@nostrify/policies@^0.35.0",
"@scure/base": "npm:@scure/base@^1.1.6", "@scure/base": "npm:@scure/base@^1.1.6",

10
deno.lock generated
View file

@ -22,7 +22,7 @@
"jsr:@gleasonator/policy@0.8.0": "0.8.0", "jsr:@gleasonator/policy@0.8.0": "0.8.0",
"jsr:@hono/hono@^4.4.6": "4.6.2", "jsr:@hono/hono@^4.4.6": "4.6.2",
"jsr:@lambdalisue/async@^2.1.1": "2.1.1", "jsr:@lambdalisue/async@^2.1.1": "2.1.1",
"jsr:@nostrify/db@0.35": "0.35.0", "jsr:@nostrify/db@~0.36.1": "0.36.1",
"jsr:@nostrify/nostrify@0.31": "0.31.0", "jsr:@nostrify/nostrify@0.31": "0.31.0",
"jsr:@nostrify/nostrify@0.32": "0.32.0", "jsr:@nostrify/nostrify@0.32": "0.32.0",
"jsr:@nostrify/nostrify@0.35": "0.35.0", "jsr:@nostrify/nostrify@0.35": "0.35.0",
@ -270,10 +270,10 @@
"@lambdalisue/async@2.1.1": { "@lambdalisue/async@2.1.1": {
"integrity": "1fc9bc6f4ed50215cd2f7217842b18cea80f81c25744f88f8c5eb4be5a1c9ab4" "integrity": "1fc9bc6f4ed50215cd2f7217842b18cea80f81c25744f88f8c5eb4be5a1c9ab4"
}, },
"@nostrify/db@0.35.0": { "@nostrify/db@0.36.1": {
"integrity": "637191c41812544e361b7997dc44ea098f8bd7efebb28f37a8a7142a0ecada8d", "integrity": "b65b89ca6fe98d9dbcc0402b5c9c07b8430c2c91f84ba4128ff2eeed70c3d49f",
"dependencies": [ "dependencies": [
"jsr:@nostrify/nostrify@0.35", "jsr:@nostrify/nostrify@0.36",
"jsr:@nostrify/types@0.35", "jsr:@nostrify/types@0.35",
"npm:kysely@~0.27.3", "npm:kysely@~0.27.3",
"npm:nostr-tools@^2.7.0" "npm:nostr-tools@^2.7.0"
@ -2048,7 +2048,7 @@
"jsr:@gfx/canvas-wasm@~0.4.2", "jsr:@gfx/canvas-wasm@~0.4.2",
"jsr:@hono/hono@^4.4.6", "jsr:@hono/hono@^4.4.6",
"jsr:@lambdalisue/async@^2.1.1", "jsr:@lambdalisue/async@^2.1.1",
"jsr:@nostrify/db@0.35", "jsr:@nostrify/db@~0.36.1",
"jsr:@nostrify/nostrify@0.36", "jsr:@nostrify/nostrify@0.36",
"jsr:@nostrify/policies@0.35", "jsr:@nostrify/policies@0.35",
"jsr:@soapbox/kysely-pglite@1", "jsr:@soapbox/kysely-pglite@1",

View file

@ -1,4 +1,4 @@
import { Context, Env as HonoEnv, Handler, Hono, Input as HonoInput, MiddlewareHandler } from '@hono/hono'; import { type Context, Env as HonoEnv, Handler, Hono, Input as HonoInput, MiddlewareHandler } from '@hono/hono';
import { cors } from '@hono/hono/cors'; import { cors } from '@hono/hono/cors';
import { serveStatic } from '@hono/hono/deno'; import { serveStatic } from '@hono/hono/deno';
import { logger } from '@hono/hono/logger'; import { logger } from '@hono/hono/logger';
@ -112,6 +112,7 @@ import {
trendingStatusesController, trendingStatusesController,
trendingTagsController, trendingTagsController,
} from '@/controllers/api/trends.ts'; } from '@/controllers/api/trends.ts';
import { translateController } from '@/controllers/api/translate.ts';
import { errorHandler } from '@/controllers/error.ts'; import { errorHandler } from '@/controllers/error.ts';
import { frontendController } from '@/controllers/frontend.ts'; import { frontendController } from '@/controllers/frontend.ts';
import { metricsController } from '@/controllers/metrics.ts'; import { metricsController } from '@/controllers/metrics.ts';
@ -128,6 +129,8 @@ import { requireSigner } from '@/middleware/requireSigner.ts';
import { signerMiddleware } from '@/middleware/signerMiddleware.ts'; import { signerMiddleware } from '@/middleware/signerMiddleware.ts';
import { storeMiddleware } from '@/middleware/storeMiddleware.ts'; import { storeMiddleware } from '@/middleware/storeMiddleware.ts';
import { uploaderMiddleware } from '@/middleware/uploaderMiddleware.ts'; import { uploaderMiddleware } from '@/middleware/uploaderMiddleware.ts';
import { DittoTranslator } from '@/translators/translator.ts';
import { translatorMiddleware } from '@/middleware/translatorMiddleware.ts';
interface AppEnv extends HonoEnv { interface AppEnv extends HonoEnv {
Variables: { Variables: {
@ -143,6 +146,8 @@ interface AppEnv extends HonoEnv {
pagination: { since?: number; until?: number; limit: number }; pagination: { since?: number; until?: number; limit: number };
/** Normalized list pagination params. */ /** Normalized list pagination params. */
listPagination: { offset: number; limit: number }; listPagination: { offset: number; limit: number };
/** Translation service. */
translator?: DittoTranslator;
}; };
} }
@ -222,6 +227,13 @@ app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/bookmark', requireSigner, bookmarkC
app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/unbookmark', requireSigner, unbookmarkController); app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/unbookmark', requireSigner, unbookmarkController);
app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/pin', requireSigner, pinController); app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/pin', requireSigner, pinController);
app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/unpin', requireSigner, unpinController); app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/unpin', requireSigner, unpinController);
app.post(
'/api/v1/statuses/:id{[0-9a-f]{64}}/translate',
requireSigner,
rateLimitMiddleware(30, Time.minutes(1)),
translatorMiddleware,
translateController,
);
app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/reblog', requireSigner, reblogStatusController); app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/reblog', requireSigner, reblogStatusController);
app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/unreblog', requireSigner, unreblogStatusController); app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/unreblog', requireSigner, unreblogStatusController);
app.post('/api/v1/statuses', requireSigner, createStatusController); app.post('/api/v1/statuses', requireSigner, createStatusController);

View file

@ -271,6 +271,26 @@ class Conf {
static get preferredLanguages(): LanguageCode[] | undefined { static get preferredLanguages(): LanguageCode[] | undefined {
return Deno.env.get('DITTO_LANGUAGES')?.split(',')?.filter(ISO6391.validate) as LanguageCode[]; return Deno.env.get('DITTO_LANGUAGES')?.split(',')?.filter(ISO6391.validate) as LanguageCode[];
} }
/** Translation provider used to translate posts. */
static get translationProvider(): string | undefined {
return Deno.env.get('TRANSLATION_PROVIDER');
}
/** DeepL URL endpoint. */
static get deepLendpoint(): string | undefined {
return Deno.env.get('DEEPL_ENDPOINT');
}
/** DeepL API KEY. */
static get deepLapiKey(): string | undefined {
return Deno.env.get('DEEPL_API_KEY');
}
/** LibreTranslate URL endpoint. */
static get libreTranslateEndpoint(): string | undefined {
return Deno.env.get('LIBRETRANSLATE_ENDPOINT');
}
/** LibreTranslate API KEY. */
static get libreTranslateApiKey(): string | undefined {
return Deno.env.get('LIBRETRANSLATE_API_KEY');
}
/** Cache settings. */ /** Cache settings. */
static caches = { static caches = {
/** NIP-05 cache settings. */ /** NIP-05 cache settings. */

View file

@ -129,7 +129,7 @@ const instanceV2Controller: AppController = async (c) => {
max_expiration: 2629746, max_expiration: 2629746,
}, },
translation: { translation: {
enabled: false, enabled: true,
}, },
}, },
nostr: { nostr: {

View file

@ -0,0 +1,140 @@
import { LanguageCode } from 'iso-639-1';
import { z } from 'zod';
import { AppController } from '@/app.ts';
import { localeSchema } from '@/schema.ts';
import { dittoTranslations, dittoTranslationsKey, MastodonTranslation } from '@/translators/translator.ts';
import { parseBody } from '@/utils/api.ts';
import { getEvent } from '@/queries.ts';
import { renderStatus } from '@/views/mastodon/statuses.ts';
const translateSchema = z.object({
lang: localeSchema,
});
const translateController: AppController = async (c) => {
const result = translateSchema.safeParse(await parseBody(c.req.raw));
const { signal } = c.req.raw;
if (!result.success) {
return c.json({ error: 'Bad request.', schema: result.error }, 422);
}
const translator = c.get('translator');
if (!translator) {
return c.json({ error: 'No translator configured.' }, 500);
}
const lang = result.data.lang.language.slice(0, 2) as LanguageCode;
const id = c.req.param('id');
const event = await getEvent(id, { signal });
if (!event) {
return c.json({ error: 'Record not found' }, 400);
}
const viewerPubkey = await c.get('signer')?.getPublicKey();
if (lang.toLowerCase() === event?.language?.toLowerCase()) {
return c.json({ error: 'Source and target languages are the same. No translation needed.' }, 400);
}
const status = await renderStatus(event, { viewerPubkey });
if (!status?.content) {
return c.json({ error: 'Bad request.', schema: result.error }, 400);
}
const translatedId = `${lang}-${id}` as dittoTranslationsKey;
const translationCache = dittoTranslations.get(translatedId);
if (translationCache) {
return c.json(translationCache.data, 200);
}
const mediaAttachments = status?.media_attachments.map((value) => {
return {
id: value.id,
description: value.description ?? '',
};
}) ?? [];
try {
const texts: string[] = [];
const mastodonTranslation: MastodonTranslation = {
content: '',
spoiler_text: '',
media_attachments: [],
poll: null,
detected_source_language: event.language ?? 'en',
provider: translator.getProvider(),
};
if ((status?.poll as MastodonTranslation['poll'])?.options) {
mastodonTranslation.poll = { id: (status?.poll as MastodonTranslation['poll'])?.id!, options: [] };
}
type TranslationIndex = {
[key: number]: 'content' | 'spoilerText' | 'poll' | { type: 'media'; id: string };
};
const translationIndex: TranslationIndex = {};
let index = 0;
// Content
translationIndex[index] = 'content';
texts.push(status.content);
index++;
// Spoiler text
if (status.spoiler_text) {
translationIndex[index] = 'spoilerText';
texts.push(status.spoiler_text);
index++;
}
// Media description
for (const [mediaIndex, value] of mediaAttachments.entries()) {
translationIndex[index + mediaIndex] = { type: 'media', id: value.id };
texts.push(mediaAttachments[mediaIndex].description);
index += mediaIndex;
}
// Poll title
if (status?.poll) {
for (const [pollIndex] of (status?.poll as MastodonTranslation['poll'])!.options.entries()) {
translationIndex[index + pollIndex] = 'poll';
texts.push((status.poll as MastodonTranslation['poll'])!.options[pollIndex].title);
index += pollIndex;
}
}
const data = await translator.translate(texts, event.language, lang, { signal });
const translatedTexts = data.results;
for (let i = 0; i < texts.length; i++) {
if (translationIndex[i] === 'content') {
mastodonTranslation.content = translatedTexts[i];
} else if (translationIndex[i] === 'spoilerText') {
mastodonTranslation.spoiler_text = translatedTexts[i];
} else if (translationIndex[i] === 'poll') {
mastodonTranslation.poll?.options.push({ title: translatedTexts[i] });
} else {
const media = translationIndex[i] as { type: 'media'; id: string };
mastodonTranslation.media_attachments.push({
id: media.id,
description: translatedTexts[i],
});
}
}
mastodonTranslation.detected_source_language = data.source_lang;
dittoTranslations.set(translatedId, { data: mastodonTranslation });
return c.json(mastodonTranslation, 200);
} catch {
return c.json({ error: 'Service Unavailable' }, 503);
}
};
export { translateController };

View file

@ -18,6 +18,7 @@ import * as pipeline from '@/pipeline.ts';
import { RelayError } from '@/RelayError.ts'; import { RelayError } from '@/RelayError.ts';
import { Storages } from '@/storages.ts'; import { Storages } from '@/storages.ts';
import { Time } from '@/utils/time.ts'; import { Time } from '@/utils/time.ts';
import { purifyEvent } from '@/utils/purify.ts';
/** Limit of initial events returned for a subscription. */ /** Limit of initial events returned for a subscription. */
const FILTER_LIMIT = 100; const FILTER_LIMIT = 100;
@ -105,7 +106,7 @@ function connectStream(socket: WebSocket, ip: string | undefined) {
try { try {
for (const event of await store.query(filters, { limit: FILTER_LIMIT, timeout: Conf.db.timeouts.relay })) { for (const event of await store.query(filters, { limit: FILTER_LIMIT, timeout: Conf.db.timeouts.relay })) {
send(['EVENT', subId, event]); send(['EVENT', subId, purifyEvent(event)]);
} }
} catch (e: any) { } catch (e: any) {
if (e instanceof RelayError) { if (e instanceof RelayError) {
@ -137,7 +138,7 @@ function connectStream(socket: WebSocket, ip: string | undefined) {
relayEventsCounter.inc({ kind: event.kind.toString() }); relayEventsCounter.inc({ kind: event.kind.toString() });
try { try {
// This will store it (if eligible) and run other side-effects. // This will store it (if eligible) and run other side-effects.
await pipeline.handleEvent(event, AbortSignal.timeout(1000)); await pipeline.handleEvent(purifyEvent(event), AbortSignal.timeout(1000));
send(['OK', event.id, true, '']); send(['OK', event.id, true, '']);
} catch (e) { } catch (e) {
if (e instanceof RelayError) { if (e instanceof RelayError) {

View file

@ -1,5 +1,3 @@
import { Nullable } from 'kysely';
import { NPostgresSchema } from '@nostrify/db'; import { NPostgresSchema } from '@nostrify/db';
export interface DittoTables extends NPostgresSchema { export interface DittoTables extends NPostgresSchema {
@ -12,7 +10,7 @@ export interface DittoTables extends NPostgresSchema {
} }
type NostrEventsRow = NPostgresSchema['nostr_events'] & { type NostrEventsRow = NPostgresSchema['nostr_events'] & {
language: Nullable<string>; language: string | null;
}; };
interface AuthorStatsRow { interface AuthorStatsRow {

View file

@ -1,4 +1,5 @@
import { NostrEvent } from '@nostrify/nostrify'; import { NostrEvent } from '@nostrify/nostrify';
import { LanguageCode } from 'iso-639-1';
/** Ditto internal stats for the event's author. */ /** Ditto internal stats for the event's author. */
export interface AuthorStats { export interface AuthorStats {
@ -43,4 +44,6 @@ export interface DittoEvent extends NostrEvent {
zap_sender?: DittoEvent | string; zap_sender?: DittoEvent | string;
zap_amount?: number; zap_amount?: number;
zap_message?: string; zap_message?: string;
/** Language of the event (kind 1s are more accurate). */
language?: LanguageCode;
} }

View file

@ -0,0 +1,39 @@
import { AppMiddleware } from '@/app.ts';
import { Conf } from '@/config.ts';
import { fetchWorker } from '@/workers/fetch.ts';
import { DeepLTranslator } from '@/translators/DeepLTranslator.ts';
import { LibreTranslateTranslator } from '@/translators/LibreTranslateTranslator.ts';
/** Set the translator used for translating posts. */
export const translatorMiddleware: AppMiddleware = async (c, next) => {
const deepLendpoint = Conf.deepLendpoint;
const deepLapiKey = Conf.deepLapiKey;
const libreTranslateEndpoint = Conf.libreTranslateEndpoint;
const libreTranslateApiKey = Conf.libreTranslateApiKey;
const translationProvider = Conf.translationProvider;
switch (translationProvider) {
case 'deepl':
if (deepLapiKey) {
c.set(
'translator',
new DeepLTranslator({ endpoint: deepLendpoint, apiKey: deepLapiKey, fetch: fetchWorker }),
);
}
break;
case 'libretranslate':
if (libreTranslateApiKey) {
c.set(
'translator',
new LibreTranslateTranslator({
endpoint: libreTranslateEndpoint,
apiKey: libreTranslateApiKey,
fetch: fetchWorker,
}),
);
}
break;
}
await next();
};

View file

@ -1,4 +1,4 @@
import ISO6391 from 'iso-639-1'; import ISO6391, { LanguageCode } from 'iso-639-1';
import { z } from 'zod'; import { z } from 'zod';
/** Validates individual items in an array, dropping any that aren't valid. */ /** Validates individual items in an array, dropping any that aren't valid. */
@ -41,7 +41,8 @@ const fileSchema = z.custom<File>((value) => value instanceof File);
const percentageSchema = z.coerce.number().int().gte(1).lte(100); const percentageSchema = z.coerce.number().int().gte(1).lte(100);
const languageSchema = z.string().transform((val, ctx) => { const languageSchema = z.string().transform<LanguageCode>((val, ctx) => {
val = val.toLowerCase();
if (!ISO6391.validate(val)) { if (!ISO6391.validate(val)) {
ctx.addIssue({ ctx.addIssue({
code: z.ZodIssueCode.custom, code: z.ZodIssueCode.custom,
@ -49,7 +50,19 @@ const languageSchema = z.string().transform((val, ctx) => {
}); });
return z.NEVER; return z.NEVER;
} }
return val; return val as LanguageCode;
});
const localeSchema = z.string().transform<Intl.Locale>((val, ctx) => {
try {
return new Intl.Locale(val);
} catch {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: 'Invalid locale',
});
return z.NEVER;
}
}); });
export { export {
@ -59,6 +72,7 @@ export {
filteredArray, filteredArray,
hashtagSchema, hashtagSchema,
languageSchema, languageSchema,
localeSchema,
percentageSchema, percentageSchema,
safeUrlSchema, safeUrlSchema,
}; };

View file

@ -7,7 +7,7 @@ import { Conf } from '@/config.ts';
import { createTestDB } from '@/test.ts'; import { createTestDB } from '@/test.ts';
Deno.test('count filters', async () => { Deno.test('count filters', async () => {
await using db = await createTestDB(); await using db = await createTestDB({ pure: true });
const { store } = db; const { store } = db;
const event1 = await eventFixture('event-1'); const event1 = await eventFixture('event-1');
@ -18,7 +18,7 @@ Deno.test('count filters', async () => {
}); });
Deno.test('insert and filter events', async () => { Deno.test('insert and filter events', async () => {
await using db = await createTestDB(); await using db = await createTestDB({ pure: true });
const { store } = db; const { store } = db;
const event1 = await eventFixture('event-1'); const event1 = await eventFixture('event-1');
@ -35,7 +35,7 @@ Deno.test('insert and filter events', async () => {
}); });
Deno.test('query events with domain search filter', async () => { Deno.test('query events with domain search filter', async () => {
await using db = await createTestDB(); await using db = await createTestDB({ pure: true });
const { store, kysely } = db; const { store, kysely } = db;
const event1 = await eventFixture('event-1'); const event1 = await eventFixture('event-1');
@ -55,7 +55,7 @@ Deno.test('query events with domain search filter', async () => {
}); });
Deno.test('query events with language search filter', async () => { Deno.test('query events with language search filter', async () => {
await using db = await createTestDB(); await using db = await createTestDB({ pure: true });
const { store, kysely } = db; const { store, kysely } = db;
const en = genEvent({ kind: 1, content: 'hello world!' }); const en = genEvent({ kind: 1, content: 'hello world!' });
@ -72,7 +72,7 @@ Deno.test('query events with language search filter', async () => {
}); });
Deno.test('delete events', async () => { Deno.test('delete events', async () => {
await using db = await createTestDB(); await using db = await createTestDB({ pure: true });
const { store } = db; const { store } = db;
const sk = generateSecretKey(); const sk = generateSecretKey();
@ -96,7 +96,7 @@ Deno.test('delete events', async () => {
}); });
Deno.test("user cannot delete another user's event", async () => { Deno.test("user cannot delete another user's event", async () => {
await using db = await createTestDB(); await using db = await createTestDB({ pure: true });
const { store } = db; const { store } = db;
const event = genEvent({ kind: 1, content: 'hello world', created_at: 1 }); const event = genEvent({ kind: 1, content: 'hello world', created_at: 1 });
@ -113,7 +113,7 @@ Deno.test("user cannot delete another user's event", async () => {
}); });
Deno.test('admin can delete any event', async () => { Deno.test('admin can delete any event', async () => {
await using db = await createTestDB(); await using db = await createTestDB({ pure: true });
const { store } = db; const { store } = db;
const sk = generateSecretKey(); const sk = generateSecretKey();
@ -137,7 +137,7 @@ Deno.test('admin can delete any event', async () => {
}); });
Deno.test('throws a RelayError when inserting an event deleted by the admin', async () => { Deno.test('throws a RelayError when inserting an event deleted by the admin', async () => {
await using db = await createTestDB(); await using db = await createTestDB({ pure: true });
const { store } = db; const { store } = db;
const event = genEvent(); const event = genEvent();
@ -154,7 +154,7 @@ Deno.test('throws a RelayError when inserting an event deleted by the admin', as
}); });
Deno.test('throws a RelayError when inserting an event deleted by a user', async () => { Deno.test('throws a RelayError when inserting an event deleted by a user', async () => {
await using db = await createTestDB(); await using db = await createTestDB({ pure: true });
const { store } = db; const { store } = db;
const sk = generateSecretKey(); const sk = generateSecretKey();
@ -173,7 +173,7 @@ Deno.test('throws a RelayError when inserting an event deleted by a user', async
}); });
Deno.test('inserting replaceable events', async () => { Deno.test('inserting replaceable events', async () => {
await using db = await createTestDB(); await using db = await createTestDB({ pure: true });
const { store } = db; const { store } = db;
const sk = generateSecretKey(); const sk = generateSecretKey();
@ -190,7 +190,7 @@ Deno.test('inserting replaceable events', async () => {
}); });
Deno.test("throws a RelayError when querying an event with a large 'since'", async () => { Deno.test("throws a RelayError when querying an event with a large 'since'", async () => {
await using db = await createTestDB(); await using db = await createTestDB({ pure: true });
const { store } = db; const { store } = db;
await assertRejects( await assertRejects(
@ -201,7 +201,7 @@ Deno.test("throws a RelayError when querying an event with a large 'since'", asy
}); });
Deno.test("throws a RelayError when querying an event with a large 'until'", async () => { Deno.test("throws a RelayError when querying an event with a large 'until'", async () => {
await using db = await createTestDB(); await using db = await createTestDB({ pure: true });
const { store } = db; const { store } = db;
await assertRejects( await assertRejects(
@ -212,7 +212,7 @@ Deno.test("throws a RelayError when querying an event with a large 'until'", asy
}); });
Deno.test("throws a RelayError when querying an event with a large 'kind'", async () => { Deno.test("throws a RelayError when querying an event with a large 'kind'", async () => {
await using db = await createTestDB(); await using db = await createTestDB({ pure: true });
const { store } = db; const { store } = db;
await assertRejects( await assertRejects(

View file

@ -1,5 +1,6 @@
// deno-lint-ignore-file require-await // deno-lint-ignore-file require-await
import { LanguageCode } from 'iso-639-1';
import { NPostgres, NPostgresSchema } from '@nostrify/db'; import { NPostgres, NPostgresSchema } from '@nostrify/db';
import { NIP50, NKinds, NostrEvent, NostrFilter, NSchema as n } from '@nostrify/nostrify'; import { NIP50, NKinds, NostrEvent, NostrFilter, NSchema as n } from '@nostrify/nostrify';
import { Stickynotes } from '@soapbox/stickynotes'; import { Stickynotes } from '@soapbox/stickynotes';
@ -12,6 +13,7 @@ import { RelayError } from '@/RelayError.ts';
import { isNostrId, isURL } from '@/utils.ts'; import { isNostrId, isURL } from '@/utils.ts';
import { abortError } from '@/utils/abort.ts'; import { abortError } from '@/utils/abort.ts';
import { purifyEvent } from '@/utils/purify.ts'; import { purifyEvent } from '@/utils/purify.ts';
import { DittoEvent } from '@/interfaces/DittoEvent.ts';
/** Function to decide whether or not to index a tag. */ /** Function to decide whether or not to index a tag. */
type TagCondition = ({ event, count, value }: { type TagCondition = ({ event, count, value }: {
@ -28,6 +30,8 @@ interface EventsDBOpts {
pubkey: string; pubkey: string;
/** Timeout in milliseconds for database queries. */ /** Timeout in milliseconds for database queries. */
timeout: number; timeout: number;
/** Whether the event returned should be a Nostr event or a Ditto event. Defaults to false. */
pure?: boolean;
} }
/** SQL database storage adapter for Nostr events. */ /** SQL database storage adapter for Nostr events. */
@ -151,7 +155,7 @@ class EventsDB extends NPostgres {
let query = super.getFilterQuery(trx, { let query = super.getFilterQuery(trx, {
...filter, ...filter,
search: tokens.filter((t) => typeof t === 'string').join(' '), search: tokens.filter((t) => typeof t === 'string').join(' '),
}) as SelectQueryBuilder<DittoTables, 'nostr_events', Pick<DittoTables['nostr_events'], keyof NostrEvent>>; }) as SelectQueryBuilder<DittoTables, 'nostr_events', DittoTables['nostr_events']>;
const languages = new Set<string>(); const languages = new Set<string>();
@ -175,7 +179,7 @@ class EventsDB extends NPostgres {
override async query( override async query(
filters: NostrFilter[], filters: NostrFilter[],
opts: { signal?: AbortSignal; timeout?: number; limit?: number } = {}, opts: { signal?: AbortSignal; timeout?: number; limit?: number } = {},
): Promise<NostrEvent[]> { ): Promise<DittoEvent[]> {
filters = await this.expandFilters(filters); filters = await this.expandFilters(filters);
for (const filter of filters) { for (const filter of filters) {
@ -199,6 +203,29 @@ class EventsDB extends NPostgres {
return super.query(filters, { ...opts, timeout: opts.timeout ?? this.opts.timeout }); return super.query(filters, { ...opts, timeout: opts.timeout ?? this.opts.timeout });
} }
/** Parse an event row from the database. */
protected override parseEventRow(row: DittoTables['nostr_events']): DittoEvent {
const event: DittoEvent = {
id: row.id,
kind: row.kind,
pubkey: row.pubkey,
content: row.content,
created_at: Number(row.created_at),
tags: row.tags,
sig: row.sig,
};
if (this.opts.pure) {
return event;
}
if (row.language) {
event.language = row.language as LanguageCode;
}
return event;
}
/** Delete events based on filters from the database. */ /** Delete events based on filters from the database. */
override async remove(filters: NostrFilter[], opts: { signal?: AbortSignal; timeout?: number } = {}): Promise<void> { override async remove(filters: NostrFilter[], opts: { signal?: AbortSignal; timeout?: number } = {}): Promise<void> {
this.console.debug('DELETE', JSON.stringify(filters)); this.console.debug('DELETE', JSON.stringify(filters));

View file

@ -1,3 +1,5 @@
import ISO6391, { LanguageCode } from 'iso-639-1';
import lande from 'lande';
import { NostrEvent } from '@nostrify/nostrify'; import { NostrEvent } from '@nostrify/nostrify';
import { finalizeEvent, generateSecretKey } from 'nostr-tools'; import { finalizeEvent, generateSecretKey } from 'nostr-tools';
@ -33,7 +35,7 @@ export function genEvent(t: Partial<NostrEvent> = {}, sk: Uint8Array = generateS
} }
/** Create a database for testing. It uses `TEST_DATABASE_URL`, or creates an in-memory database by default. */ /** Create a database for testing. It uses `TEST_DATABASE_URL`, or creates an in-memory database by default. */
export async function createTestDB() { export async function createTestDB(opts?: { pure?: boolean }) {
const { testDatabaseUrl } = Conf; const { testDatabaseUrl } = Conf;
const { kysely } = DittoDB.create(testDatabaseUrl, { poolSize: 1 }); const { kysely } = DittoDB.create(testDatabaseUrl, { poolSize: 1 });
@ -43,6 +45,7 @@ export async function createTestDB() {
kysely, kysely,
timeout: Conf.db.timeouts.default, timeout: Conf.db.timeouts.default,
pubkey: Conf.pubkey, pubkey: Conf.pubkey,
pure: opts?.pure ?? false,
}); });
return { return {
@ -65,3 +68,15 @@ export async function createTestDB() {
export function sleep(ms: number): Promise<void> { export function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms)); return new Promise((resolve) => setTimeout(resolve, ms));
} }
export function getLanguage(text: string): LanguageCode | undefined {
const [topResult] = lande(text);
if (topResult) {
const [iso6393] = topResult;
const locale = new Intl.Locale(iso6393);
if (ISO6391.validate(locale.language)) {
return locale.language as LanguageCode;
}
}
return;
}

View file

@ -0,0 +1,52 @@
import { assertEquals } from '@std/assert';
import { Conf } from '@/config.ts';
import { DeepLTranslator } from '@/translators/DeepLTranslator.ts';
import { getLanguage } from '@/test.ts';
const endpoint = Conf.deepLendpoint;
const apiKey = Conf.deepLapiKey;
const translationProvider = Conf.translationProvider;
const deepL = 'deepl';
Deno.test('DeepL translation with source language omitted', {
ignore: !(translationProvider === deepL && apiKey),
}, async () => {
const translator = new DeepLTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const data = await translator.translate(
[
'Bom dia amigos',
'Meu nome é Patrick',
'Eu irei morar na America, eu prometo. Mas antes, eu devo mencionar que o lande está interpretando este texto como italiano, que estranho.',
],
undefined,
'en',
);
assertEquals(data.source_lang, 'pt');
assertEquals(getLanguage(data.results[0]), 'en');
assertEquals(getLanguage(data.results[1]), 'en');
assertEquals(getLanguage(data.results[2]), 'en');
});
Deno.test('DeepL translation with source language set', {
ignore: !(translationProvider === deepL && apiKey),
}, async () => {
const translator = new DeepLTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const data = await translator.translate(
[
'Bom dia amigos',
'Meu nome é Patrick',
'Eu irei morar na America, eu prometo. Mas antes, eu devo mencionar que o lande está interpretando este texto como italiano, que estranho.',
],
'pt',
'en',
);
assertEquals(data.source_lang, 'pt');
assertEquals(getLanguage(data.results[0]), 'en');
assertEquals(getLanguage(data.results[1]), 'en');
assertEquals(getLanguage(data.results[2]), 'en');
});

View file

@ -0,0 +1,93 @@
import { z } from 'zod';
import { DittoTranslator, Provider, SourceLanguage, TargetLanguage } from '@/translators/translator.ts';
import { languageSchema } from '@/schema.ts';
interface DeepLTranslatorOpts {
/** DeepL endpoint to use. Default: 'https://api.deepl.com' */
endpoint?: string;
/** DeepL API key. */
apiKey: string;
/** Custom fetch implementation. */
fetch?: typeof fetch;
}
export class DeepLTranslator implements DittoTranslator {
private readonly endpoint: string;
private readonly apiKey: string;
private readonly fetch: typeof fetch;
private static provider: Provider = 'DeepL.com';
constructor(opts: DeepLTranslatorOpts) {
this.endpoint = opts.endpoint ?? 'https://api.deepl.com';
this.fetch = opts.fetch ?? globalThis.fetch;
this.apiKey = opts.apiKey;
}
async translate(
texts: string[],
source: SourceLanguage | undefined,
dest: TargetLanguage,
opts?: { signal?: AbortSignal },
) {
const data = (await this.translateMany(texts, source, dest, opts)).translations;
return {
results: data.map((value) => value.text),
source_lang: data[0].detected_source_language,
};
}
/** DeepL translate request. */
private async translateMany(
texts: string[],
source: SourceLanguage | undefined,
targetLanguage: TargetLanguage,
opts?: { signal?: AbortSignal },
) {
const body: any = {
text: texts,
target_lang: targetLanguage.toUpperCase(),
tag_handling: 'html',
split_sentences: '1',
};
if (source) {
body.source_lang = source.toUpperCase();
}
const headers = new Headers();
headers.append('Authorization', 'DeepL-Auth-Key' + ' ' + this.apiKey);
headers.append('Content-Type', 'application/json');
const request = new Request(this.endpoint + '/v2/translate', {
method: 'POST',
body: JSON.stringify(body),
headers,
signal: opts?.signal,
});
const response = await this.fetch(request);
const json = await response.json();
const data = DeepLTranslator.schema().parse(json);
return data;
}
/** DeepL response schema.
* https://developers.deepl.com/docs/api-reference/translate/openapi-spec-for-text-translation */
private static schema() {
return z.object({
translations: z.array(
z.object({
detected_source_language: languageSchema,
text: z.string(),
}),
),
});
}
/** DeepL provider. */
getProvider(): Provider {
return DeepLTranslator.provider;
}
}

View file

@ -0,0 +1,52 @@
import { assertEquals } from '@std/assert';
import { Conf } from '@/config.ts';
import { LibreTranslateTranslator } from '@/translators/LibreTranslateTranslator.ts';
import { getLanguage } from '@/test.ts';
const endpoint = Conf.libreTranslateEndpoint;
const apiKey = Conf.libreTranslateApiKey;
const translationProvider = Conf.translationProvider;
const libreTranslate = 'libretranslate';
Deno.test('LibreTranslate translation with source language omitted', {
ignore: !(translationProvider === libreTranslate && apiKey),
}, async () => {
const translator = new LibreTranslateTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const data = await translator.translate(
[
'Bom dia amigos',
'Meu nome é Patrick, um nome belo ou feio? A questão é mais profunda do que parece.',
'A respiração é mais importante do que comer e tomar agua.',
],
undefined,
'ca',
);
assertEquals(data.source_lang, 'pt');
assertEquals(getLanguage(data.results[0]), 'ca');
assertEquals(getLanguage(data.results[1]), 'ca');
assertEquals(getLanguage(data.results[2]), 'ca');
});
Deno.test('LibreTranslate translation with source language set', {
ignore: !(translationProvider === libreTranslate && apiKey),
}, async () => {
const translator = new LibreTranslateTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const data = await translator.translate(
[
'Bom dia amigos',
'Meu nome é Patrick, um nome belo ou feio? A questão é mais profunda do que parece.',
'A respiração é mais importante do que comer e tomar agua.',
],
'pt',
'ca',
);
assertEquals(data.source_lang, 'pt');
assertEquals(getLanguage(data.results[0]), 'ca');
assertEquals(getLanguage(data.results[1]), 'ca');
assertEquals(getLanguage(data.results[2]), 'ca');
});

View file

@ -0,0 +1,92 @@
import { LanguageCode } from 'iso-639-1';
import { z } from 'zod';
import { DittoTranslator, Provider, SourceLanguage, TargetLanguage } from '@/translators/translator.ts';
import { languageSchema } from '@/schema.ts';
interface LibreTranslateTranslatorOpts {
/** Libretranslate endpoint to use. Default: 'https://libretranslate.com' */
endpoint?: string;
/** Libretranslate API key. */
apiKey: string;
/** Custom fetch implementation. */
fetch?: typeof fetch;
}
export class LibreTranslateTranslator implements DittoTranslator {
private readonly endpoint: string;
private readonly apiKey: string;
private readonly fetch: typeof fetch;
private static provider: Provider = 'libretranslate.com';
constructor(opts: LibreTranslateTranslatorOpts) {
this.endpoint = opts.endpoint ?? 'https://libretranslate.com';
this.fetch = opts.fetch ?? globalThis.fetch;
this.apiKey = opts.apiKey;
}
async translate(
texts: string[],
source: SourceLanguage | undefined,
dest: TargetLanguage,
opts?: { signal?: AbortSignal },
) {
const translations = await Promise.all(
texts.map((text) => this.translateOne(text, source, dest, 'html', { signal: opts?.signal })),
);
return {
results: translations.map((value) => value.translatedText),
source_lang: translations[0]?.detectedLanguage?.language ?? source as LanguageCode, // cast is ok
};
}
private async translateOne(
q: string,
sourceLanguage: string | undefined,
targetLanguage: string,
format: 'html' | 'text',
opts?: { signal?: AbortSignal },
) {
const body = {
q,
source: sourceLanguage?.toLowerCase() ?? 'auto',
target: targetLanguage.toLowerCase(),
format,
api_key: this.apiKey,
};
const headers = new Headers();
headers.append('Content-Type', 'application/json');
const request = new Request(this.endpoint + '/translate', {
method: 'POST',
body: JSON.stringify(body),
headers,
signal: opts?.signal,
});
const response = await this.fetch(request);
const json = await response.json();
const data = LibreTranslateTranslator.schema().parse(json);
return data;
}
/** Libretranslate response schema.
* https://libretranslate.com/docs/#/translate/post_translate */
private static schema() {
return z.object({
translatedText: z.string(),
/** This field is only available if the 'source' is set to 'auto' */
detectedLanguage: z.object({
language: languageSchema,
}).optional(),
});
}
/** LibreTranslate provider. */
getProvider(): Provider {
return LibreTranslateTranslator.provider;
}
}

View file

@ -0,0 +1,59 @@
import { LanguageCode } from 'iso-639-1';
import { LRUCache } from 'lru-cache';
import { Time } from '@/utils/time.ts';
/** Supported providers. */
export type Provider = 'DeepL.com' | 'libretranslate.com';
/** Original language of the post */
export type SourceLanguage = LanguageCode;
/** Content will be translated to this language */
export type TargetLanguage = LanguageCode;
/** Entity returned by DittoTranslator and LRUCache */
type DittoTranslation = {
data: MastodonTranslation;
};
export type MastodonTranslation = {
/** HTML-encoded translated content of the status. */
content: string;
/** The translated spoiler warning of the status. */
spoiler_text: string;
/** The translated media descriptions of the status. */
media_attachments: { id: string; description: string }[];
/** The translated poll of the status. */
poll: { id: string; options: { title: string }[] } | null;
//** The language of the source text, as auto-detected by the machine translation provider. */
detected_source_language: SourceLanguage;
/** The service that provided the machine translation. */
provider: Provider;
};
/** DittoTranslator class, used for status translation. */
export interface DittoTranslator {
/** Translate the 'content' into 'targetLanguage'. */
translate(
texts: string[],
/** The language of the source text/status. */
sourceLanguage: SourceLanguage | undefined,
/** The status content will be translated into this language. */
targetLanguage: TargetLanguage,
/** Custom options. */
opts?: { signal?: AbortSignal },
): Promise<{ results: string[]; source_lang: SourceLanguage }>;
getProvider(): Provider;
}
/** Includes the TARGET language and the status id.
* Example: en-390f5b01b49a8ee6e13fe917420c023d889b3da8e983a14c9e84587e43d12c15
* The example above means:
* I want the status 390f5b01b49a8ee6e13fe917420c023d889b3da8e983a14c9e84587e43d12c15 translated to english (if it exists in the LRUCache). */
export type dittoTranslationsKey = `${TargetLanguage}-${string}`;
export const dittoTranslations = new LRUCache<dittoTranslationsKey, DittoTranslation>({
max: 1000,
ttl: Time.hours(6),
});

View file

@ -113,7 +113,7 @@ async function renderStatus(event: DittoEvent, opts: RenderStatusOpts): Promise<
sensitive: !!cw, sensitive: !!cw,
spoiler_text: (cw ? cw[1] : subject?.[1]) || '', spoiler_text: (cw ? cw[1] : subject?.[1]) || '',
visibility: 'public', visibility: 'public',
language: event.tags.find((tag) => tag[0] === 'l' && tag[2] === 'ISO-639-1')?.[1] || null, language: event.language ?? null,
replies_count: event.event_stats?.replies_count ?? 0, replies_count: event.event_stats?.replies_count ?? 0,
reblogs_count: event.event_stats?.reposts_count ?? 0, reblogs_count: event.event_stats?.reposts_count ?? 0,
favourites_count: event.event_stats?.reactions['+'] ?? 0, favourites_count: event.event_stats?.reactions['+'] ?? 0,