mirror of
https://gitlab.com/soapbox-pub/ditto.git
synced 2025-12-06 11:29:46 +00:00
Merge branch 'translate-status' into 'main'
Feat: Support Machine Translations Closes #201 See merge request soapbox-pub/ditto!537
This commit is contained in:
commit
91b82c36a8
20 changed files with 650 additions and 33 deletions
|
|
@ -41,7 +41,7 @@
|
|||
"@isaacs/ttlcache": "npm:@isaacs/ttlcache@^1.4.1",
|
||||
"@lambdalisue/async": "jsr:@lambdalisue/async@^2.1.1",
|
||||
"@noble/secp256k1": "npm:@noble/secp256k1@^2.0.0",
|
||||
"@nostrify/db": "jsr:@nostrify/db@^0.35.0",
|
||||
"@nostrify/db": "jsr:@nostrify/db@^0.36.1",
|
||||
"@nostrify/nostrify": "jsr:@nostrify/nostrify@^0.36.0",
|
||||
"@nostrify/policies": "jsr:@nostrify/policies@^0.35.0",
|
||||
"@scure/base": "npm:@scure/base@^1.1.6",
|
||||
|
|
|
|||
10
deno.lock
generated
10
deno.lock
generated
|
|
@ -22,7 +22,7 @@
|
|||
"jsr:@gleasonator/policy@0.8.0": "0.8.0",
|
||||
"jsr:@hono/hono@^4.4.6": "4.6.2",
|
||||
"jsr:@lambdalisue/async@^2.1.1": "2.1.1",
|
||||
"jsr:@nostrify/db@0.35": "0.35.0",
|
||||
"jsr:@nostrify/db@~0.36.1": "0.36.1",
|
||||
"jsr:@nostrify/nostrify@0.31": "0.31.0",
|
||||
"jsr:@nostrify/nostrify@0.32": "0.32.0",
|
||||
"jsr:@nostrify/nostrify@0.35": "0.35.0",
|
||||
|
|
@ -270,10 +270,10 @@
|
|||
"@lambdalisue/async@2.1.1": {
|
||||
"integrity": "1fc9bc6f4ed50215cd2f7217842b18cea80f81c25744f88f8c5eb4be5a1c9ab4"
|
||||
},
|
||||
"@nostrify/db@0.35.0": {
|
||||
"integrity": "637191c41812544e361b7997dc44ea098f8bd7efebb28f37a8a7142a0ecada8d",
|
||||
"@nostrify/db@0.36.1": {
|
||||
"integrity": "b65b89ca6fe98d9dbcc0402b5c9c07b8430c2c91f84ba4128ff2eeed70c3d49f",
|
||||
"dependencies": [
|
||||
"jsr:@nostrify/nostrify@0.35",
|
||||
"jsr:@nostrify/nostrify@0.36",
|
||||
"jsr:@nostrify/types@0.35",
|
||||
"npm:kysely@~0.27.3",
|
||||
"npm:nostr-tools@^2.7.0"
|
||||
|
|
@ -2048,7 +2048,7 @@
|
|||
"jsr:@gfx/canvas-wasm@~0.4.2",
|
||||
"jsr:@hono/hono@^4.4.6",
|
||||
"jsr:@lambdalisue/async@^2.1.1",
|
||||
"jsr:@nostrify/db@0.35",
|
||||
"jsr:@nostrify/db@~0.36.1",
|
||||
"jsr:@nostrify/nostrify@0.36",
|
||||
"jsr:@nostrify/policies@0.35",
|
||||
"jsr:@soapbox/kysely-pglite@1",
|
||||
|
|
|
|||
14
src/app.ts
14
src/app.ts
|
|
@ -1,4 +1,4 @@
|
|||
import { Context, Env as HonoEnv, Handler, Hono, Input as HonoInput, MiddlewareHandler } from '@hono/hono';
|
||||
import { type Context, Env as HonoEnv, Handler, Hono, Input as HonoInput, MiddlewareHandler } from '@hono/hono';
|
||||
import { cors } from '@hono/hono/cors';
|
||||
import { serveStatic } from '@hono/hono/deno';
|
||||
import { logger } from '@hono/hono/logger';
|
||||
|
|
@ -112,6 +112,7 @@ import {
|
|||
trendingStatusesController,
|
||||
trendingTagsController,
|
||||
} from '@/controllers/api/trends.ts';
|
||||
import { translateController } from '@/controllers/api/translate.ts';
|
||||
import { errorHandler } from '@/controllers/error.ts';
|
||||
import { frontendController } from '@/controllers/frontend.ts';
|
||||
import { metricsController } from '@/controllers/metrics.ts';
|
||||
|
|
@ -128,6 +129,8 @@ import { requireSigner } from '@/middleware/requireSigner.ts';
|
|||
import { signerMiddleware } from '@/middleware/signerMiddleware.ts';
|
||||
import { storeMiddleware } from '@/middleware/storeMiddleware.ts';
|
||||
import { uploaderMiddleware } from '@/middleware/uploaderMiddleware.ts';
|
||||
import { DittoTranslator } from '@/translators/translator.ts';
|
||||
import { translatorMiddleware } from '@/middleware/translatorMiddleware.ts';
|
||||
|
||||
interface AppEnv extends HonoEnv {
|
||||
Variables: {
|
||||
|
|
@ -143,6 +146,8 @@ interface AppEnv extends HonoEnv {
|
|||
pagination: { since?: number; until?: number; limit: number };
|
||||
/** Normalized list pagination params. */
|
||||
listPagination: { offset: number; limit: number };
|
||||
/** Translation service. */
|
||||
translator?: DittoTranslator;
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -222,6 +227,13 @@ app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/bookmark', requireSigner, bookmarkC
|
|||
app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/unbookmark', requireSigner, unbookmarkController);
|
||||
app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/pin', requireSigner, pinController);
|
||||
app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/unpin', requireSigner, unpinController);
|
||||
app.post(
|
||||
'/api/v1/statuses/:id{[0-9a-f]{64}}/translate',
|
||||
requireSigner,
|
||||
rateLimitMiddleware(30, Time.minutes(1)),
|
||||
translatorMiddleware,
|
||||
translateController,
|
||||
);
|
||||
app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/reblog', requireSigner, reblogStatusController);
|
||||
app.post('/api/v1/statuses/:id{[0-9a-f]{64}}/unreblog', requireSigner, unreblogStatusController);
|
||||
app.post('/api/v1/statuses', requireSigner, createStatusController);
|
||||
|
|
|
|||
|
|
@ -271,6 +271,26 @@ class Conf {
|
|||
static get preferredLanguages(): LanguageCode[] | undefined {
|
||||
return Deno.env.get('DITTO_LANGUAGES')?.split(',')?.filter(ISO6391.validate) as LanguageCode[];
|
||||
}
|
||||
/** Translation provider used to translate posts. */
|
||||
static get translationProvider(): string | undefined {
|
||||
return Deno.env.get('TRANSLATION_PROVIDER');
|
||||
}
|
||||
/** DeepL URL endpoint. */
|
||||
static get deepLendpoint(): string | undefined {
|
||||
return Deno.env.get('DEEPL_ENDPOINT');
|
||||
}
|
||||
/** DeepL API KEY. */
|
||||
static get deepLapiKey(): string | undefined {
|
||||
return Deno.env.get('DEEPL_API_KEY');
|
||||
}
|
||||
/** LibreTranslate URL endpoint. */
|
||||
static get libreTranslateEndpoint(): string | undefined {
|
||||
return Deno.env.get('LIBRETRANSLATE_ENDPOINT');
|
||||
}
|
||||
/** LibreTranslate API KEY. */
|
||||
static get libreTranslateApiKey(): string | undefined {
|
||||
return Deno.env.get('LIBRETRANSLATE_API_KEY');
|
||||
}
|
||||
/** Cache settings. */
|
||||
static caches = {
|
||||
/** NIP-05 cache settings. */
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@ const instanceV2Controller: AppController = async (c) => {
|
|||
max_expiration: 2629746,
|
||||
},
|
||||
translation: {
|
||||
enabled: false,
|
||||
enabled: true,
|
||||
},
|
||||
},
|
||||
nostr: {
|
||||
|
|
|
|||
140
src/controllers/api/translate.ts
Normal file
140
src/controllers/api/translate.ts
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
import { LanguageCode } from 'iso-639-1';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { AppController } from '@/app.ts';
|
||||
import { localeSchema } from '@/schema.ts';
|
||||
import { dittoTranslations, dittoTranslationsKey, MastodonTranslation } from '@/translators/translator.ts';
|
||||
import { parseBody } from '@/utils/api.ts';
|
||||
import { getEvent } from '@/queries.ts';
|
||||
import { renderStatus } from '@/views/mastodon/statuses.ts';
|
||||
|
||||
const translateSchema = z.object({
|
||||
lang: localeSchema,
|
||||
});
|
||||
|
||||
const translateController: AppController = async (c) => {
|
||||
const result = translateSchema.safeParse(await parseBody(c.req.raw));
|
||||
const { signal } = c.req.raw;
|
||||
|
||||
if (!result.success) {
|
||||
return c.json({ error: 'Bad request.', schema: result.error }, 422);
|
||||
}
|
||||
|
||||
const translator = c.get('translator');
|
||||
if (!translator) {
|
||||
return c.json({ error: 'No translator configured.' }, 500);
|
||||
}
|
||||
|
||||
const lang = result.data.lang.language.slice(0, 2) as LanguageCode;
|
||||
|
||||
const id = c.req.param('id');
|
||||
|
||||
const event = await getEvent(id, { signal });
|
||||
if (!event) {
|
||||
return c.json({ error: 'Record not found' }, 400);
|
||||
}
|
||||
|
||||
const viewerPubkey = await c.get('signer')?.getPublicKey();
|
||||
|
||||
if (lang.toLowerCase() === event?.language?.toLowerCase()) {
|
||||
return c.json({ error: 'Source and target languages are the same. No translation needed.' }, 400);
|
||||
}
|
||||
|
||||
const status = await renderStatus(event, { viewerPubkey });
|
||||
if (!status?.content) {
|
||||
return c.json({ error: 'Bad request.', schema: result.error }, 400);
|
||||
}
|
||||
|
||||
const translatedId = `${lang}-${id}` as dittoTranslationsKey;
|
||||
const translationCache = dittoTranslations.get(translatedId);
|
||||
|
||||
if (translationCache) {
|
||||
return c.json(translationCache.data, 200);
|
||||
}
|
||||
|
||||
const mediaAttachments = status?.media_attachments.map((value) => {
|
||||
return {
|
||||
id: value.id,
|
||||
description: value.description ?? '',
|
||||
};
|
||||
}) ?? [];
|
||||
|
||||
try {
|
||||
const texts: string[] = [];
|
||||
|
||||
const mastodonTranslation: MastodonTranslation = {
|
||||
content: '',
|
||||
spoiler_text: '',
|
||||
media_attachments: [],
|
||||
poll: null,
|
||||
detected_source_language: event.language ?? 'en',
|
||||
provider: translator.getProvider(),
|
||||
};
|
||||
|
||||
if ((status?.poll as MastodonTranslation['poll'])?.options) {
|
||||
mastodonTranslation.poll = { id: (status?.poll as MastodonTranslation['poll'])?.id!, options: [] };
|
||||
}
|
||||
|
||||
type TranslationIndex = {
|
||||
[key: number]: 'content' | 'spoilerText' | 'poll' | { type: 'media'; id: string };
|
||||
};
|
||||
const translationIndex: TranslationIndex = {};
|
||||
let index = 0;
|
||||
|
||||
// Content
|
||||
translationIndex[index] = 'content';
|
||||
texts.push(status.content);
|
||||
index++;
|
||||
|
||||
// Spoiler text
|
||||
if (status.spoiler_text) {
|
||||
translationIndex[index] = 'spoilerText';
|
||||
texts.push(status.spoiler_text);
|
||||
index++;
|
||||
}
|
||||
|
||||
// Media description
|
||||
for (const [mediaIndex, value] of mediaAttachments.entries()) {
|
||||
translationIndex[index + mediaIndex] = { type: 'media', id: value.id };
|
||||
texts.push(mediaAttachments[mediaIndex].description);
|
||||
index += mediaIndex;
|
||||
}
|
||||
|
||||
// Poll title
|
||||
if (status?.poll) {
|
||||
for (const [pollIndex] of (status?.poll as MastodonTranslation['poll'])!.options.entries()) {
|
||||
translationIndex[index + pollIndex] = 'poll';
|
||||
texts.push((status.poll as MastodonTranslation['poll'])!.options[pollIndex].title);
|
||||
index += pollIndex;
|
||||
}
|
||||
}
|
||||
|
||||
const data = await translator.translate(texts, event.language, lang, { signal });
|
||||
const translatedTexts = data.results;
|
||||
|
||||
for (let i = 0; i < texts.length; i++) {
|
||||
if (translationIndex[i] === 'content') {
|
||||
mastodonTranslation.content = translatedTexts[i];
|
||||
} else if (translationIndex[i] === 'spoilerText') {
|
||||
mastodonTranslation.spoiler_text = translatedTexts[i];
|
||||
} else if (translationIndex[i] === 'poll') {
|
||||
mastodonTranslation.poll?.options.push({ title: translatedTexts[i] });
|
||||
} else {
|
||||
const media = translationIndex[i] as { type: 'media'; id: string };
|
||||
mastodonTranslation.media_attachments.push({
|
||||
id: media.id,
|
||||
description: translatedTexts[i],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
mastodonTranslation.detected_source_language = data.source_lang;
|
||||
|
||||
dittoTranslations.set(translatedId, { data: mastodonTranslation });
|
||||
return c.json(mastodonTranslation, 200);
|
||||
} catch {
|
||||
return c.json({ error: 'Service Unavailable' }, 503);
|
||||
}
|
||||
};
|
||||
|
||||
export { translateController };
|
||||
|
|
@ -18,6 +18,7 @@ import * as pipeline from '@/pipeline.ts';
|
|||
import { RelayError } from '@/RelayError.ts';
|
||||
import { Storages } from '@/storages.ts';
|
||||
import { Time } from '@/utils/time.ts';
|
||||
import { purifyEvent } from '@/utils/purify.ts';
|
||||
|
||||
/** Limit of initial events returned for a subscription. */
|
||||
const FILTER_LIMIT = 100;
|
||||
|
|
@ -105,7 +106,7 @@ function connectStream(socket: WebSocket, ip: string | undefined) {
|
|||
|
||||
try {
|
||||
for (const event of await store.query(filters, { limit: FILTER_LIMIT, timeout: Conf.db.timeouts.relay })) {
|
||||
send(['EVENT', subId, event]);
|
||||
send(['EVENT', subId, purifyEvent(event)]);
|
||||
}
|
||||
} catch (e: any) {
|
||||
if (e instanceof RelayError) {
|
||||
|
|
@ -137,7 +138,7 @@ function connectStream(socket: WebSocket, ip: string | undefined) {
|
|||
relayEventsCounter.inc({ kind: event.kind.toString() });
|
||||
try {
|
||||
// This will store it (if eligible) and run other side-effects.
|
||||
await pipeline.handleEvent(event, AbortSignal.timeout(1000));
|
||||
await pipeline.handleEvent(purifyEvent(event), AbortSignal.timeout(1000));
|
||||
send(['OK', event.id, true, '']);
|
||||
} catch (e) {
|
||||
if (e instanceof RelayError) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
import { Nullable } from 'kysely';
|
||||
|
||||
import { NPostgresSchema } from '@nostrify/db';
|
||||
|
||||
export interface DittoTables extends NPostgresSchema {
|
||||
|
|
@ -12,7 +10,7 @@ export interface DittoTables extends NPostgresSchema {
|
|||
}
|
||||
|
||||
type NostrEventsRow = NPostgresSchema['nostr_events'] & {
|
||||
language: Nullable<string>;
|
||||
language: string | null;
|
||||
};
|
||||
|
||||
interface AuthorStatsRow {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import { NostrEvent } from '@nostrify/nostrify';
|
||||
import { LanguageCode } from 'iso-639-1';
|
||||
|
||||
/** Ditto internal stats for the event's author. */
|
||||
export interface AuthorStats {
|
||||
|
|
@ -43,4 +44,6 @@ export interface DittoEvent extends NostrEvent {
|
|||
zap_sender?: DittoEvent | string;
|
||||
zap_amount?: number;
|
||||
zap_message?: string;
|
||||
/** Language of the event (kind 1s are more accurate). */
|
||||
language?: LanguageCode;
|
||||
}
|
||||
|
|
|
|||
39
src/middleware/translatorMiddleware.ts
Normal file
39
src/middleware/translatorMiddleware.ts
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
import { AppMiddleware } from '@/app.ts';
|
||||
import { Conf } from '@/config.ts';
|
||||
import { fetchWorker } from '@/workers/fetch.ts';
|
||||
import { DeepLTranslator } from '@/translators/DeepLTranslator.ts';
|
||||
import { LibreTranslateTranslator } from '@/translators/LibreTranslateTranslator.ts';
|
||||
|
||||
/** Set the translator used for translating posts. */
|
||||
export const translatorMiddleware: AppMiddleware = async (c, next) => {
|
||||
const deepLendpoint = Conf.deepLendpoint;
|
||||
const deepLapiKey = Conf.deepLapiKey;
|
||||
const libreTranslateEndpoint = Conf.libreTranslateEndpoint;
|
||||
const libreTranslateApiKey = Conf.libreTranslateApiKey;
|
||||
const translationProvider = Conf.translationProvider;
|
||||
|
||||
switch (translationProvider) {
|
||||
case 'deepl':
|
||||
if (deepLapiKey) {
|
||||
c.set(
|
||||
'translator',
|
||||
new DeepLTranslator({ endpoint: deepLendpoint, apiKey: deepLapiKey, fetch: fetchWorker }),
|
||||
);
|
||||
}
|
||||
break;
|
||||
case 'libretranslate':
|
||||
if (libreTranslateApiKey) {
|
||||
c.set(
|
||||
'translator',
|
||||
new LibreTranslateTranslator({
|
||||
endpoint: libreTranslateEndpoint,
|
||||
apiKey: libreTranslateApiKey,
|
||||
fetch: fetchWorker,
|
||||
}),
|
||||
);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
await next();
|
||||
};
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
import ISO6391 from 'iso-639-1';
|
||||
import ISO6391, { LanguageCode } from 'iso-639-1';
|
||||
import { z } from 'zod';
|
||||
|
||||
/** Validates individual items in an array, dropping any that aren't valid. */
|
||||
|
|
@ -41,7 +41,8 @@ const fileSchema = z.custom<File>((value) => value instanceof File);
|
|||
|
||||
const percentageSchema = z.coerce.number().int().gte(1).lte(100);
|
||||
|
||||
const languageSchema = z.string().transform((val, ctx) => {
|
||||
const languageSchema = z.string().transform<LanguageCode>((val, ctx) => {
|
||||
val = val.toLowerCase();
|
||||
if (!ISO6391.validate(val)) {
|
||||
ctx.addIssue({
|
||||
code: z.ZodIssueCode.custom,
|
||||
|
|
@ -49,7 +50,19 @@ const languageSchema = z.string().transform((val, ctx) => {
|
|||
});
|
||||
return z.NEVER;
|
||||
}
|
||||
return val;
|
||||
return val as LanguageCode;
|
||||
});
|
||||
|
||||
const localeSchema = z.string().transform<Intl.Locale>((val, ctx) => {
|
||||
try {
|
||||
return new Intl.Locale(val);
|
||||
} catch {
|
||||
ctx.addIssue({
|
||||
code: z.ZodIssueCode.custom,
|
||||
message: 'Invalid locale',
|
||||
});
|
||||
return z.NEVER;
|
||||
}
|
||||
});
|
||||
|
||||
export {
|
||||
|
|
@ -59,6 +72,7 @@ export {
|
|||
filteredArray,
|
||||
hashtagSchema,
|
||||
languageSchema,
|
||||
localeSchema,
|
||||
percentageSchema,
|
||||
safeUrlSchema,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import { Conf } from '@/config.ts';
|
|||
import { createTestDB } from '@/test.ts';
|
||||
|
||||
Deno.test('count filters', async () => {
|
||||
await using db = await createTestDB();
|
||||
await using db = await createTestDB({ pure: true });
|
||||
const { store } = db;
|
||||
|
||||
const event1 = await eventFixture('event-1');
|
||||
|
|
@ -18,7 +18,7 @@ Deno.test('count filters', async () => {
|
|||
});
|
||||
|
||||
Deno.test('insert and filter events', async () => {
|
||||
await using db = await createTestDB();
|
||||
await using db = await createTestDB({ pure: true });
|
||||
const { store } = db;
|
||||
|
||||
const event1 = await eventFixture('event-1');
|
||||
|
|
@ -35,7 +35,7 @@ Deno.test('insert and filter events', async () => {
|
|||
});
|
||||
|
||||
Deno.test('query events with domain search filter', async () => {
|
||||
await using db = await createTestDB();
|
||||
await using db = await createTestDB({ pure: true });
|
||||
const { store, kysely } = db;
|
||||
|
||||
const event1 = await eventFixture('event-1');
|
||||
|
|
@ -55,7 +55,7 @@ Deno.test('query events with domain search filter', async () => {
|
|||
});
|
||||
|
||||
Deno.test('query events with language search filter', async () => {
|
||||
await using db = await createTestDB();
|
||||
await using db = await createTestDB({ pure: true });
|
||||
const { store, kysely } = db;
|
||||
|
||||
const en = genEvent({ kind: 1, content: 'hello world!' });
|
||||
|
|
@ -72,7 +72,7 @@ Deno.test('query events with language search filter', async () => {
|
|||
});
|
||||
|
||||
Deno.test('delete events', async () => {
|
||||
await using db = await createTestDB();
|
||||
await using db = await createTestDB({ pure: true });
|
||||
const { store } = db;
|
||||
|
||||
const sk = generateSecretKey();
|
||||
|
|
@ -96,7 +96,7 @@ Deno.test('delete events', async () => {
|
|||
});
|
||||
|
||||
Deno.test("user cannot delete another user's event", async () => {
|
||||
await using db = await createTestDB();
|
||||
await using db = await createTestDB({ pure: true });
|
||||
const { store } = db;
|
||||
|
||||
const event = genEvent({ kind: 1, content: 'hello world', created_at: 1 });
|
||||
|
|
@ -113,7 +113,7 @@ Deno.test("user cannot delete another user's event", async () => {
|
|||
});
|
||||
|
||||
Deno.test('admin can delete any event', async () => {
|
||||
await using db = await createTestDB();
|
||||
await using db = await createTestDB({ pure: true });
|
||||
const { store } = db;
|
||||
|
||||
const sk = generateSecretKey();
|
||||
|
|
@ -137,7 +137,7 @@ Deno.test('admin can delete any event', async () => {
|
|||
});
|
||||
|
||||
Deno.test('throws a RelayError when inserting an event deleted by the admin', async () => {
|
||||
await using db = await createTestDB();
|
||||
await using db = await createTestDB({ pure: true });
|
||||
const { store } = db;
|
||||
|
||||
const event = genEvent();
|
||||
|
|
@ -154,7 +154,7 @@ Deno.test('throws a RelayError when inserting an event deleted by the admin', as
|
|||
});
|
||||
|
||||
Deno.test('throws a RelayError when inserting an event deleted by a user', async () => {
|
||||
await using db = await createTestDB();
|
||||
await using db = await createTestDB({ pure: true });
|
||||
const { store } = db;
|
||||
|
||||
const sk = generateSecretKey();
|
||||
|
|
@ -173,7 +173,7 @@ Deno.test('throws a RelayError when inserting an event deleted by a user', async
|
|||
});
|
||||
|
||||
Deno.test('inserting replaceable events', async () => {
|
||||
await using db = await createTestDB();
|
||||
await using db = await createTestDB({ pure: true });
|
||||
const { store } = db;
|
||||
|
||||
const sk = generateSecretKey();
|
||||
|
|
@ -190,7 +190,7 @@ Deno.test('inserting replaceable events', async () => {
|
|||
});
|
||||
|
||||
Deno.test("throws a RelayError when querying an event with a large 'since'", async () => {
|
||||
await using db = await createTestDB();
|
||||
await using db = await createTestDB({ pure: true });
|
||||
const { store } = db;
|
||||
|
||||
await assertRejects(
|
||||
|
|
@ -201,7 +201,7 @@ Deno.test("throws a RelayError when querying an event with a large 'since'", asy
|
|||
});
|
||||
|
||||
Deno.test("throws a RelayError when querying an event with a large 'until'", async () => {
|
||||
await using db = await createTestDB();
|
||||
await using db = await createTestDB({ pure: true });
|
||||
const { store } = db;
|
||||
|
||||
await assertRejects(
|
||||
|
|
@ -212,7 +212,7 @@ Deno.test("throws a RelayError when querying an event with a large 'until'", asy
|
|||
});
|
||||
|
||||
Deno.test("throws a RelayError when querying an event with a large 'kind'", async () => {
|
||||
await using db = await createTestDB();
|
||||
await using db = await createTestDB({ pure: true });
|
||||
const { store } = db;
|
||||
|
||||
await assertRejects(
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
// deno-lint-ignore-file require-await
|
||||
|
||||
import { LanguageCode } from 'iso-639-1';
|
||||
import { NPostgres, NPostgresSchema } from '@nostrify/db';
|
||||
import { NIP50, NKinds, NostrEvent, NostrFilter, NSchema as n } from '@nostrify/nostrify';
|
||||
import { Stickynotes } from '@soapbox/stickynotes';
|
||||
|
|
@ -12,6 +13,7 @@ import { RelayError } from '@/RelayError.ts';
|
|||
import { isNostrId, isURL } from '@/utils.ts';
|
||||
import { abortError } from '@/utils/abort.ts';
|
||||
import { purifyEvent } from '@/utils/purify.ts';
|
||||
import { DittoEvent } from '@/interfaces/DittoEvent.ts';
|
||||
|
||||
/** Function to decide whether or not to index a tag. */
|
||||
type TagCondition = ({ event, count, value }: {
|
||||
|
|
@ -28,6 +30,8 @@ interface EventsDBOpts {
|
|||
pubkey: string;
|
||||
/** Timeout in milliseconds for database queries. */
|
||||
timeout: number;
|
||||
/** Whether the event returned should be a Nostr event or a Ditto event. Defaults to false. */
|
||||
pure?: boolean;
|
||||
}
|
||||
|
||||
/** SQL database storage adapter for Nostr events. */
|
||||
|
|
@ -151,7 +155,7 @@ class EventsDB extends NPostgres {
|
|||
let query = super.getFilterQuery(trx, {
|
||||
...filter,
|
||||
search: tokens.filter((t) => typeof t === 'string').join(' '),
|
||||
}) as SelectQueryBuilder<DittoTables, 'nostr_events', Pick<DittoTables['nostr_events'], keyof NostrEvent>>;
|
||||
}) as SelectQueryBuilder<DittoTables, 'nostr_events', DittoTables['nostr_events']>;
|
||||
|
||||
const languages = new Set<string>();
|
||||
|
||||
|
|
@ -175,7 +179,7 @@ class EventsDB extends NPostgres {
|
|||
override async query(
|
||||
filters: NostrFilter[],
|
||||
opts: { signal?: AbortSignal; timeout?: number; limit?: number } = {},
|
||||
): Promise<NostrEvent[]> {
|
||||
): Promise<DittoEvent[]> {
|
||||
filters = await this.expandFilters(filters);
|
||||
|
||||
for (const filter of filters) {
|
||||
|
|
@ -199,6 +203,29 @@ class EventsDB extends NPostgres {
|
|||
return super.query(filters, { ...opts, timeout: opts.timeout ?? this.opts.timeout });
|
||||
}
|
||||
|
||||
/** Parse an event row from the database. */
|
||||
protected override parseEventRow(row: DittoTables['nostr_events']): DittoEvent {
|
||||
const event: DittoEvent = {
|
||||
id: row.id,
|
||||
kind: row.kind,
|
||||
pubkey: row.pubkey,
|
||||
content: row.content,
|
||||
created_at: Number(row.created_at),
|
||||
tags: row.tags,
|
||||
sig: row.sig,
|
||||
};
|
||||
|
||||
if (this.opts.pure) {
|
||||
return event;
|
||||
}
|
||||
|
||||
if (row.language) {
|
||||
event.language = row.language as LanguageCode;
|
||||
}
|
||||
|
||||
return event;
|
||||
}
|
||||
|
||||
/** Delete events based on filters from the database. */
|
||||
override async remove(filters: NostrFilter[], opts: { signal?: AbortSignal; timeout?: number } = {}): Promise<void> {
|
||||
this.console.debug('DELETE', JSON.stringify(filters));
|
||||
|
|
|
|||
17
src/test.ts
17
src/test.ts
|
|
@ -1,3 +1,5 @@
|
|||
import ISO6391, { LanguageCode } from 'iso-639-1';
|
||||
import lande from 'lande';
|
||||
import { NostrEvent } from '@nostrify/nostrify';
|
||||
import { finalizeEvent, generateSecretKey } from 'nostr-tools';
|
||||
|
||||
|
|
@ -33,7 +35,7 @@ export function genEvent(t: Partial<NostrEvent> = {}, sk: Uint8Array = generateS
|
|||
}
|
||||
|
||||
/** Create a database for testing. It uses `TEST_DATABASE_URL`, or creates an in-memory database by default. */
|
||||
export async function createTestDB() {
|
||||
export async function createTestDB(opts?: { pure?: boolean }) {
|
||||
const { testDatabaseUrl } = Conf;
|
||||
const { kysely } = DittoDB.create(testDatabaseUrl, { poolSize: 1 });
|
||||
|
||||
|
|
@ -43,6 +45,7 @@ export async function createTestDB() {
|
|||
kysely,
|
||||
timeout: Conf.db.timeouts.default,
|
||||
pubkey: Conf.pubkey,
|
||||
pure: opts?.pure ?? false,
|
||||
});
|
||||
|
||||
return {
|
||||
|
|
@ -65,3 +68,15 @@ export async function createTestDB() {
|
|||
export function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
export function getLanguage(text: string): LanguageCode | undefined {
|
||||
const [topResult] = lande(text);
|
||||
if (topResult) {
|
||||
const [iso6393] = topResult;
|
||||
const locale = new Intl.Locale(iso6393);
|
||||
if (ISO6391.validate(locale.language)) {
|
||||
return locale.language as LanguageCode;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
52
src/translators/DeepLTranslator.test.ts
Normal file
52
src/translators/DeepLTranslator.test.ts
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
import { assertEquals } from '@std/assert';
|
||||
|
||||
import { Conf } from '@/config.ts';
|
||||
import { DeepLTranslator } from '@/translators/DeepLTranslator.ts';
|
||||
import { getLanguage } from '@/test.ts';
|
||||
|
||||
const endpoint = Conf.deepLendpoint;
|
||||
const apiKey = Conf.deepLapiKey;
|
||||
const translationProvider = Conf.translationProvider;
|
||||
const deepL = 'deepl';
|
||||
|
||||
Deno.test('DeepL translation with source language omitted', {
|
||||
ignore: !(translationProvider === deepL && apiKey),
|
||||
}, async () => {
|
||||
const translator = new DeepLTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
|
||||
|
||||
const data = await translator.translate(
|
||||
[
|
||||
'Bom dia amigos',
|
||||
'Meu nome é Patrick',
|
||||
'Eu irei morar na America, eu prometo. Mas antes, eu devo mencionar que o lande está interpretando este texto como italiano, que estranho.',
|
||||
],
|
||||
undefined,
|
||||
'en',
|
||||
);
|
||||
|
||||
assertEquals(data.source_lang, 'pt');
|
||||
assertEquals(getLanguage(data.results[0]), 'en');
|
||||
assertEquals(getLanguage(data.results[1]), 'en');
|
||||
assertEquals(getLanguage(data.results[2]), 'en');
|
||||
});
|
||||
|
||||
Deno.test('DeepL translation with source language set', {
|
||||
ignore: !(translationProvider === deepL && apiKey),
|
||||
}, async () => {
|
||||
const translator = new DeepLTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
|
||||
|
||||
const data = await translator.translate(
|
||||
[
|
||||
'Bom dia amigos',
|
||||
'Meu nome é Patrick',
|
||||
'Eu irei morar na America, eu prometo. Mas antes, eu devo mencionar que o lande está interpretando este texto como italiano, que estranho.',
|
||||
],
|
||||
'pt',
|
||||
'en',
|
||||
);
|
||||
|
||||
assertEquals(data.source_lang, 'pt');
|
||||
assertEquals(getLanguage(data.results[0]), 'en');
|
||||
assertEquals(getLanguage(data.results[1]), 'en');
|
||||
assertEquals(getLanguage(data.results[2]), 'en');
|
||||
});
|
||||
93
src/translators/DeepLTranslator.ts
Normal file
93
src/translators/DeepLTranslator.ts
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
import { z } from 'zod';
|
||||
|
||||
import { DittoTranslator, Provider, SourceLanguage, TargetLanguage } from '@/translators/translator.ts';
|
||||
import { languageSchema } from '@/schema.ts';
|
||||
|
||||
interface DeepLTranslatorOpts {
|
||||
/** DeepL endpoint to use. Default: 'https://api.deepl.com' */
|
||||
endpoint?: string;
|
||||
/** DeepL API key. */
|
||||
apiKey: string;
|
||||
/** Custom fetch implementation. */
|
||||
fetch?: typeof fetch;
|
||||
}
|
||||
|
||||
export class DeepLTranslator implements DittoTranslator {
|
||||
private readonly endpoint: string;
|
||||
private readonly apiKey: string;
|
||||
private readonly fetch: typeof fetch;
|
||||
private static provider: Provider = 'DeepL.com';
|
||||
|
||||
constructor(opts: DeepLTranslatorOpts) {
|
||||
this.endpoint = opts.endpoint ?? 'https://api.deepl.com';
|
||||
this.fetch = opts.fetch ?? globalThis.fetch;
|
||||
this.apiKey = opts.apiKey;
|
||||
}
|
||||
|
||||
async translate(
|
||||
texts: string[],
|
||||
source: SourceLanguage | undefined,
|
||||
dest: TargetLanguage,
|
||||
opts?: { signal?: AbortSignal },
|
||||
) {
|
||||
const data = (await this.translateMany(texts, source, dest, opts)).translations;
|
||||
|
||||
return {
|
||||
results: data.map((value) => value.text),
|
||||
source_lang: data[0].detected_source_language,
|
||||
};
|
||||
}
|
||||
|
||||
/** DeepL translate request. */
|
||||
private async translateMany(
|
||||
texts: string[],
|
||||
source: SourceLanguage | undefined,
|
||||
targetLanguage: TargetLanguage,
|
||||
opts?: { signal?: AbortSignal },
|
||||
) {
|
||||
const body: any = {
|
||||
text: texts,
|
||||
target_lang: targetLanguage.toUpperCase(),
|
||||
tag_handling: 'html',
|
||||
split_sentences: '1',
|
||||
};
|
||||
if (source) {
|
||||
body.source_lang = source.toUpperCase();
|
||||
}
|
||||
|
||||
const headers = new Headers();
|
||||
headers.append('Authorization', 'DeepL-Auth-Key' + ' ' + this.apiKey);
|
||||
headers.append('Content-Type', 'application/json');
|
||||
|
||||
const request = new Request(this.endpoint + '/v2/translate', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify(body),
|
||||
headers,
|
||||
signal: opts?.signal,
|
||||
});
|
||||
|
||||
const response = await this.fetch(request);
|
||||
const json = await response.json();
|
||||
const data = DeepLTranslator.schema().parse(json);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
/** DeepL response schema.
|
||||
* https://developers.deepl.com/docs/api-reference/translate/openapi-spec-for-text-translation */
|
||||
private static schema() {
|
||||
return z.object({
|
||||
translations: z.array(
|
||||
z.object({
|
||||
detected_source_language: languageSchema,
|
||||
text: z.string(),
|
||||
}),
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
/** DeepL provider. */
|
||||
getProvider(): Provider {
|
||||
return DeepLTranslator.provider;
|
||||
}
|
||||
}
|
||||
52
src/translators/LibreTranslateTranslator.test.ts
Normal file
52
src/translators/LibreTranslateTranslator.test.ts
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
import { assertEquals } from '@std/assert';
|
||||
|
||||
import { Conf } from '@/config.ts';
|
||||
import { LibreTranslateTranslator } from '@/translators/LibreTranslateTranslator.ts';
|
||||
import { getLanguage } from '@/test.ts';
|
||||
|
||||
const endpoint = Conf.libreTranslateEndpoint;
|
||||
const apiKey = Conf.libreTranslateApiKey;
|
||||
const translationProvider = Conf.translationProvider;
|
||||
const libreTranslate = 'libretranslate';
|
||||
|
||||
Deno.test('LibreTranslate translation with source language omitted', {
|
||||
ignore: !(translationProvider === libreTranslate && apiKey),
|
||||
}, async () => {
|
||||
const translator = new LibreTranslateTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
|
||||
|
||||
const data = await translator.translate(
|
||||
[
|
||||
'Bom dia amigos',
|
||||
'Meu nome é Patrick, um nome belo ou feio? A questão é mais profunda do que parece.',
|
||||
'A respiração é mais importante do que comer e tomar agua.',
|
||||
],
|
||||
undefined,
|
||||
'ca',
|
||||
);
|
||||
|
||||
assertEquals(data.source_lang, 'pt');
|
||||
assertEquals(getLanguage(data.results[0]), 'ca');
|
||||
assertEquals(getLanguage(data.results[1]), 'ca');
|
||||
assertEquals(getLanguage(data.results[2]), 'ca');
|
||||
});
|
||||
|
||||
Deno.test('LibreTranslate translation with source language set', {
|
||||
ignore: !(translationProvider === libreTranslate && apiKey),
|
||||
}, async () => {
|
||||
const translator = new LibreTranslateTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
|
||||
|
||||
const data = await translator.translate(
|
||||
[
|
||||
'Bom dia amigos',
|
||||
'Meu nome é Patrick, um nome belo ou feio? A questão é mais profunda do que parece.',
|
||||
'A respiração é mais importante do que comer e tomar agua.',
|
||||
],
|
||||
'pt',
|
||||
'ca',
|
||||
);
|
||||
|
||||
assertEquals(data.source_lang, 'pt');
|
||||
assertEquals(getLanguage(data.results[0]), 'ca');
|
||||
assertEquals(getLanguage(data.results[1]), 'ca');
|
||||
assertEquals(getLanguage(data.results[2]), 'ca');
|
||||
});
|
||||
92
src/translators/LibreTranslateTranslator.ts
Normal file
92
src/translators/LibreTranslateTranslator.ts
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
import { LanguageCode } from 'iso-639-1';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { DittoTranslator, Provider, SourceLanguage, TargetLanguage } from '@/translators/translator.ts';
|
||||
import { languageSchema } from '@/schema.ts';
|
||||
|
||||
interface LibreTranslateTranslatorOpts {
|
||||
/** Libretranslate endpoint to use. Default: 'https://libretranslate.com' */
|
||||
endpoint?: string;
|
||||
/** Libretranslate API key. */
|
||||
apiKey: string;
|
||||
/** Custom fetch implementation. */
|
||||
fetch?: typeof fetch;
|
||||
}
|
||||
|
||||
export class LibreTranslateTranslator implements DittoTranslator {
|
||||
private readonly endpoint: string;
|
||||
private readonly apiKey: string;
|
||||
private readonly fetch: typeof fetch;
|
||||
private static provider: Provider = 'libretranslate.com';
|
||||
|
||||
constructor(opts: LibreTranslateTranslatorOpts) {
|
||||
this.endpoint = opts.endpoint ?? 'https://libretranslate.com';
|
||||
this.fetch = opts.fetch ?? globalThis.fetch;
|
||||
this.apiKey = opts.apiKey;
|
||||
}
|
||||
|
||||
async translate(
|
||||
texts: string[],
|
||||
source: SourceLanguage | undefined,
|
||||
dest: TargetLanguage,
|
||||
opts?: { signal?: AbortSignal },
|
||||
) {
|
||||
const translations = await Promise.all(
|
||||
texts.map((text) => this.translateOne(text, source, dest, 'html', { signal: opts?.signal })),
|
||||
);
|
||||
|
||||
return {
|
||||
results: translations.map((value) => value.translatedText),
|
||||
source_lang: translations[0]?.detectedLanguage?.language ?? source as LanguageCode, // cast is ok
|
||||
};
|
||||
}
|
||||
|
||||
private async translateOne(
|
||||
q: string,
|
||||
sourceLanguage: string | undefined,
|
||||
targetLanguage: string,
|
||||
format: 'html' | 'text',
|
||||
opts?: { signal?: AbortSignal },
|
||||
) {
|
||||
const body = {
|
||||
q,
|
||||
source: sourceLanguage?.toLowerCase() ?? 'auto',
|
||||
target: targetLanguage.toLowerCase(),
|
||||
format,
|
||||
api_key: this.apiKey,
|
||||
};
|
||||
|
||||
const headers = new Headers();
|
||||
headers.append('Content-Type', 'application/json');
|
||||
|
||||
const request = new Request(this.endpoint + '/translate', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify(body),
|
||||
headers,
|
||||
signal: opts?.signal,
|
||||
});
|
||||
|
||||
const response = await this.fetch(request);
|
||||
const json = await response.json();
|
||||
const data = LibreTranslateTranslator.schema().parse(json);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
/** Libretranslate response schema.
|
||||
* https://libretranslate.com/docs/#/translate/post_translate */
|
||||
private static schema() {
|
||||
return z.object({
|
||||
translatedText: z.string(),
|
||||
/** This field is only available if the 'source' is set to 'auto' */
|
||||
detectedLanguage: z.object({
|
||||
language: languageSchema,
|
||||
}).optional(),
|
||||
});
|
||||
}
|
||||
|
||||
/** LibreTranslate provider. */
|
||||
getProvider(): Provider {
|
||||
return LibreTranslateTranslator.provider;
|
||||
}
|
||||
}
|
||||
59
src/translators/translator.ts
Normal file
59
src/translators/translator.ts
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
import { LanguageCode } from 'iso-639-1';
|
||||
import { LRUCache } from 'lru-cache';
|
||||
|
||||
import { Time } from '@/utils/time.ts';
|
||||
|
||||
/** Supported providers. */
|
||||
export type Provider = 'DeepL.com' | 'libretranslate.com';
|
||||
|
||||
/** Original language of the post */
|
||||
export type SourceLanguage = LanguageCode;
|
||||
|
||||
/** Content will be translated to this language */
|
||||
export type TargetLanguage = LanguageCode;
|
||||
|
||||
/** Entity returned by DittoTranslator and LRUCache */
|
||||
type DittoTranslation = {
|
||||
data: MastodonTranslation;
|
||||
};
|
||||
|
||||
export type MastodonTranslation = {
|
||||
/** HTML-encoded translated content of the status. */
|
||||
content: string;
|
||||
/** The translated spoiler warning of the status. */
|
||||
spoiler_text: string;
|
||||
/** The translated media descriptions of the status. */
|
||||
media_attachments: { id: string; description: string }[];
|
||||
/** The translated poll of the status. */
|
||||
poll: { id: string; options: { title: string }[] } | null;
|
||||
//** The language of the source text, as auto-detected by the machine translation provider. */
|
||||
detected_source_language: SourceLanguage;
|
||||
/** The service that provided the machine translation. */
|
||||
provider: Provider;
|
||||
};
|
||||
|
||||
/** DittoTranslator class, used for status translation. */
|
||||
export interface DittoTranslator {
|
||||
/** Translate the 'content' into 'targetLanguage'. */
|
||||
translate(
|
||||
texts: string[],
|
||||
/** The language of the source text/status. */
|
||||
sourceLanguage: SourceLanguage | undefined,
|
||||
/** The status content will be translated into this language. */
|
||||
targetLanguage: TargetLanguage,
|
||||
/** Custom options. */
|
||||
opts?: { signal?: AbortSignal },
|
||||
): Promise<{ results: string[]; source_lang: SourceLanguage }>;
|
||||
getProvider(): Provider;
|
||||
}
|
||||
|
||||
/** Includes the TARGET language and the status id.
|
||||
* Example: en-390f5b01b49a8ee6e13fe917420c023d889b3da8e983a14c9e84587e43d12c15
|
||||
* The example above means:
|
||||
* I want the status 390f5b01b49a8ee6e13fe917420c023d889b3da8e983a14c9e84587e43d12c15 translated to english (if it exists in the LRUCache). */
|
||||
export type dittoTranslationsKey = `${TargetLanguage}-${string}`;
|
||||
|
||||
export const dittoTranslations = new LRUCache<dittoTranslationsKey, DittoTranslation>({
|
||||
max: 1000,
|
||||
ttl: Time.hours(6),
|
||||
});
|
||||
|
|
@ -113,7 +113,7 @@ async function renderStatus(event: DittoEvent, opts: RenderStatusOpts): Promise<
|
|||
sensitive: !!cw,
|
||||
spoiler_text: (cw ? cw[1] : subject?.[1]) || '',
|
||||
visibility: 'public',
|
||||
language: event.tags.find((tag) => tag[0] === 'l' && tag[2] === 'ISO-639-1')?.[1] || null,
|
||||
language: event.language ?? null,
|
||||
replies_count: event.event_stats?.replies_count ?? 0,
|
||||
reblogs_count: event.event_stats?.reposts_count ?? 0,
|
||||
favourites_count: event.event_stats?.reactions['+'] ?? 0,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue