Merge branch 'translator-env' into 'main'

Translation refactors

See merge request soapbox-pub/ditto!542
This commit is contained in:
Alex Gleason 2024-10-10 19:22:12 +00:00
commit 522a283af1
16 changed files with 149 additions and 268 deletions

View file

@ -120,6 +120,7 @@ import { indexController } from '@/controllers/site.ts';
import { manifestController } from '@/controllers/manifest.ts';
import { nodeInfoController, nodeInfoSchemaController } from '@/controllers/well-known/nodeinfo.ts';
import { nostrController } from '@/controllers/well-known/nostr.ts';
import { DittoTranslator } from '@/interfaces/DittoTranslator.ts';
import { auth98Middleware, requireProof, requireRole } from '@/middleware/auth98Middleware.ts';
import { cspMiddleware } from '@/middleware/cspMiddleware.ts';
import { metricsMiddleware } from '@/middleware/metricsMiddleware.ts';
@ -129,7 +130,6 @@ import { requireSigner } from '@/middleware/requireSigner.ts';
import { signerMiddleware } from '@/middleware/signerMiddleware.ts';
import { storeMiddleware } from '@/middleware/storeMiddleware.ts';
import { uploaderMiddleware } from '@/middleware/uploaderMiddleware.ts';
import { DittoTranslator } from '@/translators/translator.ts';
import { translatorMiddleware } from '@/middleware/translatorMiddleware.ts';
interface AppEnv extends HonoEnv {

View file

@ -0,0 +1,11 @@
import { LanguageCode } from 'iso-639-1';
import { LRUCache } from 'lru-cache';
import { Conf } from '@/config.ts';
import { MastodonTranslation } from '@/entities/MastodonTranslation.ts';
/** Translations LRU cache. */
export const translationCache = new LRUCache<`${LanguageCode}-${string}`, MastodonTranslation>({
max: Conf.caches.translation.max,
ttl: Conf.caches.translation.ttl,
});

View file

@ -276,19 +276,19 @@ class Conf {
return Deno.env.get('TRANSLATION_PROVIDER');
}
/** DeepL URL endpoint. */
static get deepLendpoint(): string | undefined {
return Deno.env.get('DEEPL_ENDPOINT');
static get deeplBaseUrl(): string | undefined {
return Deno.env.get('DEEPL_BASE_URL');
}
/** DeepL API KEY. */
static get deepLapiKey(): string | undefined {
static get deeplApiKey(): string | undefined {
return Deno.env.get('DEEPL_API_KEY');
}
/** LibreTranslate URL endpoint. */
static get libreTranslateEndpoint(): string | undefined {
return Deno.env.get('LIBRETRANSLATE_ENDPOINT');
static get libretranslateBaseUrl(): string | undefined {
return Deno.env.get('LIBRETRANSLATE_BASE_URL');
}
/** LibreTranslate API KEY. */
static get libreTranslateApiKey(): string | undefined {
static get libretranslateApiKey(): string | undefined {
return Deno.env.get('LIBRETRANSLATE_API_KEY');
}
/** Cache settings. */
@ -314,6 +314,13 @@ class Conf {
ttl: Number(Deno.env.get('DITTO_CACHE_LINK_PREVIEW_TTL') || 12 * 60 * 60 * 1000),
};
},
/** Translation cache settings. */
get translation(): { max: number; ttl: number } {
return {
max: Number(Deno.env.get('DITTO_CACHE_TRANSLATION_MAX') || 1000),
ttl: Number(Deno.env.get('DITTO_CACHE_TRANSLATION_TTL') || 6 * 60 * 60 * 1000),
};
},
};
}

View file

@ -2,10 +2,12 @@ import { LanguageCode } from 'iso-639-1';
import { z } from 'zod';
import { AppController } from '@/app.ts';
import { localeSchema } from '@/schema.ts';
import { dittoTranslations, dittoTranslationsKey, MastodonTranslation } from '@/translators/translator.ts';
import { parseBody } from '@/utils/api.ts';
import { translationCache } from '@/caches/translationCache.ts';
import { MastodonTranslation } from '@/entities/MastodonTranslation.ts';
import { cachedTranslationsSizeGauge } from '@/metrics.ts';
import { getEvent } from '@/queries.ts';
import { localeSchema } from '@/schema.ts';
import { parseBody } from '@/utils/api.ts';
import { renderStatus } from '@/views/mastodon/statuses.ts';
const translateSchema = z.object({
@ -45,11 +47,11 @@ const translateController: AppController = async (c) => {
return c.json({ error: 'Bad request.', schema: result.error }, 400);
}
const translatedId = `${lang}-${id}` as dittoTranslationsKey;
const translationCache = dittoTranslations.get(translatedId);
const cacheKey: `${LanguageCode}-${string}` = `${lang}-${id}`;
const cached = translationCache.get(cacheKey);
if (translationCache) {
return c.json(translationCache.data, 200);
if (cached) {
return c.json(cached, 200);
}
const mediaAttachments = status?.media_attachments.map((value) => {
@ -68,7 +70,7 @@ const translateController: AppController = async (c) => {
media_attachments: [],
poll: null,
detected_source_language: event.language ?? 'en',
provider: translator.getProvider(),
provider: translator.provider,
};
if ((status?.poll as MastodonTranslation['poll'])?.options) {
@ -130,10 +132,12 @@ const translateController: AppController = async (c) => {
mastodonTranslation.detected_source_language = data.source_lang;
dittoTranslations.set(translatedId, { data: mastodonTranslation });
translationCache.set(cacheKey, mastodonTranslation);
cachedTranslationsSizeGauge.set(translationCache.size);
return c.json(mastodonTranslation, 200);
} catch (e) {
if (e instanceof Error && e.message?.includes('not supported')) {
if (e instanceof Error && e.message.includes('not supported')) {
return c.json({ error: `Translation of source language '${event.language}' not supported` }, 422);
}
return c.json({ error: 'Service Unavailable' }, 503);

View file

@ -0,0 +1,17 @@
import { LanguageCode } from 'iso-639-1';
/** https://docs.joinmastodon.org/entities/Translation/ */
export interface MastodonTranslation {
/** HTML-encoded translated content of the status. */
content: string;
/** The translated spoiler warning of the status. */
spoiler_text: string;
/** The translated media descriptions of the status. */
media_attachments: { id: string; description: string }[];
/** The translated poll of the status. */
poll: { id: string; options: { title: string }[] } | null;
//** The language of the source text, as auto-detected by the machine translation provider. */
detected_source_language: LanguageCode;
/** The service that provided the machine translation. */
provider: string;
}

View file

@ -0,0 +1,18 @@
import type { LanguageCode } from 'iso-639-1';
/** DittoTranslator class, used for status translation. */
export interface DittoTranslator {
/** Provider name, eg `DeepL.com` */
provider: string;
/** Translate the 'content' into 'targetLanguage'. */
translate(
/** Texts to translate. */
texts: string[],
/** The language of the source texts. */
sourceLanguage: LanguageCode | undefined,
/** The texts will be translated into this language. */
targetLanguage: LanguageCode,
/** Custom options. */
opts?: { signal?: AbortSignal },
): Promise<{ results: string[]; source_lang: LanguageCode }>;
}

View file

@ -121,6 +121,11 @@ export const cachedLinkPreviewSizeGauge = new Gauge({
help: 'Number of link previews in cache',
});
export const cachedTranslationsSizeGauge = new Gauge({
name: 'ditto_cached_translations_size',
help: 'Number of translated statuses in cache',
});
export const internalSubscriptionsSizeGauge = new Gauge({
name: 'ditto_internal_subscriptions_size',
help: "Number of active subscriptions to Ditto's internal relay",

View file

@ -1,28 +0,0 @@
import Debug from '@soapbox/stickynotes/debug';
import { type MiddlewareHandler } from 'hono';
import ExpiringCache from '@/utils/expiring-cache.ts';
const debug = Debug('ditto:middleware:cache');
export const cacheMiddleware = (options: {
cacheName: string;
expires?: number;
}): MiddlewareHandler => {
return async (c, next) => {
const key = c.req.url.replace('http://', 'https://');
const cache = new ExpiringCache(await caches.open(options.cacheName));
const response = await cache.match(key);
if (!response) {
debug('Building cache for page', c.req.url);
await next();
const response = c.res.clone();
if (response.status < 500) {
await cache.putExpiring(key, response, options.expires ?? 0);
}
} else {
debug('Serving page from cache', c.req.url);
return response;
}
};
};

View file

@ -6,33 +6,22 @@ import { LibreTranslateTranslator } from '@/translators/LibreTranslateTranslator
/** Set the translator used for translating posts. */
export const translatorMiddleware: AppMiddleware = async (c, next) => {
const deepLendpoint = Conf.deepLendpoint;
const deepLapiKey = Conf.deepLapiKey;
const libreTranslateEndpoint = Conf.libreTranslateEndpoint;
const libreTranslateApiKey = Conf.libreTranslateApiKey;
const translationProvider = Conf.translationProvider;
switch (Conf.translationProvider) {
case 'deepl': {
const { deeplApiKey: apiKey, deeplBaseUrl: baseUrl } = Conf;
if (apiKey) {
c.set('translator', new DeepLTranslator({ baseUrl, apiKey, fetch: fetchWorker }));
}
break;
}
switch (translationProvider) {
case 'deepl':
if (deepLapiKey) {
c.set(
'translator',
new DeepLTranslator({ endpoint: deepLendpoint, apiKey: deepLapiKey, fetch: fetchWorker }),
);
}
break;
case 'libretranslate':
if (libreTranslateApiKey) {
c.set(
'translator',
new LibreTranslateTranslator({
endpoint: libreTranslateEndpoint,
apiKey: libreTranslateApiKey,
fetch: fetchWorker,
}),
);
case 'libretranslate': {
const { libretranslateApiKey: apiKey, libretranslateBaseUrl: baseUrl } = Conf;
if (apiKey) {
c.set('translator', new LibreTranslateTranslator({ baseUrl, apiKey, fetch: fetchWorker }));
}
break;
}
}
await next();

View file

@ -4,15 +4,18 @@ import { Conf } from '@/config.ts';
import { DeepLTranslator } from '@/translators/DeepLTranslator.ts';
import { getLanguage } from '@/test.ts';
const endpoint = Conf.deepLendpoint;
const apiKey = Conf.deepLapiKey;
const translationProvider = Conf.translationProvider;
const deepL = 'deepl';
const {
deeplBaseUrl: baseUrl,
deeplApiKey: apiKey,
translationProvider,
} = Conf;
const deepl = 'deepl';
Deno.test('DeepL translation with source language omitted', {
ignore: !(translationProvider === deepL && apiKey),
ignore: !(translationProvider === deepl && apiKey),
}, async () => {
const translator = new DeepLTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const translator = new DeepLTranslator({ fetch: fetch, baseUrl, apiKey: apiKey! });
const data = await translator.translate(
[
@ -31,9 +34,9 @@ Deno.test('DeepL translation with source language omitted', {
});
Deno.test('DeepL translation with source language set', {
ignore: !(translationProvider === deepL && apiKey),
ignore: !(translationProvider === deepl && apiKey),
}, async () => {
const translator = new DeepLTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const translator = new DeepLTranslator({ fetch: fetch, baseUrl, apiKey: apiKey as string });
const data = await translator.translate(
[

View file

@ -1,12 +1,12 @@
import { LanguageCode } from 'iso-639-1';
import { z } from 'zod';
import { DittoTranslator, SourceLanguage, TargetLanguage } from '@/translators/translator.ts';
import { DittoTranslator } from '@/interfaces/DittoTranslator.ts';
import { languageSchema } from '@/schema.ts';
interface DeepLTranslatorOpts {
/** DeepL endpoint to use. Default: 'https://api.deepl.com' */
endpoint?: string;
/** DeepL base URL to use. Default: 'https://api.deepl.com' */
baseUrl?: string;
/** DeepL API key. */
apiKey: string;
/** Custom fetch implementation. */
@ -14,36 +14,37 @@ interface DeepLTranslatorOpts {
}
export class DeepLTranslator implements DittoTranslator {
private readonly endpoint: string;
private readonly baseUrl: string;
private readonly apiKey: string;
private readonly fetch: typeof fetch;
private static provider = 'DeepL.com';
readonly provider = 'DeepL.com';
constructor(opts: DeepLTranslatorOpts) {
this.endpoint = opts.endpoint ?? 'https://api.deepl.com';
this.baseUrl = opts.baseUrl ?? 'https://api.deepl.com';
this.fetch = opts.fetch ?? globalThis.fetch;
this.apiKey = opts.apiKey;
}
async translate(
texts: string[],
source: SourceLanguage | undefined,
dest: TargetLanguage,
source: LanguageCode | undefined,
dest: LanguageCode,
opts?: { signal?: AbortSignal },
) {
const data = (await this.translateMany(texts, source, dest, opts)).translations;
const { translations } = await this.translateMany(texts, source, dest, opts);
return {
results: data.map((value) => value.text),
source_lang: data[0].detected_source_language as LanguageCode,
results: translations.map((value) => value.text),
source_lang: translations[0]?.detected_source_language as LanguageCode,
};
}
/** DeepL translate request. */
private async translateMany(
texts: string[],
source: SourceLanguage | undefined,
targetLanguage: TargetLanguage,
source: LanguageCode | undefined,
targetLanguage: LanguageCode,
opts?: { signal?: AbortSignal },
) {
const body: any = {
@ -56,25 +57,26 @@ export class DeepLTranslator implements DittoTranslator {
body.source_lang = source.toUpperCase();
}
const headers = new Headers();
headers.append('Authorization', 'DeepL-Auth-Key' + ' ' + this.apiKey);
headers.append('Content-Type', 'application/json');
const url = new URL('/v2/translate', this.baseUrl);
const request = new Request(this.endpoint + '/v2/translate', {
const request = new Request(url, {
method: 'POST',
body: JSON.stringify(body),
headers,
headers: {
'Authorization': `DeepL-Auth-Key ${this.apiKey}`,
'Content-Type': 'application/json',
},
signal: opts?.signal,
});
const response = await this.fetch(request);
const json = await response.json();
if (!response.ok) {
throw new Error(json['message']);
}
const data = DeepLTranslator.schema().parse(json);
return data;
return DeepLTranslator.schema().parse(json);
}
/** DeepL response schema.
@ -89,9 +91,4 @@ export class DeepLTranslator implements DittoTranslator {
),
});
}
/** DeepL provider. */
getProvider(): string {
return DeepLTranslator.provider;
}
}

View file

@ -4,15 +4,18 @@ import { Conf } from '@/config.ts';
import { LibreTranslateTranslator } from '@/translators/LibreTranslateTranslator.ts';
import { getLanguage } from '@/test.ts';
const endpoint = Conf.libreTranslateEndpoint;
const apiKey = Conf.libreTranslateApiKey;
const translationProvider = Conf.translationProvider;
const libreTranslate = 'libretranslate';
const {
libretranslateBaseUrl: baseUrl,
libretranslateApiKey: apiKey,
translationProvider,
} = Conf;
const libretranslate = 'libretranslate';
Deno.test('LibreTranslate translation with source language omitted', {
ignore: !(translationProvider === libreTranslate && apiKey),
ignore: !(translationProvider === libretranslate && apiKey),
}, async () => {
const translator = new LibreTranslateTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const translator = new LibreTranslateTranslator({ fetch: fetch, baseUrl, apiKey: apiKey! });
const data = await translator.translate(
[
@ -31,9 +34,9 @@ Deno.test('LibreTranslate translation with source language omitted', {
});
Deno.test('LibreTranslate translation with source language set', {
ignore: !(translationProvider === libreTranslate && apiKey),
ignore: !(translationProvider === libretranslate && apiKey),
}, async () => {
const translator = new LibreTranslateTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const translator = new LibreTranslateTranslator({ fetch: fetch, baseUrl, apiKey: apiKey! });
const data = await translator.translate(
[

View file

@ -1,12 +1,12 @@
import { LanguageCode } from 'iso-639-1';
import { z } from 'zod';
import { DittoTranslator, SourceLanguage, TargetLanguage } from '@/translators/translator.ts';
import { DittoTranslator } from '@/interfaces/DittoTranslator.ts';
import { languageSchema } from '@/schema.ts';
interface LibreTranslateTranslatorOpts {
/** Libretranslate endpoint to use. Default: 'https://libretranslate.com' */
endpoint?: string;
baseUrl?: string;
/** Libretranslate API key. */
apiKey: string;
/** Custom fetch implementation. */
@ -14,21 +14,22 @@ interface LibreTranslateTranslatorOpts {
}
export class LibreTranslateTranslator implements DittoTranslator {
private readonly endpoint: string;
private readonly baseUrl: string;
private readonly apiKey: string;
private readonly fetch: typeof fetch;
private static provider = 'libretranslate.com';
readonly provider = 'libretranslate.com';
constructor(opts: LibreTranslateTranslatorOpts) {
this.endpoint = opts.endpoint ?? 'https://libretranslate.com';
this.baseUrl = opts.baseUrl ?? 'https://libretranslate.com';
this.fetch = opts.fetch ?? globalThis.fetch;
this.apiKey = opts.apiKey;
}
async translate(
texts: string[],
source: SourceLanguage | undefined,
dest: TargetLanguage,
source: LanguageCode | undefined,
dest: LanguageCode,
opts?: { signal?: AbortSignal },
) {
const translations = await Promise.all(
@ -56,13 +57,14 @@ export class LibreTranslateTranslator implements DittoTranslator {
api_key: this.apiKey,
};
const headers = new Headers();
headers.append('Content-Type', 'application/json');
const url = new URL('/translate', this.baseUrl);
const request = new Request(this.endpoint + '/translate', {
const request = new Request(url, {
method: 'POST',
body: JSON.stringify(body),
headers,
headers: {
'Content-Type': 'application/json',
},
signal: opts?.signal,
});
@ -87,9 +89,4 @@ export class LibreTranslateTranslator implements DittoTranslator {
}).optional(),
});
}
/** LibreTranslate provider. */
getProvider(): string {
return LibreTranslateTranslator.provider;
}
}

View file

@ -1,56 +0,0 @@
import { LanguageCode } from 'iso-639-1';
import { LRUCache } from 'lru-cache';
import { Time } from '@/utils/time.ts';
/** Original language of the post */
export type SourceLanguage = LanguageCode;
/** Content will be translated to this language */
export type TargetLanguage = LanguageCode;
/** Entity returned by DittoTranslator and LRUCache */
type DittoTranslation = {
data: MastodonTranslation;
};
export type MastodonTranslation = {
/** HTML-encoded translated content of the status. */
content: string;
/** The translated spoiler warning of the status. */
spoiler_text: string;
/** The translated media descriptions of the status. */
media_attachments: { id: string; description: string }[];
/** The translated poll of the status. */
poll: { id: string; options: { title: string }[] } | null;
//** The language of the source text, as auto-detected by the machine translation provider. */
detected_source_language: SourceLanguage;
/** The service that provided the machine translation. */
provider: string;
};
/** DittoTranslator class, used for status translation. */
export interface DittoTranslator {
/** Translate the 'content' into 'targetLanguage'. */
translate(
texts: string[],
/** The language of the source text/status. */
sourceLanguage: SourceLanguage | undefined,
/** The status content will be translated into this language. */
targetLanguage: TargetLanguage,
/** Custom options. */
opts?: { signal?: AbortSignal },
): Promise<{ results: string[]; source_lang: SourceLanguage }>;
getProvider(): string;
}
/** Includes the TARGET language and the status id.
* Example: en-390f5b01b49a8ee6e13fe917420c023d889b3da8e983a14c9e84587e43d12c15
* The example above means:
* I want the status 390f5b01b49a8ee6e13fe917420c023d889b3da8e983a14c9e84587e43d12c15 translated to english (if it exists in the LRUCache). */
export type dittoTranslationsKey = `${TargetLanguage}-${string}`;
export const dittoTranslations = new LRUCache<dittoTranslationsKey, DittoTranslation>({
max: 1000,
ttl: Time.hours(6),
});

View file

@ -1,18 +0,0 @@
import { assert } from '@std/assert';
import ExpiringCache from './expiring-cache.ts';
Deno.test('ExpiringCache', async () => {
const cache = new ExpiringCache(await caches.open('test'));
await cache.putExpiring('http://mostr.local/1', new Response('hello world'), 300);
await cache.putExpiring('http://mostr.local/2', new Response('hello world'), -1);
// const resp1 = await cache.match('http://mostr.local/1');
const resp2 = await cache.match('http://mostr.local/2');
// assert(resp1!.headers.get('Expires'));
assert(!resp2);
// await resp1!.text();
});

View file

@ -1,68 +0,0 @@
class ExpiringCache implements Cache {
#cache: Cache;
constructor(cache: Cache) {
this.#cache = cache;
}
add(request: RequestInfo | URL): Promise<void> {
return this.#cache.add(request);
}
addAll(requests: RequestInfo[]): Promise<void> {
return this.#cache.addAll(requests);
}
keys(request?: RequestInfo | URL | undefined, options?: CacheQueryOptions | undefined): Promise<readonly Request[]> {
return this.#cache.keys(request, options);
}
matchAll(
request?: RequestInfo | URL | undefined,
options?: CacheQueryOptions | undefined,
): Promise<readonly Response[]> {
return this.#cache.matchAll(request, options);
}
put(request: RequestInfo | URL, response: Response): Promise<void> {
return this.#cache.put(request, response);
}
putExpiring(request: RequestInfo | URL, response: Response, expiresIn: number): Promise<void> {
const expires = Date.now() + expiresIn;
const clone = new Response(response.body, {
status: response.status,
headers: {
expires: new Date(expires).toUTCString(),
...Object.fromEntries(response.headers.entries()),
},
});
return this.#cache.put(request, clone);
}
async match(request: RequestInfo | URL, options?: CacheQueryOptions | undefined): Promise<Response | undefined> {
const response = await this.#cache.match(request, options);
const expires = response?.headers.get('Expires');
if (response && expires) {
if (new Date(expires).getTime() > Date.now()) {
return response;
} else {
await Promise.all([
this.delete(request),
response.text(), // Prevent memory leaks
]);
}
} else if (response) {
return response;
}
}
delete(request: RequestInfo | URL, options?: CacheQueryOptions | undefined): Promise<boolean> {
return this.#cache.delete(request, options);
}
}
export default ExpiringCache;