refactor: simply DittoTranslator interface and classes that implement it

This commit is contained in:
P. Reis 2024-10-09 15:03:11 -03:00
parent c1c25d7c08
commit 4f8c8fd1de
6 changed files with 183 additions and 401 deletions

View file

@ -1,14 +1,15 @@
import { LanguageCode } from 'iso-639-1';
import { z } from 'zod';
import { AppController } from '@/app.ts';
import { languageSchema } from '@/schema.ts';
import { dittoTranslations, dittoTranslationsKey } from '@/translators/translator.ts';
import { localeSchema } from '@/schema.ts';
import { dittoTranslations, dittoTranslationsKey, MastodonTranslation } from '@/translators/translator.ts';
import { parseBody } from '@/utils/api.ts';
import { getEvent } from '@/queries.ts';
import { renderStatus } from '@/views/mastodon/statuses.ts';
const translateSchema = z.object({
lang: languageSchema,
lang: localeSchema,
});
const translateController: AppController = async (c) => {
@ -24,7 +25,8 @@ const translateController: AppController = async (c) => {
return c.json({ error: 'No translator configured.' }, 500);
}
const { lang } = result.data;
const lang = result.data.lang.language.slice(0, 2) as LanguageCode;
const id = c.req.param('id');
const event = await getEvent(id, { signal });
@ -39,6 +41,9 @@ const translateController: AppController = async (c) => {
}
const status = await renderStatus(event, { viewerPubkey });
if (!status?.content) {
return c.json({ error: 'Bad request.', schema: result.error }, 400);
}
const translatedId = `${lang}-${id}` as dittoTranslationsKey;
const translationCache = dittoTranslations.get(translatedId);
@ -55,18 +60,79 @@ const translateController: AppController = async (c) => {
}) ?? [];
try {
const translation = await translator.translate(
status?.content ?? '',
status?.spoiler_text ?? '',
mediaAttachments,
null,
event.language,
lang,
{ signal },
);
dittoTranslations.set(translatedId, translation);
return c.json(translation.data, 200);
} catch (_) {
const texts: string[] = [];
const mastodonTranslation: MastodonTranslation = {
content: '',
spoiler_text: '',
media_attachments: [],
poll: null,
detected_source_language: event.language ?? 'en',
provider: translator.getProvider(),
};
if ((status?.poll as MastodonTranslation['poll'])?.options) {
mastodonTranslation.poll = { id: (status?.poll as MastodonTranslation['poll'])?.id!, options: [] };
}
type TranslationIndex = {
[key: number]: 'content' | 'spoilerText' | 'poll' | { type: 'media'; id: string };
};
const translationIndex: TranslationIndex = {};
let index = 0;
// Content
translationIndex[index] = 'content';
texts.push(status.content);
index++;
// Spoiler text
if (status.spoiler_text) {
translationIndex[index] = 'spoilerText';
texts.push(status.spoiler_text);
index++;
}
// Media description
for (const [mediaIndex, value] of mediaAttachments.entries()) {
translationIndex[index + mediaIndex] = { type: 'media', id: value.id };
texts.push(mediaAttachments[mediaIndex].description);
index += mediaIndex;
}
// Poll title
if (status?.poll) {
for (const [pollIndex] of (status?.poll as MastodonTranslation['poll'])!.options.entries()) {
translationIndex[index + pollIndex] = 'poll';
texts.push((status.poll as MastodonTranslation['poll'])!.options[pollIndex].title);
index += pollIndex;
}
}
const data = await translator.translate(texts, event.language, lang, { signal });
const translatedTexts = data.results;
for (let i = 0; i < texts.length; i++) {
if (translationIndex[i] === 'content') {
mastodonTranslation.content = translatedTexts[i];
} else if (translationIndex[i] === 'spoilerText') {
mastodonTranslation.spoiler_text = translatedTexts[i];
} else if (translationIndex[i] === 'poll') {
mastodonTranslation.poll?.options.push({ title: translatedTexts[i] });
} else {
const media = translationIndex[i] as { type: 'media'; id: string };
mastodonTranslation.media_attachments.push({
id: media.id,
description: translatedTexts[i],
});
}
}
mastodonTranslation.detected_source_language = data.source_lang;
dittoTranslations.set(translatedId, { data: mastodonTranslation });
return c.json(mastodonTranslation, 200);
} catch {
return c.json({ error: 'Service Unavailable' }, 503);
}
};

View file

@ -9,131 +9,44 @@ const apiKey = Conf.deepLapiKey;
const translationProvider = Conf.translationProvider;
const deepL = 'deepl';
Deno.test('Translate status with EMPTY media_attachments and WITHOUT poll', {
Deno.test('DeepL translation with source language omitted', {
ignore: !(translationProvider === deepL && apiKey),
}, async () => {
const translator = new DeepLTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const mastodonTranslation = await translator.translate(
'Bom dia amigos do Element, meu nome é Patrick',
'',
[],
null,
'pt',
'en',
);
assertEquals(getLanguage(mastodonTranslation.data.content), 'en');
assertEquals(mastodonTranslation.data.spoiler_text, '');
assertEquals(mastodonTranslation.data.media_attachments, []);
assertEquals(mastodonTranslation.data.poll, null);
assertEquals(mastodonTranslation.data.provider, 'DeepL.com');
});
Deno.test('Translate status WITH auto detect and with EMPTY media_attachments and WITHOUT poll', {
ignore: !(translationProvider === deepL && apiKey),
}, async () => {
const translator = new DeepLTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const mastodonTranslation = await translator.translate(
'Bom dia amigos do Element, meu nome é Patrick',
'',
[],
null,
const data = await translator.translate(
[
'Bom dia amigos',
'Meu nome é Patrick',
'Eu irei morar na America, eu prometo. Mas antes, eu devo mencionar que o lande está interpretando este texto como italiano, que estranho.',
],
undefined,
'en',
);
assertEquals(getLanguage(mastodonTranslation.data.content), 'en');
assertEquals(mastodonTranslation.data.spoiler_text, '');
assertEquals(mastodonTranslation.data.media_attachments, []);
assertEquals(mastodonTranslation.data.poll, null);
assertEquals(mastodonTranslation.data.provider, 'DeepL.com');
assertEquals(data.source_lang, 'pt');
assertEquals(getLanguage(data.results[0]), 'en');
assertEquals(getLanguage(data.results[1]), 'en');
assertEquals(getLanguage(data.results[2]), 'en');
});
Deno.test('Translate status WITH media_attachments and WITHOUT poll', {
Deno.test('DeepL translation with source language set', {
ignore: !(translationProvider === deepL && apiKey),
}, async () => {
const translator = new DeepLTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const mastodonTranslation = await translator.translate(
'Hello my friends, my name is Alex and I am american.',
"That is spoiler isn't it",
[{ id: 'game', description: 'I should be playing Miles Edgeworth with my wife' }],
null,
'en',
'pt',
);
assertEquals(getLanguage(mastodonTranslation.data.content), 'pt');
assertEquals(getLanguage(mastodonTranslation.data.spoiler_text), 'pt');
assertEquals(mastodonTranslation.data.media_attachments.map((value) => getLanguage(value.description)), ['pt']);
assertEquals(mastodonTranslation.data.poll, null);
assertEquals(mastodonTranslation.data.provider, 'DeepL.com');
});
Deno.test('Translate status WITHOUT media_attachments and WITH poll', {
ignore: !(translationProvider === deepL && apiKey),
}, async () => {
const translator = new DeepLTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const poll = {
'id': '34858',
'options': [
{
'title': 'Kill him right now',
},
{
'title': 'Save him right now',
},
const data = await translator.translate(
[
'Bom dia amigos',
'Meu nome é Patrick',
'Eu irei morar na America, eu prometo. Mas antes, eu devo mencionar que o lande está interpretando este texto como italiano, que estranho.',
],
};
const mastodonTranslation = await translator.translate(
'Hello my friends, my name is Alex and I am american.',
'',
[],
poll,
'en',
'pt',
'en',
);
assertEquals(getLanguage(mastodonTranslation.data.content), 'pt');
assertEquals(mastodonTranslation.data.spoiler_text, '');
assertEquals(mastodonTranslation.data.media_attachments, []);
assertEquals(mastodonTranslation.data.poll?.options.map((value) => getLanguage(value.title)), ['pt', 'pt']);
assertEquals(mastodonTranslation.data.provider, 'DeepL.com');
});
Deno.test('Translate status WITH media_attachments and WITH poll', {
ignore: !(translationProvider === deepL && apiKey),
}, async () => {
const translator = new DeepLTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const poll = {
'id': '34858',
'options': [
{
'title': 'Kill him right now',
},
{
'title': 'Save him right now',
},
],
};
const mastodonTranslation = await translator.translate(
'Hello my friends, my name is Alex and I am american.',
'',
[{ id: 'game', description: 'I should be playing Miles Edgeworth with my wife' }],
poll,
'en',
'pt',
);
assertEquals(getLanguage(mastodonTranslation.data.content), 'pt');
assertEquals(mastodonTranslation.data.spoiler_text, '');
assertEquals(mastodonTranslation.data.media_attachments.map((value) => getLanguage(value.description)), ['pt']);
assertEquals(mastodonTranslation.data.poll?.options.map((value) => getLanguage(value.title)), ['pt', 'pt']);
assertEquals(mastodonTranslation.data.provider, 'DeepL.com');
assertEquals(data.source_lang, 'pt');
assertEquals(getLanguage(data.results[0]), 'en');
assertEquals(getLanguage(data.results[1]), 'en');
assertEquals(getLanguage(data.results[2]), 'en');
});

View file

@ -1,12 +1,6 @@
import { z } from 'zod';
import {
DittoTranslator,
MastodonTranslation,
Provider,
SourceLanguage,
TargetLanguage,
} from '@/translators/translator.ts';
import { DittoTranslator, Provider, SourceLanguage, TargetLanguage } from '@/translators/translator.ts';
import { languageSchema } from '@/schema.ts';
interface DeepLTranslatorOpts {
@ -22,45 +16,43 @@ export class DeepLTranslator implements DittoTranslator {
private readonly endpoint: string;
private readonly apiKey: string;
private readonly fetch: typeof fetch;
private readonly provider: Provider;
private static provider: Provider = 'DeepL.com';
constructor(opts: DeepLTranslatorOpts) {
this.endpoint = opts.endpoint ?? 'https://api.deepl.com';
this.fetch = opts.fetch ?? globalThis.fetch;
this.provider = 'DeepL.com';
this.apiKey = opts.apiKey;
}
async translate(
contentHTMLencoded: string,
spoilerText: string,
mediaAttachments: { id: string; description: string }[],
poll: { id: string; options: { title: string }[] } | null,
sourceLanguage: SourceLanguage | undefined,
texts: string[],
source: SourceLanguage | undefined,
dest: TargetLanguage,
opts?: { signal?: AbortSignal },
) {
const data = (await this.translateMany(texts, source, dest, opts)).translations;
return {
results: data.map((value) => value.text),
source_lang: data[0].detected_source_language,
};
}
/** DeepL translate request. */
private async translateMany(
texts: string[],
source: SourceLanguage | undefined,
targetLanguage: TargetLanguage,
opts?: { signal?: AbortSignal },
) {
// --------------------- START explanation
// Order of texts:
// 1 - contentHTMLencoded
// 2 - spoilerText
// 3 - mediaAttachments descriptions
// 4 - poll title options
const medias = mediaAttachments.map((value) => value.description);
const polls = poll?.options.map((value) => value.title) ?? [];
const text = [contentHTMLencoded, spoilerText].concat(medias, polls);
// --------------------- END explanation
const body: any = {
text,
text: texts,
target_lang: targetLanguage.toUpperCase(),
tag_handling: 'html',
split_sentences: '1',
};
if (sourceLanguage) {
body.source_lang = sourceLanguage.toUpperCase();
if (source) {
body.source_lang = source.toUpperCase();
}
const headers = new Headers();
@ -76,55 +68,9 @@ export class DeepLTranslator implements DittoTranslator {
const response = await this.fetch(request);
const json = await response.json();
const data = DeepLTranslator.schema().parse(json).translations;
const data = DeepLTranslator.schema().parse(json);
const mastodonTranslation: MastodonTranslation = {
content: '',
spoiler_text: '',
media_attachments: [],
poll: null,
detected_source_language: 'en',
provider: this.provider,
};
/** Used to keep track of the offset. When slicing, should be used as the start value. */
let startIndex = 0;
mastodonTranslation.content = data[0].text;
startIndex++;
mastodonTranslation.spoiler_text = data[1].text;
startIndex++;
if (medias.length) {
const mediasTranslated = data.slice(startIndex, startIndex + medias.length);
for (let i = 0; i < mediasTranslated.length; i++) {
mastodonTranslation.media_attachments.push({
id: mediaAttachments[i].id,
description: mediasTranslated[i].text,
});
}
startIndex += mediasTranslated.length;
}
if (polls.length && poll) {
const pollsTranslated = data.slice(startIndex);
mastodonTranslation.poll = {
id: poll.id,
options: [],
};
for (let i = 0; i < pollsTranslated.length; i++) {
mastodonTranslation.poll.options.push({
title: pollsTranslated[i].text,
});
}
startIndex += pollsTranslated.length;
}
mastodonTranslation.detected_source_language = data[0].detected_source_language;
return {
data: mastodonTranslation,
};
return data;
}
/** DeepL response schema.
@ -139,4 +85,9 @@ export class DeepLTranslator implements DittoTranslator {
),
});
}
/** DeepL provider. */
getProvider(): Provider {
return DeepLTranslator.provider;
}
}

View file

@ -9,131 +9,44 @@ const apiKey = Conf.libreTranslateApiKey;
const translationProvider = Conf.translationProvider;
const libreTranslate = 'libretranslate';
Deno.test('Translate status with EMPTY media_attachments and WITHOUT poll', {
Deno.test('LibreTranslate translation with source language omitted', {
ignore: !(translationProvider === libreTranslate && apiKey),
}, async () => {
const translator = new LibreTranslateTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const mastodonTranslation = await translator.translate(
'Bom dia amigos do Element, meu nome é Patrick',
'',
[],
null,
'pt',
'en',
);
assertEquals(getLanguage(mastodonTranslation.data.content), 'en');
assertEquals(mastodonTranslation.data.spoiler_text, '');
assertEquals(mastodonTranslation.data.media_attachments, []);
assertEquals(mastodonTranslation.data.poll, null);
assertEquals(mastodonTranslation.data.provider, 'libretranslate.com');
});
Deno.test('Translate status WITH auto detect and with EMPTY media_attachments and WITHOUT poll', {
ignore: !(translationProvider === libreTranslate && apiKey),
}, async () => {
const translator = new LibreTranslateTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const mastodonTranslation = await translator.translate(
'Bom dia amigos do Element, meu nome é Patrick',
'',
[],
null,
const data = await translator.translate(
[
'Bom dia amigos',
'Meu nome é Patrick, um nome belo ou feio? A questão é mais profunda do que parece.',
'A respiração é mais importante do que comer e tomar agua.',
],
undefined,
'en',
'ca',
);
assertEquals(getLanguage(mastodonTranslation.data.content), 'en');
assertEquals(mastodonTranslation.data.spoiler_text, '');
assertEquals(mastodonTranslation.data.media_attachments, []);
assertEquals(mastodonTranslation.data.poll, null);
assertEquals(mastodonTranslation.data.provider, 'libretranslate.com');
assertEquals(data.source_lang, 'pt');
assertEquals(getLanguage(data.results[0]), 'ca');
assertEquals(getLanguage(data.results[1]), 'ca');
assertEquals(getLanguage(data.results[2]), 'ca');
});
Deno.test('Translate status WITH media_attachments and WITHOUT poll', {
Deno.test('LibreTranslate translation with source language set', {
ignore: !(translationProvider === libreTranslate && apiKey),
}, async () => {
const translator = new LibreTranslateTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const mastodonTranslation = await translator.translate(
'Hello my friends, my name is Alex and I am american.',
"That is spoiler isn't it",
[{ id: 'game', description: 'I should be playing Miles Edgeworth with my wife' }],
null,
'en',
'pt',
);
assertEquals(getLanguage(mastodonTranslation.data.content), 'pt');
assertEquals(getLanguage(mastodonTranslation.data.spoiler_text), 'pt');
assertEquals(mastodonTranslation.data.media_attachments.map((value) => getLanguage(value.description)), ['pt']);
assertEquals(mastodonTranslation.data.poll, null);
assertEquals(mastodonTranslation.data.provider, 'libretranslate.com');
});
Deno.test('Translate status WITHOUT media_attachments and WITH poll', {
ignore: !(translationProvider === libreTranslate && apiKey),
}, async () => {
const translator = new LibreTranslateTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const poll = {
'id': '34858',
'options': [
{
'title': 'Kill him right now',
},
{
'title': 'Save him right now',
},
const data = await translator.translate(
[
'Bom dia amigos',
'Meu nome é Patrick, um nome belo ou feio? A questão é mais profunda do que parece.',
'A respiração é mais importante do que comer e tomar agua.',
],
};
const mastodonTranslation = await translator.translate(
'Hello my friends, my name is Alex and I am american.',
'',
[],
poll,
'en',
'pt',
'ca',
);
assertEquals(getLanguage(mastodonTranslation.data.content), 'pt');
assertEquals(mastodonTranslation.data.spoiler_text, '');
assertEquals(mastodonTranslation.data.media_attachments, []);
assertEquals(mastodonTranslation.data.poll?.options.map((value) => getLanguage(value.title)), ['pt', 'pt']);
assertEquals(mastodonTranslation.data.provider, 'libretranslate.com');
});
Deno.test('Translate status WITH media_attachments and WITH poll', {
ignore: !(translationProvider === libreTranslate && apiKey),
}, async () => {
const translator = new LibreTranslateTranslator({ fetch: fetch, endpoint, apiKey: apiKey as string });
const poll = {
'id': '34858',
'options': [
{
'title': 'Kill him right now',
},
{
'title': 'Save him right now',
},
],
};
const mastodonTranslation = await translator.translate(
'Hello my friends, my name is Alex and I am american.',
'',
[{ id: 'game', description: 'I should be playing Miles Edgeworth with my wife' }],
poll,
'en',
'pt',
);
assertEquals(getLanguage(mastodonTranslation.data.content), 'pt');
assertEquals(mastodonTranslation.data.spoiler_text, '');
assertEquals(mastodonTranslation.data.media_attachments.map((value) => getLanguage(value.description)), ['pt']);
assertEquals(mastodonTranslation.data.poll?.options.map((value) => getLanguage(value.title)), ['pt', 'pt']);
assertEquals(mastodonTranslation.data.provider, 'libretranslate.com');
assertEquals(data.source_lang, 'pt');
assertEquals(getLanguage(data.results[0]), 'ca');
assertEquals(getLanguage(data.results[1]), 'ca');
assertEquals(getLanguage(data.results[2]), 'ca');
});

View file

@ -1,12 +1,8 @@
import { LanguageCode } from 'iso-639-1';
import { z } from 'zod';
import {
DittoTranslator,
MastodonTranslation,
Provider,
SourceLanguage,
TargetLanguage,
} from '@/translators/translator.ts';
import { DittoTranslator, Provider, SourceLanguage, TargetLanguage } from '@/translators/translator.ts';
import { languageSchema } from '@/schema.ts';
interface LibreTranslateTranslatorOpts {
/** Libretranslate endpoint to use. Default: 'https://libretranslate.com' */
@ -21,97 +17,37 @@ export class LibreTranslateTranslator implements DittoTranslator {
private readonly endpoint: string;
private readonly apiKey: string;
private readonly fetch: typeof fetch;
private readonly provider: Provider;
private static provider: Provider = 'libretranslate.com';
constructor(opts: LibreTranslateTranslatorOpts) {
this.endpoint = opts.endpoint ?? 'https://libretranslate.com';
this.fetch = opts.fetch ?? globalThis.fetch;
this.provider = 'libretranslate.com';
this.apiKey = opts.apiKey;
}
async translate(
contentHTMLencoded: string,
spoilerText: string,
mediaAttachments: { id: string; description: string }[],
poll: { id: string; options: { title: string }[] } | null,
sourceLanguage: SourceLanguage | undefined,
targetLanguage: TargetLanguage,
texts: string[],
source: SourceLanguage | undefined,
dest: TargetLanguage,
opts?: { signal?: AbortSignal },
) {
const mastodonTranslation: MastodonTranslation = {
content: '',
spoiler_text: '',
media_attachments: [],
poll: null,
detected_source_language: 'en',
provider: this.provider,
};
const translatedContent = await this.makeRequest(contentHTMLencoded, sourceLanguage, targetLanguage, 'html', {
signal: opts?.signal,
});
mastodonTranslation.content = translatedContent;
if (spoilerText.length) {
const translatedSpoilerText = await this.makeRequest(spoilerText, sourceLanguage, targetLanguage, 'text', {
signal: opts?.signal,
});
mastodonTranslation.spoiler_text = translatedSpoilerText;
}
if (mediaAttachments) {
for (const media of mediaAttachments) {
const translatedDescription = await this.makeRequest(
media.description,
sourceLanguage,
targetLanguage,
'text',
{
signal: opts?.signal,
},
);
mastodonTranslation.media_attachments.push({
id: media.id,
description: translatedDescription,
});
}
}
if (poll) {
mastodonTranslation.poll = {
id: poll.id,
options: [],
};
for (const option of poll.options) {
const translatedTitle = await this.makeRequest(
option.title,
sourceLanguage,
targetLanguage,
'text',
{
signal: opts?.signal,
},
);
mastodonTranslation.poll.options.push({
title: translatedTitle,
});
}
}
const translations = await Promise.all(
texts.map((text) => this.translateOne(text, source, dest, 'html', { signal: opts?.signal })),
);
return {
data: mastodonTranslation,
results: translations.map((value) => value.translatedText),
source_lang: translations[0]?.detectedLanguage?.language ?? source as LanguageCode, // cast is ok
};
}
private async makeRequest(
private async translateOne(
q: string,
sourceLanguage: string | undefined,
targetLanguage: string,
format: 'html' | 'text',
opts?: { signal?: AbortSignal },
): Promise<string> {
) {
const body = {
q,
source: sourceLanguage?.toLowerCase() ?? 'auto',
@ -132,7 +68,7 @@ export class LibreTranslateTranslator implements DittoTranslator {
const response = await this.fetch(request);
const json = await response.json();
const data = LibreTranslateTranslator.schema().parse(json).translatedText;
const data = LibreTranslateTranslator.schema().parse(json);
return data;
}
@ -142,6 +78,15 @@ export class LibreTranslateTranslator implements DittoTranslator {
private static schema() {
return z.object({
translatedText: z.string(),
/** This field is only available if the 'source' is set to 'auto' */
detectedLanguage: z.object({
language: languageSchema,
}).optional(),
});
}
/** LibreTranslate provider. */
getProvider(): Provider {
return LibreTranslateTranslator.provider;
}
}

View file

@ -36,21 +36,15 @@ export type MastodonTranslation = {
export interface DittoTranslator {
/** Translate the 'content' into 'targetLanguage'. */
translate(
/** HTML-encoded content of the status. */
content: string,
/** Spoiler warning of the status. */
spoilerText: string,
/** Media descriptions of the status. */
mediaAttachments: { id: string; description: string }[],
/** Poll of the status. */
poll: { id: string; options: { title: string }[] } | null,
texts: string[],
/** The language of the source text/status. */
sourceLanguage: SourceLanguage | undefined,
/** The status content will be translated into this language. */
targetLanguage: TargetLanguage,
/** Custom options. */
opts?: { signal?: AbortSignal },
): Promise<DittoTranslation>;
): Promise<{ results: string[]; source_lang: SourceLanguage }>;
getProvider(): Provider;
}
/** Includes the TARGET language and the status id.