mirror of
https://gitlab.com/soapbox-pub/ditto.git
synced 2025-12-06 11:29:46 +00:00
feat: improve setLanguage() function, remove links and emojis from event.content before using lande
This commit is contained in:
parent
522a283af1
commit
1cb13b141a
1 changed files with 11 additions and 1 deletions
|
|
@ -3,6 +3,7 @@ import { Stickynotes } from '@soapbox/stickynotes';
|
||||||
import ISO6391 from 'iso-639-1';
|
import ISO6391 from 'iso-639-1';
|
||||||
import { Kysely, sql } from 'kysely';
|
import { Kysely, sql } from 'kysely';
|
||||||
import lande from 'lande';
|
import lande from 'lande';
|
||||||
|
import linkify from 'linkifyjs';
|
||||||
import { LRUCache } from 'lru-cache';
|
import { LRUCache } from 'lru-cache';
|
||||||
import { z } from 'zod';
|
import { z } from 'zod';
|
||||||
|
|
||||||
|
|
@ -200,7 +201,16 @@ async function parseMetadata(event: NostrEvent, signal: AbortSignal): Promise<vo
|
||||||
|
|
||||||
/** Update the event in the database and set its language. */
|
/** Update the event in the database and set its language. */
|
||||||
async function setLanguage(event: NostrEvent): Promise<void> {
|
async function setLanguage(event: NostrEvent): Promise<void> {
|
||||||
const [topResult] = lande(event.content);
|
const contentWithoutEmoji = event.content.replace(
|
||||||
|
/[\p{Emoji}\p{Emoji_Modifier}\p{Emoji_Component}\p{Emoji_Modifier_Base}\p{Emoji_Presentation}]/gu,
|
||||||
|
'',
|
||||||
|
);
|
||||||
|
const contentWithoutLinks = linkify.tokenize(contentWithoutEmoji).reduce((accumulator, current) => {
|
||||||
|
if (current.t === 'text') return accumulator + current.v;
|
||||||
|
return accumulator;
|
||||||
|
}, '');
|
||||||
|
const parsedContent = contentWithoutLinks;
|
||||||
|
const [topResult] = lande(parsedContent);
|
||||||
|
|
||||||
if (topResult) {
|
if (topResult) {
|
||||||
const [iso6393, confidence] = topResult;
|
const [iso6393, confidence] = topResult;
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue