mirror of
https://gitlab.com/soapbox-pub/ditto.git
synced 2025-12-06 11:29:46 +00:00
Merge branch 'main' into cashu
This commit is contained in:
commit
2012ef5192
7 changed files with 79 additions and 10 deletions
|
|
@ -22,6 +22,7 @@
|
|||
"trends": "deno run -A --env-file --deny-read=.env scripts/trends.ts",
|
||||
"clean:deps": "deno cache --reload src/app.ts",
|
||||
"db:populate-search": "deno run -A --env-file --deny-read=.env scripts/db-populate-search.ts",
|
||||
"db:populate-extensions": "deno run -A --env-file --deny-read=.env scripts/db-populate-extensions.ts",
|
||||
"vapid": "deno run scripts/vapid.ts"
|
||||
},
|
||||
"unstable": [
|
||||
|
|
|
|||
8
deno.lock
generated
8
deno.lock
generated
|
|
@ -26,6 +26,7 @@
|
|||
"jsr:@gleasonator/policy@0.9.1": "0.9.1",
|
||||
"jsr:@gleasonator/policy@0.9.2": "0.9.2",
|
||||
"jsr:@gleasonator/policy@0.9.3": "0.9.3",
|
||||
"jsr:@gleasonator/policy@0.9.4": "0.9.4",
|
||||
"jsr:@hono/hono@^4.4.6": "4.6.15",
|
||||
"jsr:@lambdalisue/async@^2.1.1": "2.1.1",
|
||||
"jsr:@negrel/http-ece@0.6.0": "0.6.0",
|
||||
|
|
@ -298,6 +299,13 @@
|
|||
"jsr:@nostrify/policies@~0.36.1"
|
||||
]
|
||||
},
|
||||
"@gleasonator/policy@0.9.4": {
|
||||
"integrity": "5d5b8a585b8e3cd6e6b7daed2cfa61cd1a3e5945691f092eb98f8671384c3657",
|
||||
"dependencies": [
|
||||
"jsr:@nostrify/nostrify@0.36",
|
||||
"jsr:@nostrify/policies@~0.36.1"
|
||||
]
|
||||
},
|
||||
"@hono/hono@4.4.6": {
|
||||
"integrity": "aa557ca9930787ee86b9ca1730691f1ce1c379174c2cb244d5934db2b6314453"
|
||||
},
|
||||
|
|
|
|||
26
scripts/db-populate-extensions.ts
Normal file
26
scripts/db-populate-extensions.ts
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
import { Storages } from '@/storages.ts';
|
||||
import { EventsDB } from '@/storages/EventsDB.ts';
|
||||
|
||||
const store = await Storages.db();
|
||||
const kysely = await Storages.kysely();
|
||||
|
||||
for await (const msg of store.req([{}])) {
|
||||
if (msg[0] === 'EVENT') {
|
||||
const event = msg[2];
|
||||
|
||||
const ext = EventsDB.indexExtensions(event);
|
||||
|
||||
try {
|
||||
await kysely.updateTable('nostr_events')
|
||||
.set('search_ext', ext)
|
||||
.where('id', '=', event.id)
|
||||
.execute();
|
||||
} catch {
|
||||
// do nothing
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Deno.exit();
|
||||
|
|
@ -59,7 +59,8 @@ class EventsDB extends NPostgres {
|
|||
'proxy': ({ count, value }) => count === 0 && value.length < 256,
|
||||
'q': ({ event, count, value }) => count === 0 && event.kind === 1 && isNostrId(value),
|
||||
'r': ({ event, count }) => (event.kind === 1985 ? count < 20 : count < 3),
|
||||
't': ({ event, count, value }) => (event.kind === 1985 ? count < 20 : count < 5) && value.length < 50,
|
||||
't': ({ event, count, value }) =>
|
||||
(value === value.toLowerCase()) && (event.kind === 1985 ? count < 20 : count < 5) && value.length < 50,
|
||||
};
|
||||
|
||||
static indexExtensions(event: NostrEvent): Record<string, string> {
|
||||
|
|
|
|||
|
|
@ -26,3 +26,18 @@ Deno.test('Detect English language', () => {
|
|||
'en',
|
||||
);
|
||||
});
|
||||
|
||||
Deno.test('Detects definitive texts', () => {
|
||||
// NOTE: pass `1` as min confidence to test only the definitive patterns
|
||||
|
||||
// unambiguous
|
||||
assertEquals(detectLanguage('안녕하세요.', 1), 'ko');
|
||||
assertEquals(detectLanguage('Γειά σου!', 1), 'el');
|
||||
assertEquals(detectLanguage('שלום!', 1), 'he');
|
||||
assertEquals(detectLanguage('こんにちは。', 1), 'ja');
|
||||
|
||||
// ambiguous
|
||||
assertEquals(detectLanguage('你好', 1), undefined);
|
||||
assertEquals(detectLanguage('Привет', 1), undefined);
|
||||
assertEquals(detectLanguage('Hello', 1), undefined);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -4,8 +4,9 @@ import linkify from 'linkifyjs';
|
|||
|
||||
linkify.registerCustomProtocol('nostr', true);
|
||||
|
||||
/** Returns the detected language if the confidence is greater or equal than 'minConfidence'
|
||||
* 'minConfidence' must be a number between 0 and 1, such as 0.95
|
||||
/**
|
||||
* Returns the detected language if the confidence is greater or equal than 'minConfidence'.
|
||||
* 'minConfidence' must be a number between 0 and 1, such as 0.95.
|
||||
*/
|
||||
export function detectLanguage(text: string, minConfidence: number): LanguageCode | undefined {
|
||||
// It's better to remove the emojis first
|
||||
|
|
@ -15,13 +16,31 @@ export function detectLanguage(text: string, minConfidence: number): LanguageCod
|
|||
.replaceAll(/[\s\uFEFF\u00A0\u200B-\u200D\u{0FE0E}]+/gu, ' '),
|
||||
).reduce((acc, { t, v }) => t === 'text' ? acc + v : acc, '').trim();
|
||||
|
||||
// Definite patterns for some languages.
|
||||
// Text which matches MUST unambiguously be in the given language.
|
||||
// This is only possible for some languages.
|
||||
// All patterns match the full text, so mixed scripts would fail these tests.
|
||||
const languagePatterns: Partial<Record<LanguageCode, RegExp>> = {
|
||||
ko: /^[\p{Script=Hangul}\s]+$/u, // Korean (Hangul only)
|
||||
el: /^[\p{Script=Greek}\s]+$/u, // Greek
|
||||
he: /^[\p{Script=Hebrew}\s]+$/u, // Hebrew
|
||||
ja: /^(?=.*[\p{Script=Hiragana}\p{Script=Katakana}])[\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Han}\s]+$/u, // Japanese (requires at least one Kana)
|
||||
// zh: not possible to detect unambiguously
|
||||
};
|
||||
|
||||
// If any pattern matches, the language is known.
|
||||
for (const [lang, pattern] of Object.entries(languagePatterns) as [LanguageCode, RegExp][]) {
|
||||
if (pattern.test(text.replace(/[\p{P}\p{S}]/gu, ''))) { // strip punctuation and symbols before checking
|
||||
return lang;
|
||||
}
|
||||
}
|
||||
|
||||
if (sanitizedText.length < 10) { // heuristics
|
||||
return;
|
||||
}
|
||||
|
||||
const [topResult] = lande(
|
||||
sanitizedText,
|
||||
);
|
||||
const [topResult] = lande(sanitizedText);
|
||||
|
||||
if (topResult) {
|
||||
const [iso6393, confidence] = topResult;
|
||||
const locale = new Intl.Locale(iso6393);
|
||||
|
|
@ -30,5 +49,4 @@ export function detectLanguage(text: string, minConfidence: number): LanguageCod
|
|||
return locale.language as LanguageCode;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import { MastodonMention } from '@/entities/MastodonMention.ts';
|
|||
import { MastodonStatus } from '@/entities/MastodonStatus.ts';
|
||||
import { type DittoEvent } from '@/interfaces/DittoEvent.ts';
|
||||
import { Storages } from '@/storages.ts';
|
||||
import { nostrDate } from '@/utils.ts';
|
||||
import { isNostrId, nostrDate } from '@/utils.ts';
|
||||
import { getMediaLinks, parseNoteContent, stripimeta } from '@/utils/note.ts';
|
||||
import { findReplyTag } from '@/utils/tags.ts';
|
||||
import { unfurlCardCached } from '@/utils/unfurl.ts';
|
||||
|
|
@ -41,8 +41,8 @@ async function renderStatus(event: DittoEvent, opts: RenderStatusOpts): Promise<
|
|||
const mentionedPubkeys = [
|
||||
...new Set(
|
||||
event.tags
|
||||
.filter((tag) => tag[0] === 'p')
|
||||
.map((tag) => tag[1]),
|
||||
.filter(([name, value]) => name === 'p' && isNostrId(value))
|
||||
.map(([, value]) => value),
|
||||
),
|
||||
];
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue