mirror of
https://gitlab.com/soapbox-pub/ditto.git
synced 2025-12-06 11:29:46 +00:00
Merge branch 'main' into cashu
This commit is contained in:
commit
2012ef5192
7 changed files with 79 additions and 10 deletions
|
|
@ -22,6 +22,7 @@
|
||||||
"trends": "deno run -A --env-file --deny-read=.env scripts/trends.ts",
|
"trends": "deno run -A --env-file --deny-read=.env scripts/trends.ts",
|
||||||
"clean:deps": "deno cache --reload src/app.ts",
|
"clean:deps": "deno cache --reload src/app.ts",
|
||||||
"db:populate-search": "deno run -A --env-file --deny-read=.env scripts/db-populate-search.ts",
|
"db:populate-search": "deno run -A --env-file --deny-read=.env scripts/db-populate-search.ts",
|
||||||
|
"db:populate-extensions": "deno run -A --env-file --deny-read=.env scripts/db-populate-extensions.ts",
|
||||||
"vapid": "deno run scripts/vapid.ts"
|
"vapid": "deno run scripts/vapid.ts"
|
||||||
},
|
},
|
||||||
"unstable": [
|
"unstable": [
|
||||||
|
|
|
||||||
8
deno.lock
generated
8
deno.lock
generated
|
|
@ -26,6 +26,7 @@
|
||||||
"jsr:@gleasonator/policy@0.9.1": "0.9.1",
|
"jsr:@gleasonator/policy@0.9.1": "0.9.1",
|
||||||
"jsr:@gleasonator/policy@0.9.2": "0.9.2",
|
"jsr:@gleasonator/policy@0.9.2": "0.9.2",
|
||||||
"jsr:@gleasonator/policy@0.9.3": "0.9.3",
|
"jsr:@gleasonator/policy@0.9.3": "0.9.3",
|
||||||
|
"jsr:@gleasonator/policy@0.9.4": "0.9.4",
|
||||||
"jsr:@hono/hono@^4.4.6": "4.6.15",
|
"jsr:@hono/hono@^4.4.6": "4.6.15",
|
||||||
"jsr:@lambdalisue/async@^2.1.1": "2.1.1",
|
"jsr:@lambdalisue/async@^2.1.1": "2.1.1",
|
||||||
"jsr:@negrel/http-ece@0.6.0": "0.6.0",
|
"jsr:@negrel/http-ece@0.6.0": "0.6.0",
|
||||||
|
|
@ -298,6 +299,13 @@
|
||||||
"jsr:@nostrify/policies@~0.36.1"
|
"jsr:@nostrify/policies@~0.36.1"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"@gleasonator/policy@0.9.4": {
|
||||||
|
"integrity": "5d5b8a585b8e3cd6e6b7daed2cfa61cd1a3e5945691f092eb98f8671384c3657",
|
||||||
|
"dependencies": [
|
||||||
|
"jsr:@nostrify/nostrify@0.36",
|
||||||
|
"jsr:@nostrify/policies@~0.36.1"
|
||||||
|
]
|
||||||
|
},
|
||||||
"@hono/hono@4.4.6": {
|
"@hono/hono@4.4.6": {
|
||||||
"integrity": "aa557ca9930787ee86b9ca1730691f1ce1c379174c2cb244d5934db2b6314453"
|
"integrity": "aa557ca9930787ee86b9ca1730691f1ce1c379174c2cb244d5934db2b6314453"
|
||||||
},
|
},
|
||||||
|
|
|
||||||
26
scripts/db-populate-extensions.ts
Normal file
26
scripts/db-populate-extensions.ts
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
import { Storages } from '@/storages.ts';
|
||||||
|
import { EventsDB } from '@/storages/EventsDB.ts';
|
||||||
|
|
||||||
|
const store = await Storages.db();
|
||||||
|
const kysely = await Storages.kysely();
|
||||||
|
|
||||||
|
for await (const msg of store.req([{}])) {
|
||||||
|
if (msg[0] === 'EVENT') {
|
||||||
|
const event = msg[2];
|
||||||
|
|
||||||
|
const ext = EventsDB.indexExtensions(event);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await kysely.updateTable('nostr_events')
|
||||||
|
.set('search_ext', ext)
|
||||||
|
.where('id', '=', event.id)
|
||||||
|
.execute();
|
||||||
|
} catch {
|
||||||
|
// do nothing
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Deno.exit();
|
||||||
|
|
@ -59,7 +59,8 @@ class EventsDB extends NPostgres {
|
||||||
'proxy': ({ count, value }) => count === 0 && value.length < 256,
|
'proxy': ({ count, value }) => count === 0 && value.length < 256,
|
||||||
'q': ({ event, count, value }) => count === 0 && event.kind === 1 && isNostrId(value),
|
'q': ({ event, count, value }) => count === 0 && event.kind === 1 && isNostrId(value),
|
||||||
'r': ({ event, count }) => (event.kind === 1985 ? count < 20 : count < 3),
|
'r': ({ event, count }) => (event.kind === 1985 ? count < 20 : count < 3),
|
||||||
't': ({ event, count, value }) => (event.kind === 1985 ? count < 20 : count < 5) && value.length < 50,
|
't': ({ event, count, value }) =>
|
||||||
|
(value === value.toLowerCase()) && (event.kind === 1985 ? count < 20 : count < 5) && value.length < 50,
|
||||||
};
|
};
|
||||||
|
|
||||||
static indexExtensions(event: NostrEvent): Record<string, string> {
|
static indexExtensions(event: NostrEvent): Record<string, string> {
|
||||||
|
|
|
||||||
|
|
@ -26,3 +26,18 @@ Deno.test('Detect English language', () => {
|
||||||
'en',
|
'en',
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
Deno.test('Detects definitive texts', () => {
|
||||||
|
// NOTE: pass `1` as min confidence to test only the definitive patterns
|
||||||
|
|
||||||
|
// unambiguous
|
||||||
|
assertEquals(detectLanguage('안녕하세요.', 1), 'ko');
|
||||||
|
assertEquals(detectLanguage('Γειά σου!', 1), 'el');
|
||||||
|
assertEquals(detectLanguage('שלום!', 1), 'he');
|
||||||
|
assertEquals(detectLanguage('こんにちは。', 1), 'ja');
|
||||||
|
|
||||||
|
// ambiguous
|
||||||
|
assertEquals(detectLanguage('你好', 1), undefined);
|
||||||
|
assertEquals(detectLanguage('Привет', 1), undefined);
|
||||||
|
assertEquals(detectLanguage('Hello', 1), undefined);
|
||||||
|
});
|
||||||
|
|
|
||||||
|
|
@ -4,8 +4,9 @@ import linkify from 'linkifyjs';
|
||||||
|
|
||||||
linkify.registerCustomProtocol('nostr', true);
|
linkify.registerCustomProtocol('nostr', true);
|
||||||
|
|
||||||
/** Returns the detected language if the confidence is greater or equal than 'minConfidence'
|
/**
|
||||||
* 'minConfidence' must be a number between 0 and 1, such as 0.95
|
* Returns the detected language if the confidence is greater or equal than 'minConfidence'.
|
||||||
|
* 'minConfidence' must be a number between 0 and 1, such as 0.95.
|
||||||
*/
|
*/
|
||||||
export function detectLanguage(text: string, minConfidence: number): LanguageCode | undefined {
|
export function detectLanguage(text: string, minConfidence: number): LanguageCode | undefined {
|
||||||
// It's better to remove the emojis first
|
// It's better to remove the emojis first
|
||||||
|
|
@ -15,13 +16,31 @@ export function detectLanguage(text: string, minConfidence: number): LanguageCod
|
||||||
.replaceAll(/[\s\uFEFF\u00A0\u200B-\u200D\u{0FE0E}]+/gu, ' '),
|
.replaceAll(/[\s\uFEFF\u00A0\u200B-\u200D\u{0FE0E}]+/gu, ' '),
|
||||||
).reduce((acc, { t, v }) => t === 'text' ? acc + v : acc, '').trim();
|
).reduce((acc, { t, v }) => t === 'text' ? acc + v : acc, '').trim();
|
||||||
|
|
||||||
|
// Definite patterns for some languages.
|
||||||
|
// Text which matches MUST unambiguously be in the given language.
|
||||||
|
// This is only possible for some languages.
|
||||||
|
// All patterns match the full text, so mixed scripts would fail these tests.
|
||||||
|
const languagePatterns: Partial<Record<LanguageCode, RegExp>> = {
|
||||||
|
ko: /^[\p{Script=Hangul}\s]+$/u, // Korean (Hangul only)
|
||||||
|
el: /^[\p{Script=Greek}\s]+$/u, // Greek
|
||||||
|
he: /^[\p{Script=Hebrew}\s]+$/u, // Hebrew
|
||||||
|
ja: /^(?=.*[\p{Script=Hiragana}\p{Script=Katakana}])[\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Han}\s]+$/u, // Japanese (requires at least one Kana)
|
||||||
|
// zh: not possible to detect unambiguously
|
||||||
|
};
|
||||||
|
|
||||||
|
// If any pattern matches, the language is known.
|
||||||
|
for (const [lang, pattern] of Object.entries(languagePatterns) as [LanguageCode, RegExp][]) {
|
||||||
|
if (pattern.test(text.replace(/[\p{P}\p{S}]/gu, ''))) { // strip punctuation and symbols before checking
|
||||||
|
return lang;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (sanitizedText.length < 10) { // heuristics
|
if (sanitizedText.length < 10) { // heuristics
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const [topResult] = lande(
|
const [topResult] = lande(sanitizedText);
|
||||||
sanitizedText,
|
|
||||||
);
|
|
||||||
if (topResult) {
|
if (topResult) {
|
||||||
const [iso6393, confidence] = topResult;
|
const [iso6393, confidence] = topResult;
|
||||||
const locale = new Intl.Locale(iso6393);
|
const locale = new Intl.Locale(iso6393);
|
||||||
|
|
@ -30,5 +49,4 @@ export function detectLanguage(text: string, minConfidence: number): LanguageCod
|
||||||
return locale.language as LanguageCode;
|
return locale.language as LanguageCode;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ import { MastodonMention } from '@/entities/MastodonMention.ts';
|
||||||
import { MastodonStatus } from '@/entities/MastodonStatus.ts';
|
import { MastodonStatus } from '@/entities/MastodonStatus.ts';
|
||||||
import { type DittoEvent } from '@/interfaces/DittoEvent.ts';
|
import { type DittoEvent } from '@/interfaces/DittoEvent.ts';
|
||||||
import { Storages } from '@/storages.ts';
|
import { Storages } from '@/storages.ts';
|
||||||
import { nostrDate } from '@/utils.ts';
|
import { isNostrId, nostrDate } from '@/utils.ts';
|
||||||
import { getMediaLinks, parseNoteContent, stripimeta } from '@/utils/note.ts';
|
import { getMediaLinks, parseNoteContent, stripimeta } from '@/utils/note.ts';
|
||||||
import { findReplyTag } from '@/utils/tags.ts';
|
import { findReplyTag } from '@/utils/tags.ts';
|
||||||
import { unfurlCardCached } from '@/utils/unfurl.ts';
|
import { unfurlCardCached } from '@/utils/unfurl.ts';
|
||||||
|
|
@ -41,8 +41,8 @@ async function renderStatus(event: DittoEvent, opts: RenderStatusOpts): Promise<
|
||||||
const mentionedPubkeys = [
|
const mentionedPubkeys = [
|
||||||
...new Set(
|
...new Set(
|
||||||
event.tags
|
event.tags
|
||||||
.filter((tag) => tag[0] === 'p')
|
.filter(([name, value]) => name === 'p' && isNostrId(value))
|
||||||
.map((tag) => tag[1]),
|
.map(([, value]) => value),
|
||||||
),
|
),
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue