Merge branch 'main' into cashu

This commit is contained in:
P. Reis 2025-02-04 21:35:41 -03:00
commit 2012ef5192
7 changed files with 79 additions and 10 deletions

View file

@ -22,6 +22,7 @@
"trends": "deno run -A --env-file --deny-read=.env scripts/trends.ts", "trends": "deno run -A --env-file --deny-read=.env scripts/trends.ts",
"clean:deps": "deno cache --reload src/app.ts", "clean:deps": "deno cache --reload src/app.ts",
"db:populate-search": "deno run -A --env-file --deny-read=.env scripts/db-populate-search.ts", "db:populate-search": "deno run -A --env-file --deny-read=.env scripts/db-populate-search.ts",
"db:populate-extensions": "deno run -A --env-file --deny-read=.env scripts/db-populate-extensions.ts",
"vapid": "deno run scripts/vapid.ts" "vapid": "deno run scripts/vapid.ts"
}, },
"unstable": [ "unstable": [

8
deno.lock generated
View file

@ -26,6 +26,7 @@
"jsr:@gleasonator/policy@0.9.1": "0.9.1", "jsr:@gleasonator/policy@0.9.1": "0.9.1",
"jsr:@gleasonator/policy@0.9.2": "0.9.2", "jsr:@gleasonator/policy@0.9.2": "0.9.2",
"jsr:@gleasonator/policy@0.9.3": "0.9.3", "jsr:@gleasonator/policy@0.9.3": "0.9.3",
"jsr:@gleasonator/policy@0.9.4": "0.9.4",
"jsr:@hono/hono@^4.4.6": "4.6.15", "jsr:@hono/hono@^4.4.6": "4.6.15",
"jsr:@lambdalisue/async@^2.1.1": "2.1.1", "jsr:@lambdalisue/async@^2.1.1": "2.1.1",
"jsr:@negrel/http-ece@0.6.0": "0.6.0", "jsr:@negrel/http-ece@0.6.0": "0.6.0",
@ -298,6 +299,13 @@
"jsr:@nostrify/policies@~0.36.1" "jsr:@nostrify/policies@~0.36.1"
] ]
}, },
"@gleasonator/policy@0.9.4": {
"integrity": "5d5b8a585b8e3cd6e6b7daed2cfa61cd1a3e5945691f092eb98f8671384c3657",
"dependencies": [
"jsr:@nostrify/nostrify@0.36",
"jsr:@nostrify/policies@~0.36.1"
]
},
"@hono/hono@4.4.6": { "@hono/hono@4.4.6": {
"integrity": "aa557ca9930787ee86b9ca1730691f1ce1c379174c2cb244d5934db2b6314453" "integrity": "aa557ca9930787ee86b9ca1730691f1ce1c379174c2cb244d5934db2b6314453"
}, },

View file

@ -0,0 +1,26 @@
import { Storages } from '@/storages.ts';
import { EventsDB } from '@/storages/EventsDB.ts';
const store = await Storages.db();
const kysely = await Storages.kysely();
for await (const msg of store.req([{}])) {
if (msg[0] === 'EVENT') {
const event = msg[2];
const ext = EventsDB.indexExtensions(event);
try {
await kysely.updateTable('nostr_events')
.set('search_ext', ext)
.where('id', '=', event.id)
.execute();
} catch {
// do nothing
}
} else {
break;
}
}
Deno.exit();

View file

@ -59,7 +59,8 @@ class EventsDB extends NPostgres {
'proxy': ({ count, value }) => count === 0 && value.length < 256, 'proxy': ({ count, value }) => count === 0 && value.length < 256,
'q': ({ event, count, value }) => count === 0 && event.kind === 1 && isNostrId(value), 'q': ({ event, count, value }) => count === 0 && event.kind === 1 && isNostrId(value),
'r': ({ event, count }) => (event.kind === 1985 ? count < 20 : count < 3), 'r': ({ event, count }) => (event.kind === 1985 ? count < 20 : count < 3),
't': ({ event, count, value }) => (event.kind === 1985 ? count < 20 : count < 5) && value.length < 50, 't': ({ event, count, value }) =>
(value === value.toLowerCase()) && (event.kind === 1985 ? count < 20 : count < 5) && value.length < 50,
}; };
static indexExtensions(event: NostrEvent): Record<string, string> { static indexExtensions(event: NostrEvent): Record<string, string> {

View file

@ -26,3 +26,18 @@ Deno.test('Detect English language', () => {
'en', 'en',
); );
}); });
Deno.test('Detects definitive texts', () => {
// NOTE: pass `1` as min confidence to test only the definitive patterns
// unambiguous
assertEquals(detectLanguage('안녕하세요.', 1), 'ko');
assertEquals(detectLanguage('Γειά σου!', 1), 'el');
assertEquals(detectLanguage('שלום!', 1), 'he');
assertEquals(detectLanguage('こんにちは。', 1), 'ja');
// ambiguous
assertEquals(detectLanguage('你好', 1), undefined);
assertEquals(detectLanguage('Привет', 1), undefined);
assertEquals(detectLanguage('Hello', 1), undefined);
});

View file

@ -4,8 +4,9 @@ import linkify from 'linkifyjs';
linkify.registerCustomProtocol('nostr', true); linkify.registerCustomProtocol('nostr', true);
/** Returns the detected language if the confidence is greater or equal than 'minConfidence' /**
* 'minConfidence' must be a number between 0 and 1, such as 0.95 * Returns the detected language if the confidence is greater or equal than 'minConfidence'.
* 'minConfidence' must be a number between 0 and 1, such as 0.95.
*/ */
export function detectLanguage(text: string, minConfidence: number): LanguageCode | undefined { export function detectLanguage(text: string, minConfidence: number): LanguageCode | undefined {
// It's better to remove the emojis first // It's better to remove the emojis first
@ -15,13 +16,31 @@ export function detectLanguage(text: string, minConfidence: number): LanguageCod
.replaceAll(/[\s\uFEFF\u00A0\u200B-\u200D\u{0FE0E}]+/gu, ' '), .replaceAll(/[\s\uFEFF\u00A0\u200B-\u200D\u{0FE0E}]+/gu, ' '),
).reduce((acc, { t, v }) => t === 'text' ? acc + v : acc, '').trim(); ).reduce((acc, { t, v }) => t === 'text' ? acc + v : acc, '').trim();
// Definite patterns for some languages.
// Text which matches MUST unambiguously be in the given language.
// This is only possible for some languages.
// All patterns match the full text, so mixed scripts would fail these tests.
const languagePatterns: Partial<Record<LanguageCode, RegExp>> = {
ko: /^[\p{Script=Hangul}\s]+$/u, // Korean (Hangul only)
el: /^[\p{Script=Greek}\s]+$/u, // Greek
he: /^[\p{Script=Hebrew}\s]+$/u, // Hebrew
ja: /^(?=.*[\p{Script=Hiragana}\p{Script=Katakana}])[\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Han}\s]+$/u, // Japanese (requires at least one Kana)
// zh: not possible to detect unambiguously
};
// If any pattern matches, the language is known.
for (const [lang, pattern] of Object.entries(languagePatterns) as [LanguageCode, RegExp][]) {
if (pattern.test(text.replace(/[\p{P}\p{S}]/gu, ''))) { // strip punctuation and symbols before checking
return lang;
}
}
if (sanitizedText.length < 10) { // heuristics if (sanitizedText.length < 10) { // heuristics
return; return;
} }
const [topResult] = lande( const [topResult] = lande(sanitizedText);
sanitizedText,
);
if (topResult) { if (topResult) {
const [iso6393, confidence] = topResult; const [iso6393, confidence] = topResult;
const locale = new Intl.Locale(iso6393); const locale = new Intl.Locale(iso6393);
@ -30,5 +49,4 @@ export function detectLanguage(text: string, minConfidence: number): LanguageCod
return locale.language as LanguageCode; return locale.language as LanguageCode;
} }
} }
return;
} }

View file

@ -7,7 +7,7 @@ import { MastodonMention } from '@/entities/MastodonMention.ts';
import { MastodonStatus } from '@/entities/MastodonStatus.ts'; import { MastodonStatus } from '@/entities/MastodonStatus.ts';
import { type DittoEvent } from '@/interfaces/DittoEvent.ts'; import { type DittoEvent } from '@/interfaces/DittoEvent.ts';
import { Storages } from '@/storages.ts'; import { Storages } from '@/storages.ts';
import { nostrDate } from '@/utils.ts'; import { isNostrId, nostrDate } from '@/utils.ts';
import { getMediaLinks, parseNoteContent, stripimeta } from '@/utils/note.ts'; import { getMediaLinks, parseNoteContent, stripimeta } from '@/utils/note.ts';
import { findReplyTag } from '@/utils/tags.ts'; import { findReplyTag } from '@/utils/tags.ts';
import { unfurlCardCached } from '@/utils/unfurl.ts'; import { unfurlCardCached } from '@/utils/unfurl.ts';
@ -41,8 +41,8 @@ async function renderStatus(event: DittoEvent, opts: RenderStatusOpts): Promise<
const mentionedPubkeys = [ const mentionedPubkeys = [
...new Set( ...new Set(
event.tags event.tags
.filter((tag) => tag[0] === 'p') .filter(([name, value]) => name === 'p' && isNostrId(value))
.map((tag) => tag[1]), .map(([, value]) => value),
), ),
]; ];