diff --git a/packages/ditto/storages/DittoRelayStore.ts b/packages/ditto/storages/DittoRelayStore.ts index ba6e8569..40478eac 100644 --- a/packages/ditto/storages/DittoRelayStore.ts +++ b/packages/ditto/storages/DittoRelayStore.ts @@ -39,7 +39,7 @@ import { PolicyWorker } from '@/workers/policy.ts'; import { verifyEventWorker } from '@/workers/verify.ts'; import { fetchFavicon, insertFavicon, queryFavicon } from '@/utils/favicon.ts'; import { lookupNip05 } from '@/utils/nip05.ts'; -import { parseNoteContent, stripimeta } from '@/utils/note.ts'; +import { getCardUrl } from '@/utils/note.ts'; import { SimpleLRU } from '@/utils/SimpleLRU.ts'; import { unfurlCard } from '@/utils/unfurl.ts'; import { renderWebPushNotification } from '@/views/mastodon/push.ts'; @@ -221,7 +221,7 @@ export class DittoRelayStore implements NRelay { Promise.allSettled([ this.handleZaps(event), this.updateAuthorData(event, signal), - this.warmLinkPreview(event, signal), + this.generateLinkPreview(event, signal), this.generateSetEvents(event), ]) .then(() => @@ -429,14 +429,14 @@ export class DittoRelayStore implements NRelay { } } - private async warmLinkPreview(event: NostrEvent, signal?: AbortSignal): Promise { + private async generateLinkPreview(event: NostrEvent, signal?: AbortSignal): Promise { const { db, conf } = this.opts; if (event.kind === 1) { - const { firstUrl } = parseNoteContent(stripimeta(event.content, event.tags), [], this.opts); + const cardUrl = getCardUrl(event); - if (firstUrl) { - const linkPreview = await unfurlCard(firstUrl, { conf, signal }); + if (cardUrl) { + const linkPreview = await unfurlCard(cardUrl, { conf, signal }); if (linkPreview) { await db.kysely.insertInto('event_stats') diff --git a/packages/ditto/utils/note.test.ts b/packages/ditto/utils/note.test.ts index cdf29314..ca4b282b 100644 --- a/packages/ditto/utils/note.test.ts +++ b/packages/ditto/utils/note.test.ts @@ -2,35 +2,34 @@ import { DittoConf } from '@ditto/conf'; import { assertEquals } from '@std/assert'; import { eventFixture } from '@/test.ts'; -import { getMediaLinks, parseNoteContent, stripimeta } from '@/utils/note.ts'; +import { contentToHtml, getCardUrl, getMediaLinks, stripMediaUrls } from '@/utils/note.ts'; +import { genEvent } from '@nostrify/nostrify/test'; -Deno.test('parseNoteContent', () => { +Deno.test('contentToHtml', () => { const conf = new DittoConf(new Map()); - const { html, links, firstUrl } = parseNoteContent('Hello, world!', [], { conf }); + const html = contentToHtml('Hello, world!', [], { conf }); assertEquals(html, 'Hello, world!'); - assertEquals(links, []); - assertEquals(firstUrl, undefined); }); -Deno.test('parseNoteContent parses URLs', () => { +Deno.test('contentToHtml parses URLs', () => { const conf = new DittoConf(new Map()); - const { html } = parseNoteContent('check out my website: https://alexgleason.me', [], { conf }); + const html = contentToHtml('check out my website: https://alexgleason.me', [], { conf }); assertEquals(html, 'check out my website: https://alexgleason.me'); }); -Deno.test('parseNoteContent parses bare URLs', () => { +Deno.test('contentToHtml parses bare URLs', () => { const conf = new DittoConf(new Map()); - const { html } = parseNoteContent('have you seen ditto.pub?', [], { conf }); + const html = contentToHtml('have you seen ditto.pub?', [], { conf }); assertEquals(html, 'have you seen ditto.pub?'); }); -Deno.test('parseNoteContent parses mentions with apostrophes', () => { +Deno.test('contentToHtml parses mentions with apostrophes', () => { const conf = new DittoConf(new Map()); - const { html } = parseNoteContent( + const html = contentToHtml( `did you see nostr:nprofile1qqsqgc0uhmxycvm5gwvn944c7yfxnnxm0nyh8tt62zhrvtd3xkj8fhgprdmhxue69uhkwmr9v9ek7mnpw3hhytnyv4mz7un9d3shjqgcwaehxw309ahx7umywf5hvefwv9c8qtmjv4kxz7gpzemhxue69uhhyetvv9ujumt0wd68ytnsw43z7s3al0v's speech?`, [{ id: '0461fcbecc4c3374439932d6b8f11269ccdb7cc973ad7a50ae362db135a474dd', @@ -47,10 +46,10 @@ Deno.test('parseNoteContent parses mentions with apostrophes', () => { ); }); -Deno.test('parseNoteContent parses mentions with commas', () => { +Deno.test('contentToHtml parses mentions with commas', () => { const conf = new DittoConf(new Map()); - const { html } = parseNoteContent( + const html = contentToHtml( `Sim. Hi nostr:npub1q3sle0kvfsehgsuexttt3ugjd8xdklxfwwkh559wxckmzddywnws6cd26p and nostr:npub1gujeqakgt7fyp6zjggxhyy7ft623qtcaay5lkc8n8gkry4cvnrzqd3f67z, any chance to have Cobrafuma as PWA?`, [{ id: '0461fcbecc4c3374439932d6b8f11269ccdb7cc973ad7a50ae362db135a474dd', @@ -72,16 +71,16 @@ Deno.test('parseNoteContent parses mentions with commas', () => { ); }); -Deno.test("parseNoteContent doesn't parse invalid nostr URIs", () => { +Deno.test("contentToHtml doesn't parse invalid nostr URIs", () => { const conf = new DittoConf(new Map()); - const { html } = parseNoteContent('nip19 has URIs like nostr:npub and nostr:nevent, etc.', [], { conf }); + const html = contentToHtml('nip19 has URIs like nostr:npub and nostr:nevent, etc.', [], { conf }); assertEquals(html, 'nip19 has URIs like nostr:npub and nostr:nevent, etc.'); }); -Deno.test('parseNoteContent renders empty for non-profile nostr URIs', () => { +Deno.test('contentToHtml renders empty for non-profile nostr URIs', () => { const conf = new DittoConf(new Map()); - const { html } = parseNoteContent( + const html = contentToHtml( 'nostr:nevent1qgsr9cvzwc652r4m83d86ykplrnm9dg5gwdvzzn8ameanlvut35wy3gpz3mhxue69uhhztnnwashymtnw3ezucm0d5qzqru8mkz2q4gzsxg99q7pdneyx7n8p5u0afe3ntapj4sryxxmg4gpcdvgce', [], { conf }, @@ -90,10 +89,10 @@ Deno.test('parseNoteContent renders empty for non-profile nostr URIs', () => { assertEquals(html, ''); }); -Deno.test("parseNoteContent doesn't fuck up links to my own post", () => { +Deno.test("contentToHtml doesn't fuck up links to my own post", () => { const conf = new DittoConf(new Map()); - const { html } = parseNoteContent( + const html = contentToHtml( 'Check this post: https://gleasonator.dev/@alex@gleasonator.dev/posts/a8badb480d88f9e7b6a090342279ef47ed0e0a3989ed85f898dfedc6be94225f', [{ id: '0461fcbecc4c3374439932d6b8f11269ccdb7cc973ad7a50ae362db135a474dd', @@ -117,19 +116,76 @@ Deno.test('getMediaLinks', () => { { href: 'https://example.com/yolo' }, { href: 'https://example.com/' }, ]; + const mediaLinks = getMediaLinks(links); + assertEquals(mediaLinks, [[ ['url', 'https://example.com/image.png'], ['m', 'image/png'], ]]); }); -Deno.test('stripimeta', async () => { +Deno.test('stripMediaUrls', async () => { const { content, tags } = await eventFixture('event-imeta'); - const stripped = stripimeta(content, tags); + const media: string[][][] = tags + .filter(([name]) => name === 'imeta') + .map(([_, ...entries]) => + entries.map((entry) => { + const split = entry.split(' '); + return [split[0], split.splice(1).join(' ')]; + }) + ); + + const stripped = stripMediaUrls(content, media); + const expected = `Today we were made aware of multiple Fediverse blog posts incorrectly attributing “vote Trump” spam on Bluesky to the Mostr.pub Bridge. \n\nThis spam is NOT coming from Mostr. From the screenshots used in these blogs, it's clear the spam is coming from an entirely different bridge called momostr.pink. This bridge is not affiliated with Mostr, and is not even a fork of Mostr. We appreciate that the authors of these posts responded quickly to us and have since corrected the blogs. \n\nMostr.pub uses stirfry policies for anti-spam filtering. This includes an anti-duplication policy that prevents spam like the recent “vote Trump” posts we’ve seen repeated over and over. \n\nIt is important to note WHY there are multiple bridges, though. \n\nWhen Mostr.pub launched, multiple major servers immediately blocked Mostr, including Mastodon.social. The moderators of Mastodon.social claimed that this was because Nostr was unregulated, and suggested to one user that if they want to bridge their account they should host their own bridge.\n\nThat is exactly what momostr.pink, the source of this spam, has done. \n\nThe obvious response to the censorship of the Mostr Bridge is to build more bridges. \n\nWhile we have opted for pro-social policies that aim to reduce spam and build better connections between decentralized platforms, other bridges built to get around censorship of the Mostr Bridge may not — as we’re already seeing.\n\nThere will inevitably be multiple bridges, and we’re working on creating solutions to the problems that arise from that. In the meantime, if the Fediverse could do itself a favor and chill with the censorship for two seconds, we might not have so many problems. `; assertEquals(stripped, expected); }); + +Deno.test('getCardUrl', async (t) => { + await t.step('returns undefined for an event with no URLs', () => { + const result = getCardUrl(genEvent({ kind: 1, content: 'Hello, world!' })); + assertEquals(result, undefined); + }); + + await t.step('returns the first URL for an event with a URL', () => { + const result = getCardUrl(genEvent({ kind: 1, content: 'https://soapbox.pub' })); + assertEquals(result, 'https://soapbox.pub'); + }); + + await t.step('returns the first URL for an event with multiple URLs', () => { + const result = getCardUrl(genEvent({ kind: 1, content: 'https://ditto.pub https://soapbox.pub' })); + assertEquals(result, 'https://ditto.pub'); + }); + + await t.step('returns the first non-media URL (by file extension) in an event without imeta tags', () => { + const result = getCardUrl(genEvent({ kind: 1, content: 'https://i.nostr.build/video.mp4 https://ditto.pub' })); + assertEquals(result, 'https://ditto.pub'); + }); + + await t.step('returns the first non-media URL in an event with imeta tags', () => { + const result = getCardUrl(genEvent({ + kind: 1, + content: 'https://i.nostr.build/video https://ditto.pub', + tags: [['imeta', 'url https://i.nostr.build/video']], + })); + + assertEquals(result, 'https://ditto.pub'); + }); + + await t.step('returns undefined in an event with multiple imeta tags and no other URLs', () => { + const result = getCardUrl(genEvent({ + kind: 1, + content: 'https://i.nostr.build/video https://ditto.pub', + tags: [ + ['imeta', 'url https://i.nostr.build/video'], + ['imeta', 'url https://ditto.pub'], + ], + })); + + assertEquals(result, undefined); + }); +}); diff --git a/packages/ditto/utils/note.ts b/packages/ditto/utils/note.ts index c51595f1..987bdd72 100644 --- a/packages/ditto/utils/note.ts +++ b/packages/ditto/utils/note.ts @@ -8,31 +8,22 @@ import { getUrlMediaType, isPermittedMediaType } from '@/utils/media.ts'; import type { DittoConf } from '@ditto/conf'; import type { MastodonMention } from '@ditto/mastoapi/types'; +import type { NostrEvent } from '@nostrify/nostrify'; linkify.registerCustomProtocol('nostr', true); linkify.registerCustomProtocol('wss'); type Link = ReturnType[0]; -interface ParsedNoteContent { - html: string; - links: Link[]; - /** First non-media URL - eligible for a preview card. */ - firstUrl: string | undefined; -} - interface ParseNoteContentOpts { conf: DittoConf; } -/** Convert Nostr content to Mastodon API HTML. Also return parsed data. */ -function parseNoteContent(content: string, mentions: MastodonMention[], opts: ParseNoteContentOpts): ParsedNoteContent { +/** Convert Nostr content to Mastodon API HTML. */ +export function contentToHtml(content: string, mentions: MastodonMention[], opts: ParseNoteContentOpts): string { const { conf } = opts; - const links = linkify.find(content).filter(({ type }) => type === 'url'); - const firstUrl = links.find(isNonMediaLink)?.href; - - const result = linkifyStr(content, { + return linkifyStr(content, { render: { hashtag: ({ content }) => { const tag = content.replace(/^#/, ''); @@ -76,25 +67,24 @@ function parseNoteContent(content: string, mentions: MastodonMention[], opts: Pa }, }, }).replace(/\n+$/, ''); - - return { - html: result, - links, - firstUrl, - }; } -/** Remove imeta links. */ -function stripimeta(content: string, tags: string[][]): string { - const imeta = tags.filter(([name]) => name === 'imeta'); - - if (!imeta.length) { +/** Remove media URLs from content. */ +export function stripMediaUrls(content: string, media: string[][][]): string { + if (!media.length) { return content; } - const urls = new Set( - imeta.map(([, ...values]) => values.map((v) => v.split(' ')).find(([name]) => name === 'url')?.[1]), - ); + const urls = new Set(); + + for (const tags of media) { + for (const [name, value] of tags) { + if (name === 'url') { + urls.add(value); + break; + } + } + } const lines = content.split('\n').reverse(); @@ -109,8 +99,12 @@ function stripimeta(content: string, tags: string[][]): string { return lines.reverse().join('\n'); } -/** Returns a matrix of tags. Each item is a list of NIP-94 tags representing a file. */ -function getMediaLinks(links: Pick[]): string[][][] { +export function getLinks(content: string) { + return linkify.find(content).filter(({ type }) => type === 'url'); +} + +/** Legacy media URL finder. Should be used only as a fallback when no imeta tags are in the event. */ +export function getMediaLinks(links: Pick[]): string[][][] { return links.reduce((acc, link) => { const mediaType = getUrlMediaType(link.href); if (!mediaType) return acc; @@ -126,8 +120,36 @@ function getMediaLinks(links: Pick[]): string[][][] { }, []); } -function isNonMediaLink({ href }: Link): boolean { - return /^https?:\/\//.test(href) && !getUrlMediaType(href); +/** Get the first non-media URL from an event. */ +export function getCardUrl(event: NostrEvent): string | undefined { + const links = getLinks(event.content); + + const imeta: string[][][] = event.tags + .filter(([name]) => name === 'imeta') + .map(([_, ...entries]) => + entries.map((entry) => { + const split = entry.split(' '); + return [split[0], split.splice(1).join(' ')]; + }) + ); + + const media = imeta.length ? imeta : getMediaLinks(links); + const mediaUrls = new Set(); + + for (const tags of media) { + for (const [name, value] of tags) { + if (name === 'url') { + mediaUrls.add(value); + break; + } + } + } + + for (const link of links) { + if (link.type === 'url' && !mediaUrls.has(link.href)) { + return link.href; + } + } } /** Get pubkey from decoded bech32 entity, or undefined if not applicable. */ @@ -141,7 +163,7 @@ function getDecodedPubkey(decoded: nip19.DecodeResult): string | undefined { } /** Find a quote in the content. */ -function findQuoteInContent(content: string): string | undefined { +export function findQuoteInContent(content: string): string | undefined { try { for (const { decoded } of nip27.matchAll(content)) { switch (decoded.type) { @@ -155,5 +177,3 @@ function findQuoteInContent(content: string): string | undefined { // do nothing } } - -export { findQuoteInContent, getMediaLinks, parseNoteContent, stripimeta }; diff --git a/packages/ditto/views/mastodon/accounts.ts b/packages/ditto/views/mastodon/accounts.ts index 4639ade3..7f390d1a 100644 --- a/packages/ditto/views/mastodon/accounts.ts +++ b/packages/ditto/views/mastodon/accounts.ts @@ -5,7 +5,7 @@ import { Conf } from '@/config.ts'; import { type DittoEvent } from '@/interfaces/DittoEvent.ts'; import { metadataSchema } from '@/schemas/nostr.ts'; import { getLnurl } from '@/utils/lnurl.ts'; -import { parseNoteContent } from '@/utils/note.ts'; +import { contentToHtml } from '@/utils/note.ts'; import { getTagSet } from '@/utils/tags.ts'; import { nostrDate, nostrNow, parseNip05 } from '@/utils.ts'; import { renderEmojis } from '@/views/mastodon/emojis.ts'; @@ -48,7 +48,7 @@ function renderAccount(event: Omit, opts: ToAccountOpt const parsed05 = stats?.nip05 ? parseNip05(stats.nip05) : undefined; const acct = parsed05?.handle || npub; - const { html } = parseNoteContent(about || '', [], { conf: Conf }); + const html = contentToHtml(about || '', [], { conf: Conf }); const fields = _fields ?.slice(0, Conf.profileFields.maxFields) @@ -84,7 +84,7 @@ function renderAccount(event: Omit, opts: ToAccountOpt discoverable: true, display_name: name ?? '', emojis: renderEmojis(event), - fields: fields.map((field) => ({ ...field, value: parseNoteContent(field.value, [], { conf: Conf }).html })), + fields: fields.map((field) => ({ ...field, value: contentToHtml(field.value, [], { conf: Conf }) })), follow_requests_count: 0, followers_count: stats?.followers_count ?? 0, following_count: stats?.following_count ?? 0, diff --git a/packages/ditto/views/mastodon/statuses.ts b/packages/ditto/views/mastodon/statuses.ts index e4084123..a82953f0 100644 --- a/packages/ditto/views/mastodon/statuses.ts +++ b/packages/ditto/views/mastodon/statuses.ts @@ -4,7 +4,7 @@ import { nip19 } from 'nostr-tools'; import { Conf } from '@/config.ts'; import { type DittoEvent } from '@/interfaces/DittoEvent.ts'; import { nostrDate } from '@/utils.ts'; -import { getMediaLinks, parseNoteContent, stripimeta } from '@/utils/note.ts'; +import { contentToHtml, getLinks, getMediaLinks, stripMediaUrls } from '@/utils/note.ts'; import { findReplyTag } from '@/utils/tags.ts'; import { accountFromPubkey, renderAccount } from '@/views/mastodon/accounts.ts'; import { renderAttachment } from '@/views/mastodon/attachments.ts'; @@ -39,9 +39,21 @@ async function renderStatus( const replyId = findReplyTag(event.tags)?.[1]; + const links = getLinks(event.content); const mentions = event.mentions?.map((event) => renderMention(event)) ?? []; - const { html, links } = parseNoteContent(stripimeta(event.content, event.tags), mentions, { conf: Conf }); + const imeta: string[][][] = event.tags + .filter(([name]) => name === 'imeta') + .map(([_, ...entries]) => + entries.map((entry) => { + const split = entry.split(' '); + return [split[0], split.splice(1).join(' ')]; + }) + ); + + const media = imeta.length ? imeta : getMediaLinks(links); + + const html = contentToHtml(stripMediaUrls(event.content, media), mentions, { conf: Conf }); const relatedEvents = viewerPubkey ? await store.query([ @@ -68,17 +80,6 @@ async function renderStatus( const cw = event.tags.find(([name]) => name === 'content-warning'); const subject = event.tags.find(([name]) => name === 'subject'); - const imeta: string[][][] = event.tags - .filter(([name]) => name === 'imeta') - .map(([_, ...entries]) => - entries.map((entry) => { - const split = entry.split(' '); - return [split[0], split.splice(1).join(' ')]; - }) - ); - - const media = imeta.length ? imeta : getMediaLinks(links); - /** Pleroma emoji reactions object. */ const reactions = Object.entries(event.event_stats?.reactions ?? {}).reduce((acc, [emoji, count]) => { if (['+', '-'].includes(emoji)) return acc;