Strip media URLs regardless of whitespace delimiter

This commit is contained in:
Alex Gleason 2025-03-12 16:46:32 -05:00
parent 2e4456dba2
commit bd71b45a8d
No known key found for this signature in database
GPG key ID: 7211D1F99744FBB7
3 changed files with 64 additions and 40 deletions

View file

@ -1,8 +1,7 @@
import { DittoConf } from '@ditto/conf';
import { assertEquals } from '@std/assert';
import { eventFixture } from '@/test.ts';
import { contentToHtml, getCardUrl, getMediaLinks, stripMediaUrls } from '@/utils/note.ts';
import { contentToHtml, getCardUrl, getMediaLinks, removeTrailingUrls } from '@/utils/note.ts';
import { genEvent } from '@nostrify/nostrify/test';
Deno.test('contentToHtml', () => {
@ -125,24 +124,53 @@ Deno.test('getMediaLinks', () => {
]]);
});
Deno.test('stripMediaUrls', async () => {
const { content, tags } = await eventFixture('event-imeta');
Deno.test('removeTrailingUrls with spaces', () => {
const urls = new Set<string>([
'https://ditto.pub/a.png',
'https://ditto.pub/b.jpg',
]);
const media: string[][][] = tags
.filter(([name]) => name === 'imeta')
.map(([_, ...entries]) =>
entries.map((entry) => {
const split = entry.split(' ');
return [split[0], split.splice(1).join(' ')];
})
);
const result = removeTrailingUrls(
'hey!\n\nthis is cool https://ditto.pub/a.png https://ditto.pub/b.jpg',
urls,
);
const stripped = stripMediaUrls(content, media);
assertEquals(result, 'hey!\n\nthis is cool');
});
const expected =
`Today we were made aware of multiple Fediverse blog posts incorrectly attributing “vote Trump” spam on Bluesky to the Mostr.pub Bridge. \n\nThis spam is NOT coming from Mostr. From the screenshots used in these blogs, it's clear the spam is coming from an entirely different bridge called momostr.pink. This bridge is not affiliated with Mostr, and is not even a fork of Mostr. We appreciate that the authors of these posts responded quickly to us and have since corrected the blogs. \n\nMostr.pub uses stirfry policies for anti-spam filtering. This includes an anti-duplication policy that prevents spam like the recent “vote Trump” posts weve seen repeated over and over. \n\nIt is important to note WHY there are multiple bridges, though. \n\nWhen Mostr.pub launched, multiple major servers immediately blocked Mostr, including Mastodon.social. The moderators of Mastodon.social claimed that this was because Nostr was unregulated, and suggested to one user that if they want to bridge their account they should host their own bridge.\n\nThat is exactly what momostr.pink, the source of this spam, has done. \n\nThe obvious response to the censorship of the Mostr Bridge is to build more bridges. \n\nWhile we have opted for pro-social policies that aim to reduce spam and build better connections between decentralized platforms, other bridges built to get around censorship of the Mostr Bridge may not — as were already seeing.\n\nThere will inevitably be multiple bridges, and were working on creating solutions to the problems that arise from that. In the meantime, if the Fediverse could do itself a favor and chill with the censorship for two seconds, we might not have so many problems. `;
Deno.test('removeTrailingUrls with newlines', () => {
const urls = new Set<string>([
'https://ditto.pub/a.png',
'https://ditto.pub/b.jpg',
]);
assertEquals(stripped, expected);
const result = removeTrailingUrls(
'Hey!\n\nthis is cool \n\nhttps://ditto.pub/a.png\nhttps://ditto.pub/b.jpg',
urls,
);
assertEquals(result, 'Hey!\n\nthis is cool');
});
Deno.test('removeTrailingUrls with only URLs', () => {
const urls = new Set<string>([
'https://ditto.pub/a.png',
'https://ditto.pub/b.jpg',
]);
const result = removeTrailingUrls(
'https://ditto.pub/a.png https://ditto.pub/b.jpg',
urls,
);
assertEquals(result, '');
});
Deno.test('removeTrailingUrls with just one URL', () => {
const urls = new Set<string>(['https://ditto.pub/a.png']);
const result = removeTrailingUrls('https://ditto.pub/a.png', urls);
assertEquals(result, '');
});
Deno.test('getCardUrl', async (t) => {

View file

@ -69,34 +69,20 @@ export function contentToHtml(content: string, mentions: MastodonMention[], opts
}).replace(/\n+$/, '');
}
/** Remove media URLs from content. */
export function stripMediaUrls(content: string, media: string[][][]): string {
if (!media.length) {
return content;
}
/** Remove the URLs from the _end_ of the content. */
export function removeTrailingUrls(text: string, urls: Set<string>): string {
let trimmedText = text;
const urls = new Set<string>();
for (const tags of media) {
for (const [name, value] of tags) {
if (name === 'url') {
urls.add(value);
break;
}
}
}
const lines = content.split('\n').reverse();
for (const line of [...lines]) {
if (line === '' || urls.has(line)) {
lines.splice(0, 1);
while (true) {
const match = trimmedText.match(/\s?([^\s]+)\s?$/);
if (match && urls.has(match[1])) {
trimmedText = trimmedText.slice(0, match.index).replace(/\s+$/, '');
} else {
break;
}
}
return lines.reverse().join('\n');
return trimmedText;
}
export function getLinks(content: string) {

View file

@ -4,7 +4,7 @@ import { nip19 } from 'nostr-tools';
import { Conf } from '@/config.ts';
import { type DittoEvent } from '@/interfaces/DittoEvent.ts';
import { nostrDate } from '@/utils.ts';
import { contentToHtml, getLinks, getMediaLinks, stripMediaUrls } from '@/utils/note.ts';
import { contentToHtml, getLinks, getMediaLinks, removeTrailingUrls } from '@/utils/note.ts';
import { findReplyTag } from '@/utils/tags.ts';
import { accountFromPubkey, renderAccount } from '@/views/mastodon/accounts.ts';
import { renderAttachment } from '@/views/mastodon/attachments.ts';
@ -52,8 +52,18 @@ async function renderStatus(
);
const media = imeta.length ? imeta : getMediaLinks(links);
const mediaUrls = new Set<string>();
const html = contentToHtml(stripMediaUrls(event.content, media), mentions, { conf: Conf });
for (const tags of media) {
for (const [name, value] of tags) {
if (name === 'url') {
mediaUrls.add(value);
break;
}
}
}
const html = contentToHtml(removeTrailingUrls(event.content, mediaUrls), mentions, { conf: Conf });
const relatedEvents = viewerPubkey
? await store.query([