Merge branch 'link-preview-db' into 'main'

Store link previews in the database

Closes #301

See merge request soapbox-pub/ditto!715
This commit is contained in:
Alex Gleason 2025-03-09 01:39:56 +00:00
commit 3cac9b6e26
9 changed files with 64 additions and 53 deletions

View file

@ -422,7 +422,6 @@ export class DittoConf {
get caches(): { get caches(): {
nip05: { max: number; ttl: number }; nip05: { max: number; ttl: number };
favicon: { max: number; ttl: number }; favicon: { max: number; ttl: number };
linkPreview: { max: number; ttl: number };
translation: { max: number; ttl: number }; translation: { max: number; ttl: number };
} { } {
const env = this.env; const env = this.env;
@ -442,13 +441,6 @@ export class DittoConf {
ttl: Number(env.get('DITTO_CACHE_FAVICON_TTL') || 1 * 60 * 60 * 1000), ttl: Number(env.get('DITTO_CACHE_FAVICON_TTL') || 1 * 60 * 60 * 1000),
}; };
}, },
/** Link preview cache settings. */
get linkPreview(): { max: number; ttl: number } {
return {
max: Number(env.get('DITTO_CACHE_LINK_PREVIEW_MAX') || 3000),
ttl: Number(env.get('DITTO_CACHE_LINK_PREVIEW_TTL') || 12 * 60 * 60 * 1000),
};
},
/** Translation cache settings. */ /** Translation cache settings. */
get translation(): { max: number; ttl: number } { get translation(): { max: number; ttl: number } {
return { return {

View file

@ -1,6 +1,8 @@
import type { NPostgresSchema } from '@nostrify/db'; import type { NPostgresSchema } from '@nostrify/db';
import type { Generated } from 'kysely'; import type { Generated } from 'kysely';
import type { MastodonPreviewCard } from '@ditto/mastoapi/types';
export interface DittoTables extends NPostgresSchema { export interface DittoTables extends NPostgresSchema {
auth_tokens: AuthTokenRow; auth_tokens: AuthTokenRow;
author_stats: AuthorStatsRow; author_stats: AuthorStatsRow;
@ -34,6 +36,7 @@ interface EventStatsRow {
quotes_count: number; quotes_count: number;
reactions: string; reactions: string;
zaps_amount: number; zaps_amount: number;
link_preview?: MastodonPreviewCard;
} }
interface AuthTokenRow { interface AuthTokenRow {

View file

@ -0,0 +1,9 @@
import type { Kysely } from 'kysely';
export async function up(db: Kysely<unknown>): Promise<void> {
await db.schema.alterTable('event_stats').addColumn('link_preview', 'jsonb').execute();
}
export async function down(db: Kysely<unknown>): Promise<void> {
await db.schema.alterTable('event_stats').dropColumn('link_preview').execute();
}

View file

@ -7,7 +7,6 @@ import { z } from 'zod';
import { AppController } from '@/app.ts'; import { AppController } from '@/app.ts';
import { hydrateEvents } from '@/storages/hydrate.ts'; import { hydrateEvents } from '@/storages/hydrate.ts';
import { generateDateRange, Time } from '@/utils/time.ts'; import { generateDateRange, Time } from '@/utils/time.ts';
import { unfurlCardCached } from '@/utils/unfurl.ts';
import { errorJson } from '@/utils/log.ts'; import { errorJson } from '@/utils/log.ts';
import { renderStatus } from '@/views/mastodon/statuses.ts'; import { renderStatus } from '@/views/mastodon/statuses.ts';
@ -94,9 +93,8 @@ const trendingLinksController: AppController = async (c) => {
async function getTrendingLinks(conf: DittoConf, relay: NStore): Promise<TrendingLink[]> { async function getTrendingLinks(conf: DittoConf, relay: NStore): Promise<TrendingLink[]> {
const trends = await getTrendingTags(relay, 'r', await conf.signer.getPublicKey()); const trends = await getTrendingTags(relay, 'r', await conf.signer.getPublicKey());
return Promise.all(trends.map(async (trend) => { return Promise.all(trends.map((trend) => {
const link = trend.value; const link = trend.value;
const card = await unfurlCardCached(link);
const history = trend.history.map(({ day, authors, uses }) => ({ const history = trend.history.map(({ day, authors, uses }) => ({
day: String(day), day: String(day),
@ -119,7 +117,6 @@ async function getTrendingLinks(conf: DittoConf, relay: NStore): Promise<Trendin
image: null, image: null,
embed_url: '', embed_url: '',
blurhash: null, blurhash: null,
...card,
history, history,
}; };
})); }));

View file

@ -1,6 +1,8 @@
import { NostrEvent } from '@nostrify/nostrify'; import { NostrEvent } from '@nostrify/nostrify';
import { LanguageCode } from 'iso-639-1'; import { LanguageCode } from 'iso-639-1';
import type { MastodonPreviewCard } from '@ditto/mastoapi/types';
/** Ditto internal stats for the event's author. */ /** Ditto internal stats for the event's author. */
export interface AuthorStats { export interface AuthorStats {
followers_count: number; followers_count: number;
@ -22,6 +24,7 @@ export interface EventStats {
quotes_count: number; quotes_count: number;
reactions: Record<string, number>; reactions: Record<string, number>;
zaps_amount: number; zaps_amount: number;
link_preview?: MastodonPreviewCard;
} }
/** Internal Event representation used by Ditto, including extra keys. */ /** Internal Event representation used by Ditto, including extra keys. */

View file

@ -41,7 +41,7 @@ import { fetchFavicon, insertFavicon, queryFavicon } from '@/utils/favicon.ts';
import { lookupNip05 } from '@/utils/nip05.ts'; import { lookupNip05 } from '@/utils/nip05.ts';
import { parseNoteContent, stripimeta } from '@/utils/note.ts'; import { parseNoteContent, stripimeta } from '@/utils/note.ts';
import { SimpleLRU } from '@/utils/SimpleLRU.ts'; import { SimpleLRU } from '@/utils/SimpleLRU.ts';
import { unfurlCardCached } from '@/utils/unfurl.ts'; import { unfurlCard } from '@/utils/unfurl.ts';
import { renderWebPushNotification } from '@/views/mastodon/push.ts'; import { renderWebPushNotification } from '@/views/mastodon/push.ts';
interface DittoRelayStoreOpts { interface DittoRelayStoreOpts {
@ -217,10 +217,11 @@ export class DittoRelayStore implements NRelay {
await relay.event(purifyEvent(event), { signal }); await relay.event(purifyEvent(event), { signal });
} finally { } finally {
// This needs to run in steps, and should not block the API from responding. // This needs to run in steps, and should not block the API from responding.
const signal = AbortSignal.timeout(3000);
Promise.allSettled([ Promise.allSettled([
this.handleZaps(event), this.handleZaps(event),
this.updateAuthorData(event, signal), this.updateAuthorData(event, signal),
this.prewarmLinkPreview(event, signal), this.warmLinkPreview(event, signal),
this.generateSetEvents(event), this.generateSetEvents(event),
]) ])
.then(() => .then(() =>
@ -428,12 +429,34 @@ export class DittoRelayStore implements NRelay {
} }
} }
private async prewarmLinkPreview(event: NostrEvent, signal?: AbortSignal): Promise<void> { private async warmLinkPreview(event: NostrEvent, signal?: AbortSignal): Promise<void> {
const { db, conf } = this.opts;
if (event.kind === 1) { if (event.kind === 1) {
const { firstUrl } = parseNoteContent(stripimeta(event.content, event.tags), [], this.opts); const { firstUrl } = parseNoteContent(stripimeta(event.content, event.tags), [], this.opts);
console.log({ firstUrl });
if (firstUrl) { if (firstUrl) {
await unfurlCardCached(firstUrl, signal); const linkPreview = await unfurlCard(firstUrl, { conf, signal });
console.log(linkPreview);
if (linkPreview) {
await db.kysely.insertInto('event_stats')
.values({
event_id: event.id,
replies_count: 0,
reposts_count: 0,
reactions_count: 0,
quotes_count: 0,
reactions: '{}',
zaps_amount: 0,
link_preview: linkPreview,
})
.onConflict((oc) => oc.column('event_id').doUpdateSet({ link_preview: linkPreview }))
.execute();
}
} }
} }
} }

View file

@ -411,6 +411,7 @@ async function gatherEventStats(
quotes_count: Math.max(0, row.quotes_count), quotes_count: Math.max(0, row.quotes_count),
reactions: row.reactions, reactions: row.reactions,
zaps_amount: Math.max(0, row.zaps_amount), zaps_amount: Math.max(0, row.zaps_amount),
link_preview: row.link_preview,
})); }));
} }

View file

@ -1,23 +1,27 @@
import { cachedLinkPreviewSizeGauge } from '@ditto/metrics';
import TTLCache from '@isaacs/ttlcache';
import { logi } from '@soapbox/logi'; import { logi } from '@soapbox/logi';
import { safeFetch } from '@soapbox/safe-fetch'; import { safeFetch } from '@soapbox/safe-fetch';
import DOMPurify from 'isomorphic-dompurify'; import DOMPurify from 'isomorphic-dompurify';
import { unfurl } from 'unfurl.js'; import { unfurl } from 'unfurl.js';
import { Conf } from '@/config.ts';
import { errorJson } from '@/utils/log.ts'; import { errorJson } from '@/utils/log.ts';
import type { DittoConf } from '@ditto/conf';
import type { MastodonPreviewCard } from '@ditto/mastoapi/types'; import type { MastodonPreviewCard } from '@ditto/mastoapi/types';
async function unfurlCard(url: string, signal: AbortSignal): Promise<MastodonPreviewCard | null> { interface UnfurlCardOpts {
conf: DittoConf;
signal?: AbortSignal;
}
export async function unfurlCard(url: string, opts: UnfurlCardOpts): Promise<MastodonPreviewCard | null> {
const { conf, signal } = opts;
try { try {
const result = await unfurl(url, { const result = await unfurl(url, {
fetch: (url) => fetch: (url) =>
safeFetch(url, { safeFetch(url, {
headers: { headers: {
'Accept': 'text/html, application/xhtml+xml', 'Accept': 'text/html, application/xhtml+xml',
'User-Agent': Conf.fetchUserAgent, 'User-Agent': conf.fetchUserAgent,
}, },
signal, signal,
}), }),
@ -54,19 +58,3 @@ async function unfurlCard(url: string, signal: AbortSignal): Promise<MastodonPre
return null; return null;
} }
} }
/** TTL cache for preview cards. */
const previewCardCache = new TTLCache<string, Promise<MastodonPreviewCard | null>>(Conf.caches.linkPreview);
/** Unfurl card from cache if available, otherwise fetch it. */
export function unfurlCardCached(url: string, signal = AbortSignal.timeout(1000)): Promise<MastodonPreviewCard | null> {
const cached = previewCardCache.get(url);
if (cached !== undefined) {
return cached;
} else {
const card = unfurlCard(url, signal);
previewCardCache.set(url, card);
cachedLinkPreviewSizeGauge.set(previewCardCache.size);
return card;
}
}

View file

@ -6,7 +6,6 @@ import { type DittoEvent } from '@/interfaces/DittoEvent.ts';
import { nostrDate } from '@/utils.ts'; import { nostrDate } from '@/utils.ts';
import { getMediaLinks, parseNoteContent, stripimeta } from '@/utils/note.ts'; import { getMediaLinks, parseNoteContent, stripimeta } from '@/utils/note.ts';
import { findReplyTag } from '@/utils/tags.ts'; import { findReplyTag } from '@/utils/tags.ts';
import { unfurlCardCached } from '@/utils/unfurl.ts';
import { accountFromPubkey, renderAccount } from '@/views/mastodon/accounts.ts'; import { accountFromPubkey, renderAccount } from '@/views/mastodon/accounts.ts';
import { renderAttachment } from '@/views/mastodon/attachments.ts'; import { renderAttachment } from '@/views/mastodon/attachments.ts';
import { renderEmojis } from '@/views/mastodon/emojis.ts'; import { renderEmojis } from '@/views/mastodon/emojis.ts';
@ -42,21 +41,17 @@ async function renderStatus(
const mentions = event.mentions?.map((event) => renderMention(event)) ?? []; const mentions = event.mentions?.map((event) => renderMention(event)) ?? [];
const { html, links, firstUrl } = parseNoteContent(stripimeta(event.content, event.tags), mentions, { conf: Conf }); const { html, links } = parseNoteContent(stripimeta(event.content, event.tags), mentions, { conf: Conf });
const [card, relatedEvents] = await Promise const relatedEvents = viewerPubkey
.all([ ? await store.query([
firstUrl ? unfurlCardCached(firstUrl, AbortSignal.timeout(500)) : null, { kinds: [6], '#e': [event.id], authors: [viewerPubkey], limit: 1 },
viewerPubkey { kinds: [7], '#e': [event.id], authors: [viewerPubkey], limit: 1 },
? await store.query([ { kinds: [9734], '#e': [event.id], authors: [viewerPubkey], limit: 1 },
{ kinds: [6], '#e': [event.id], authors: [viewerPubkey], limit: 1 }, { kinds: [10001], '#e': [event.id], authors: [viewerPubkey], limit: 1 },
{ kinds: [7], '#e': [event.id], authors: [viewerPubkey], limit: 1 }, { kinds: [10003], '#e': [event.id], authors: [viewerPubkey], limit: 1 },
{ kinds: [9734], '#e': [event.id], authors: [viewerPubkey], limit: 1 }, ])
{ kinds: [10001], '#e': [event.id], authors: [viewerPubkey], limit: 1 }, : [];
{ kinds: [10003], '#e': [event.id], authors: [viewerPubkey], limit: 1 },
])
: [],
]);
const reactionEvent = relatedEvents.find((event) => event.kind === 7); const reactionEvent = relatedEvents.find((event) => event.kind === 7);
const repostEvent = relatedEvents.find((event) => event.kind === 6); const repostEvent = relatedEvents.find((event) => event.kind === 6);
@ -96,7 +91,7 @@ async function renderStatus(
return { return {
id: event.id, id: event.id,
account, account,
card, card: event.event_stats?.link_preview ?? null,
content: compatMentions + html, content: compatMentions + html,
created_at: nostrDate(event.created_at).toISOString(), created_at: nostrDate(event.created_at).toISOString(),
in_reply_to_id: replyId ?? null, in_reply_to_id: replyId ?? null,