mirror of
https://gitlab.com/soapbox-pub/ditto.git
synced 2025-12-06 03:19:46 +00:00
Optimize top_authors materialized view for better performance
- Limit materialized view to authors with followers_count > 0 and top 10k records - Add proper indexes (search GIN, pubkey B-tree, followers_count DESC) - Use CONCURRENTLY for non-blocking refreshes in cron job - Reduces sorting overhead and memory usage during refresh The original materialized view was expensive because it sorted all authors regardless of follower count. This optimization reduces the dataset to a manageable size while maintaining search functionality. Fixes expensive query issue in top_authors materialized view.
This commit is contained in:
parent
370deac1af
commit
baa7968305
2 changed files with 44 additions and 1 deletions
43
packages/db/migrations/055_optimize_top_authors_view.ts
Normal file
43
packages/db/migrations/055_optimize_top_authors_view.ts
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
import { type Kysely, sql } from 'kysely';
|
||||
|
||||
// deno-lint-ignore no-explicit-any
|
||||
export async function up(db: Kysely<any>): Promise<void> {
|
||||
// Drop the existing materialized view and its indexes
|
||||
await sql`DROP MATERIALIZED VIEW IF EXISTS top_authors`.execute(db);
|
||||
|
||||
// Create a more optimized materialized view that limits the number of authors
|
||||
// This reduces the sorting cost and memory usage during refresh
|
||||
await sql`
|
||||
CREATE MATERIALIZED VIEW top_authors AS
|
||||
SELECT pubkey, followers_count, search
|
||||
FROM author_stats
|
||||
WHERE followers_count > 0 -- Only include authors with followers
|
||||
ORDER BY followers_count DESC
|
||||
LIMIT 10000 -- Limit to top 10k authors for search functionality
|
||||
`.execute(db);
|
||||
|
||||
// Create indexes on the materialized view for better query performance
|
||||
await sql`CREATE INDEX CONCURRENTLY IF NOT EXISTS top_authors_search_idx ON top_authors USING GIN (search gin_trgm_ops)`
|
||||
.execute(db);
|
||||
await sql`CREATE INDEX CONCURRENTLY IF NOT EXISTS top_authors_pubkey_idx ON top_authors (pubkey)`.execute(db);
|
||||
await sql`CREATE INDEX CONCURRENTLY IF NOT EXISTS top_authors_followers_count_idx ON top_authors (followers_count DESC)`
|
||||
.execute(db);
|
||||
|
||||
// Ensure the materialized view is populated with data immediately
|
||||
await sql`REFRESH MATERIALIZED VIEW CONCURRENTLY top_authors`.execute(db);
|
||||
}
|
||||
|
||||
export async function down(db: Kysely<unknown>): Promise<void> {
|
||||
// Drop the optimized view and recreate the original one
|
||||
await sql`DROP MATERIALIZED VIEW IF EXISTS top_authors`.execute(db);
|
||||
|
||||
await sql`
|
||||
CREATE MATERIALIZED VIEW top_authors AS
|
||||
SELECT pubkey, followers_count, search
|
||||
FROM author_stats
|
||||
ORDER BY followers_count DESC
|
||||
`.execute(db);
|
||||
|
||||
await sql`CREATE INDEX top_authors_search_idx ON top_authors USING GIN (search gin_trgm_ops)`.execute(db);
|
||||
await sql`CREATE INDEX top_authors_pubkey_idx ON top_authors (pubkey)`.execute(db);
|
||||
}
|
||||
|
|
@ -19,6 +19,6 @@ export function cron(ctx: TrendsCtx) {
|
|||
|
||||
Deno.cron('refresh top authors', '20 * * * *', async () => {
|
||||
const { kysely } = ctx.db;
|
||||
await sql`refresh materialized view top_authors`.execute(kysely);
|
||||
await sql`refresh materialized view CONCURRENTLY top_authors`.execute(kysely);
|
||||
});
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue