Merge branch 'db-export-filters' into 'main'

Allow filtering items exported by `deno task db:export`

Closes #186

See merge request soapbox-pub/ditto!463
This commit is contained in:
Alex Gleason 2024-08-26 22:14:23 +00:00
commit e286ecc47d
4 changed files with 255 additions and 20 deletions

View file

@ -48,6 +48,7 @@
"@std/streams": "jsr:@std/streams@^0.223.0", "@std/streams": "jsr:@std/streams@^0.223.0",
"comlink": "npm:comlink@^4.4.1", "comlink": "npm:comlink@^4.4.1",
"comlink-async-generator": "npm:comlink-async-generator@^0.0.1", "comlink-async-generator": "npm:comlink-async-generator@^0.0.1",
"commander": "npm:commander@12.1.0",
"deno-safe-fetch/load": "https://gitlab.com/soapbox-pub/deno-safe-fetch/-/raw/v1.0.0/load.ts", "deno-safe-fetch/load": "https://gitlab.com/soapbox-pub/deno-safe-fetch/-/raw/v1.0.0/load.ts",
"deno.json": "./deno.json", "deno.json": "./deno.json",
"entities": "npm:entities@^4.5.0", "entities": "npm:entities@^4.5.0",

6
deno.lock generated
View file

@ -60,6 +60,7 @@
"npm:comlink-async-generator": "npm:comlink-async-generator@0.0.1", "npm:comlink-async-generator": "npm:comlink-async-generator@0.0.1",
"npm:comlink-async-generator@^0.0.1": "npm:comlink-async-generator@0.0.1", "npm:comlink-async-generator@^0.0.1": "npm:comlink-async-generator@0.0.1",
"npm:comlink@^4.4.1": "npm:comlink@4.4.1", "npm:comlink@^4.4.1": "npm:comlink@4.4.1",
"npm:commander@12.1.0": "npm:commander@12.1.0",
"npm:entities@^4.5.0": "npm:entities@4.5.0", "npm:entities@^4.5.0": "npm:entities@4.5.0",
"npm:fast-stable-stringify@^1.0.0": "npm:fast-stable-stringify@1.0.0", "npm:fast-stable-stringify@^1.0.0": "npm:fast-stable-stringify@1.0.0",
"npm:formdata-helper@^0.3.0": "npm:formdata-helper@0.3.0", "npm:formdata-helper@^0.3.0": "npm:formdata-helper@0.3.0",
@ -543,6 +544,10 @@
"integrity": "sha512-yPVavfyCcRhmorC7rWlkHn15b4wDVgVmBA7kV4QVBsF7kv/9TKJAbAXVTxvTnwP8HHKjRCJDClKbciiYS7p0DQ==", "integrity": "sha512-yPVavfyCcRhmorC7rWlkHn15b4wDVgVmBA7kV4QVBsF7kv/9TKJAbAXVTxvTnwP8HHKjRCJDClKbciiYS7p0DQ==",
"dependencies": {} "dependencies": {}
}, },
"commander@12.1.0": {
"integrity": "sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==",
"dependencies": {}
},
"cross-spawn@7.0.3": { "cross-spawn@7.0.3": {
"integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
"dependencies": { "dependencies": {
@ -1904,6 +1909,7 @@
"npm:@scure/base@^1.1.6", "npm:@scure/base@^1.1.6",
"npm:comlink-async-generator@^0.0.1", "npm:comlink-async-generator@^0.0.1",
"npm:comlink@^4.4.1", "npm:comlink@^4.4.1",
"npm:commander@12.1.0",
"npm:entities@^4.5.0", "npm:entities@^4.5.0",
"npm:fast-stable-stringify@^1.0.0", "npm:fast-stable-stringify@^1.0.0",
"npm:formdata-helper@^0.3.0", "npm:formdata-helper@^0.3.0",

94
scripts/db-export.test.ts Normal file
View file

@ -0,0 +1,94 @@
import { assertEquals, assertThrows } from '@std/assert';
import { buildFilter } from './db-export.ts';
Deno.test('buildFilter should return an empty filter when no arguments are provided', () => {
const filter = buildFilter({});
assertEquals(Object.keys(filter).length, 0);
});
Deno.test('buildFilter should correctly handle valid authors', () => {
const filter = buildFilter({
authors: ['a'.repeat(64)],
});
assertEquals(filter.authors, ['a'.repeat(64)]);
});
Deno.test('buildFilter throws on invalid author pubkey', () => {
assertThrows(
() => {
buildFilter({
authors: ['invalid_pubkey'],
});
},
Error,
'ERROR: Invalid pubkey invalid_pubkey supplied.',
);
});
Deno.test('buildFilter should correctly handle valid ids', () => {
const filter = buildFilter({
ids: ['b'.repeat(64)],
});
assertEquals(filter.ids, ['b'.repeat(64)]);
});
Deno.test('buildFilter should throw on invalid event IDs', () => {
assertThrows(
() => {
buildFilter({
ids: ['invalid_id'],
});
},
Error,
'ERROR: Invalid event ID invalid_id supplied.',
);
});
Deno.test('buildFilter should correctly handle tag shortcuts', () => {
const filter = buildFilter({
d: 'value1',
e: 'a'.repeat(64),
p: 'b'.repeat(64),
});
assertEquals(filter['#d'], ['value1']);
assertEquals(filter['#e'], ['a'.repeat(64)]);
assertEquals(filter['#p'], ['b'.repeat(64)]);
});
Deno.test('buildFilter should correctly handle since and until args', () => {
const filter = buildFilter({
since: 1000,
until: 2000,
});
assertEquals(filter.since, 1000);
assertEquals(filter.until, 2000);
});
Deno.test('buildFilter should correctly handle search field', () => {
const filter = buildFilter({
search: 'search_term',
});
assertEquals(filter.search, 'search_term');
});
Deno.test('buildFilter should correctly handle tag k-v pairs', () => {
const filter = buildFilter({
tags: ['tag1=value1', 'tag2=value2'],
});
assertEquals(filter['#tag1'], ['value1']);
assertEquals(filter['#tag2'], ['value2']);
});
Deno.test('buildFilter should correctly handle limit specifier', () => {
const filter = buildFilter({
limit: 10,
});
assertEquals(filter.limit, 10);
});

View file

@ -1,24 +1,158 @@
import { Storages } from '@/storages.ts'; import { Storages } from '@/storages.ts';
import { NostrFilter } from '@nostrify/nostrify';
import { Command, InvalidOptionArgumentError } from 'commander';
const store = await Storages.db(); interface ExportFilter {
authors?: string[];
console.warn('Exporting events...'); ids?: string[];
kinds?: number[];
let count = 0; limit?: number;
search?: string;
for await (const msg of store.req([{}])) { /**
if (msg[0] === 'EOSE') { * Array of `key=value` pairs.
break; */
} tags?: string[];
if (msg[0] === 'EVENT') { since?: number;
console.log(JSON.stringify(msg[2])); until?: number;
count++; /**
} * shortcut for `--tag d=<value>`
if (msg[0] === 'CLOSED') { */
console.error('Database closed unexpectedly'); d?: string;
break; /**
} * shortcut for `--tag e=<value>`
*/
e?: string;
/**
* shortcut for `--tag p=<value>`
*/
p?: string;
} }
console.warn(`Exported ${count} events`); function safeParseInt(s: string) {
Deno.exit(); const n = parseInt(s);
if (isNaN(n)) throw new InvalidOptionArgumentError('Not a number.');
return n;
}
function findInvalid(arr: string[], predicate = (v: string) => !/[a-f0-9]{64}/.test(v)) {
return arr.find(predicate);
}
function die(code: number, ...args: any[]) {
console.error(...args);
Deno.exit(code);
}
function tagFilterShortcut(name: 'd' | 'e' | 'p', value: string) {
const val = [value];
if (findInvalid(val)) throw new Error(`ERROR: Invalid value supplied for ${name}-tag.`);
return val;
}
export function buildFilter(args: ExportFilter) {
const filter: NostrFilter = {};
const { authors, ids, kinds, d, e, limit, p, search, since, until, tags } = args;
if (since) {
filter.since = since;
}
if (until) {
filter.until = until;
}
if (authors && authors.length) {
const invalid = findInvalid(authors);
if (invalid) throw new Error(`ERROR: Invalid pubkey ${invalid} supplied.`);
filter.authors = authors;
}
if (ids) {
const invalid = findInvalid(ids);
if (invalid) throw new Error(`ERROR: Invalid event ID ${invalid} supplied.`);
filter.ids = ids;
}
if (kinds && kinds.length) {
filter.kinds = kinds;
}
if (d) {
filter['#d'] = [d];
}
if (e) {
filter['#e'] = tagFilterShortcut('e', e);
}
if (p) {
filter['#p'] = tagFilterShortcut('e', p);
}
if (search) {
filter.search = search;
}
if (limit) {
filter.limit = limit;
}
if (tags) {
for (const val of tags) {
const [name, ...values] = val.split('=');
filter[`#${name}`] = [values.join('=')];
}
}
return filter;
}
async function exportEvents(args: ExportFilter) {
const store = await Storages.db();
let filter: NostrFilter = {};
try {
filter = buildFilter(args);
} catch (e) {
die(1, e.message || e.toString());
}
let count = 0;
for await (const msg of store.req([filter])) {
if (msg[0] === 'EOSE') {
break;
}
if (msg[0] === 'EVENT') {
console.log(JSON.stringify(msg[2]));
count++;
}
if (msg[0] === 'CLOSED') {
console.error('Database closed unexpectedly');
break;
}
}
console.warn(`Exported ${count} events`);
}
if (import.meta.main) {
const exporter = new Command()
.name('db:export')
.description('Export the specified set of events from the Ditto database, in JSONL format.')
.version('0.1.0')
.showHelpAfterError();
exporter
.option('-a, --authors <authors...>', 'Pubkeys of authors whose events you want to export.', [])
.option('-i, --ids <ids...>', 'IDs of events you want to export.', [])
.option(
'-k --kinds <kinds...>',
'Event kinds you want to export.',
(v: string, arr: number[]) => arr.concat([safeParseInt(v)]),
[],
)
.option(
'-t --tags <tag pairs...>',
'A list of key=value pairs of tags to search for events using. For tag values with spaces etc, simply quote the entire item, like `deno task db:export -t "name=A string with spaces in it"`.',
[],
)
.option('--search <search string>', 'A string to full-text search the db for.')
.option('-s --since <number>', 'The oldest time an exported event should be from.', safeParseInt)
.option('-u --until <number>', 'The newest time an exported event should be from.', safeParseInt)
.option('--limit <number>', 'Maximum number of events to export.', safeParseInt)
.option('-d <string>', 'Shortcut for `--tag d=<value>`.')
.option('-e <string>', 'Shortcut for `--tag e=<value>`.')
.option('-p <string>', 'Shortcut for `--tag p=<value>`.')
.action(exportEvents);
await exporter.parseAsync(Deno.args, { from: 'user' });
}