Skip to content

Just for fun, I decided to import a subset of my MF2-JSON data into Postgres and see if I can maybe improve Kittybox with it.

CREATE DATABASE mf2_test;
CREATE TABLE vika.mf2_json (url TEXT NOT NULL PRIMARY KEY, mf2 JSONB NOT NULL);
for post in $(ssh root@primrose "ls /persist/containers/kittybox/kittybox/data/fireburn.ru/posts/*.json"); do
    json="$(ssh root@primrose cat "$post")"
    echo "INSERT INTO vika.mf2_json (url, mf2) VALUES ('$(echo "$json" | jq -r ".properties.uid[]")', '$(echo "$json" | sed -e "s/'/''/g")') ON CONFLICT DO NOTHING;"
done | psql mf2_test --single-transaction
-- See which categories I am using for posts
SELECT DISTINCT jsonb_strip_nulls(jsonb_path_query(mf2['properties']['category'], '$[*]'))  #>> '{}' AS tag FROM vika.mf2_json ORDER BY tag ASC;

-- Index the post corpus for full-text search
CREATE INDEX fulltext ON vika.mf2_json USING GIN (to_tsvector('english', mf2['properties']['content'][]));

-- Run a full-text search -- takes 90ms w/o index, 2ms with index!
SELECT url, mf2['properties']['content'][]['value'] FROM mf2_json WHERE to_tsvector('english', mf2['properties']['content'][]) @@ to_tsquery('Kittybox') ORDER BY mf2['properties']['published'][] DESC;

This makes it possible to run a lot of data analysis on posts. Maybe I'll finally have some stuff to populate my widget on the right of the main page with.