From 3f5133e419d82a373fccfafb1fa390546977b86e Mon Sep 17 00:00:00 2001 From: Mathieu Virbel Date: Fri, 6 Oct 2023 19:55:19 +0200 Subject: [PATCH] server: add a tool to export a reflector sqlite3 database to csv --- server/reflector/tools/exportdb.py | 64 ++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 server/reflector/tools/exportdb.py diff --git a/server/reflector/tools/exportdb.py b/server/reflector/tools/exportdb.py new file mode 100644 index 00000000..dabfcc10 --- /dev/null +++ b/server/reflector/tools/exportdb.py @@ -0,0 +1,64 @@ +import csv +import pathlib + + +async def export_db(filename: str) -> None: + from reflector.settings import settings + + filename = pathlib.Path(filename).resolve() + settings.DATABASE_URL = f"sqlite:///{filename}" + + from reflector.db import database, transcripts + + await database.connect() + transcripts = await database.fetch_all(transcripts.select()) + await database.disconnect() + + def export_transcript(transcript): + tid = transcript.id + yield tid, "title", transcript.title + yield tid, "name", transcript.name + yield tid, "created_at", transcript.created_at + yield tid, "long_summary", transcript.long_summary + yield tid, "short_summary", transcript.short_summary + yield tid, "source_language", transcript.source_language + yield tid, "target_language", transcript.target_language + yield tid, "user_id", transcript.user_id + yield tid, "status", transcript.status + for topic in transcript.topics: + yield tid, "topic", topic["id"], "title", topic["title"] + yield tid, "topic", topic["id"], "summary", topic["summary"] + yield tid, "topic", topic["id"], "timestamp", topic["timestamp"] + yield tid, "topic", topic["id"], "transcript", topic["transcript"] + + # extract transcripts + for idx, entry in enumerate(transcript.events): + if entry["event"] == "TRANSCRIPT": + yield tid, "event_transcript", idx, "text", entry["data"]["text"] + if entry["data"].get("translation") is not None: + yield tid, "event_transcript", idx, "translation", entry[ + "data" + ].get("translation", None) + + def export_transcripts(transcripts): + for transcript in transcripts: + yield from export_transcript(transcript) + + csv_output = pathlib.Path("export.csv").resolve() + output = csv.writer(open(csv_output, "w")) + output.writerow(["transcript_id", "key", "value", "key", "value"]) + for row in export_transcripts(transcripts): + output.writerow(row) + + print(f"Exported {len(transcripts)} transcripts to {csv_output}") + + +if __name__ == "__main__": + import argparse + import asyncio + + parser = argparse.ArgumentParser() + parser.add_argument("database", help="Sqlite Database file") + args = parser.parse_args() + + asyncio.run(export_db(args.database))