export danswer as text file

This commit is contained in:
2023-10-10 14:57:16 +02:00
committed by Mathieu Virbel
parent 3f5133e419
commit 953e51ef9f
2 changed files with 66 additions and 0 deletions

1
.gitignore vendored
View File

@@ -1,3 +1,4 @@
.DS_Store
server/.env
.env
server/exportdanswer

View File

@@ -0,0 +1,65 @@
import json
import pathlib
async def export_db(filename: str) -> None:
from reflector.settings import settings
filename = pathlib.Path(filename).resolve()
settings.DATABASE_URL = f"sqlite:///{filename}"
from reflector.db import database, transcripts
await database.connect()
transcripts = await database.fetch_all(transcripts.select())
await database.disconnect()
def export_transcript(transcript, output_dir):
for topic in transcript.topics:
metadata = {
"link": f"https://reflector.media/transcripts/{transcript.id}#topic:{topic['id']},timestamp:{topic['timestamp']}",
"rfl_id": transcript.id,
"rfl_topic_id": topic["id"],
"rfl_topic_timestamp": topic["timestamp"],
"rfl_topic_title": topic["title"],
}
j_metadata = json.dumps(metadata)
# export transcript
output = output_dir / f"{transcript.id}-topic-{topic['id']}.txt"
with open(output, "w", encoding="utf8") as fd:
fd.write(f"#DANSWER_METADATA={j_metadata}\n")
fd.write("\n")
fd.write(f"# {topic['title']} [{topic['timestamp']}]\n")
fd.write("\n")
fd.write(f"{topic['transcript']}\n")
# export summary
output = output_dir / f"{transcript.id}-summary.txt"
metadata = {
"link": f"https://reflector.media/transcripts/{transcript.id}",
"rfl_id": transcript.id,
}
j_metadata = json.dumps(metadata)
with open(output, "w", encoding="utf8") as fd:
fd.write(f"#DANSWER_METADATA={j_metadata}\n")
fd.write("\n")
fd.write("# Summary\n")
fd.write("\n")
fd.write(f"{transcript.long_summary}\n")
output_dir = pathlib.Path("exportdanswer")
for transcript in transcripts:
export_transcript(transcript, output_dir)
if __name__ == "__main__":
import argparse
import asyncio
parser = argparse.ArgumentParser()
parser.add_argument("database", help="Sqlite Database file")
args = parser.parse_args()
asyncio.run(export_db(args.database))