mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 04:39:06 +00:00
export danswer as text file
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,3 +1,4 @@
|
|||||||
.DS_Store
|
.DS_Store
|
||||||
server/.env
|
server/.env
|
||||||
.env
|
.env
|
||||||
|
server/exportdanswer
|
||||||
|
|||||||
65
server/reflector/tools/exportdanswer.py
Normal file
65
server/reflector/tools/exportdanswer.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
import json
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
|
||||||
|
async def export_db(filename: str) -> None:
|
||||||
|
from reflector.settings import settings
|
||||||
|
|
||||||
|
filename = pathlib.Path(filename).resolve()
|
||||||
|
settings.DATABASE_URL = f"sqlite:///{filename}"
|
||||||
|
|
||||||
|
from reflector.db import database, transcripts
|
||||||
|
|
||||||
|
await database.connect()
|
||||||
|
transcripts = await database.fetch_all(transcripts.select())
|
||||||
|
await database.disconnect()
|
||||||
|
|
||||||
|
def export_transcript(transcript, output_dir):
|
||||||
|
for topic in transcript.topics:
|
||||||
|
metadata = {
|
||||||
|
"link": f"https://reflector.media/transcripts/{transcript.id}#topic:{topic['id']},timestamp:{topic['timestamp']}",
|
||||||
|
"rfl_id": transcript.id,
|
||||||
|
"rfl_topic_id": topic["id"],
|
||||||
|
"rfl_topic_timestamp": topic["timestamp"],
|
||||||
|
"rfl_topic_title": topic["title"],
|
||||||
|
}
|
||||||
|
j_metadata = json.dumps(metadata)
|
||||||
|
|
||||||
|
# export transcript
|
||||||
|
output = output_dir / f"{transcript.id}-topic-{topic['id']}.txt"
|
||||||
|
with open(output, "w", encoding="utf8") as fd:
|
||||||
|
fd.write(f"#DANSWER_METADATA={j_metadata}\n")
|
||||||
|
fd.write("\n")
|
||||||
|
fd.write(f"# {topic['title']} [{topic['timestamp']}]\n")
|
||||||
|
fd.write("\n")
|
||||||
|
fd.write(f"{topic['transcript']}\n")
|
||||||
|
|
||||||
|
# export summary
|
||||||
|
output = output_dir / f"{transcript.id}-summary.txt"
|
||||||
|
metadata = {
|
||||||
|
"link": f"https://reflector.media/transcripts/{transcript.id}",
|
||||||
|
"rfl_id": transcript.id,
|
||||||
|
}
|
||||||
|
|
||||||
|
j_metadata = json.dumps(metadata)
|
||||||
|
with open(output, "w", encoding="utf8") as fd:
|
||||||
|
fd.write(f"#DANSWER_METADATA={j_metadata}\n")
|
||||||
|
fd.write("\n")
|
||||||
|
fd.write("# Summary\n")
|
||||||
|
fd.write("\n")
|
||||||
|
fd.write(f"{transcript.long_summary}\n")
|
||||||
|
|
||||||
|
output_dir = pathlib.Path("exportdanswer")
|
||||||
|
for transcript in transcripts:
|
||||||
|
export_transcript(transcript, output_dir)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("database", help="Sqlite Database file")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
asyncio.run(export_db(args.database))
|
||||||
Reference in New Issue
Block a user