update export tools for danswer to match the plugin

This commit is contained in:
2023-10-11 19:10:50 +02:00
committed by Mathieu Virbel
parent 953e51ef9f
commit 35af25d4e8

View File

@@ -1,5 +1,6 @@
import json import json
import pathlib import pathlib
from datetime import timedelta
async def export_db(filename: str) -> None: async def export_db(filename: str) -> None:
@@ -18,10 +19,14 @@ async def export_db(filename: str) -> None:
for topic in transcript.topics: for topic in transcript.topics:
metadata = { metadata = {
"link": f"https://reflector.media/transcripts/{transcript.id}#topic:{topic['id']},timestamp:{topic['timestamp']}", "link": f"https://reflector.media/transcripts/{transcript.id}#topic:{topic['id']},timestamp:{topic['timestamp']}",
"rfl_id": transcript.id, "transcript_id": transcript.id,
"rfl_topic_id": topic["id"], "transcript_created_at": transcript.created_at.isoformat(),
"rfl_topic_timestamp": topic["timestamp"], "topic_id": topic["id"],
"rfl_topic_title": topic["title"], "topic_relative_timestamp": topic["timestamp"],
"topic_created_at": (
transcript.created_at + timedelta(seconds=topic["timestamp"])
).isoformat(),
"topic_title": topic["title"],
} }
j_metadata = json.dumps(metadata) j_metadata = json.dumps(metadata)
@@ -30,24 +35,24 @@ async def export_db(filename: str) -> None:
with open(output, "w", encoding="utf8") as fd: with open(output, "w", encoding="utf8") as fd:
fd.write(f"#DANSWER_METADATA={j_metadata}\n") fd.write(f"#DANSWER_METADATA={j_metadata}\n")
fd.write("\n") fd.write("\n")
fd.write(f"# {topic['title']} [{topic['timestamp']}]\n") fd.write(f"# {topic['title']}\n")
fd.write("\n") fd.write("\n")
fd.write(f"{topic['transcript']}\n") fd.write(f"{topic['transcript']}\n")
# export summary # # export summary
output = output_dir / f"{transcript.id}-summary.txt" # output = output_dir / f"{transcript.id}-summary.txt"
metadata = { # metadata = {
"link": f"https://reflector.media/transcripts/{transcript.id}", # "link": f"https://reflector.media/transcripts/{transcript.id}",
"rfl_id": transcript.id, # "rfl_id": transcript.id,
} # }
#
j_metadata = json.dumps(metadata) # j_metadata = json.dumps(metadata)
with open(output, "w", encoding="utf8") as fd: # with open(output, "w", encoding="utf8") as fd:
fd.write(f"#DANSWER_METADATA={j_metadata}\n") # fd.write(f"#DANSWER_METADATA={j_metadata}\n")
fd.write("\n") # fd.write("\n")
fd.write("# Summary\n") # fd.write("# Summary\n")
fd.write("\n") # fd.write("\n")
fd.write(f"{transcript.long_summary}\n") # fd.write(f"{transcript.long_summary}\n")
output_dir = pathlib.Path("exportdanswer") output_dir = pathlib.Path("exportdanswer")
for transcript in transcripts: for transcript in transcripts: