refactor

2025-12-21 12:49:06 +00:00 · 2023-07-11 11:06:27 +05:30
parent 58c9cdf676
commit b7fbfb2a54
13 changed files with 54 additions and 44 deletions
--- a/reflector-local/whisper_summarizer_bart.py
+++ b/reflector-local/whisper_summarizer_bart.py
@@ -1,15 +1,18 @@
 import argparse
 import os
 import tempfile
+
 import moviepy.editor
+import nltk
+import whisper
 from loguru import logger
 from transformers import BartTokenizer, BartForConditionalGeneration
-import whisper
-import nltk
+
 nltk.download('punkt', quiet=True)

 WHISPER_MODEL_SIZE = "base"

+
 def init_argparse() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        usage="%(prog)s [OPTIONS] <LOCATION> <OUTPUT>",
@@ -30,6 +33,7 @@ def init_argparse() -> argparse.ArgumentParser:

    return parser

+
 # NLTK chunking function
 def chunk_text(txt, max_chunk_length=500):
    "Split text into smaller chunks."
@@ -45,6 +49,7 @@ def chunk_text(txt, max_chunk_length=500):
    chunks.append(current_chunk.strip())
    return chunks

+
 # BART summary function
 def summarize_chunks(chunks, tokenizer, model):
    summaries = []
@@ -56,6 +61,7 @@ def summarize_chunks(chunks, tokenizer, model):
        summaries.append(summary)
    return summaries

+
 def main():
    import sys
    sys.setrecursionlimit(10000)
@@ -103,7 +109,7 @@ def main():
    chunks = chunk_text(whisper_result['text'])

    logger.info(
-        f"Transcript broken into {len(chunks)} chunks of at most 500 words") # TODO fix variable
+        f"Transcript broken into {len(chunks)} chunks of at most 500 words")  # TODO fix variable

    logger.info(f"Writing summary text in {args.language} to: {args.output}")
    with open(args.output, 'w') as f:
@@ -114,5 +120,6 @@ def main():

    logger.info("Summarization completed")

+
 if __name__ == "__main__":
    main()