refactor

2025-12-21 04:39:06 +00:00 · 2023-07-11 11:06:27 +05:30
parent 58c9cdf676
commit b7fbfb2a54
13 changed files with 54 additions and 44 deletions
--- a/reflector-local/3-transcript-summarizer.py
+++ b/reflector-local/3-transcript-summarizer.py
@@ -1,11 +1,14 @@
 import argparse
+
 import nltk
+
 nltk.download('stopwords')
 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize, sent_tokenize
 from heapq import nlargest
 from loguru import logger

+
 # Function to initialize the argument parser
 def init_argparse():
    parser = argparse.ArgumentParser(
@@ -17,12 +20,14 @@ def init_argparse():
    parser.add_argument("--num_sentences", type=int, default=5, help="Number of sentences to include in the summary")
    return parser

+
 # Function to read the input transcript file
 def read_transcript(file_path):
    with open(file_path, "r") as file:
        transcript = file.read()
    return transcript

+
 # Function to preprocess the text by removing stop words and special characters
 def preprocess_text(text):
    stop_words = set(stopwords.words('english'))
@@ -30,6 +35,7 @@ def preprocess_text(text):
    words = [w.lower() for w in words if w.isalpha() and w.lower() not in stop_words]
    return words

+
 # Function to score each sentence based on the frequency of its words and return the top sentences
 def summarize_text(text, num_sentences):
    # Tokenize the text into sentences
@@ -61,6 +67,7 @@ def summarize_text(text, num_sentences):

    return " ".join(summary)

+
 def main():
    # Initialize the argument parser and parse the arguments
    parser = init_argparse()
@@ -82,5 +89,6 @@ def main():

    logger.info("Summarization completed")

+
 if __name__ == "__main__":
    main()