summary trials

2025-12-21 20:59:05 +00:00 · 2023-07-17 19:54:16 +05:30
parent 0282d4fca5
commit 6f2f608b6a
10 changed files with 380 additions and 46 deletions
--- a/trials/bert.py
+++ b/trials/bert.py
@@ -0,0 +1,43 @@
+import torch
+from transformers import BertTokenizer, BertModel
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+
+# Load the pre-trained BERT model and tokenizer
+model_name = "bert-base-uncased"
+model = BertModel.from_pretrained(model_name)
+tokenizer = BertTokenizer.from_pretrained(model_name)
+
+# Set the device to use
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+
+# Load the SentenceTransformer model
+sentence_transformer_model = SentenceTransformer('average_word_embeddings_glove.6B.300d')
+
+# Define the input text
+text = "Your input text to be summarized goes here."
+
+# Tokenize the text
+tokens = tokenizer.tokenize(text)
+input_ids = tokenizer.convert_tokens_to_ids(tokens)
+input_ids = torch.tensor([input_ids]).to(device)
+
+# Get the BERT model output
+with torch.no_grad():
+    outputs = model(input_ids)[0]  # Extract the last hidden states
+
+# Calculate sentence embeddings
+sentence_embeddings = outputs.mean(dim=1).squeeze().cpu().numpy()
+input_text_embedding = sentence_transformer_model.encode([text])[0]
+
+# Calculate cosine similarity between sentences and input text
+similarity_scores = cosine_similarity([input_text_embedding], sentence_embeddings)
+
+# Sort the sentences by similarity scores in descending order
+sorted_sentences = [sent for _, sent in sorted(zip(similarity_scores[0], sentences), reverse=True)]
+
+# Choose the top sentences as the summary
+num_summary_sentences = 2  # Adjust as needed
+summary = ". ".join(sorted_sentences[:num_summary_sentences])
+print("Summary:", summary)