restructuring

2025-12-21 12:49:06 +00:00 · 2023-07-25 10:44:37 +05:30
parent 8be41647fe
commit b0b47cca83
15 changed files with 27 additions and 3 deletions
--- a/trials/title_summary/bert.py
+++ b/trials/title_summary/bert.py
@@ -0,0 +1,43 @@
+import torch
+from transformers import BertTokenizer, BertModel
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+
+# Load the pre-trained BERT model and tokenizer
+model_name = "bert-base-uncased"
+model = BertModel.from_pretrained(model_name)
+tokenizer = BertTokenizer.from_pretrained(model_name)
+
+# Set the device to use
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+
+# Load the SentenceTransformer model
+sentence_transformer_model = SentenceTransformer('average_word_embeddings_glove.6B.300d')
+
+# Define the input text
+text = "Your input text to be summarized goes here."
+
+# Tokenize the text
+tokens = tokenizer.tokenize(text)
+input_ids = tokenizer.convert_tokens_to_ids(tokens)
+input_ids = torch.tensor([input_ids]).to(device)
+
+# Get the BERT model output
+with torch.no_grad():
+    outputs = model(input_ids)[0]  # Extract the last hidden states
+
+# Calculate sentence embeddings
+sentence_embeddings = outputs.mean(dim=1).squeeze().cpu().numpy()
+input_text_embedding = sentence_transformer_model.encode([text])[0]
+
+# Calculate cosine similarity between sentences and input text
+similarity_scores = cosine_similarity([input_text_embedding], sentence_embeddings)
+
+# Sort the sentences by similarity scores in descending order
+sorted_sentences = [sent for _, sent in sorted(zip(similarity_scores[0], sentences), reverse=True)]
+
+# Choose the top sentences as the summary
+num_summary_sentences = 2  # Adjust as needed
+summary = ". ".join(sorted_sentences[:num_summary_sentences])
+print("Summary:", summary)