mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-22 05:09:05 +00:00
summary trials
This commit is contained in:
@@ -0,0 +1,47 @@
|
||||
import subprocess
|
||||
|
||||
# subprocess.run("openai tools fine_tunes.prepare_data -f " + "finetuning_dataset.jsonl")
|
||||
#
|
||||
# export OPENAI_API_KEY=
|
||||
#
|
||||
# openai api fine_tunes.create -t <TRAIN_FILE_ID_OR_PATH> -m <BASE_MODEL>
|
||||
#
|
||||
# openai api fine_tunes.list
|
||||
|
||||
|
||||
import torch
|
||||
from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPT2Config, Trainer, TrainingArguments
|
||||
|
||||
# Load the GPT-2 tokenizer and model
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
||||
model = GPT2LMHeadModel.from_pretrained("gpt2")
|
||||
|
||||
# Load and preprocess your dataset
|
||||
dataset = [...] # Your dataset of transcriptions and corresponding titles
|
||||
|
||||
# Tokenize and encode the dataset
|
||||
encoded_dataset = tokenizer(dataset, truncation=True, padding=True)
|
||||
|
||||
# Define the fine-tuning training arguments
|
||||
training_args = TrainingArguments(
|
||||
output_dir="./fine_tuned_model",
|
||||
overwrite_output_dir=True,
|
||||
num_train_epochs=3,
|
||||
per_device_train_batch_size=4,
|
||||
save_steps=1000,
|
||||
save_total_limit=2,
|
||||
prediction_loss_only=True,
|
||||
)
|
||||
|
||||
# Define the fine-tuning trainer
|
||||
trainer = Trainer(
|
||||
model=model,
|
||||
args=training_args,
|
||||
train_dataset=encoded_dataset,
|
||||
)
|
||||
|
||||
# Fine-tune the GPT-2 model
|
||||
trainer.train()
|
||||
|
||||
# Save the fine-tuned model
|
||||
trainer.save_model("./fine_tuned_model")
|
||||
|
||||
Reference in New Issue
Block a user