Question: how do i modify this traim _ lm function in order to have a decreasing lower epoch, pass the sanity, perplexity, and causal check: def

how do i modify this traim_lm function in order to have a decreasing lower epoch, pass the sanity, perplexity, and causal check:
def train_lm(args, train_text, dev_text, vocab_index):
"""
:param args: command-line args, passed through here for your convenience
:param train_text: train text as a sequence of characters
:param dev_text: dev text as a sequence of characters
:param vocab_index: an Indexer of the character vocabulary (27 characters)
:return: a NeuralLanguageModel instance trained on the given data
"""
print("training text length:" , len(train_text))
# Set default values for missing args if necessary
lr = getattr(args,'lr',0.01)
epochs = getattr(args, 'epochs', 30)
batch_size = getattr(args, 'batch_size', 20)
seq_len = getattr(args, 'seq_len', 20)
model = TransformerModel(vocab_size=len(vocab_index)) #this is necesssary to call the transfer from pytorch
# Set up optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2)
loss_function = nn.CrossEntropyLoss()
# Convert training text to indices
train_inds = torch.tensor([vocab_index.index_of(c) for c in train_text], dtype= torch.long)
#training loop
for epoch in range(epochs):
model.train()
total_loss =0
#num_batches = len(train_text)// batch_size
# Train over batches of characters from the training data (size args.batch_size)
for i in range(0, len(train_inds)- seq_len, batch_size):
# Ensure the batch size fits within the available training data
if i + batch_size * seq_len > len(train_inds):
break # Skip the last incomplete batch
# Get the input (context) and target (next character) for each batch (Prepare input and target tensors)
batch_input = train_inds[i:i + batch_size * seq_len]
#.view(batch_size, seq_len)# (batch_size=1, seq_len)
batch_target = train_inds[i +1:i +1+ batch_size * seq_len]
#.view(batch_size, seq_len)# (seq_len,)
# print('Batch_size: ', batch_size)
# print('Batch_input: ', batch_input)
# print('Batch_target: ', batch_target)
# Ensure batch sizes are consistent
if batch_input.size(0)!= batch_size or batch_target.size(0)!= batch_size:
continue # Skip incomplete batches
# Reshape the batches into the correct format
batch_input = batch_input.view(batch_size, seq_len)
batch_target = batch_target.view(batch_size, seq_len)
# if len(batch_input)< seq_len:
# pad_size = seq_len - len(batch_input)
# batch_input = torch.nn.functional.pad(batch_input, (0, pad_size), 'constant', 0)
optimizer.zero_grad()
#forward pass trough the model
output = model(batch_input) # Shape: (batch_size=1, seq_len, vocab_size)
# Calculate the loss (comparing model output to batch_target)
loss = loss_function(output.reshape(-1, len(vocab_index)), batch_target.reshape(-1))
loss.backward()
optimizer.step()
total_loss += loss.item()
print(f" Epoch {epoch +1}/{epochs}, loss:{total_loss / len(train_text)}")
scheduler.step(total_loss / len(train_text))
return NeuralLanguageModel(model, vocab_index)

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!