revamped training process

This commit is contained in:
Patrick 2024-07-06 18:20:21 +02:00
parent 79817340dd
commit e9d86f3309
1 changed files with 104 additions and 71 deletions

View File

@ -1,119 +1,152 @@
import tkinter as tk
import warnings
from datetime import datetime from datetime import datetime
import numpy.random import numpy.random
import torch.utils.data import torch.utils.data
import torch.cuda import torch.cuda
from tqdm.tk import tqdm
from architecture import MyCNN from architecture import MyCNN
from dataset import ImagesDataset from dataset import ImagesDataset
from AImageDataset import AImagesDataset from AImageDataset import AImagesDataset
model = MyCNN(input_channels=1,
input_size=(100, 100),
hidden_channels=[500, 250, 100, 50],
output_channels=20,
use_batchnorm=True,
kernel_size=[9, 5, 3, 3, 1],
stride=[1, 1, 1, 1, 1],
activation_function=torch.nn.ReLU())
num_epochs = 100 def train_model(accuracies,
losses,
batch_size = 64 progress_epoch,
optimizer = torch.optim.ASGD(model.parameters(), progress_train_data,
lr=0.001, progress_eval_data,
lambd=1e-4, model,
alpha=0.75, num_epochs,
t0=1000000.0, batch_size,
weight_decay=0) optimizer,
loss_function = torch.nn.CrossEntropyLoss() loss_function,
device,
start_time):
if __name__ == '__main__':
torch.random.manual_seed(42) torch.random.manual_seed(42)
numpy.random.seed(42) numpy.random.seed(42)
torch.multiprocessing.set_start_method('spawn', force=True)
start_time = datetime.now()
dataset = ImagesDataset("training_data") dataset = ImagesDataset("training_data")
# dataset = torch.utils.data.Subset(dataset, range(0, 20)) # dataset = torch.utils.data.Subset(dataset, range(0, 1024))
train_data, eval_data = torch.utils.data.random_split(dataset, [0.5, 0.5]) train_data, eval_data = torch.utils.data.random_split(dataset, [0.5, 0.5])
train_loader = torch.utils.data.DataLoader(AImagesDataset(train_data), batch_size=batch_size) augmented_train_data = AImagesDataset(train_data, False)
eval_loader = torch.utils.data.DataLoader(eval_data, batch_size=1) train_loader = torch.utils.data.DataLoader(augmented_train_data,
batch_size=batch_size,
num_workers=3,
pin_memory=True,
shuffle=True)
eval_loader = torch.utils.data.DataLoader(eval_data,
batch_size=batch_size,
num_workers=3,
pin_memory=True)
if torch.cuda.is_available(): for epoch in progress_epoch.range(num_epochs):
# print("GPU available")
model = model.cuda()
else:
warnings.warn("GPU not available")
train_losses = [] train_positives = torch.tensor(0, device=device)
eval_losses = [] eval_positives = torch.tensor(0, device=device)
progress_epoch = tqdm(range(num_epochs), position=0, tk_parent=root_window) train_loss = torch.tensor(0.0, device=device)
progress_epoch.set_description("Epoch") eval_loss = torch.tensor(0.0, device=device)
progress_train_data = tqdm(train_loader, position=1, tk_parent=root_window)
progress_eval_data = tqdm(eval_loader, position=2, tk_parent=root_window)
progress_train_data.set_description("Training progress")
progress_eval_data.set_description("Evaluation progress")
for epoch in progress_epoch:
train_loss = 0
eval_loss = 0
progress_train_data.reset()
progress_eval_data.reset()
# Start training of model # Start training of model
progress_train_data.reset()
model.train() model.train()
for batch_nr, (imageT, transforms, img_index, classIDs, labels, paths) in enumerate(progress_train_data): for batch_nr, (image_t, transforms, img_index, class_ids, labels, paths) \
imageT = imageT.to('cuda') in enumerate(progress_train_data.iter(train_loader)):
classIDs = classIDs.to('cuda') image_t = image_t.to(device)
class_ids = class_ids.to(device)
# progress_train_data.set_postfix_str("Running model...") outputs = model(image_t)
outputs = model(imageT)
optimizer.zero_grad() optimizer.zero_grad(set_to_none=True)
# progress_train_data.set_postfix_str("calculating loss...") loss = loss_function(outputs, class_ids)
loss = loss_function(outputs, classIDs)
# progress_train_data.set_postfix_str("propagating loss...")
loss.backward() loss.backward()
# progress_train_data.set_postfix_str("optimizing...")
optimizer.step() optimizer.step()
train_loss += loss.item() train_loss += loss
mean_loss = train_loss / len(train_loader) outputs.flatten()
train_losses.append(mean_loss) classes = outputs.argmax()
train_positives += torch.sum(torch.eq(classes, class_ids))
accuracies.append('train_acc', train_positives.item() / len(augmented_train_data))
losses.append('train_loss', train_loss.item() / len(augmented_train_data))
print("Train: ", train_positives.item(), "/ ", len(augmented_train_data),
" = ", train_positives.item() / len(augmented_train_data))
# evaluation of model # evaluation of model
progress_eval_data.reset()
model.eval() model.eval()
with torch.no_grad(): with torch.no_grad():
for (imageT, classIDs, labels, paths) in progress_eval_data: for (image_t, class_ids, labels, paths) in progress_eval_data.iter(eval_loader):
imageT = imageT.to('cuda') image_t = image_t.to(device)
classIDs = classIDs.to('cuda') class_ids = class_ids.to(device)
outputs = model(imageT) outputs = model(image_t)
loss = loss_function(outputs, classIDs) outputs.flatten()
eval_loss = loss.item() classes = outputs.argmax()
eval_losses.append(eval_loss) eval_positives += torch.sum(torch.eq(classes, class_ids))
eval_loss += loss_function(outputs, class_ids)
accuracies.append('eval_acc', eval_positives.item() / len(eval_data))
losses.append('eval_loss', eval_loss.item() / len(eval_data))
print("Eval: ", eval_positives.item(), "/ ", len(eval_data), " = ", eval_positives.item() / len(eval_data))
# print epoch summary # print epoch summary
# print(f"Epoch: {epoch} --- Train loss: {train_loss:7.4f} --- Eval loss: {eval_loss:7.4f}") # print(f"Epoch: {epoch} --- Train loss: {train_loss:7.4f} --- Eval loss: {eval_loss:7.4f}")
torch.save(model.state_dict(), f'models/model-{start_time.strftime("%Y%m%d-%H%M%S")}-epoch-{epoch}.pt') if eval_positives.item() / len(eval_data) > 0.3:
torch.save(model.state_dict(), f'models/model-{start_time.strftime("%Y%m%d-%H%M%S")}-epoch-{epoch}.pt')
with open(f'models/model-{start_time.strftime("%Y%m%d-%H%M%S")}.csv', 'a') as file:
file.write(f'{epoch};{len(augmented_train_data)};{len(eval_data)};{train_loss.item()};{eval_loss.item()};'
f'{train_positives};{eval_positives}\n')
def train_worker(p_epoch, p_train, p_eval, plotter_accuracies, plotter_loss):
if not torch.cuda.is_available():
raise RuntimeError("GPU not available")
device = 'cuda'
model = MyCNN(input_channels=1,
input_size=(100, 100)).to(device)
num_epochs = 1000000
batch_size = 64
optimizer = torch.optim.Adam(model.parameters(),
lr=0.00005,
# weight_decay=0.1,
fused=True)
loss_function = torch.nn.CrossEntropyLoss()
start_time = datetime.now()
file_name = f'models/model-{start_time.strftime("%Y%m%d-%H%M%S")}.csv'
with open(file_name.replace(".csv", ".txt"), 'a') as file:
file.write(f"device: {device}\n")
file.write(f"batch_size: {batch_size}\n")
file.write(f"optimizer: {optimizer}\n")
file.write(f"loss_function: {loss_function}\n")
file.write(f"model: {model}")
train_model(plotter_accuracies, plotter_loss, p_epoch, p_train, p_eval,
model,
num_epochs,
batch_size,
optimizer,
loss_function,
device,
start_time)