TensorboardX, pytorch-ignite メモ
argparser
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument('--batch_size', type=int, default=256, help='training batch size')
...
args = parser.parse_args()
args.batch_size # 256
TensorboardX
pytorch 1.1 から本体に統合されたらしい
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter(log_dir=args.log_dir)
writer.add_scalar(f'train/ac', y, x)$ tensorboard --logdir=logs
pytorch-ignite
CNNサンプルコード
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.tensorboard import SummaryWriter
from logging import getLogger
from ignite.engine import Engine, Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss
from ignite.handlers import ModelCheckpoint, EarlyStopping
from argparse import ArgumentParser
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# in-size out-size
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.dropout = nn.Dropout2d(0.5)
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
# CNN
x = self.conv1(x)
x = F.max_pool2d(x, 2)
x = F.relu(x)
x = self.conv2(x)
x = self.dropout(x)
x = F.max_pool2d(x, 2)
x = F.relu(x)
# (? x ?) => (320 x ?)
x = x.view(-1, 320)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1)
return output
def get_data_loaders(data_dir: str, batch_size: int = 64, test_batch_size: int = 1000):
# Train Data
train_data_with_label = MNIST(
data_dir, train=True, download=True, transform=transforms.ToTensor())
train_loader = DataLoader(train_data_with_label, batch_size=batch_size, shuffle=True)
# Test Data
test_data_with_label = MNIST(
data_dir, train=False, download=True, transform=transforms.ToTensor())
test_loader = DataLoader(
test_data_with_label, batch_size=test_batch_size, shuffle=False)
return train_loader, test_loader
def write_metrics(metrics, writer, logger, mode: str, epoch: int):
logger.info(f'{mode} results - Epoch: {epoch}')
ac, nll = metrics['ac'], metrics['nll']
logger.info(f'Ac: {ac:.2f} Loss: {nll:.2f}')
writer.add_scalar(f'{mode}/ac', ac, epoch)
writer.add_scalar(f'{mode}/nll', nll, epoch)
def main(args):
writer = SummaryWriter(log_dir=args.log_dir)
logger = getLogger(args.name)
device = args.device
model = Net()
logger.info(model)
# Adam
optimizer = optim.Adam(model.parameters())
# DataLoader
train_loader, test_loader = get_data_loaders(args.data_dir, batch_size=args.batch_size, test_batch_size=args.test_batch_size)
trainer = create_supervised_trainer(model, optimizer, F.nll_loss, device=device)
evaluator = create_supervised_evaluator(model, metrics={'ac': Accuracy(), 'nll': Loss(F.nll_loss)}, device=device)
desc = 'Epoch {} Iteration - loss: {:.2f}'
pbar = tqdm(initial=0, leave=False, total=len(train_loader), desc=desc.format(0, 0))
@trainer.on(Events.ITERATION_COMPLETED(every=args.log_interval))
def log_training_loss(engine: Engine):
pbar.desc = desc.format(engine.state.epoch, engine.state.output)
pbar.update(args.log_interval)
@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(engine: Engine):
pbar.refresh()
evaluator.run(train_loader)
metrics = evaluator.state.metrics
write_metrics(metrics, writer, logger, 'train', engine.state.epoch)
@trainer.on(Events.EPOCH_COMPLETED)
def log_test_results(engine: Engine):
metrics = evaluator.state.metrics
write_metrics(metrics, writer, logger, 'test', engine.state.epoch)
pbar.n = pbar.last_print_n = 0
# save last 3 models
handler = ModelCheckpoint(dirname=args.model_dir, filename_prefix=args.name, n_saved=3, create_dir=True, require_empty=False)
trainer.add_event_handler(Events.EPOCH_COMPLETED, handler, {'model': model})
def score_function(engine):
# evaluated by nll * -1
return -1 * engine.state.metrics['nll']
# early stopping
handler = EarlyStopping(patience=5, score_function=score_function, trainer=trainer)
evaluator.add_event_handler(Events.COMPLETED, handler)
trainer.run(train_loader, max_epochs=args.epochs)
pbar.close()
logger.info('Train Completed')
def parse_cnn_args():
parser = ArgumentParser()
parser.add_argument('--batch_size', type=int, default=256,
help='training batch size')
parser.add_argument('--test_batch_size', type=int, default=1000,
help='test batch size')
parser.add_argument('--epochs', type=int, default=30,
help='epochs count')
parser.add_argument('--log_interval', type=int, default=10,
help='every n batch, update progress')
parser.add_argument('--log_dir', type=str, default='../../logs/cnn',
help='log dir')
parser.add_argument('--model_dir', type=str, default='../../models/cnn',
help='model dir')
parser.add_argument('--data_dir', type=str, default='../../datasets/cnn',
help='dataset dir')
parser.add_argument('--device', type=str, default='cuda',
help='cpu or cuda')
parser.add_argument('--name', type=str, default='cnn',
help='project name')
return parser.parse_args()
if __name__ == '__main__':
main(parse_cnn_args())