TensorboardX, pytorch-ignite メモ

ml pytorch

argparser

from argparse import ArgumentParser
 
parser = ArgumentParser()
parser.add_argument('--batch_size', type=int, default=256, help='training batch size')
...
args = parser.parse_args()
args.batch_size # 256
                     

TensorboardX

pytorch 1.1 から本体に統合されたらしい

from torch.utils.tensorboard import SummaryWriter
 
writer = SummaryWriter(log_dir=args.log_dir)
writer.add_scalar(f'train/ac', y, x)
$ tensorboard --logdir=logs

image

pytorch-ignite

CNNサンプルコード

import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.tensorboard import SummaryWriter
from logging import getLogger
from ignite.engine import Engine, Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss
from ignite.handlers import ModelCheckpoint, EarlyStopping
from argparse import ArgumentParser
 
 
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # in-size out-size
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.dropout = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
 
    def forward(self, x):
        # CNN
        x = self.conv1(x)
        x = F.max_pool2d(x, 2)
 
        x = F.relu(x)
 
        x = self.conv2(x)
        x = self.dropout(x)
        x = F.max_pool2d(x, 2)
 
        x = F.relu(x)
 
        # (? x ?) => (320 x ?)
        x = x.view(-1, 320)
 
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output
 
 
def get_data_loaders(data_dir: str, batch_size: int = 64, test_batch_size: int = 1000):
    # Train Data
    train_data_with_label = MNIST(
        data_dir, train=True, download=True, transform=transforms.ToTensor())
    train_loader = DataLoader(train_data_with_label, batch_size=batch_size, shuffle=True)
    # Test Data
    test_data_with_label = MNIST(
        data_dir, train=False, download=True, transform=transforms.ToTensor())
    test_loader = DataLoader(
        test_data_with_label, batch_size=test_batch_size, shuffle=False)
    return train_loader, test_loader
 
 
def write_metrics(metrics, writer, logger, mode: str, epoch: int):
    logger.info(f'{mode} results - Epoch: {epoch}')
    ac, nll = metrics['ac'], metrics['nll']
    logger.info(f'Ac: {ac:.2f} Loss: {nll:.2f}')
    writer.add_scalar(f'{mode}/ac', ac, epoch)
    writer.add_scalar(f'{mode}/nll', nll, epoch)
 
 
def main(args):
    writer = SummaryWriter(log_dir=args.log_dir)
    logger = getLogger(args.name)
    device = args.device
    model = Net()
    logger.info(model)
 
    # Adam
    optimizer = optim.Adam(model.parameters())
    # DataLoader
    train_loader, test_loader = get_data_loaders(args.data_dir, batch_size=args.batch_size, test_batch_size=args.test_batch_size)
    trainer = create_supervised_trainer(model, optimizer, F.nll_loss, device=device)
    evaluator = create_supervised_evaluator(model, metrics={'ac': Accuracy(), 'nll': Loss(F.nll_loss)}, device=device)
    desc = 'Epoch {} Iteration - loss: {:.2f}'
    pbar = tqdm(initial=0, leave=False, total=len(train_loader), desc=desc.format(0, 0))
 
    @trainer.on(Events.ITERATION_COMPLETED(every=args.log_interval))
    def log_training_loss(engine: Engine):
        pbar.desc = desc.format(engine.state.epoch, engine.state.output)
        pbar.update(args.log_interval)
 
    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(engine: Engine):
        pbar.refresh()
        evaluator.run(train_loader)
        metrics = evaluator.state.metrics
        write_metrics(metrics, writer, logger, 'train', engine.state.epoch)
 
    @trainer.on(Events.EPOCH_COMPLETED)
    def log_test_results(engine: Engine):
        metrics = evaluator.state.metrics
        write_metrics(metrics, writer, logger, 'test', engine.state.epoch)
        pbar.n = pbar.last_print_n = 0
 
    # save last 3 models
    handler = ModelCheckpoint(dirname=args.model_dir, filename_prefix=args.name, n_saved=3, create_dir=True, require_empty=False)
    trainer.add_event_handler(Events.EPOCH_COMPLETED, handler, {'model': model})
 
    def score_function(engine):
        # evaluated by nll * -1
        return -1 * engine.state.metrics['nll']
    # early stopping
    handler = EarlyStopping(patience=5, score_function=score_function, trainer=trainer)
    evaluator.add_event_handler(Events.COMPLETED, handler)
 
    trainer.run(train_loader, max_epochs=args.epochs)
    pbar.close()
 
    logger.info('Train Completed')
 
 
def parse_cnn_args():
    parser = ArgumentParser()
    parser.add_argument('--batch_size', type=int, default=256,
                        help='training batch size')
    parser.add_argument('--test_batch_size', type=int, default=1000,
                        help='test batch size')
    parser.add_argument('--epochs', type=int, default=30,
                        help='epochs count')
    parser.add_argument('--log_interval', type=int, default=10,
                        help='every n batch, update progress')
    parser.add_argument('--log_dir', type=str, default='../../logs/cnn',
                        help='log dir')
    parser.add_argument('--model_dir', type=str, default='../../models/cnn',
                        help='model dir')
    parser.add_argument('--data_dir', type=str, default='../../datasets/cnn',
                        help='dataset dir')
    parser.add_argument('--device', type=str, default='cuda',
                        help='cpu or cuda')
    parser.add_argument('--name', type=str, default='cnn',
                        help='project name')
    return parser.parse_args()
 
 
if __name__ == '__main__':
    main(parse_cnn_args())