One-Shot NAS on YOLOv5 backbone¶

Define NAS Model¶

Defining a model is almost the same as defining a PyTorch. You need to replace the code import torch.nn as nn with import nni.retiarii.nn.pytorch as nn and add @model_wrapper at the beginning of the model

Define Changable Modules¶

NASC3 is the variantion of the original CSP block (C3 module), it can adjust the output channel numnbers of cv1 and cv2.

import nni.retiarii.nn.pytorch as nn

class NASC3(nn.Module):
    # CSP Bottleneck with 3 convolutions
    def __init__(self, c1, c2, inputshape=(), id=0, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
        super().__init__()
        c_ = int(c2 * e)  # changeable output channels
        choice = []

        for scale in [1.0, 1.5, 2.0]:
            choice.append(NASC3sub(c1, c2, inputshape, id , n, shortcut, g, e, scale))
        self.total = LayerChoice(choice, label="c3_{}".format(id))
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c1, c_, 1, 1)
        self.cv3 = Conv(2 * c_, c2, 1)  # optional act=FReLU(c2)
        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))

NASConv is the varation of the original Conv module, it can adjust the kernel size and padding of convolutions and choose between different activations.

import nni.retiarii.nn.pytorch as nn

class NASConv(nn.Module):
    # Standard convolution
    def __init__(self, c1, c2, inputshape=(), id=0, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
        super().__init__()

        choice = [nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)]
        # conv_2d_output_shape checks the output shape of convolutions (to make sure output size is the same)
        outputshape = conv_2d_output_shape(inputshape, k, s, autopad(k, p))
        for offsetk in (-2 , 2):
                for offsetpad in range(0 if p is None else -1*p,4):
                    if conv_2d_output_shape(inputshape, k+offsetk , s, autopad(k, p)+offsetpad) == outputshape:
                        choice.append(nn.Conv2d(c1, c2, k+offsetk, s, autopad(k, p)+offsetpad, groups=g, bias=False))

        self.conv  = LayerChoice(choice, label="nasconv_{}".format(id))
        self.shape = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
        self.bn = nn.BatchNorm2d(c2)
        act_choice = [nn.SiLU(), nn.Identity(), nn.ReLU()]  # activation choices
        self.act = LayerChoice(act_choice, label="nasconv_{}_act".format(id))

Construct YOLOv5 backbone that supports NAS componenets (NASC3, NASConv)¶

full_code

class Backbone(nn.Module):
    parse_backbone(d, ch)

The function that supports parsing NAS components in backbone full_code

def parse_backbone(d, ch):  # model_dict, input_channels(3)

    component_mapping = {"NASConv":NASConv, "NASC3":NASC3}

    for i, (f, n, m, args) in enumerate(d['backbone']):  # from, number, module, args
        
        if m in component_mapping.keys():
            m = component_mapping[m]

        if m in (NASConv, Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
                 BottleneckCSP, C3, NASC3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x):
            c1, c2 = ch[f], args[0]
            if c2 != no:  # if not output
                c2 = make_divisible(c2 * gw, 8)

            args = [c1, c2, *args[1:]]

            if m in [BottleneckCSP, NASC3, C3, C3TR, C3Ghost, C3x]:
                args.insert(2, n)  # number of repeats
                n = 1

            if m in [NASConv, NASC3]:
                args.insert(2,pre_shape)
                args.insert(3, i)

Wrap the NAS backbone to a classification model for training¶

from nni.retiarii import model_wrapper

@model_wrapper
class NASBACKBONE(nn.Module):

    def __init__(self, cfg, nc):
        super().__init__()
        self.backbone = Backbone(cfg=cfg)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        if cfg[cfg.find("yolov5") + 6] == 'x':
            self.head = nn.Linear(1280, nc, bias=True)
        elif cfg[cfg.find("yolov5") + 6] == 's':
            self.head = nn.Linear(512, nc, bias=True)
        elif cfg[cfg.find("yolov5") + 6] == 'm':
            self.head = nn.Linear(768, nc, bias=True)
        elif cfg[cfg.find("yolov5") + 6] == 'n':
            self.head = nn.Linear(256, nc, bias=True)
        else:
            print("error loading models in backbone")
    
    def forward(self, x):
        x = self.backbone(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.head(x)
        return x

Change Yaml File¶

YoloV5’s backbone NAS yaml yolov5sb_nas.yaml

# YOLOv5 v6.0 backbone
backbone:
  # [from, number, module, args]
  [[-1, 1, NASConv, [64, 6, 2, 2]],  # 0-P1/2
   [-1, 1, NASConv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, NASC3, [128]],
   [-1, 1, NASConv, [256, 3, 2]],  # 3-P3/8
   [-1, 6, NASC3, [256]],
   [-1, 1, NASConv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, NASC3, [512]],
   [-1, 1, NASConv, [1024, 3, 2]],  # 7-P5/32
   [-1, 3, NASC3, [1024]],
   [-1, 1, SPPF, [1024, 5]]
  ]

Setup User-defined Nas Model¶

nas_backbone_yaml = "./models/yolov5sb_nas.yaml"
model_space = NASBACKBONE(cfg=nas_backbone_yaml, nc=200).to(device=device)

Dataset¶

We use TinyImageNet for our classification training

train_dataset, test_dataset, train_loader, test_loader = create_tinyimagenet(batchsize=1024)

Evaluator¶

To begin exploring the model space, one firstly need to have an evaluator to provide the criterion of a “good model”. The program is testing on classification tasks, so it can use pl.Classification as the evaluator

import nni.retiarii.evaluator.pytorch.lightning as pl
evaluator = pl.Classification(
        # Need to use `pl.DataLoader` instead of `torch.utils.data.DataLoader` here,
        # or use `nni.trace` to wrap `torch.utils.data.DataLoader`.
        train_dataloaders=pl.DataLoader(train_dataset, batch_size=512, num_workers=10),
        val_dataloaders=pl.DataLoader(test_dataset, batch_size=512, num_workers=10),
        # Other keyword arguments passed to pytorch_lightning.Trainer.
        max_epochs=1,
        gpus=1,
    )

Startegy¶

In the experiment, we use DARTS and ENAS to explore our model space, for more one-shot strategies, please check out here

exploration_strategy = strategy.DARTS()
exploration_strategy = strategy.ENAS()

Experiments¶

Set configs¶

from nni.retiarii.experiment.pytorch import RetiariiExperiment, RetiariiExeConfig
exp = RetiariiExperiment(model_space, evaluator, [], exploration_strategy)
exp_config = RetiariiExeConfig('local')
exp_config.experiment_name = 'darts'
exp_config.execution_engine = 'oneshot'
exp_config.max_trial_number = 4  # spawn 4 trials at most
exp_config.trial_concurrency = 2  # will run two trials concurrently
exp_config.trial_gpu_number = 1
exp_config.training_service.use_active_gpu = True

Launch experiment¶

port = 8081
exp.run(exp_config, port)

Get the best usable model¶

Export the best backbone (model)¶

for model_dict in exp.export_top_models(formatter='dict'):
        print(model_dict)

with open(save_json_path, 'w') as fp:
    json.dump(model_dict, fp)

with fixed_arch(save_json_path):
    backbone = NASBACKBONE(cfg=nas_backbone_yaml, nc=200).to(device=device)

Map the backbone structure back to a YOLO model¶

match_nas is a function that maps the backbone structure back and save the final detection model full_code

save_model_path = "....."
yolo = Model(yolo_yaml).to(device=device) 
match_nas(yolo, backbone, save_model_path)

Full Code On Github¶

oneshot_nas.py