# Multi-trial Detection NAS With Efficiency Rewards ## Define NAS Model Defining a model is almost the same as defining a PyTorch. You need to replace the code import torch.nn as nn with import nni.retiarii.nn.pytorch as nn and add `@model_wrapper` at the beginning of the model ```python from nni.retiarii import model_wrapper @model_wrapper class Model(nn.Module): ``` ### Define Changable Modules NASC3 is the variantion of the original CSP block (C3 module), it can adjust the output channel numnbers of cv1 and cv2. ```python import nni.retiarii.nn.pytorch as nn class NASC3(nn.Module): # CSP Bottleneck with 3 convolutions def __init__(self, c1, c2, inputshape=(), id=0, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() c_ = int(c2 * e) # changeable output channels choice = [] for scale in [1.0, 1.5, 2.0]: choice.append(NASC3sub(c1, c2, inputshape, id , n, shortcut, g, e, scale)) self.total = LayerChoice(choice, label="c3_{}".format(id)) self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c1, c_, 1, 1) self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2) self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) ``` NASConv is the varation of the original Conv module, it can adjust the kernel size and padding of convolutions and choose between different activations. ```python import nni.retiarii.nn.pytorch as nn class NASConv(nn.Module): # Standard convolution def __init__(self, c1, c2, inputshape=(), id=0, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups super().__init__() choice = [nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)] # conv_2d_output_shape checks the output shape of convolutions (to make sure output size is the same) outputshape = conv_2d_output_shape(inputshape, k, s, autopad(k, p)) for offsetk in (-2 , 2): for offsetpad in range(0 if p is None else -1*p,4): if conv_2d_output_shape(inputshape, k+offsetk , s, autopad(k, p)+offsetpad) == outputshape: choice.append(nn.Conv2d(c1, c2, k+offsetk, s, autopad(k, p)+offsetpad, groups=g, bias=False)) self.conv = LayerChoice(choice, label="nasconv_{}".format(id)) self.shape = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) self.bn = nn.BatchNorm2d(c2) act_choice = [nn.SiLU(), nn.Identity(), nn.ReLU()] # activation choices self.act = LayerChoice(act_choice, label="nasconv_{}_act".format(id)) ``` ### Change Yaml File YoloV5s NAS model's yaml (only backbone part) [full yolov5s_nas.yaml](https://github.com/Raychen0617/yolov5_optimization/blob/master/models/yolov5s_nas.yaml) ```yaml # YOLOv5 v6.0 backbone backbone: # [from, number, module, args] [[-1, 1, NASConv, [64, 6, 2, 2]], # 0-P1/2 [-1, 1, NASConv, [128, 3, 2]], # 1-P2/4 [-1, 3, NASC3, [128]], [-1, 1, NASConv, [256, 3, 2]], # 3-P3/8 [-1, 6, NASC3, [256]], [-1, 1, NASConv, [512, 3, 2]], # 5-P4/16 [-1, 9, NASC3, [512]], [-1, 1, NASConv, [1024, 3, 2]], # 7-P5/32 [-1, 3, NASC3, [1024]], [-1, 1, SPPF, [1024, 5]] ] ``` ### Setup User-defined Nas Model ```python device = "cuda:0" hyp = "data/hyps/hyp.scratch-low.yaml" # hyper-parameters in yolov5 cfg="./models/yolov5s_nas.yaml" # yaml file model_space = Model(cfg=cfg, ch=3, nc=80, anchors=hyp.get('anchors')).to(device) ``` ## Explore The Defined Model Space ### Pick An Exploration Strategy NNI supports many [exploration startegies](https://nni.readthedocs.io/en/stable/nas/exploration_strategy.html), simply choosing (i.e., instantiate) an exploration strategy as below.
```python import nni.retiarii.strategy as strategy search_strategy = strategy.Random(dedup=True) # dedup=False if deduplication is not wanted ``` ### Customize A Model Evaluator Setup parameters (ex: batch size, epochs) for model ```python def evaluate_model(model_detect): model = model_detect() device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) # Parameters hyp = 'data/hyps/hyp.scratch-low.yaml' if isinstance(hyp, str): with open(hyp, errors='ignore') as f: hyp = yaml.safe_load(f) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) imgsz = 640 batch_size = 64 single_cls = False from utils.general import colorstr train_path = "/home/raytjchen/Desktop/code/datasets/coco128/images/train2017" gs = 32 nbs = 64 # nominal batch size epochs = 20 # how many epochs to train for a single choice ``` Create optimizer and scheduler for Yolov5 ```python # Optimizer accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay from utils.torch_utils import smart_optimizer optimizer = smart_optimizer(model, 'SGD', hyp['lr0'], hyp['momentum'], hyp['weight_decay']) # Scheduler lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear from torch.optim import lr_scheduler scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) ``` Create trainloader and dataloader on coco dataset ```python # Create Trainloader from utils.dataloaders import create_dataloader train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls, hyp=hyp, augment=True, cache=None, rect=False, rank=-1, workers=0, image_weights=False, quad=False, prefix=colorstr('train: '), shuffle=True) # Testloader val_path = "/home/raytjchen/Desktop/code/datasets/coco128/images/train2017" val_loader = create_dataloader( val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, single_cls, hyp=hyp, cache=None, rect=True, rank=-1, workers=0, pad=0.5, prefix=colorstr('val: '))[0] ``` Specify Model's attribute ```python # Model attributes hyp['obj'] *= (imgsz / 640) ** 2 # scale to image size and layers hyp['label_smoothing'] = 0.0 model.nc = 80 # attach number of classes to model model.hyp = hyp # attach hyperparameters to model from utils.general import labels_to_class_weights model.class_weights = labels_to_class_weights(dataset.labels, 80).to(device) * 80 # attach class weights model.names = "nas_yolov5s" ``` Start training ```python import time start_time = time.time() nb = len(train_loader) nw = max(round(hyp['warmup_epochs'] * nb), 100) last_opt_step = -1 import numpy as np nc = 80 maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) scheduler.last_epoch = - 1 # do not move scaler = torch.cuda.amp.GradScaler(enabled=False) from utils.loss import NASComputeLoss compute_loss = NASComputeLoss(model=model, h=hyp) for epoch in range(epochs): model.train() mloss = torch.zeros(3, device=device) pbar = enumerate(train_loader) from tqdm import tqdm pbar = tqdm(pbar, total=nb, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') optimizer.zero_grad() for batch_idx, (imgs, targets, paths, _) in pbar: ni = batch_idx + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 pred = model(imgs) loss, loss_items = compute_loss(pred, targets.to(device)) scaler.scale(loss).backward() # Optimizer step on if ni - last_opt_step >= accumulate: scaler.unscale_(optimizer) # unscale gradients torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # clip gradients scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() last_opt_step = ni mloss = (mloss * batch_idx + loss_items) / (batch_idx + 1) # update mean losses # end batch ------------------------------------------------------------------------------------------------------------------------ # Scheduler step scheduler.step() # Validate data_dict = check_dataset('data/coco128.yaml') import val as validate results, maps, _ = validate.run(data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, half=False, model=model, single_cls=single_cls, dataloader=val_loader, save_dir="./output/", plots=False, #callbacks=callbacks, compute_loss=compute_loss) nni.report_intermediate_result(int(results[3]) * 1000) # report final test result model_time = start_time - time.time() nni.report_final_result(results[3] * 1000) ``` **Add efficiency term into our decision function (optional)** ```python # report final test result alpha = 0.03 nni.report_final_result(results[3] * 1000 - model_time * alpha) ``` ### Create The Evaluator ```python fmrom nni.retiarii.evaluator import FunctionalEvaluator evaluator = FunctionalEvaluator(evaluate_model) ``` ## Launch An Experiment After all the above are prepared, it is time to start an experiment to do the model search. An example is shown below. ```python from nni.retiarii.evaluator import FunctionalEvaluator evaluator = FunctionalEvaluator(evaluate_model) from nni.retiarii.experiment.pytorch import RetiariiExperiment, RetiariiExeConfig exp = RetiariiExperiment(model_space, evaluator, [], search_strategy) exp_config = RetiariiExeConfig('local') exp_config.experiment_name = 'yolov5s_nas_search' ``` The following configurations are useful to control how many trials to run at most / at the same time. ```python exp_config.max_trial_number = 4 # spawn 4 trials at most exp_config.trial_concurrency = 1 # will run two trials concurrently ``` Remember to set the following config if you want to GPU. use_active_gpu should be set true if you wish to use an occupied GPU (possibly running a GUI). ```python exp_config.trial_gpu_number = 1 exp_config.training_service.use_active_gpu = True ``` Launch the experiment ```python exp.run(exp_config, 8083) ``` ## Export Best Model ``` python for model_dict in exp.export_top_models(formatter='dict'): print(model_dict) save_json_path = "./yolov5s_nas.json" with open(save_json_path, 'w') as fp: json.dump(model_dict, fp) ``` ## Full Code On Github [hello_nas.py](https://github.com/Raychen0617/yolov5_optimization/blob/master/hello_nas.py)