Define Jobs

This page explains how to create a job for training and validation

The training and validation jobs have to be created in a standardized format which is described below. Once the following structure is adopted and implemented, zip the files and upload it on our platform to start the process.

Structure

File Structure:

input.json - This file contains the job configuration details like name, task, sites, etc.
nn.py - This file contains the neural network code.
nnMetrics.py - This file contains the code for optimizers, loss functions, transformation functions and metrics related to the training and validation process.
dataLoader_{site}.py - This file contains the code for how data is loaded at each data site. The {site} in dataLoader_{site}.py stands for the site ID for which the data loader is written. There should be one data loader file for each site. For eg. if there are 3 sites with ID 1,2,3, then there will be 3 data loader files with names dataLoader_1.py, dataLoader_2.py, dataLoader_3.py.

File Description

You can find sample text on each file below. For an in-depth understanding on customisation, please visit the next pages which contain a proper description on what each value represents in the files and how to customise them.

1. input.json

{
    "name": "TrainCNN_V1",
    "task": "train",
    "sites": "site-1,site-2",
    "rounds": "2",
    "nnClass": "MobileNetCNN",
    "site-1": {
        "aggregation_epochs": 2,
        "lr": 0.01,
        "batch_size": 8,
        "data_size": 15,
        "train_test_split": 0.3,
        "balanced_class_train": "true"
    },
    "site-2": {
        "aggregation_epochs": 2,
        "lr": 0.01,
        "batch_size": 8,
        "data_size": 15,
        "train_test_split": 0.3,
        "balanced_class_train": "true"
    }
}

2. nn.py

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models.mobilenet import mobilenet_v2

class MobileNetCNN(nn.Module):
    def __init__(self):
        super().__init__()
        mobile = mobilenet_v2(pretrained=False)
        self.mobilenet_layer = nn.Sequential(*list(mobile.children())[:-1])
        self.fc1 = nn.Linear(1280, 512)
        self.fc2 = nn.Linear(512, 14)
        self.dropout = nn.Dropout(0.5)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.mobilenet_layer(x)
        x = F.adaptive_avg_pool2d(x, 1).reshape(-1, 1280)
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x

def getModel():
    return MobileNetCNN()

3. nnMetrics.py


import numpy as np
def metricSupportFn(outputs, labels):
    labels = np.array([t.numpy() for t in labels])
    outputs = outputs.cpu().numpy()
    outputs_rounded = np.array(np.matrix.round(outputs))
    vals = []
    for i in (outputs_rounded == labels):
        vals.append(i.sum()/len(i))
    total  = len(vals)
    correct  = np.array(vals).sum()
    return total, correct

def metricSupportFn2(outputs, labels):
    from sklearn.metrics import confusion_matrix
    classes = len(labels[0])
    tp, fp, tn, fn = 0, 0, 0, 0
    for i in range(classes):
        y_pred = np.where(outputs[:,i] > 0.5, 1, 0)
        tn_tmp, fp_tmp, fn_tmp, tp_tmp = confusion_matrix(labels[:,i], y_pred, labels=[0, 1]).ravel()
        tp += tp_tmp
        fp += fp_tmp
        tn += tn_tmp
        fn += fn_tmp
    return (tp, fp, tn, fn)

def optimizerFn(model, lr):
    import torch.optim as optim
    return optim.SGD(model.parameters(), lr=lr, momentum=0.9)

def criterionFn():
    import torch
    return torch.nn.CrossEntropyLoss()

def transformFn():
    from torchvision import transforms
    t = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.ToTensor()
    ])
    return t

4. dataLoader_{site}.py - Eg. dataLoader_1.py

from PIL import Image
import cv2
from torchvision import transforms
import pandas as pd
import numpy as np
import random

def reSampler(data, data_size):
    sample_weights1 = data['Finding Labels'].map(lambda x: len(x.split('|')) if len(x)>0 else 0).values + 4e-2
    sample_weights1 /= sample_weights1.sum()
    data = data.sample(data_size, weights=sample_weights1, random_state=0)    
    return data

def imgReader(streamedFile, transform):
    file_byte_string = streamedFile.read()
    image = np.array(cv2.imdecode(np.asarray(bytearray(file_byte_string)), cv2.IMREAD_COLOR))
    image = (image * 255).round().astype(np.uint8)
    image = Image.fromarray(image)
    if transform is not None:
        image = transform(image)
    return image.float()

Please checkout the next pages for an in-depth explanation on each file.

PreviousPreview Site Description NextJob - input.json

Last updated 2 years ago

hashtagStructure

hashtagFile Description

hashtag1. input.json

hashtag2. nn.py

hashtag3. nnMetrics.py