Deep Neural Network with PyTorch - Coursera

From IBM
pytorch
coursera
Published

July 7, 2021

Coursera website: Deep Neural Networks with PyTorch

Course certificate

Week 1 - Tensor and Datasets

Learning Objectives

  • Tensors 1D
  • Two-Dimensional Tensors
  • Data Set
  • Differentiation in PyTorch

notebook

notebook

Tensors 1D

The basics
#initialize
import torch
a=torch.tensor([7,4,3,2,6])

#dtype, type()
a.dtype
a.type()

#convert with type
a=a.type(torch.FloatTensor)

#size, ndimension
a.size()
a.ndimension()

#convert to 2D
a_2D=a.view(-1, 1)

#from_numpy, to numpy
import numpy as np
numpy_array = np.array([0.0, 1.0, 2.0, 3.0, 4.0])
torch_tensor = torch.from_numpy(numpy_array)
back_to_numpy = torch_tensor.numpy()

#from pandas
import pandas as pd
pandas_series = pd.Series([0.1, 2, 0.3, 10.1])
pandas_to_torch = torch.from_numpy(pandas_series.values)

#to list
this_tensor = torch.tensor([0, 1, 2, 3])
torch_to_list = this_tensor.tolist()

#item
new_tensor = torch.tensor([5, 2, 6, 1])
new_tensor[0].item()

#indexing and slicing
c[3:5]=torch.tensor([300.0, 4.0])
basic operations
#hadamard product
z = u*v

#dot product, (produit scalaire)
result = torch.dot(u, v)
universal functions, mean, max, mathematical functions, plot with linspace
#mean
a.mean()

#max
b.max()

#plot y=sin(x)
import matplotlib.pyplot as plt
%matplotlib inline

x = torch.linspace(0, 2 * np.pi, 100)
y = torch.sin(x)
plt.plot(x.numpy(), y.numpy())
Ungraded lab

1.1_1Dtensors_v2.ipynb

Tensors 2D

notebook

Tensor creation in 2D
a = [ [11, 12, 13], [21, 22, 23], [31, 32, 33] ]
A = torch.tensor(a)

A.ndimension()
>> 2

A.shape
>> torch.Size([3, 3])

A.size()
>> torch.Size([3, 3])

#number of elements
A.numel()
>> 9
Indexing and slicing in 2D
A[0, 0:2]
>> tensor([11, 12])

A[1:3, 2]
>> tensor([23, 33])
Basic operations in 2D: hadamard product, matrix multiplication
X = torch.tensor([[1,0], [0,1]])
Y = torch.tensor([[2,1], [1,2]])

#hadamard product
Z = X*Y
Z
>> tensor([[2, 0],
           [0, 2]])

A = torch.tensor([ [0, 1, 1], [1, 0, 1]])
B = torch.tensor([ [1, 1], [1, 1], [-1, 1]])

#matrix multiplication
C = torch.mm(A, B)
C
>> tensor([[0, 2],
           [0, 2]])
Ungraded lab

1.1_2 Two-Dimensional Tensors_v2.ipynb

Derivatives in Pytorch

Derivatives

using \(y(x)=x^2\)

x = torch.tensor(2., requires_grad=True)
y = x ** 2

#calculate derivative df/dx
y.backward()
#evaluate at x : df/dx(x)
x.grad
>> tensor(4.)

using \(z(x)=x^2+2x+1\)

x = torch.tensor(2., requires_grad=True)
z = x**2 + 2*x + 1
z.backward()
x.grad
>> tensor(6.)

Note: in my version of pytorch (1.7.1), I cannot use torch.int dtypes.

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-92-979d0f10c1e7> in <module>
----> 3 x = torch.tensor(2, requires_grad=True)
      4 z = x**2 + 2*x + 1
      5 z.backward()
RuntimeError: Only Tensors of floating point and complex dtype can require gradients
Partial derivatives

using \(f(u, v)=uv+u^2\), \(\frac{\partial f(u,v)}{\partial u} = v+2u\), \(\frac{\partial f(u,v)}{\partial v} = u\)

u = torch.tensor(1., requires_grad=True)
v = torch.tensor(2., requires_grad=True)

f = u*v + u**2

#calculate all partial derivatives df/du and df/dv
f.backward()
#evaluate partial derivative with respect to u df/du at u, v : df/du(u, v)
u.grad
>> tensor(4.)
#evaluate partial derivative with respect to v df/dv at u, v : df/dv(u, v)
v.grad
>> tensor(1.)
Ungraded lab

1.2derivativesandGraphsinPytorch_v2.ipynb

With some explanation about .detach() pointing to torch.autograd documentation. In this page, there is a link to walkthrough of backprop video.

Will have to go back to .detach()

Simple Dataset

Build a Dataset Class and Object
from torch.utils.data import Dataset

class toy_set(Dataset):
    def __init__(self, length=100, transform=None):
        self.x = 2*torch.ones(length, 2)
        self.y = torch.ones(length, 1)
        self.len = length
        self.transform = transform
    def __getitem__(self, index):
        sample=self.x[index], self.y[index]
        if self.transform:
            sample = self.transform(sample)
        return sample
    def __len__(self):
        return self.len
    
dataset = toy_set()
len(dataset)
>> 100
dataset[0]
(tensor([2., 2.]), tensor([1.]))
Build a Dataset Transform (e.g. normalize or standardize)
class add_mult(object):
    def __init__(self, addx=1, muly=1):
        self.addx = addx
        self.muly = muly
    def __call__(self, sample):
        x=sample[0]
        y=sample[1]
        x=x+self.addx
        y=y*self.muly
        sample=x, y
        return sample
    
    
# automatically apply the transform
a_m = add_mult()
dataset_ = toy_set(transform=a_m)
dataset_[0]
>> (tensor([3., 3.]), tensor([1.]))
Compose Transforms
class mult(object):
    def __init__(self, mul=100):
        self.mul = mul

    def __call__(self, sample):
        x = sample[0]
        y = sample[1]
        x = x * self.mul
        y = y * self.mul
        sample = x, y
        return sample
    
from torchvision import transforms
data_transform = transforms.Compose([add_mult(), mult()])

# automatically apply the composed transform
dataset_tr = toy_set(transform=data_transform)
dataset_tr[0]
>> (tensor([300., 300.]), tensor([100.]))
Ungraded lab

1.3.1_simple_data_set_v2.ipynb

Dataset

Dataset Class for Images
from PIL import Image
import pandas as pd
import os
from matplotlib.pyplot import imshow
from torch.utils.data import Dataset, DataLoader
class Dataset(Dataset):
    def __init__(self, csv_file, data_dir, transform=None):
        self.transform = transform
        self.data_dir = data_dir
        data_dir_csv_file = os.path.join(self.data_dir, csv_file)
        self.data_name = pd.read_csv(data_dir_csv_file)
        self.len = self.data_name.shape[0]
    def __len__(self):
        return self.len
    def __getitem__(self, idx):
        img_name=os.path.join(self.data_dir, self.data_name.iloc[idx, 1])
        image = Image.open(img_name)
        y = self.data_name.iloc[idx, 0]
        if self.transform:
            image = self.transform(image)
        return image, y
    
def show_data(data_sample, shape = (28, 28)):
    plt.imshow(data_sample[0].numpy().reshape(shape), cmap='gray')
    plt.title('y = ' + data_sample[1])
dataset = Dataset(csv_file=csv_file, data_dir=directory)
show_data(dataset[0])
Torch Vision Transforms
import torchvision.transforms as transforms
transforms.CenterCrop(20)
transforms.ToTensor()
croptensor_data_transform = transforms.Compose( [ transforms.CenterCrop(20), transforms.ToTensor() ] )
dataset = Dataset(csv_file=csv_file, data_dir=directory, transform=croptensor_data_transform)
dataset[0][0].shape
>> torch.Size([1, 20, 20])
Torch Vision Datasets

MNIST example

import torchvision.datasets as dsets
dataset = dsets.MNIST(root='./data', train = False, download = True, transform = transforms.ToTensor())
Ungraded lab

1.3.2_Datasets_and_transforms.ipynb

1.3.3_pre-Built Datasets_and_transforms_v2.ipynb

Week 2 - Linear Regression

Learning Objectives

  • Linear Regression Prediction
  • Linear Regression Training
  • Loss
  • Gradient Descent
  • Cost
  • Linear Regression Training PyTorch

notebook

notebook

Linear Regression in 1D - Prediction

Simple linear regression - prediction
import torch
w = torch.tensor(2.0, requires_grad=True)
b = torch.tensor(-1.0, requires_grad=True)
def forward(x):
    y=w*x+b
    return y
x=torch.tensor([1.0])
yhat=forward(x)
yhat
>> tensor([1.], grad_fn=<AddBackward0>)
x=torch.tensor([[1.0],[2.0]])
forward(x)  
>> tensor([[1.],
        [3.]], grad_fn=<AddBackward0>)
PyTorch - Class Linear
from torch.nn import Linear
torch.manual_seed(1)
model = Linear(in_features=1, out_features=1)
list(model.parameters())
>> [Parameter containing:
     tensor([[0.5153]], requires_grad=True),
     Parameter containing:
     tensor([-0.4414], requires_grad=True)]
x=torch.tensor([[1.0],[2.0]])
model(x)
>> tensor([[0.0739],
        [0.5891]], grad_fn=<AddmmBackward>)
PyTorch - Custom Modules
import torch.nn as nn

class LR(nn.Module):
    def __init__(self, in_size, output_size):
        super(LR, self).__init__()
        self.linear = nn.Linear(in_size, output_size)
    def forward(self, x):
        out = self.linear(x)
        return out
model = LR(1, 1)
list(model.parameters())
>> [Parameter containing:
     tensor([[-0.9414]], requires_grad=True),
     Parameter containing:
     tensor([0.5997], requires_grad=True)]
x=torch.tensor([[1.0],[2.0]])
model(x)
>> tensor([[-0.3417],
        [-1.2832]], grad_fn=<AddmmBackward>)

Model state_dict()

this returns a python dictionary. We will use it as our models get more complex. One Function is to map the relationship of the linear layers to its parameters. we can print out the keys and values.

model.state_dict()
>> OrderedDict([('linear.weight', tensor([[-0.9414]])),
             ('linear.bias', tensor([0.5997]))])
Ungraded lab

2.1Prediction1Dregression_v3.ipynb

Linear Regression Training

loss function presented is mean squared error

\(l(w,b)=\frac{1}{N}\displaystyle\sum_{n=1}^{N}(y_n-(wx_n+b))^2\)

Gradient Descent and cost

PyTorch Slope

import torch
w=torch.tensor(-10.0, requires_grad=True)
X=torch.arange(-3,3,0.1).view(-1, 1)
f = -3*X
import matplotlib.pyplot as plt
plt.plot(X.numpy(), f.numpy())
plt.show()
Y = f+0.1*torch.randn(X.size())
plt.plot(X.numpy(), Y.numpy(), 'ro')
plt.show()
def forward(x):
    return w*x

def criterion(yhat, y):
    return torch.mean((yhat-y)**2)
lr = 0.1
for epoch in range(4):
    Yhat = forward(X)
    loss= criterion(Yhat, Y)
    loss.backward()
    w.data = w.data - lr*w.grad.data
    w.grad.data.zero_()
Ungraded lab

2.2_linear_regression_one_parameter_v3.ipynb

Linear Regression Training in PyTorch

Cost surface
# The class for plot the diagram

class plot_error_surfaces(object):
    
    # Constructor
    def __init__(self, w_range, b_range, X, Y, n_samples = 30, go = True):
        W = np.linspace(-w_range, w_range, n_samples)
        B = np.linspace(-b_range, b_range, n_samples)
        w, b = np.meshgrid(W, B)    
        Z = np.zeros((30,30))
        count1 = 0
        self.y = Y.numpy()
        self.x = X.numpy()
        for w1, b1 in zip(w, b):
            count2 = 0
            for w2, b2 in zip(w1, b1):
                Z[count1, count2] = np.mean((self.y - w2 * self.x + b2) ** 2)
                count2 += 1
            count1 += 1
        self.Z = Z
        self.w = w
        self.b = b
        self.W = []
        self.B = []
        self.LOSS = []
        self.n = 0
        if go == True:
            plt.figure()
            plt.figure(figsize = (7.5, 5))
            plt.axes(projection='3d').plot_surface(self.w, self.b, self.Z, rstride = 1, cstride = 1,cmap = 'viridis', edgecolor = 'none')
            plt.title('Cost/Total Loss Surface')
            plt.xlabel('w')
            plt.ylabel('b')
            plt.show()
            plt.figure()
            plt.title('Cost/Total Loss Surface Contour')
            plt.xlabel('w')
            plt.ylabel('b')
            plt.contour(self.w, self.b, self.Z)
            plt.show()
    
    # Setter
    def set_para_loss(self, W, B, loss):
        self.n = self.n + 1
        self.W.append(W)
        self.B.append(B)
        self.LOSS.append(loss)
    
    # Plot diagram
    def final_plot(self): 
        ax = plt.axes(projection = '3d')
        ax.plot_wireframe(self.w, self.b, self.Z)
        ax.scatter(self.W,self.B, self.LOSS, c = 'r', marker = 'x', s = 200, alpha = 1)
        plt.figure()
        plt.contour(self.w,self.b, self.Z)
        plt.scatter(self.W, self.B, c = 'r', marker = 'x')
        plt.xlabel('w')
        plt.ylabel('b')
        plt.show()
    
    # Plot diagram
    def plot_ps(self):
        plt.subplot(121)
        plt.ylim
        plt.plot(self.x, self.y, 'ro', label="training points")
        plt.plot(self.x, self.W[-1] * self.x + self.B[-1], label = "estimated line")
        plt.xlabel('x')
        plt.ylabel('y')
        plt.ylim((-10, 15))
        plt.title('Data Space Iteration: ' + str(self.n))

        plt.subplot(122)
        plt.contour(self.w, self.b, self.Z)
        plt.scatter(self.W, self.B, c = 'r', marker = 'x')
        plt.title('Total Loss Surface Contour Iteration' + str(self.n))
        plt.xlabel('w')
        plt.ylabel('b')
        plt.show()
        
get_surface = plot_error_surfaces(15, 15, X, Y, 30)

img

img
PyTorch (hard way)
def forward(x):
    y=w*x+b
    return y
def criterion(yhat, y):
    return torch.mean((yhat-y)**2)

w = torch.tensor(-15.0, requires_grad=True)
b = torch.tensor(-10.0, requires_grad=True)
X = torch.arange(-3, 3, 0.1).view(-1, 1)
f = 1*X-1
Y = f+0.1*torch.rand(X.size())
lr = 0.1
for epoch in range(15):
    Yhat=forward(X)
    loss=criterion(Yhat, Y)
    loss.backward()
    w.data=w.data-lr*w.grad.data
    w.grad.data.zero_()
    b.data=b.data-lr*b.grad.data
    b.grad.data.zero_()
Ungraded lab

2.3_training_slope_and_bias_v3.ipynb

Stochastic Gradient Descent and the Data Loader

Stochastic Gradient Descent in PyTorch
w = torch.tensor(-15.0, requires_grad=True)
b = torch.tensor(-10.0, requires_grad=True)
X = torch.arange(-3, 3, 0.1).view(-1, 1)
f = -3*X
Y=f+0.1*torch.randn(X.size())

def forward(x):
    y=w*x+b
    return y
def criterion(yhat, y):
    return torch.mean((yhat-y)**2)
lr = 0.1
for epoch in range(4):
    for x, y in zip(X, Y):
        yhat=forward(x)
        loss=criterion(yhat, y)
        loss.backward()
        w.data=w.data-lr*w.grad.data
        w.grad.data.zero_()
        b.data=b.data-lr*b.grad.data
        b.grad.data.zero_()
Stochastic Gradient Descent DataLoader

dataset

from torch.utils.data import Dataset

class Data(Dataset):
    def __init__(self):
        self.x = torch.arange(-3, 3, 0.1).view(-1, 1)
        self.y = -3*X+1
        self.len = self.x.shape[0]
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.len
    
dataset = Data()

dataloader

from torch.utils.data import DataLoader

dataset=Data()
trainloader = DataLoader(dataset=dataset, batch_size=1)

stochastic gradient descent

for x, y in trainloader:
    yhat = forward(x)
    loss = criterion(yhat, y)
    loss.backward()
    w.data=w.data-lr*w.grad.data
    b.data=b.data-lr*b.grad.data
    w.grad.data.zero_()
    b.grad.data.zero_()
Ungraded lab

3.1_stochastic_gradient_descent_v3.ipynb

Mini-Batch Gradient Descent

Iterations = \(\frac{\text{training size}}{\text{batch size}}\)

Mini-Batch Gradient Descent in Pytorch
dataset = Data()
trainloader = DataLoader(dataset=dataset, batch_size=5)

lr=0.1
LOSS = []
for epoch in range(4):
    for x, y in trainloader:
        yhat=forward(x)
        loss = criterion(yhat, y)
        loss.backward()
        w.data=w.data-lr*w.grad.data
        b.data=b.data-lr*b.grad.data
        w.grad.data.zero_()
        b.grad.data.zero_()      
        LOSS.append(loss.item())

Optimization in PyTorch

criterion = nn.MSELoss()
trainloader = DataLoader(dataset=dataset, batch_size=1)
model = LR(1,1)
from torch import nn, optim
optimizer = optim.SGD(model.parameters(), lr = 0.01)
optimizer.state_dict()
>> {'state': {},
 'param_groups': [{'lr': 0.01,
   'momentum': 0,
   'dampening': 0,
   'weight_decay': 0,
   'nesterov': False,
   'params': [0, 1]}]}
for epoch in range(100):
    for x, y in trainloader:
        yhat = model(x)
        loss = criterion(yhat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

image.png
Ungraded lab

3.3_PyTorchway_v3.ipynb

Training, Validation and Test Split

standard explanation about Train, Validation, Test

Training, Validation and Test Split in PyTorch

Dataset to generate train_data and val_data

from torch.utils.data import Dataset, DataLoader

class Data(Dataset):
    def __init__(self, train = True):
        self.x = torch.arange(-3, 3, 0.1).view(-1, 1)
        self.f = -3*self.x+1
        self.y = self.f+0.1*torch.randn(self.x.size())
        self.len = self.x.shape[0]
        if train == True:
            self.y[0] = 0
            self.y[50:55] = 20
        else:
            pass
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return self.len
                
        
train_data = Data()
val_data = Data(train=False)

LR model

import torch.nn as nn

class LR(nn.Module):
    def __init__(self, input_size, output_size):
        super(LR, self).__init__()
        self.linear = nn.Linear(input_size, output_size)
    def forward(self, x):
        out=self.linear(x)
        return out
criterion = nn.MSELoss()

trainloader = DataLoader(dataset=train_data, batch_size=1)
epochs = 10
learning_rates = [0.0001, 0.001, 0.01, 0.1, 1]
validation_error = torch.zeros(len(learning_rates))
test_error=torch.zeros(len(learning_rates))
MODELS=[]
from torch import optim
from tqdm import tqdm
for i, learning_rate in tqdm(enumerate(learning_rates)):
    model = LR(1,1)
    optimizer = optim.SGD(model.parameters(), lr = learning_rate)
    
    for epoch in range(epochs):
        for x, y in trainloader:
            yhat = model(x)
            loss = criterion(yhat, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
    yhat=model(train_data.x)
    loss=criterion(yhat, train_data.y)
    test_error[i]=loss.item()

    yhat=model(val_data.x)
    loss=criterion(yhat, val_data.y)
    validation_error[i]=loss.item()
    MODELS.append(model)
import numpy as np
plt.semilogx(np.array(learning_rates), validation_error.numpy(), label='training cost/total loss')
plt.semilogx(np.array(learning_rates), test_error.numpy(), label='validation cost/total loss')
plt.ylabel('Cost Total loss')
plt.xlabel('learning rate')
plt.legend()
plt.show()

img

Week 3 - Multiple Input Output Linear Regression - Logistic Regression for Classification

Learning Objectives

  • Multiple Linear Regression
  • Multiple Linear Regression Training
  • Linear Regression Multiple Outputs
  • Linear Regression Multiple Outputs Training

notebook

notebook

Multiple Input Linear Regression Prediction

Class Linear
import torch
from torch.nn import Linear
torch.manual_seed(1)
model = Linear(in_features=2, out_features=1)
list(model.parameters())
>> [Parameter containing:
 tensor([[ 0.3643, -0.3121]], requires_grad=True),
 Parameter containing:
 tensor([-0.1371], requires_grad=True)]
model.state_dict()
>> OrderedDict([('weight', tensor([[ 0.3643, -0.3121]])),
             ('bias', tensor([-0.1371]))])
#predictions for multiple samples
X = torch.tensor([[1.0, 1.0], [1.0, 2.0], [1.0, 3.0]])
yhat = model(X)
yhat
>> tensor([[-0.0848],
        [-0.3969],
        [-0.7090]], grad_fn=<AddmmBackward>)
Custom Modules
import torch.nn as nn

class LR(nn.Module):
    def __init__(self, input_size, output_size):
        super(LR, self).__init__()
        self.linear = nn.Linear(input_size, output_size)
    def forward(self, x):
        out = self.linear(x)
        return out
Ungraded lab

4.1.multiple_linear_regression_prediction_v2.ipynb

Multiple Input Linear Regression Training

Cost function and Gradient Descent for Multiple Linear Regression

Cost function

\[l(w,b)=\frac{1}{N}\displaystyle\sum_{n=1}^{N}(y_n-(x_nw+b))^2\]

Gradient of loss function with respect to the weights

\[\nabla l(w,b) = \begin{bmatrix}\frac{\partial l(w,b)}{\partial w_1}\\ \vdots \\\frac{\partial l(w,b)}{\partial w_d}\end{bmatrix}\]

Gradient of loss function with respect to the bias

\[\frac{\partial l(w,b)}{\partial b}\]

Update of weights

\[w^{k+1} = w^k-\eta \nabla l(w^k,b^k)\]

\[\begin{bmatrix} w_1^{k+1}\\ \vdots\\ w_d^{k+1}\\\end{bmatrix}=\begin{bmatrix} w_1^{k}\\ \vdots\\ w_d^{k}\\\end{bmatrix}-\eta \begin{bmatrix}\frac{\partial l(w^k,b^k)}{\partial w_1}\\ \vdots \\\frac{\partial l(w^k,b^k)}{\partial w_d}\end{bmatrix}\]

and update of bias

\[b^{k+1}=b^k-\eta \frac{\partial l(w^k,b^k)}{\partial b}\]

Train the model in PyTorch
from torch import nn, optim
import torch

class LR(nn.Module):
    def __init__(self, input_size, output_size):
        super(LR, self).__init__()
        self.linear = nn.Linear(input_size, output_size)
    def forward(self, x):
        out = self.linear(x)
        return out
from torch.utils.data import Dataset, DataLoader

class Data2D(Dataset):
    def __init__(self):
        self.x = torch.zeros(20,2)
        self.x[:, 0] = torch.arange(-1,1,0.1)
        self.x[:, 1] = torch.arange(-1,1,0.1)
        self.w = torch.tensor([ [1.0], [1.0]])
        self.b = 1
        self.f = torch.mm(self.x, self.w)+self.b
        self.y = self.f + 0.1*torch.randn((self.x.shape[0], 1))
        self.len = self.x.shape[0]
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.len
data_set = Data2D()
criterion = nn.MSELoss()
trainloader = DataLoader(dataset=data_set, batch_size=2)
model = LR(input_size=2, output_size=1)
optimizer = optim.SGD(model.parameters(), lr=0.1)
for epoch in range(100):
    for x, y in trainloader:
        yhat = model(x)
        loss = criterion(yhat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()   
Ungraded lab

4.2.multiple_linear_regression_training_v2.ipynb

Multiple Output Linear Regression

Linear regression with multiple outputs

image.png
Custom Modules
import torch.nn as nn
import torch
class LR(nn.Module):
    def __init__(self, input_size, output_size):
        super(LR, self).__init__()
        self.linear = nn.Linear(input_size, output_size)
    def forward(self, x):
        out = self.linear(x)
        return out
    
torch.manual_seed(1)
model = LR(input_size=2, output_size=2)

list(model.parameters())
>> [Parameter containing:
 tensor([[ 0.3643, -0.3121],
         [-0.1371,  0.3319]], requires_grad=True),
 Parameter containing:
 tensor([-0.6657,  0.4241], requires_grad=True)]

#with 2 columns and 3 rows
X=torch.tensor([[1.0, 1.0], [1.0,2.0], [1.0, 3.0]])
Yhat = model(X)
Yhat
>> tensor([[-0.6135,  0.6189],
        [-0.9256,  0.9508],
        [-1.2377,  1.2827]], grad_fn=<AddmmBackward>)
Ungraded lab

4.3.multi-target_linear_regression.ipynb

Multiple Output Linear Regression Training

Training in PyTorch

Training is the same, what changes is Dataset:

from torch.utils.data import Dataset, DataLoader

class Data2D(Dataset):
    def __init__(self):
        self.x = torch.zeros(20,2)
        self.x[:, 0] = torch.arange(-1,1,0.1)
        self.x[:, 1] = torch.arange(-1,1,0.1)
        self.w = torch.tensor([ [1.0, -1.0], [1.0, -1.0]])
        self.b = torch.tensor([[1.0, -1.0]])
        self.f = torch.mm(self.x, self.w)+self.b
        self.y = self.f + 0.1*torch.randn((self.x.shape[0], 1))
        self.len = self.x.shape[0]
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.len

and model instantiation

from torch import nn, optim

data_set = Data2D()
criterion = nn.MSELoss()
trainloader = DataLoader(dataset=data_set, batch_size=1)
model = LR(input_size=2, output_size=2)
optimizer = optim.SGD(model.parameters(), lr=0.001)

Training:

for epoch in range(100):
    for x, y in trainloader:
        yhat = model(x)
        loss = criterion(yhat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
Ungraded lab

4.4.training_multiple_output_linear_regression.ipynb

Linear Classifier and Logistic Regression

\[\sigma(z)=\frac{1}{1+e^{-z}}\]

sigmoid is used as the threshold function in logistic regression

Logistic Regression: Prediction

logistic function in PyTorch

as a function: torch.sigmoid

import torch
import matplotlib.pyplot as plt

z = torch.arange(-100, 100, 0.1).view(-1, 1)
yhat = torch.sigmoid(z)
plt.plot(z.numpy(), yhat.numpy())

img

as a class: nn.Signmoid()

import torch
import torch.nn as nn
import matplotlib.pyplot as plt

z = torch.arange(-100, 100, 0.1).view(-1, 1)
sig = nn.Sigmoid()
yhat = sig(z)
plt.plot(z.numpy(), yhat.numpy())

torch.nn.Sigmoid vs torch.sigmoid - PyTorch Forums

torch.nn.Sigmoid (note the capital “S”) is a class. When you
instantiate it, you get a function object, that is, an object that you
can call like a function. In contrast, torch.sigmoid is a function.

nn.Sequential

image.png
sequential_model = nn.Sequential(nn.Linear(1,1), nn.Sigmoid())
nn.Module
import torch.nn as nn

class logistic_regression(nn.Module):
    def __init__(self, in_size):
        super(logistic_regression, self).__init__()
        self.linear = nn.Linear(in_size, 1)
    def forward(self, x):
        z = torch.sigmoid(self.linear(x))
        return z
    
custom_model = logistic_regression(1)
Making a prediction
x=torch.tensor([[1.0], [2.0]])
custom_model(x)
>> tensor([[0.4129],
        [0.3936]], grad_fn=<SigmoidBackward>)
sequential_model(x)
>> tensor([[0.2848],
        [0.2115]], grad_fn=<SigmoidBackward>)
Multidimensional Logistic Regression
custom_2D_model = logistic_regression(2)
sequential_2D_model = nn.Sequential(nn.Linear(2, 1), nn.Sigmoid())

x=torch.tensor([[1.0, 2.0]])
yhat = sequential_2D_model(x)
yhat
>> tensor([[0.7587]], grad_fn=<SigmoidBackward>)
Ungraded lab

5.1logistic_regression_prediction_v2.ipynb

Bernoulli Distribution and Maximum Likelihood Estimation

To fine the parameter values of the Bernoulli distribution, we do not maximize the likelihood function but the log of the likelihood function: Loss likelihood which is given by

\[l(\theta) = \ln(p(Y|\theta))=\displaystyle\sum_{n=1}^{N}y_n \ln(\theta)+(1-y_n) \ln(1-\theta)\]

Note: We want to get

\[\hat\theta = argmax_\theta(P(Y|\theta))\]

where

\[P(Y|\theta) = \displaystyle\prod_{n=1}^{N}\theta^{y_n}(1-\theta)^{1-y_n}\]

Logistic Regression Cross Entropy Loss

Loss function \(l(w,b)=\frac{1}{N}\displaystyle\sum_{n=1}^{N}(y_n-\sigma(wx_n+b))^2\)

Cross entropy loss

\[l(\theta)=-\frac{1}{N}\displaystyle\sum_{n=1}^{N}y_n \ln(\sigma(wx_n+b))+(1-y_n)\ln(1-\sigma(wx_n+b))\]

def criterion(yhat, y):
    out = -1 * torch.mean(y * torch.log(yhat) + (1-y) * torch.log(1-yhat))
    return out
Logistic Regression in PyTorch

Create a model (using Sequential)

model = nn.Sequential(nn.Linear(1, 1), nn.Sigmoid())

or create a custom one

import torch.nn as nn

class logistic_regression(nn.Module):
    def __init__(self, in_size):
        super(logistic_regression, self).__init__()
        self.linear = nn.Linear(in_size, 1)
    def forward(self, x):
        z = torch.sigmoid(self.linear(x))
        return z

Then define our loss function

def criterion(yhat, y):
    out = -1 * torch.mean(y * torch.log(yhat) + (1-y) * torch.log(1-yhat))
    return out

or simply BCE (binary cross entropy)

criterion = nn.BCELoss()

Putting all pieces together:

#dataset

import torch
from torch.utils.data import Dataset

class Data(Dataset):
    def __init__(self):
        self.x = torch.arange(-1, 1, 0.1).view(-1, 1)
        self.y = torch.zeros(self.x.shape[0], 1)
        self.y[self.x[:, 0] > 0.2] = 1
        self.len = self.x.shape[0]
    def __getitem__(self, index):      
        return self.x[index], self.y[index]
    def __len__(self):
        return self.len
    
dataset = Data()

# dataloader

from torch.utils.data import DataLoader
trainloader = DataLoader(dataset=dataset, batch_size=1)

# model

import torch.nn as nn
model = nn.Sequential(nn.Linear(1, 1), nn.Sigmoid())

# optimizer

from torch import optim
optimizer = optim.SGD(model.parameters(), lr = 0.01)

# loss

criterion = nn.BCELoss()

# training

for epoch in range(100):
    for x, y in trainloader:
        yhat = model(x)
        loss = criterion(yhat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
Ungraded lab

5.2.2bad_inshilization_logistic_regression_with_mean_square_error_v2.ipynb

Week 4 - Softmax regression

Learning Objectives

  • Using Lines to Classify Data
  • Softmax Prediction in PyTorch
  • Softmax Pytorch MNIST

notebook

notebook

Softmax Prediction

Softmax is a combination of logistic regression and argmax

image.png

Softmax function

Custom module using nn.module
import torch.nn as nn

class Softmax(nn.Module):
    def __init__(self, in_size, out_size):
        super(Softmax, self).__init__()
        self.linear = nn.Linear(in_size, out_size)
    def forward(self, x):
        out = self.linear(x)
        return out
import torch
torch.manual_seed(1)
# 2 dimensions input samples and 3 output classes
model = Softmax(2,3)

x = torch.tensor([[1.0, 2.0]])
z = model(x)
z
>> tensor([[-0.4053,  0.8864,  0.2807]], grad_fn=<AddmmBackward>)

_, yhat = z.max(1)
yhat
>> tensor([1])

and with multiple samples

X=torch.tensor([[1.0, 1.0],[1.0, 2.0],[1.0, -3.0]])
z = model(X)
z
>> tensor([[-0.0932,  0.5545, -0.1433],
        [-0.4053,  0.8864,  0.2807],
        [ 1.1552, -0.7730, -1.8396]], grad_fn=<AddmmBackward>)

_, yhat = z.max(1)
yhat
>> tensor([1, 1, 0])

Softmax PyTorch

Load Data
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets

train_dataset = dsets.MNIST(root='./data', train = True, download = True, transform=transforms.ToTensor())

validation_dataset = dsets.MNIST(root='./data', train = False, download = True, transform=transforms.ToTensor())

train_dataset[0] is a tuple with the image and the class:

Create Model
import torch.nn as nn

class Softmax(nn.Module):
    def __init__(self, in_size, out_size):
        super(Softmax, self).__init__()
        self.linear = nn.Linear(in_size, out_size)
    def forward(self, x):
        out = self.linear(x)
        return out
    
input_dim = 28 * 28
output_dim = 10
model = Softmax(input_dim, output_dim)
criterion = nn.CrossEntropyLoss()

import torch.optim as optim
optimizer = optim.SGD(model.parameters(), lr=0.01)

n_epochs = 100
accuracy_list = []

train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = 100)
validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=5000)
Train Model
from tqdm import tqdm

for epoch in tqdm(range(n_epochs)):
    for x, y in train_loader:
        optimizer.zero_grad()
        z = model(x.view(-1, 28 * 28))
        loss = criterion(z, y)
        loss.backward()
        optimizer.step()
    correct = 0
    for x_test, y_test in validation_loader:
        z = model(x_test.view(-1, 28 * 28))
        _, yhat = torch.max(z.data, 1)
        correct = correct+(yhat == y_test).sum().item()
    accuracy = correct / y.shape[0]
    accuracy_list.append(accuracy)
Ungraded lab

5.4softmax_in_one_dimension_v2.ipynb

Ungraded lab

6.2lab_predicting _MNIST_using_Softmax_v2.ipynb

# The function to plot parameters

def PlotParameters(model): 
    W = model.state_dict()['linear.weight'].data
    w_min = W.min().item()
    w_max = W.max().item()
    fig, axes = plt.subplots(2, 5)
    fig.subplots_adjust(hspace=0.01, wspace=0.1)
    for i, ax in enumerate(axes.flat):
        if i < 10:
            
            # Set the label for the sub-plot.
            ax.set_xlabel("class: {0}".format(i))

            # Plot the image.
            ax.imshow(W[i, :].view(28, 28), vmin=w_min, vmax=w_max, cmap='seismic')

            ax.set_xticks([])
            ax.set_yticks([])

        # Ensure the plot is shown correctly with multiple plots
        # in a single Notebook cell.
    plt.show()
    
# Plot the parameters

PlotParameters(model)

img

Week 4 - Shallow neural networks

Learning Objectives

  • Simple Neural Networks
  • More Hidden Neurons
  • Neural Networks with Multiple Dimensional
  • Multi-Class Neural Networks
  • Backpropagation
  • Activation Functions

notebook

notebook

Neural networks in One Dimension

using nn.Module
import torch
import torch.nn as nn
from torch import sigmoid

class Net(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
    def forward(self, x):
        x=sigmoid(self.linear1(x))
        x=sigmoid(self.linear2(x))
        return x
model = Net(1, 2, 1)
x = torch.tensor([0.0])
yhat = model(x)
yhat
>> tensor([0.5972], grad_fn=<SigmoidBackward>)

# multiple samples
x = torch.tensor([[0.0], [2.0], [3.0]])
yhat = model(x)
yhat
>> tensor([[0.5972],
        [0.5925],
        [0.5894]], grad_fn=<SigmoidBackward>)

# to get a discrete value we apply a threshold
yhat = yhat < 0.59
yhat
>> tensor([[False],
           [False],
           [ True]])
model.state_dict()
>> OrderedDict([('linear1.weight',
              tensor([[0.3820],
                      [0.4019]])),
             ('linear1.bias', tensor([-0.7746, -0.3389])),
             ('linear2.weight', tensor([[-0.3466,  0.2201]])),
             ('linear2.bias', tensor([0.4115]))])
using nn.Sequential
model = nn.Sequential(nn.Linear(1, 2), nn.Sigmoid(), nn.Linear(2, 1), nn.Sigmoid())
train the model

we create the data

X = torch.arange(-20, 20, 1).view(-1, 1).type(torch.FloatTensor)
Y = torch.zeros(X.shape[0])
Y[(X[:, 0]>-4) & (X[:, 0] <4)] = 1.0

we create a training function

from tqdm import tqdm

def train(Y, X, model, optimizer, criterion, epochs=1000):
    cost = []
    total = 0
    for epoch in tqdm(range(epochs)):
        total = 0
        for x, y in zip(X, Y):
            yhat = model(x)
            loss = criterion(yhat, y.view(-1))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total+=loss.item()
        cost.append(total)
    return cost

and the training process is now

#loss
criterion = nn.BCELoss()

#data
X = torch.arange(-20, 20, 1).view(-1, 1).type(torch.FloatTensor)
Y = torch.zeros(X.shape[0])
Y[(X[:, 0]>-4) & (X[:, 0] <4)] = 1.0

#model
model = Net(1, 2, 1)

#optimizer
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)

#train the model
cost = train(Y, X, model, optimizer, criterion, epochs=1000)
>> 100%|██████████| 1000/1000 [00:12<00:00, 76.96it/s]
Ungraded lab

7.1_simple1hiddenlayer.ipynb

I like how to display intermediate representations of learning performance:

img
# The function for plotting the model

def PlotStuff(X, Y, model, epoch, leg=True):
    
    plt.plot(X.numpy(), model(X).detach().numpy(), label=('epoch ' + str(epoch)))
    plt.plot(X.numpy(), Y.numpy(), 'r')
    plt.xlabel('x')
    if leg == True:
        plt.legend()
    else:
        pass

activation values (called in the training loop). Using model variables (model.a1) which seems a bad practice.

img
plt.scatter(model.a1.detach().numpy()[:, 0], model.a1.detach().numpy()[:, 1], c=Y.numpy().reshape(-1))
plt.title('activations')
plt.show()

and final loss curve

img

Neural Networks More Hidden Neurons

using nn.Module
import torch
import torch.nn as nn
from torch import sigmoid
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from tqdm import tqdm

class to get our dataset

class Data(Dataset):
    def __init__(self):
        self.x = torch.linspace(-20, 20, 100).view(-1, 1)
        self.y = torch.zeros(self.x.shape[0])
        self.y[(self.x[:, 0]>-10) & (self.x[:, 0]<-5)] = 1
        self.y[(self.x[:, 0]>5) & (self.x[:, 0]<10)] = 1
        self.y = self.y.view(-1, 1)
        self.len = self.x.shape[0]
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.len

class for creating our model

class Net(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
    def forward(self, x):
        x=sigmoid(self.linear1(x))
        x=sigmoid(self.linear2(x))
        return x

and the function to train our model

# The function for plotting the model
def PlotStuff(X, Y, model):  
    plt.plot(X.numpy(), model(X).detach().numpy())
    plt.plot(X.numpy(), Y.numpy(), 'r')
    plt.xlabel('x')
    
def train(data_set, model, criterion, train_loader, optimizer, epochs=5):
    cost = []
    total=0
    for epoch in tqdm(range(epochs)):
        total=0
        for x, y in train_loader:
            optimizer.zero_grad()
            yhat = model(x)
            loss = criterion(yhat, y)
            loss.backward()
            optimizer.step()
            total+=loss.item() 
            PlotStuff(data_set.x, data_set.y, model)
        cost.append(total)
    return cost

process for training is identical to logistic regression

criterion = nn.BCELoss()
data_set = Data()
train_loader = DataLoader(dataset=data_set, batch_size=100)
model = Net(1, 6, 1)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
train(data_set, model, criterion, train_loader, optimizer, epochs=1000)
using nn.Sequential
model = nn.Sequential(
    nn.Linear(1, 7),
    nn.Sigmoid(),
    nn.Linear(7, 1),
    nn.Sigmoid()
)
Ungraded lab

7.2multiple_neurons.ipynb

Neural Networks with Multiple Dimensional Input

implementation
import torch
import torch.nn as nn
from torch import sigmoid
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import numpy as np

we create a dataset class

class XOR_Data(Dataset):
    def __init__(self, N_s=100):
        self.x = torch.zeros((N_s, 2))
        self.y = torch.zeros((N_s, 1))
        for i in range(N_s // 4):
            self.x[i, :] = torch.Tensor([0.0, 0.0])
            self.y[i, 0] = torch.Tensor([0.0])
            self.x[i + N_s // 4, :] = torch.Tensor([0.0, 1.0])
            self.y[i + N_s // 4, 0] = torch.Tensor([1.0])
            self.x[i + N_s // 2, :] = torch.Tensor([1.0, 0.0])
            self.y[i + N_s // 2, 0] = torch.Tensor([1.0])
            self.x[i + 3 * N_s // 4, :] = torch.Tensor([1.0, 1.0])
            self.y[i + 3 * N_s // 4, 0] = torch.Tensor([0.0])
            self.x = self.x + 0.01 * torch.randn((N_s, 2))
        self.len = N_s
            
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.len      
    # Plot the data
    def plot_stuff(self):
        plt.plot(self.x[self.y[:, 0] == 0, 0].numpy(), self.x[self.y[:, 0] == 0, 1].numpy(), 'o', label="y=0")
        plt.plot(self.x[self.y[:, 0] == 1, 0].numpy(), self.x[self.y[:, 0] == 1, 1].numpy(), 'ro', label="y=1")
        plt.legend()

img

We create a class for creating our model

class Net(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
    def forward(self, x):
        x=sigmoid(self.linear1(x))
        x=sigmoid(self.linear2(x))
        return x

We create a function to train our model

# Calculate the accuracy

def accuracy(model, data_set):
    return np.mean(data_set.y.view(-1).numpy() == (model(data_set.x)[:, 0] > 0.5).numpy())

def train(data_set, model, criterion, train_loader, optimizer, epochs=5):
    COST = []
    ACC = []
    for epoch in tqdm(range(epochs)):
        total=0
        for x, y in train_loader:
            optimizer.zero_grad()
            yhat = model(x)
            loss = criterion(yhat, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #cumulative loss 
            total+=loss.item()
        ACC.append(accuracy(model, data_set))
        COST.append(total)
        
    return COST

process for training is identical to logistic regression

criterion = nn.BCELoss()
data_set = XOR_Data()
train_loader = DataLoader(dataset=data_set, batch_size=1)
model = Net(2, 4, 1)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)
train(data_set, model, criterion, train_loader, optimizer, epochs=500)
overfitting and underfitting

Solution:

  • use validation data to determine optimum number of neurons
  • get more data
  • regularization: for example dropout
Ungraded lab

7.3xor_v2.ipynb

Multi-Class Neural Networks

using nn.Module

we don’t have sigmoid for the output, and D_out is our number of classes

class Net(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
    def forward(self, x):
        x=sigmoid(self.linear1(x))
        x=(self.linear2(x))
        return x
using nn.Sequential
input_dim = 2
hidden_dim = 6
output_dim = 3
model = nn.Sequential(
    nn.Linear(input_dim, hidden_dim),
    nn.Sigmoid(),
    nn.Linear(hidden_dim, output_dim)
)
training

we create a validation and training dataset

import torchvision.datasets as dsets
import torchvision.transforms as transforms
train_dataset = dsets.MNIST(root='./data', train = True, download = True, transform=transforms.ToTensor())
validation_dataset = dsets.MNIST(root='./data', train = False, download = True, transform=transforms.ToTensor())

we create a validation and training loader

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=2000)
validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=2000)
criterion = nn.CrossEntropyLoss()

we create the training function

from tqdm import tqdm
def train(model, criterion, train_loader, validation_loader, optimizer, epochs=100):
    i = 0
    useful_stuff = {'training_loss': [],'validation_accuracy': []}  
    for epoch in tqdm(range(epochs)):
        for i, (x, y) in enumerate(train_loader): 
            optimizer.zero_grad()
            z = model(x.view(-1, 28 * 28))
            loss = criterion(z, y)
            loss.backward()
            optimizer.step()
             #loss for every iteration
            useful_stuff['training_loss'].append(loss.data.item())
        correct = 0
        for x, y in validation_loader:
            #validation 
            z = model(x.view(-1, 28 * 28))
            _, label = torch.max(z, 1)
            correct += (label == y).sum().item()
        accuracy = 100 * (correct / len(validation_dataset))
        useful_stuff['validation_accuracy'].append(accuracy)
    return useful_stuff

We instantiate and train the model

input_dim = 28 * 28
hidden_dim = 100
output_dim = 10

model = Net(input_dim, hidden_dim, output_dim)
training_results = train(model, criterion, train_loader, validation_loader, optimizer, epochs=30)

To plot accuracy and lost

# Define a function to plot accuracy and loss

def plot_accuracy_loss(training_results): 
    plt.subplot(2, 1, 1)
    plt.plot(training_results['training_loss'], 'r')
    plt.ylabel('loss')
    plt.title('training loss iterations')
    plt.subplot(2, 1, 2)
    plt.plot(training_results['validation_accuracy'])
    plt.ylabel('accuracy')
    plt.xlabel('epochs')   
    plt.show()
    
plot_accuracy_loss(training_results)

img

To plot improper classified items

count = 0
for x, y in validation_dataset:
    z = model(x.reshape(-1, 28 * 28))
    _,yhat = torch.max(z, 1)
    if yhat != y:
        show_data(x)
        count += 1
    if count >= 5:
        break

img
Ungraded lab

7.4one_layer_neural_network_MNIST.ipynb

Backpropagation

Following the chain rule in gradient calculation, it happens that gradient results are getting closer and closer to 0. (i.e. vanishing gradient) therefore we cannot improve model parameters.

One way to deal with that is to change activation function.

Activation functions

sigmoid, tanh, relu

img
sigmoid, tanh, relu in PyTorch
class Net_sigmoid(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
    def forward(self, x):
        x=sigmoid(self.linear1(x))
        x=(self.linear2(x))
        return x
class Net_tanh(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
    def forward(self, x):
        x=torch.tanh(self.linear1(x))
        x=(self.linear2(x))
        return x
class Net_relu(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
    def forward(self, x):
        x=torch.relu(self.linear1(x))
        x=(self.linear2(x))
        return x

using nn.Sequential

model_tanh = nn.Sequential(
    nn.Linear(input_dim, hidden_dim),
    nn.Tanh(),
    nn.Linear(hidden_dim, output_dim)
)

model_relu = nn.Sequential(
    nn.Linear(input_dim, hidden_dim),
    nn.ReLU(),
    nn.Linear(hidden_dim, output_dim)
)
Ungraded lab

7.5.1activationfuction_v2.ipynb

Ungraded lab

7.5.2mist1layer_v2.ipynb

to monitor gpu usage: nvidia-smi -l 1

image.png

Week 5 - Deep neural networks

Learning Objectives

  • building deep networks
  • Dropout
  • Neural Network initialization weights
  • Gradient Descent with Momentum

notebook

notebook

Deep Neural Networks

Deep, following this course definition, is when number of hidden layers > 1.

using nn.Module
import torch
import torch.nn as nn
from torch import sigmoid
class Net(nn.Module):
    def __init__(self, D_in, H1, H2, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H1)
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, D_out)
    def forward(self, x):
        x=sigmoid(self.linear1(x))       
        x=sigmoid(self.linear2(x))
        x=self.linear3(x)
        return x
using nn.Sequential
input_dim = 2
hidden_dim1 = 6
hidden_dim2 = 4
output_dim = 3
model = nn.Sequential(
    nn.Linear(input_dim, hidden_dim1),
    nn.Sigmoid(),
    nn.Linear(hidden_dim1, hidden_dim2),
    nn.Sigmoid(),    
    nn.Linear(hidden_dim2, output_dim)
)
training

there is no change compare to other networks

we create a validation and training dataset

import torchvision.datasets as dsets
import torchvision.transforms as transforms
train_dataset = dsets.MNIST(root='./data', train = True, download = True, transform=transforms.ToTensor())
validation_dataset = dsets.MNIST(root='./data', train = False, download = True, transform=transforms.ToTensor())

we create a validation and training loader

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=2000)
validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=2000)
criterion = nn.CrossEntropyLoss()

we create the training function

from tqdm import tqdm
def train(model, criterion, train_loader, validation_loader, optimizer, epochs=100):
    i = 0
    useful_stuff = {'training_loss': [],'validation_accuracy': []}  
    for epoch in tqdm(range(epochs)):
        for i, (x, y) in enumerate(train_loader): 
            optimizer.zero_grad()
            z = model(x.view(-1, 28 * 28))
            loss = criterion(z, y)
            loss.backward()
            optimizer.step()
             #loss for every iteration
            useful_stuff['training_loss'].append(loss.data.item())
        correct = 0
        for x, y in validation_loader:
            #validation 
            z = model(x.view(-1, 28 * 28))
            _, label = torch.max(z, 1)
            correct += (label == y).sum().item()
        accuracy = 100 * (correct / len(validation_dataset))
        useful_stuff['validation_accuracy'].append(accuracy)
    return useful_stuff

We instantiate and train the model

input_dim = 28 * 28
hidden_dim1 = 50
hidden_dim2 = 50
output_dim = 10

model = Net(input_dim, hidden_dim1, hidden_dim2, output_dim)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
training_results = train(model, criterion, train_loader, validation_loader, optimizer, epochs=30)
Ungraded lab - deep neural networks

8.1.1mist2layer_v2.ipynb

img

Deep Neural Networks : nn.ModuleList()

jdc

this is a nice library to allow breaking down definition of classes in separate notebook cells

Installation is as simple as pip install jdc

and usage is

import jdc

and start a cell with %%add_to <your class name>

python implementation
import torch
import torch.nn as nn
from torch import sigmoid
import jdc
class Net(nn.Module):
    def __init__(self, Layers):
        super(Net, self).__init__()
        self.hidden = nn.ModuleList()
        for input_size, output_size in zip(Layers, Layers[1:]):
            self.hidden.append(nn.Linear(input_size, output_size))
Layers = [2, 3, 4, 3]
model = Net(Layers)
%%add_to Net

def forward(self, x):
    L = len(self.hidden)
    for (l, linear_transform) in zip(range(L), self.hidden):
        if (l < L-1):
            x = torch.relu(linear_transform(x))
        else:
            x = linear_transform(x)
    return x
Ungraded lab - nn.ModuleList()

8.1.2mulitclassspiralrulu_v2.ipynb

img

Dropout

using nn.Module
class Net(nn.Module):
    def __init__(self, in_size, n_hidden, out_size, p=0):
        super(Net, self).__init__()
        self.drop = nn.Dropout(p=p)
        self.linear1 = nn.Linear(in_size, n_hidden)
        self.linear2 = nn.Linear(n_hidden, n_hidden)
        self.linear3 = nn.Linear(n_hidden, out_size)
    def forward(self, x):
        x=torch.relu(self.linear1(x))       
        x=self.drop(x)
        x=torch.relu(self.linear2(x))
        x=self.drop(x)
        x=self.linear3(x)
        return x
using nn.Sequential
model = nn.Sequential(
    nn.Linear(1, 10),
    nn.Dropout(0.5),
    nn.ReLU(),
    nn.Linear(10, 12),
    nn.Dropout(0.5),
    nn.ReLU(),
    nn.Linear(12, 1),
)
training

create data

from torch.utils.data import Dataset, DataLoader 
import numpy as np
# Create data class for creating dataset object

class Data(Dataset):
    
    # Constructor
    def __init__(self, N_SAMPLES=1000, noise_std=0.15, train=True):
        a = np.matrix([-1, 1, 2, 1, 1, -3, 1]).T
        self.x = np.matrix(np.random.rand(N_SAMPLES, 2))
        self.f = np.array(a[0] + (self.x) * a[1:3] + np.multiply(self.x[:, 0], self.x[:, 1]) * a[4] + np.multiply(self.x, self.x) * a[5:7]).flatten()
        self.a = a
       
        self.y = np.zeros(N_SAMPLES)
        self.y[self.f > 0] = 1
        self.y = torch.from_numpy(self.y).type(torch.LongTensor)
        self.x = torch.from_numpy(self.x).type(torch.FloatTensor)
        self.x = self.x + noise_std * torch.randn(self.x.size())
        self.f = torch.from_numpy(self.f)
        self.a = a
        if train == True:
            torch.manual_seed(1)
            self.x = self.x + noise_std * torch.randn(self.x.size())
            torch.manual_seed(0)
        
    # Getter        
    def __getitem__(self, index):    
        return self.x[index], self.y[index]
    
    # Get Length
    def __len__(self):
        return self.len
    
    # Plot the diagram
    def plot(self):
        X = data_set.x.numpy()
        y = data_set.y.numpy()
        h = .02
        x_min, x_max = X[:, 0].min(), X[:, 0].max()
        y_min, y_max = X[:, 1].min(), X[:, 1].max() 
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
        Z = data_set.multi_dim_poly(np.c_[xx.ravel(), yy.ravel()]).flatten()
        f = np.zeros(Z.shape)
        f[Z > 0] = 1
        f = f.reshape(xx.shape)
        
        plt.title('True decision boundary  and sample points with noise ')
        plt.plot(self.x[self.y == 0, 0].numpy(), self.x[self.y == 0,1].numpy(), 'bo', label='y=0') 
        plt.plot(self.x[self.y == 1, 0].numpy(), self.x[self.y == 1,1].numpy(), 'ro', label='y=1')
        plt.contour(xx, yy, f,cmap=plt.cm.Paired)
        plt.xlim(0,1)
        plt.ylim(0,1)
        plt.legend()
    
    # Make a multidimension ploynomial function
    def multi_dim_poly(self, x):
        x = np.matrix(x)
        out = np.array(self.a[0] + (x) * self.a[1:3] + np.multiply(x[:, 0], x[:, 1]) * self.a[4] + np.multiply(x, x) * self.a[5:7])
        out = np.array(out)
        return out

img

instantiate the model

model_drop = Net(2, 300, 2, p=0.5)

train method tells the model we are in the training phase which will implement the dropout method, later we use the eval method to tell the model it is in the evaluation phase and that will turn off the dropout method

model_drop.train()
optimizer = torch.optim.Adam(model_drop.parameters(), lr = 0.01)
criterion = nn.CrossEntropyLoss()
data_set = Data()
validation_set = Data(train=False)
# Initialize the LOSS dictionary to store the loss

LOSS = {}
LOSS['training data dropout'] = []
LOSS['validation data dropout'] = []

train the model

# Train the model
from tqdm import tqdm

epochs = 500

def train_model(epochs):
    
    for epoch in tqdm(range(epochs)):
        #all the samples are used for training 
        yhat_drop = model_drop(data_set.x)
        loss_drop = criterion(yhat_drop, data_set.y)

        #store the loss for both the training and validation data for both models 
        LOSS['training data dropout'].append(loss_drop.item())
        model_drop.eval()
        LOSS['validation data dropout'].append(criterion(model_drop(validation_set.x), validation_set.y).item())
        model_drop.train()

        optimizer.zero_grad()
        loss_drop.backward()
        optimizer.step()
        
train_model(epochs)
# The function for calculating accuracy

def accuracy(model, data_set):
    _, yhat = torch.max(model(data_set.x), 1)
    return (yhat == data_set.y).numpy().mean()

# Print out the accuracy of the model with dropout

print("The accuracy of the model with dropout: ", accuracy(model_drop, validation_set))
>> The accuracy of the model with dropout:  0.866
Ungraded lab - dropout classification

8.2.1dropoutPredictin_v2.ipynb

Ungraded lab - dropout regression

8.2.2dropoutRegression_v2.ipynb

Neural Network initialization weights

Different methods exist:

  • uniform distribution for parameters: we simply make the lower bound of the range of the distribution the negative of the inverse of square root of L in. the upper bound of the range of the distribution is the positive of the inverse of square root of L in. See this paper for more details. LeCun, Yann A., et al. “Efficient backprop.” Neural networks: Tricks of the trade. Springer, Berlin, Heidelberg, 2012. 9-48
linear=nn.Linear(input_size,output_size)
linear.weight.data.uniform_(0, 1)
  • xavier method: Xavier Initialization is another popular method and is used in conjunction with the tanh activation. It takes into consideration the number of input neurons “Lin” as well as the number of neuron in the next layer “L out”. This paper for more details: Glorot, Xavier and Yoshua Bengio. “Understanding the difficulty of training deep feedforward neural networks” 2010.
linear=nn.Linear(input_size,output_size)
torch.nn.init.xavier_uniform_(linear.weight)
  • He method: For relu we use the He initialize method, after we create a linear object, We use the following method to initialize the weights, for more info check out the following paper. He, Kaiming, et al. “Delving deep into rectifiers: surpassing human-level performance in imagenet classification”
linear = nn.Linear(input_size, output_size)
torch.nn.init.kaiming_uniform_(linear.weight, nonlinearity='relu')
Ungraded lab - initialization

8.3.1.initializationsame.ipynb

Ungraded lab - Xavier initialization

8.3.2Xaviermist1layer_v2.ipynb

Ungraded lab - He initialization

8.3.3.He_Initialization_v2.ipynb

Gradient Descent with Momentum

PyTorch implementation

In PyTorch, this is just defined at optim level

optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum = 0.4)
Ungraded lab - momentum with different polynomial

8.4.1_MomentumwithPolynomialFunctions_v2.ipynb

Ungraded lab - Neural Network momentum

8.4.2_NeuralNetworkswithMomentum_v2.ipynb

Batch Normalization

image.png

𝛾, 𝛽 parameters are are actually scale and shift parameters, which we’re going to learn via training.

using nn.Module
class Net_BatchNorm(nn.Module):
    def __init__(self, in_size, n_hidden1, n_hidden2, out_size):
        super(Net_BatchNorm, self).__init__()

        self.linear1 = nn.Linear(in_size, n_hidden1)
        self.linear2 = nn.Linear(n_hidden1, n_hidden2)
        self.linear3 = nn.Linear(n_hidden2, out_size)
        
        self.bn1 = nn.BatchNorm1d(n_hidden1)
        self.bn2 = nn.BatchNorm1d(n_hidden2)
        
    def forward(self, x):
        x=torch.sigmoid(self.bn1(self.linear1(x)))
        x=torch.sigmoid(self.bn2(self.linear2(x)))
        x=self.linear3(x)
        return x
Ungraded lab - Batch normalization

8.5.1BachNorm_v2.ipynb

comparing training loss for each iteration and accuracy on validation data for both Batch / No Batch normalization.

img

img

Week 6 - Convolutional neural networks

Learning Objectives

  • Convolution
  • Activation Functions
  • Max Pooling
  • Convolution: Multiple Channels
  • Convolutional Neural Network
  • TORCH-VISION MODELS

notebook

notebook

Convolution

convolution explanation from stanford course CS231

convolution
conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3)
image = torch.zeros(1,1,5,5)
image[0,0,:,2] = 1
image
>> tensor([[[[0., 0., 1., 0., 0.],
          [0., 0., 1., 0., 0.],
          [0., 0., 1., 0., 0.],
          [0., 0., 1., 0., 0.],
          [0., 0., 1., 0., 0.]]]])
z=conv(image)
>> tensor([[[[ 0.6065,  0.0728, -0.7915],
          [ 0.6065,  0.0728, -0.7915],
          [ 0.6065,  0.0728, -0.7915]]]], grad_fn=<ThnnConv2DBackward>)
conv.state_dict()
>> OrderedDict([('weight',
              tensor([[[[ 0.1132, -0.0418,  0.3140],
                        [-0.2261, -0.1528, -0.3270],
                        [-0.2140, -0.1900,  0.2127]]]])),
             ('bias', tensor([0.0423]))])
stride
conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, stride = 2)
zeros padding
conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, stride = 2, padding = 1)
size of activation map

Feature size = ((Image size + 2 * Padding size − Kernel size) / Stride)+1

Ungraded lab - What’s convolution

9.1What_is_Convolution.ipynb

Activation Functions and Max Polling

Activation function using nn.Module
import torch
image = torch.zeros(1,1,5,5)
image[0,0,:,2] = 1
conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3)
z=conv(image)
A = torch.relu(z)
Activation function using nn.Sequential
relu = nn.ReLU()
A = relu(z)
Max pooling
max = nn.MaxPool2d(2, stride=1)
max(image)
torch.max_pool2d(image, stride=1, kernel_size=2)
Ungraded lab - Activation Functions and Max Polling

9.2Activation_max_pooling.ipynb

Multiple Input and Output Channels

Ungraded lab - Activation Functions and Max Polling

9.3Multiple Channel Convolution.ipynb

Convolutional Neural Network

using nn.Module
class CNN(nn.Module):
    def __init__(self,out_1=2,out_2=1):
        
        super(CNN,self).__init__()
        #first Convolutional layers 
        self.cnn1=nn.Conv2d(in_channels=1,out_channels=out_1,kernel_size=2,padding=0)
        self.maxpool1=nn.MaxPool2d(kernel_size=2 ,stride=1)

        #second Convolutional layers
        self.cnn2=nn.Conv2d(in_channels=out_1,out_channels=out_2,kernel_size=2,stride=1,padding=0)
        self.maxpool2=nn.MaxPool2d(kernel_size=2 ,stride=1)
        #max pooling 

        #fully connected layer 
        self.fc1=nn.Linear(out_2*7*7,2)
        
    def forward(self,x):
        #first Convolutional layers
        x=self.cnn1(x)
        #activation function 
        x=torch.relu(x)
        #max pooling 
        x=self.maxpool1(x)
        #first Convolutional layers
        x=self.cnn2(x)
        #activation function
        x=torch.relu(x)
        #max pooling
        x=self.maxpool2(x)
        #flatten output 
        x=x.view(x.size(0),-1)
        #fully connected layer
        x=self.fc1(x)
        return x
training
n_epochs=10
cost_list=[]
accuracy_list=[]
N_test=len(validation_dataset)
cost=0
#n_epochs
for epoch in range(n_epochs):
    cost=0    
    for x, y in train_loader:
        #clear gradient 
        optimizer.zero_grad()
        #make a prediction 
        z=model(x)
        # calculate loss 
        loss=criterion(z,y)
        # calculate gradients of parameters 
        loss.backward()
        # update parameters 
        optimizer.step()
        cost+=loss.item()
    cost_list.append(cost)
    correct=0
    #perform a prediction on the validation  data  
    for x_test, y_test in validation_loader:
        z=model(x_test)
        _,yhat=torch.max(z.data,1)
        correct+=(yhat==y_test).sum().item()
    accuracy=correct/N_test
    accuracy_list.append(accuracy)
Ungraded lab - Convolutional Neural Network Simple example

9.4.1ConvolutionalNeralNetworkSimple example.ipynb

def conv_output_shape(h_w, kernel_size=1, stride=1, pad=0, dilation=1):
    #by Duane Nielsen
    from math import floor
    if type(kernel_size) is not tuple:
        kernel_size = (kernel_size, kernel_size)
    h = floor( ((h_w[0] + (2 * pad) - ( dilation * (kernel_size[0] - 1) ) - 1 )/ stride) + 1)
    w = floor( ((h_w[1] + (2 * pad) - ( dilation * (kernel_size[1] - 1) ) - 1 )/ stride) + 1)
    return h, w
out=conv_output_shape((11,11), kernel_size=2, stride=1, pad=0, dilation=1)
print(out)
out1=conv_output_shape(out, kernel_size=2, stride=1, pad=0, dilation=1)
print(out1)
out2=conv_output_shape(out1, kernel_size=2, stride=1, pad=0, dilation=1)
print(out2)

out3=conv_output_shape(out2, kernel_size=2, stride=1, pad=0, dilation=1)
print(out3)
>> (10, 10)
(9, 9)
(8, 8)
(7, 7)
Ungraded lab - Convolutional Neural Network MNIST

9.4.2CNN_Small_Image.ipynbb

Ungraded lab - Convolutional Neural Networks with Batch Norm

9.4.3CNN_Small_Image_batch.ipynb

GPU in PyTorch
torch.cuda.is_available()
>> True

device = torch.device('cuda:0')

torch.tensor([1,2,32,4]).to(device)
>> tensor([ 1,  2, 32,  4], device='cuda:0')

model = CNN()
model.to(device)
Training on GPU
for epoch in range(num_epochs):
    for features, labels in train_loader:
        features, labels = features.to(device), labels.to(device)
        optimizer.zero_grad()
        predictions = model(features)
        loss = criterion(predictions, labels)
        loss.backward()
        optimizer.step()

TORCH-VISION MODELS

load resnet18 with pretrained parameters

import torch
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
torch.manual_seed(0)

model = models.resnet18(pretrained=True)

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

composed = transforms.Compose([transforms.Resize(224),
                              transforms.ToTensor(),
                              transforms.Normalize(mean, std)])

train_dataset = Dataset(transform=composed, train = True)
validation_dataset = Dataset(transform=composed)

freeze parameters and add a final layer to be trained

for param in model.parameters():
    param.requires_grad=False
model.fc = nn.Linear(512, 7)
train_loader = DataLoader(dataset=train_loader, batch_size=15)
validation_loader = DataLoader(dataset=validation_loader, batch_size=10)

provides only parameters to be trained to optim

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam([parameters for parameters in model.parameters() if parameters.requires_grad], lr = 0.003)

N_EPOCHS = 20
loss_list = []
accuracy_list = []
correct = 0
n_test = len(validation_dataset)

train the model, switching to model.train and model.eval

for epoch in range(N_EPOCHS):
    loss_sublist = []
    for x, y in train_loader:
        model.train()
        optimizer.zero_grad()
        z = model(x)
        loss = criterion(z, y)
        loss_sublist.append(loss.data.item())
        loss.backward()
        optimizer.step()
    loss_list.append(np.mean(loss_sublist))
    correct = 0
    for x_test, y_test in validation_loader:
        model.eval()
        z = model(x_test)
        _, yhat = torch.max(z.data, 1)
        correct += (yhat == y_test).sum().item()
    accuracy = correct / n_test
    accuracy_list.append(accuracy)

Week 7 - Fashion MNIST

Learning Objectives

  • Apply all you have learned to train a Convolutional Neural Network

notebook

notebook