Intro ANN

Intro ANN#

Review numpy and pytorch
Perceptron with numpy and pytorch
Training a basic network with pytorch

import numpy as np
import matplotlib.pyplot as plt
import torch
from tqdm import tqdm

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 3
      1 import numpy as np
      2 import matplotlib.pyplot as plt
----> 3 import torch
      4 from tqdm import tqdm

ModuleNotFoundError: No module named 'torch'

Derivatives and Gradients for Optimization#

Consider:

\[f(x) = x^2\]

The big idea is that we can use the tangent line at a point to approximate the function itself. Thus, we walk on the tangent line in the negative direction of the slope. If we take small enough steps and readjust each iteration we should find ourselves at the bottom of the graph.

\[x_n = x_{n-1} - \alpha f'(x_{n-1})\]

Start at the point \((1, 1)\) and note that the slope of the tangent line is \(f'(1) = 2*1 = 2\).

def f(x): return x**2
x = np.linspace(-2, 2, 100)
plt.plot(x, f(x))
plt.plot(1, 1, 'ro')

[<matplotlib.lines.Line2D at 0x7ce687367880>]

_images/ea46f88f4addd8df1130ea193559680a3affcfcead2e45d044b862651d190a0a.png

#start - step * slope
1 - .1*(2*1)

0.8

#next step
.8 - .1*(2*.8)

0.64

PROBLEM

Complete the loop below, updating the list xs with each iteration of gradient descent.

xs = [1]
for i in range(10):
  pass

xs = np.array(xs)
plt.plot(x, f(x))
plt.plot(xs, f(xs), 'ro')
plt.title('Did we find the minimum?')
plt.grid();

_images/673acac393a19ca95aa1c377655f2b241bd90bdfe847f312814001e639b4b9a2.png

Introduction to Artificial Neural Networks#

For the examples in our class, we will use the pytorch library for modeling with neural networks. The important object here is the tensor object, similar to the numpy.array but with some extra gradient functionality.

Playground

import torch
import torch.nn as nn
import torch.optim as optim

xt = torch.tensor(x, dtype = torch.float32)
yt = torch.tensor(f(x), dtype = torch.float32)

loss_fn = nn.MSELoss()

def model(x, w, b):
  return w*x**2 + b

w = torch.ones((), requires_grad=True)
b = torch.ones((), requires_grad=True)

model(xt, w, b)

tensor([5.0000, 4.8400, 4.6833, 4.5298, 4.3797, 4.2327, 4.0891, 3.9487, 3.8115,
        3.6777, 3.5471, 3.4198, 3.2957, 3.1749, 3.0573, 2.9431, 2.8321, 2.7243,
        2.6198, 2.5186, 2.4207, 2.3260, 2.2346, 2.1464, 2.0615, 1.9799, 1.9015,
        1.8264, 1.7546, 1.6861, 1.6208, 1.5587, 1.4999, 1.4444, 1.3922, 1.3432,
        1.2975, 1.2551, 1.2159, 1.1800, 1.1473, 1.1179, 1.0918, 1.0690, 1.0494,
        1.0331, 1.0200, 1.0102, 1.0037, 1.0004, 1.0004, 1.0037, 1.0102, 1.0200,
        1.0331, 1.0494, 1.0690, 1.0918, 1.1179, 1.1473, 1.1800, 1.2159, 1.2551,
        1.2975, 1.3432, 1.3922, 1.4444, 1.4999, 1.5587, 1.6208, 1.6861, 1.7546,
        1.8264, 1.9015, 1.9799, 2.0615, 2.1464, 2.2346, 2.3260, 2.4207, 2.5186,
        2.6198, 2.7243, 2.8321, 2.9431, 3.0573, 3.1749, 3.2957, 3.4198, 3.5471,
        3.6777, 3.8115, 3.9487, 4.0891, 4.2327, 4.3797, 4.5298, 4.6833, 4.8400,
        5.0000], grad_fn=<AddBackward0>)

yhat = model(xt, w, b)

loss = loss_fn(yt, yhat)

loss.backward()

w.grad

tensor(2.7205)

b.grad

tensor(2.)

#loss.zero_()

optimizer = optim.SGD([w, b], lr = 0.1)

for epoch in tqdm(range(20)):
  yhat = model(xt, w, b)
  loss = loss_fn(yt, yhat)
  loss.backward()
  optimizer.step()
  optimizer.zero_grad()

100%|██████████| 20/20 [00:00<00:00, 2071.62it/s]

tensor(0.9263, requires_grad=True)

tensor(0.1602, requires_grad=True)

def soln(x): return w.detach().numpy()*x**2 + b.detach().numpy()
plt.plot(x, f(x))
plt.plot(x, soln(x), '--r')

[<matplotlib.lines.Line2D at 0x7ce6874dfd00>]

_images/90b9eb0ad323093f5cc161c7ca8dc75ddf6a7cf3a7b86864feb3d18741352174.png

Pytorch and Regression#

np.random.seed(11)
x = np.random.random_integers(low = 1, high = 30, size = 15)
y = 3*x + 4 + np.random.normal(size = len(x), scale = 3)

<ipython-input-616-75502d753c78>:2: DeprecationWarning: This function is deprecated. Please call randint(1, 30 + 1) instead
  x = np.random.random_integers(low = 1, high = 30, size = 15)

plt.scatter(x, y)

<matplotlib.collections.PathCollection at 0x7ce685bd5150>

_images/6ca7e60eac77985752edc623d58ff78fcbd5eb8a7e943aecd8d1c92985331730.png

#rule 1 -- turn everything into a tensor
xt = torch.tensor(x, dtype = torch.float32)
yt = torch.tensor(y, dtype = torch.float32)

xt

tensor([26., 17., 28., 18., 24., 14., 13.,  2.,  8., 19., 25., 14., 29., 17.,
        30.])

yt

tensor([80.3901, 55.9462, 89.2632, 54.8032, 73.3413, 44.5728, 45.0690, 11.6836,
        24.0834, 57.6416, 81.2105, 50.7239, 90.9068, 52.9497, 97.2869])

### define the model
model = nn.Linear(in_features=1, out_features=1)

list(model.parameters())

[Parameter containing:
 tensor([[-0.2880]], requires_grad=True),
 Parameter containing:
 tensor([-0.1145], requires_grad=True)]

loss_fn = nn.MSELoss()

optimizer = optim.SGD(model.parameters(), lr = 0.01)

model(xt.reshape(-1, 1))

tensor([[-7.6035],
        [-5.0112],
        [-8.1796],
        [-5.2992],
        [-7.0275],
        [-4.1471],
        [-3.8590],
        [-0.6906],
        [-2.4188],
        [-5.5873],
        [-7.3155],
        [-4.1471],
        [-8.4676],
        [-5.0112],
        [-8.7557]], grad_fn=<AddmmBackward0>)

yhat = model(xt.reshape(-1, 1))

loss_fn(yt.reshape(-1, 1), yhat )

tensor(5064.9062, grad_fn=<MseLossBackward0>)

loss = loss_fn(yhat, yt.reshape(-1, 1))

loss.backward()

optimizer.step()

list(model.parameters())

[Parameter containing:
 tensor([[28.8443]], requires_grad=True),
 Parameter containing:
 tensor([1.2100], requires_grad=True)]

model = nn.Linear(in_features=1, out_features=1)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = 0.1)

losses = []
for epoch in tqdm(range(100)):
  yhat = model(xt.reshape(-1, 1))
  loss = loss_fn( yhat, yt.reshape(-1, 1))
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
  losses.append(loss.item())

100%|██████████| 100/100 [00:00<00:00, 1651.63it/s]

fig, ax = plt.subplots(1, 2, figsize = (15, 5))
ax[0].plot(losses, '--r')
ax[0].set_title('Training Loss')
ax[0].grid();

ax[1].scatter(x, y)
ax[1].plot(x, model(xt.reshape(-1, 1)).detach().numpy(), '-r')
ax[1].set_title('Trained Model')
ax[1].grid();

_images/1da02e4c4abd51ba168addf2167f710e3bd384eaf5e47d68d6a7ca455703ebac.png

import pandas as pd

Binary Classification#

Same process but we add a Sigmoid to the end of the network to interpret the output as probabilities.

Loss will use BCELoss or Binary Cross Entropy – a measure associated with the quality of predictions in binary classification.

\[\text{BCE Loss} = -\frac{1}{n} \sum_{i = 1}^n y_i \log(p_i) + (1 - y_i)\log(1 - p_i)\]

from sklearn.datasets import make_blobs

### make a basic classification dataset
X, y = make_blobs(centers = 2, center_box=(-3, 3), random_state = 22)

X.shape

(100, 2)

plt.scatter(X[:, 0], X[:, 1], c = y)
plt.grid()
plt.title('Binary Classification Data');

_images/99bc0bec019d85e38039be1439769d4f7979ad5b90b3413de85c6d15dc70e1fe.png

model = nn.Sequential(nn.Linear(in_features=2, out_features=1), nn.Sigmoid())
# WE NEED BINARY CLASSIFICATION LOSS
loss = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr = 0.1)

X = torch.tensor(X, dtype = torch.float32)
y = torch.tensor(y, dtype = torch.float32)

losses = []
for epoch in tqdm(range(100)):
  #make some predictions
  yhat = model(X)
  #evaluate the predictions
  loss_val = loss(yhat, y.unsqueeze(1))
  #use those predictions to update the parameters
  optimizer.zero_grad() #clearing out any prior gradient info
  loss_val.backward() #computes derivatives/gradients of loss function
  optimizer.step() #steps towards minimum values
  #keep track of how we are doing
  losses.append(loss_val.item())

100%|██████████| 100/100 [00:00<00:00, 2096.67it/s]

yhat.dtype

torch.float32

#the model returns probabilities
model(X)[:10]

tensor([[0.7954],
        [0.0077],
        [0.0995],
        [0.0091],
        [0.0183],
        [0.8861],
        [0.9653],
        [0.0081],
        [0.1589],
        [0.9119]], grad_fn=<SliceBackward0>)

torch.where(model(X) > 0.5, 1, 0).flatten()

tensor([1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0,
        0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1,
        1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0,
        1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0,
        1, 1, 0, 0])

tensor([1., 0., 0., 0., 0., 1., 1., 0., 0., 1., 0., 0., 1., 0., 0., 1., 1., 1.,
        1., 1., 1., 0., 1., 0., 0., 1., 0., 0., 0., 1., 1., 1., 0., 0., 1., 0.,
        1., 1., 0., 0., 0., 0., 1., 1., 0., 0., 1., 1., 1., 1., 0., 1., 0., 0.,
        1., 0., 1., 1., 1., 0., 1., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0.,
        1., 0., 0., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 0., 0.,
        1., 1., 1., 1., 0., 0., 1., 1., 0., 0.])

torch.where(model(X) > 0.5, 1, 0).flatten() == y

tensor([ True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True, False,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True])

(torch.where(model(X) > 0.5, 1, 0).flatten() == y).sum()

tensor(99)

X.shape

torch.Size([100, 2])

yhat = torch.where(model(X) > 0, 1, 0)

yhat.shape

torch.Size([100, 1])

y == yhat.flatten()

tensor([ True, False, False, False, False,  True,  True, False, False,  True,
        False, False,  True, False, False,  True,  True,  True,  True,  True,
         True, False,  True, False, False,  True, False, False, False,  True,
         True,  True, False, False,  True, False,  True,  True, False, False,
        False, False,  True,  True, False, False,  True,  True,  True,  True,
        False,  True, False, False,  True, False,  True,  True,  True, False,
         True, False, False,  True, False, False,  True, False,  True, False,
        False, False,  True, False, False,  True, False, False,  True,  True,
         True,  True,  True,  True,  True, False, False,  True, False, False,
         True,  True,  True,  True, False, False,  True,  True, False, False])

Image Classification#

from torchvision.datasets import FashionMNIST
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor

train = FashionMNIST(root = './data', train = True, download = True, transform = ToTensor())
test = FashionMNIST(root = './data', train = False, download = True, transform=ToTensor())

trainloader = DataLoader(train, batch_size = 10, shuffle = True)
testloader = DataLoader(test, batch_size = 10, shuffle = False)

plt.imshow(train[0][0].squeeze(0), cmap = 'gray')

<matplotlib.image.AxesImage at 0x7ce685789810>

_images/88678bdacd7b14a8341296df31458f32781cef8476500ceb3484b38bddfb6da2.png

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = nn.Sequential(nn.Flatten(),
                      nn.Linear(in_features=28*28, out_features=10))
model = model.to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.1)

device

device(type='cuda')

for epoch in tqdm(range(10)):
  for x, y in trainloader:
    x = x.to(device)
    y = y.to(device)
    yhat = model(x)
    loss = loss_fn(yhat, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

100%|██████████| 10/10 [02:11<00:00, 13.19s/it]

model(x).argmax(dim = 1)

tensor([7, 5, 4, 4, 3, 4, 8, 3, 7, 4], device='cuda:0')

tensor([7, 5, 4, 4, 3, 4, 8, 3, 7, 4], device='cuda:0')

(model(x).argmax(dim = 1) == y).sum()

tensor(10, device='cuda:0')

correct = 0
total = 0
for x, y in testloader:
    x = x.to(device)
    y = y.to(device)
    yhat = model(x)
    correct += (yhat.argmax(dim = 1) == y).sum()
    total += len(y)

print(f'Accuracy: {correct/total}')

Accuracy: 0.8136999607086182