Zheng Chu's Blog

让希望永驻


  • 主页

  • 所有专栏

  • 历史文章

  • 标签

  • 关于我

Mxnet-Pytorch-part1---LR-Minist

Posted on 2020-06-12 Edited on 2020-12-06 In Pytorch Views:

LR

  • $y = \mathbf{w}^\top \mathbf{x} + b + \epsilon \text{ where } \epsilon \sim \mathcal{N}(0, \sigma^2).$
  • $p(y|\mathbf{x}) = \frac{1}{\sqrt{2 \pi \sigma^2}} \exp\left(-\frac{1}{2 \sigma^2} (y - \mathbf{w}^\top \mathbf{x} - b)^2\right).$
1
2
3
4
5
%matplotlib inline
import torch
import random
from matplotlib import pyplot as plt
from IPython import display
1
2
3
4
5
6
def synthetic_data(w, b, num_examples):
"""Generate y = X w + b + noise."""
X = torch.zeros(size=(num_examples, len(w))).normal_()
y = torch.matmul(X, w) + b
y += torch.zeros(size = y.shape).normal_(std=0.01)
return X, y
1
2
3
true_w = torch.tensor([2, -3.4])
true_b = 4.444
features, labels = synthetic_data(true_w, true_b, 1000)
1
features[0], labels[0]
(tensor([-0.9447, -1.9361]), tensor(9.1399))
1
2
3
4
5
6
7
8
9
10
# Plot settings
def use_svg_display():
"""Use the svg format to display a plot in Jupyter."""
display.set_matplotlib_formats('svg')


def set_figsize(figsize=(6, 8)):
"""Set the figure size for matplotlib."""
use_svg_display()
plt.rcParams['figure.figsize'] = figsize
1
set_figsize(figsize=(4, 4))
1
2
3
# 特征的最后一维做可视化,
# 第三个参数:The marker size
plt.scatter(features[:, 1].numpy(), labels.numpy(), 1)
<matplotlib.collections.PathCollection at 0x129ad8690>

svg

1
2
3
4
5
6
7
8
# golden example on batch-data access
def data_iter(batch_size, features, labels):
num_examples = len(features)
indices = list(range(num_examples))
random.shuffle(indices)
for i in range(0, num_examples, batch_size):
j = torch.tensor(indices[i: min(i + batch_size, num_examples)])
yield features[j], labels[j]
1
2
3
4
batch_size =  10
for X, y in data_iter(batch_size, features, labels):
print(X, '\n', y)
break
tensor([[-0.0140,  0.4955],
        [ 0.3740,  1.4491],
        [ 1.3240,  0.6199],
        [-0.0944, -0.1557],
        [-0.2420,  0.2464],
        [-0.6962, -1.4067],
        [ 0.6464, -0.7040],
        [-0.6650,  0.1285],
        [ 0.6528,  0.4227],
        [-1.4647,  0.0225]]) 
 tensor([2.7209, 0.2724, 4.9975, 4.7869, 3.1399, 7.8283, 8.1079, 2.6803, 4.3185,
        1.4501])
1
2
3
#Model parameters
w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
1
2
3
4
5
6
#Model
def linreg(X, w, b):
return torch.matmul(X, w) + b
#Loss
def squared_error(y_hat, y):
return (y_hat - y.reshape(y_hat.shape))**2 / 2
1
2
3
4
5
6
7
8
9
10
"""
Because our loss is calculated as a sum over the batch of examples,
we normalize our step size by the batch size (batch_size), so that the magnitude of a
typical step size does not depend heavily on our choice of the batch size.
"""
# optimizer
def sgd(params, lr, batch_size):
for param in params:
param.data.sub_(lr * param.grad / batch_size) #该梯度是所有batch的和,所有除以batch正则化
param.grad.data.zero_()
1
2
3
4
5
6
7
8
9
10
11
# Traning
lr = 0.03
num_epochs = 5
for epoch in range(num_epochs):
for X, y in data_iter(batch_size, features, labels):
loss = squared_error(linreg(X, w, b), y) # Minibatch-loss
loss.mean().backward() # Compute gradient on loss with respect to [w,b]
sgd([w, b], lr, batch_size) # 用随机梯度下降,利用它们的梯度更新参数
with torch.no_grad():
training_loss = squared_error(linreg(X, w, b), y)
print(f"epoch {epoch + 1}, loss {float(training_loss.mean())}")
epoch 1, loss 0.03302072733640671
epoch 2, loss 0.0073621841147542
epoch 3, loss 0.005785355810075998
epoch 4, loss 0.002183828502893448
epoch 5, loss 0.002760048722848296

框架自带的数据类构建LR

1
from torch.utils import data
1
2
3
4
5
6
def load_array(data_array, batch_size, is_train=True):
dataset = data.TensorDataset(*data_array)
return data.DataLoader(dataset, batch_size, shuffle=is_train)

batch_size = 10
data_iter = load_array((features, labels), batch_size)
1
next(iter(data_iter))
[tensor([[-0.6236,  0.8897],
         [-0.3073,  0.9034],
         [ 0.9847, -0.2340],
         [-0.6958,  0.5574],
         [-0.5059, -0.5852],
         [-1.0757, -0.4725],
         [-1.6130, -0.9148],
         [-1.2579,  0.1319],
         [-0.8099, -0.4857],
         [-0.2726,  1.0848]]),
 tensor([0.1864, 0.7726, 7.2163, 1.1565, 5.4282, 3.9169, 4.3345, 1.4896, 4.4729,
         0.2074])]
1
from torch import nn
1
2
3
4
5
net = nn.Sequential(nn.Linear(2, 1))
net[0].weight.data.uniform_(0.0, 0.01)
net[0].bias.data.fill_(0)
loss = nn.MSELoss()
trainer = torch.optim.SGD(net.parameters(), lr=0.03)
1
2
3
4
5
6
7
8
9
num_epochs = 3
for epoch in range(1,num_epochs + 1):
for X, y in data_iter:
l = loss(net(X), y)
trainer.zero_grad() # torch每次update需要清空grad
l.backward()# 计算梯度
trainer.step() # 梯度更新
l = loss(net(features), labels) # 测试损失
print("epoch {}, loss {}".format(epoch, l))
epoch 1, loss 16.28912353515625
epoch 2, loss 16.194189071655273
epoch 3, loss 16.209455490112305

MINIST图像多分类

1
2
3
import torchvision
from torchvision import transforms
import sys
1
2
3
4
5
6
7
8
# By default pytorch torchvision datasets are of type PIL. 
# change the PIL to Tensor format.
trans = transforms.ToTensor()
# parameter: transform takes in an PIL image and returns a transformed version
mnist_train = torchvision.datasets.FashionMNIST(
root='fashion-mnist/data', train=True, transform=trans, download=True)
mnist_test = torchvision.datasets.FashionMNIST(
root="fashion-mnist/data", train=False, transform=trans, download=True)
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to fashion-mnist/data/FashionMNIST/raw/train-images-idx3-ubyte.gz



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


Extracting fashion-mnist/data/FashionMNIST/raw/train-images-idx3-ubyte.gz to fashion-mnist/data/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to fashion-mnist/data/FashionMNIST/raw/train-labels-idx1-ubyte.gz



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


Extracting fashion-mnist/data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to fashion-mnist/data/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to fashion-mnist/data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


Extracting fashion-mnist/data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to fashion-mnist/data/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to fashion-mnist/data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


Extracting fashion-mnist/data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to fashion-mnist/data/FashionMNIST/raw
Processing...
Done!

​
​

/Users/distiller/project/conda/conda-bld/pytorch_1587428061935/work/torch/csrc/utils/tensor_numpy.cpp:141: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program.
1
2
3
4
def get_fashion_mnist_labels(labels):  #@save
text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
return [text_labels[int(i)] for i in labels]
1
2
3
4
5
6
7
8
9
10
11
12
def show_images(imgs, num_rows, num_cols, title=None, scale=1.5):
fig_size = (num_rows * scale, num_cols * scale)
_, axes = plt.subplots(num_rows, num_cols, figsize=fig_size)
axes = axes.flatten()
for i, (ax, img) in enumerate(zip(axes, imgs)):
if 'numpy' in dir(img): img = img.numpy()
ax.imshow(img)
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
if title:
ax.set_title(title[i])
return axes
1
2
X, y = next(iter(data.DataLoader(mnist_train, batch_size=5)))
show_images(X.reshape(5, 28, 28), 1, 5, title=get_fashion_mnist_labels(y), scale=10)
array([<matplotlib.axes._subplots.AxesSubplot object at 0x131bad250>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x131be9910>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x131c2a150>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x131c5e950>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x131c9e190>],
      dtype=object)

svg

1
2
3
4
5
6
7
8
9
10
11
12
batch_size = 256
# num_workers : num of multiple processes
mnist_train = torchvision.datasets.FashionMNIST(
root="../data", train=True, transform=trans, download=True)
mnist_test = torchvision.datasets.FashionMNIST(
root="../data", train=False, transform=trans, download=True)
return (data.DataLoader(mnist_train, batch_size, shuffle=True,
num_workers=get_dataloader_workers()),
data.DataLoader(mnist_test, batch_size, shuffle=False,
num_workers=get_dataloader_workers()))
train_iter = data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=4)
test_iter = data.DataLoader(mnist_test, batch_size, shuffle=False, num_workers=4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
def load_data_fashion_mnist(batch_size, resize=None):  #@save
"""Download the Fashion-MNIST dataset and then load into memory."""
trans = [transforms.Resize(resize)] if resize else []
trans.append(transforms.ToTensor())
trans = transforms.Compose(trans)

mnist_train = torchvision.datasets.FashionMNIST(
root="fashion-mnist/data", train=True, transform=trans, download=True)
mnist_test = torchvision.datasets.FashionMNIST(
root="fashion-mnist/data", train=False, transform=trans, download=True)
return (data.DataLoader(mnist_train, batch_size, shuffle=True,
num_workers=4),
data.DataLoader(mnist_test, batch_size, shuffle=False,
num_workers=4))
1
2

train_iter, test_iter = load_data_fashion_mnist(batch_size)

Model

1
2
3
4
num_inputs = 784
num_outputs = 10
W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
b = torch.zeros(num_outputs, requires_grad=True)
1
2
X = torch.tensor([[1., 2., 3.], [4., 5., 6.]])
torch.sum(X, dim=0, keepdim=True), torch.sum(X, dim=1, keepdim=True)
(tensor([[5., 7., 9.]]),
 tensor([[ 6.],
         [15.]]))
1
2
3
4
def softmax(x):
X_exp = torch.exp(x)
denominator = torch.sum(X_exp, dim=1, keepdim=True)
return X_exp / denominator # [*, k] / [*, 1]
1
2
def Net(X):
return softmax(torch.matmul(X.reshape((-1, num_inputs)), W) + b)

cross-entropy

  • cross-entropy takes the negative log likelihood of the predicted probability assigned to the true label.
  • $-\log P(y \mid x)$
1
2
def crossEntropy(hat_Y, Y):
return -torch.log(hat_Y[range(len(hat_Y)), Y])
1
2
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
y_hat[[0, 1], [0, 2]]
tensor([0.1000, 0.5000])
1
2
3
4
5
6
def accuracy(hat_Y, Y):
# 区别predicted是flatten的,还是未flatten
if hat_Y.shape[1] > 1:
# 对应arg就是类别
return float((hat_Y.argmax(axis=1).type(torch.float32) == Y.type(torch.float32)).sum())
return float((hat_Y.argmax(axis=1).type(torch.float32) == Y.type(torch.float32)).sum())
1
2
y = torch.tensor([0, 2])
accuracy(y_hat, y) / len(y)
0.5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
class Accumulator:
"""Sum a list of numbers over time."""

def __init__(self, n):
self.data = [0.0] * n

def add(self, *args):
self.data = [a+float(b) for a, b in zip(self.data, args)]

def reset(self):
self.data = [0] * len(self.data)

def __getitem__(self, idx):
return self.data[idx]
1
2
3
4
5
def evaluate_accuracy(net, data_iter):  #@save
metric = Accumulator(2) # num_corrected_examples, num_examples
for X, y in data_iter:
metric.add(accuracy(net(X), y), y.size)
return metric[0] / metric[1]
1
2
def net(X):
return softmax(torch.matmul(X.reshape((-1, num_inputs)), W) + b)
1
2


# Pytorch
Scala-part2
Scala-part3
  • Table of Contents
  • Overview
Zheng Chu

Zheng Chu

90 posts
20 categories
25 tags
GitHub 简书 CSDN E-Mail
  1. 1. LR
  • 框架自带的数据类构建LR
  • MINIST图像多分类
    1. 0.1. Model
    2. 0.2. cross-entropy
  • © 2021 Zheng Chu
    Powered by Hexo v4.2.1
    |
    Theme – NexT.Pisces v7.3.0
    |