原理

对于分类问题,我们希望的是输出一个标签,表示属于哪一类,因此对于每一个标签都应输出一个概率,概率最大的即为标签。采用softmax进行归一化:
$$
\hat {\pmb y}=softmax(\pmb o),\hat y_j=\frac{e^{o_j}}{\sum_{k}e^{o_k}}
$$
因此softmax的矢量计算表达式为:
$$
\pmb O=\pmb X\pmb W+\pmb b,\hat{\pmb Y}=softmax(\pmb O)
$$
定义损失函数为交叉熵:
$$
l(y,\hat y)=-\sum y_j log\hat{y_j}
$$
然后每次利用梯度对参数进行更新。

softmax回归的实现(TensorFlow的高级API)

  1. 读取数据集;
  2. 定义模型;
  3. 定义损失函数、优化器;
  4. 设置超参数进行训练

导入数据集

1
2
3
4
import tensorflow as tf
from d2l import tensorflow as d2l
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

初始化模型参数

1
2
3
4
5
6
7
8
9
10
11
12
13
net = tf.keras.models.Sequential()
###添加输入层Flatten,定义输入为一个28*28的矩阵,在实际输入时会展平为784个输入
net.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
weight_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.01)
###添加全连接层为10个节点,即为输出,初始化权重
net.add(tf.keras.layers.Dense(10, kernel_initializer=weight_initializer))

###定义损失函数
###为了避免softmax后产生上溢或下溢,将softmax和求交叉熵在一步中完成。类似于LogSumExp技巧
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

###定义优化算法
trainer = tf.keras.optimizers.SGD(learning_rate=.1)

定义分类精度函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
###计算预测正确的数量
def accuracy(y_hat, y):
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = tf.argmax(y_hat, axis=1)
cmp = tf.cast(y_hat, y.dtype) == y
return float(tf.reduce_sum(tf.cast(cmp, y.dtype)))

###计算在指定数据集上模型的精度
class Accumulator:
# 正确预测数、预测总数
def __init__(self, n):
self.data = [0.0] * n

def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]

def reset(self):
self.data = [0.0] * len(self.data)

def __getitem__(self, idx):
return self.data[idx]

def evaluate_accuracy(net, data_iter):
metric = Accumulator(2) # 正确预测数、预测总数
for X, y in data_iter:
metric.add(accuracy(net(X), y), d2l.size(y))
return metric[0] / metric[1]

作图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
###在动画中绘制数据
class Animator:
def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,
ylim=None, xscale='linear', yscale='linear',
fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,
figsize=(3.5, 2.5)):
# 增量地绘制多条线
if legend is None:
legend = []
d2l.use_svg_display()
self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)
if nrows * ncols == 1:
self.axes = [self.axes, ]
# 使用lambda函数捕获参数
self.config_axes = lambda: d2l.set_axes(
self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
self.X, self.Y, self.fmts = None, None, fmts

def add(self, x, y):
# 向图表中添加多个数据点
if not hasattr(y, "__len__"):
y = [y]
n = len(y)
if not hasattr(x, "__len__"):
x = [x] * n
if not self.X:
self.X = [[] for _ in range(n)]
if not self.Y:
self.Y = [[] for _ in range(n)]
for i, (a, b) in enumerate(zip(x, y)):
if a is not None and b is not None:
self.X[i].append(a)
self.Y[i].append(b)
self.axes[0].cla()
for x, y, fmt in zip(self.X, self.Y, self.fmts):
self.axes[0].plot(x, y, fmt)
self.config_axes()
display.display(self.fig)
display.clear_output(wait=True)

训练

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def train_epoch_ch3(net, train_iter, loss, updater):
# 训练损失总和、训练准确度总和、样本数
metric = Accumulator(3)
for X, y in train_iter:
# 计算梯度并更新参数
with tf.GradientTape() as tape:
y_hat = net(X)
l = loss(y, y_hat)

params = net.trainable_variables
grads = tape.gradient(l, params)
updater.apply_gradients(zip(grads, params))
# Keras的loss默认返回一个批量的平均损失
l_sum = l * float(tf.size(y))
metric.add(l_sum, accuracy(y_hat, y), tf.size(y))
# 返回训练损失和训练精度
return metric[0] / metric[2], metric[1] / metric[2]

def train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer):
animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],
legend=['train loss', 'train acc', 'test acc'])
for epoch in range(num_epochs):
train_metrics = train_epoch_ch3(net, train_iter, loss, trainer)
test_acc = evaluate_accuracy(net, test_iter)
animator.add(epoch + 1, train_metrics + (test_acc,))
train_loss, train_acc = train_metrics

###如果不满足条件就停止并输出
assert train_loss < 0.5, train_loss
assert train_acc <= 1 and train_acc > 0.7, train_acc
assert test_acc <= 1 and test_acc > 0.7, test_acc

num_epochs = 10
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)

预测

1
2
3
4
5
6
7
8
9
10
11
###只显示6张
def predict_ch3(net, test_iter, n=6):
for X, y in test_iter:
break
trues = d2l.get_fashion_mnist_labels(y)
preds = d2l.get_fashion_mnist_labels(tf.argmax(net(X), axis=1))
titles = [true +'\n' + pred for true, pred in zip(trues, preds)]
d2l.show_images(
tf.reshape(X[0:n], (n, 28, 28)), 1, n, titles=titles[0:n])

predict_ch3(net, test_iter)