1. 用 Linear 替换 w1w2
  2. 用 forward 替换 x @ w
  3. 打包 sigmoid 和 softmax 的 forward
  4. 打包 sigmoid 和 softmax 的 backward
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import matplotlib.pyplot as plt  
import numpy as np
import struct

def load_labels(file): # 加载数据
with open(file, "rb") as f:
data = f.read()
return np.asanyarray(bytearray(data[8:]), dtype=np.int32)

def load_images(file): # 加载数据
with open(file, "rb") as f:
data = f.read()
magic_number, num_items, rows, cols = struct.unpack(">iiii", data[:16])
return np.asanyarray(bytearray(data[16:]), dtype=np.uint8).reshape(num_items, -1)

# 将label变成矩阵(60000*10)
def make_one_hot(labels,class_num=10):
result = np.zeros((len(labels),class_num))
for index,lab in enumerate(labels): # enumerate()函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据下标和数据
result[index][lab] = 1
return result

def sigmoid(x):
return 1/(1+np.exp(-x))

def softmax(x):
ex = np.exp(x) # 整个矩阵的每个元素都求指数
sum_ex = np.sum(ex, axis=1, keepdims=True) # 按行求指数结果的和,axis=1表示按行,keepdims=True表示保持原来形状
return ex/sum_ex

def get_datas():
train_datas = load_images("data/train-images.idx3-ubyte") / 255 # (60000, 784)
train_label = make_one_hot(load_labels("data/train-labels.idx1-ubyte"),10) # # (60000,)

test_datas = load_images("data/t10k-images.idx3-ubyte") / 255
test_label = load_labels("data/t10k-labels.idx1-ubyte")

return train_datas,train_label,test_datas,test_label

class Linear:
def __init__(self,in_num,out_num):
self.weight = np.random.normal(0,1,size=(in_num, out_num))

def forward(self,x):
self.x = x
return self.x @ self.weight

def backward(self,G):
delta_weight = self.x.T @ G
delta_x = G @ self.weight.T
self.weight -= lr * delta_weight # 优化器的内容,梯度下降优化器 SGD return delta_x

class Sigmoid:
def forward(self,x):
self.r = sigmoid(x)
return self.r
def backward(self,G):
return G * self.r * (1-self.r)

class Softmax:
def forward(self,x):
self.r = softmax(x)
return self.r
def backward(self,G): # 传的是label
return (self.r - G)/self.r.shape[0] # batch_size就是第0个维度的shape

if __name__ == "__main__":
train_datas,train_label,test_datas,test_label = get_datas()

# 定义参数
epoch = 100
batch_size = 600 # 一次性处理多少图片
lr = 0.01
hidden_num = 256 # 隐层大小

linear1_layer = Linear(784,hidden_num)
sigmoid_layer = Sigmoid()
linear2_layer = Linear(hidden_num,10)
softmax_layer = Softmax()

batch_times = int(np.ceil(len(train_datas) / batch_size)) # np.ceil 向上取整

for e in range(epoch):
for batch_index in range(batch_times):

batch_x = train_datas[batch_index * batch_size : (batch_index + 1) * batch_size] # 按行为单位取出,每次取batch_size行
batch_label = train_label[batch_index * batch_size : (batch_index + 1) * batch_size]

# forward
h = linear1_layer.forward(batch_x)
sig_h = sigmoid_layer.forward(h)
p = linear2_layer.forward(sig_h)
pre = softmax_layer.forward(p)

# 计算loss
loss = -np.mean(batch_label * np.log(pre))/batch_size # 求平均loss

# backward && 更新梯度
G2 = softmax_layer.backward(batch_label)
delta_sig_h = linear2_layer.backward(G2)
delta_h = sigmoid_layer.backward(delta_sig_h)
linear1_layer.backward(delta_h)

# 利用测试集计算精确度
h = linear1_layer.forward(test_datas)
sig_h = sigmoid_layer.forward(h)
p = linear2_layer.forward(sig_h)
pre = softmax_layer.forward(p) #pre是一个10000行1列的向量
# print(pre.shape)
pre = np.argmax(pre, axis=1) # 取一行最大值的下标,最终的pre是一个10000行1列的向量
# print(pre.shape)

acc = np.sum(pre==test_label)/10000

print(acc)

# 画图
# t = train_datas[1107]
# plt.imshow(t.reshape(28,28)) # plt.show() # print(train_label[1037])
  1. 简化变量
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import matplotlib.pyplot as plt  
import numpy as np
import struct

def load_labels(file): # 加载数据
with open(file, "rb") as f:
data = f.read()
return np.asanyarray(bytearray(data[8:]), dtype=np.int32)

def load_images(file): # 加载数据
with open(file, "rb") as f:
data = f.read()
magic_number, num_items, rows, cols = struct.unpack(">iiii", data[:16])
return np.asanyarray(bytearray(data[16:]), dtype=np.uint8).reshape(num_items, -1)

# 将label变成矩阵(60000*10)
def make_one_hot(labels,class_num=10):
result = np.zeros((len(labels),class_num))
for index,lab in enumerate(labels): # enumerate()函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据下标和数据
result[index][lab] = 1
return result

def sigmoid(x):
return 1/(1+np.exp(-x))

def softmax(x):
ex = np.exp(x) # 整个矩阵的每个元素都求指数
sum_ex = np.sum(ex, axis=1, keepdims=True) # 按行求指数结果的和,axis=1表示按行,keepdims=True表示保持原来形状
return ex/sum_ex

def get_datas():
train_datas = load_images("data/train-images.idx3-ubyte") / 255 # (60000, 784)
train_label = make_one_hot(load_labels("data/train-labels.idx1-ubyte"),10) # # (60000,)

test_datas = load_images("data/t10k-images.idx3-ubyte") / 255
test_label = load_labels("data/t10k-labels.idx1-ubyte")

return train_datas,train_label,test_datas,test_label

class Linear:
def __init__(self,in_num,out_num):
self.weight = np.random.normal(0,1,size=(in_num, out_num))

def forward(self,x):
self.x = x
return self.x @ self.weight

def backward(self,G):
delta_weight = self.x.T @ G
delta_x = G @ self.weight.T
self.weight -= lr * delta_weight # 优化器的内容,梯度下降优化器 SGD return delta_x

class Sigmoid:
def forward(self,x):
self.r = sigmoid(x)
return self.r
def backward(self,G):
return G * self.r * (1-self.r)

class Softmax:
def forward(self,x):
self.r = softmax(x)
return self.r
def backward(self,G): # 传的是label
return (self.r - G)/self.r.shape[0] # batch_size就是第0个维度的shape

if __name__ == "__main__":
train_datas,train_label,test_datas,test_label = get_datas()

# 定义参数
epoch = 100
batch_size = 600 # 一次性处理多少图片
lr = 0.01
hidden_num = 256 # 隐层大小

linear1_layer = Linear(784,hidden_num)
sigmoid_layer = Sigmoid()
linear2_layer = Linear(hidden_num,10)
softmax_layer = Softmax()

batch_times = int(np.ceil(len(train_datas) / batch_size)) # np.ceil 向上取整

for e in range(epoch):
for batch_index in range(batch_times):

batch_x = train_datas[batch_index * batch_size : (batch_index + 1) * batch_size] # 按行为单位取出,每次取batch_size行
batch_label = train_label[batch_index * batch_size : (batch_index + 1) * batch_size]

# forward
x = batch_x
x = linear1_layer.forward(x)
x = sigmoid_layer.forward(x)
x = linear2_layer.forward(x)
x = softmax_layer.forward(x)

# 计算loss
loss = -np.mean(batch_label * np.log(x))/batch_size # 求平均loss

# backward && 更新梯度
G = batch_label
G = softmax_layer.backward(G)
G = linear2_layer.backward(G)
G = sigmoid_layer.backward(G)
linear1_layer.backward(G)

# 利用测试集计算精确度
x = test_datas
x = linear1_layer.forward(x)
x = sigmoid_layer.forward(x)
x = linear2_layer.forward(x)
x = softmax_layer.forward(x)

pre = np.argmax(x, axis=1) # 取一行最大值的下标,最终的pre是一个10000行1列的向量
# print(pre.shape)

acc = np.sum(pre==test_label)/10000

print(acc)

# 画图
# t = train_datas[1107]
# plt.imshow(t.reshape(28,28)) # plt.show() # print(train_label[1037])
  1. 封装 Linear
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import matplotlib.pyplot as plt  
import numpy as np
import struct

def load_labels(file): # 加载数据
with open(file, "rb") as f:
data = f.read()
return np.asanyarray(bytearray(data[8:]), dtype=np.int32)

def load_images(file): # 加载数据
with open(file, "rb") as f:
data = f.read()
magic_number, num_items, rows, cols = struct.unpack(">iiii", data[:16])
return np.asanyarray(bytearray(data[16:]), dtype=np.uint8).reshape(num_items, -1)

# 将label变成矩阵(60000*10)
def make_one_hot(labels,class_num=10):
result = np.zeros((len(labels),class_num))
for index,lab in enumerate(labels): # enumerate()函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据下标和数据
result[index][lab] = 1
return result

def sigmoid(x):
return 1/(1+np.exp(-x))

def softmax(x):
ex = np.exp(x) # 整个矩阵的每个元素都求指数
sum_ex = np.sum(ex, axis=1, keepdims=True) # 按行求指数结果的和,axis=1表示按行,keepdims=True表示保持原来形状
return ex/sum_ex

def get_datas():
train_datas = load_images("data/train-images.idx3-ubyte") / 255 # (60000, 784)
train_label = make_one_hot(load_labels("data/train-labels.idx1-ubyte"),10) # # (60000,)

test_datas = load_images("data/t10k-images.idx3-ubyte") / 255
test_label = load_labels("data/t10k-labels.idx1-ubyte")

return train_datas,train_label,test_datas,test_label

class Linear:
def __init__(self,in_num,out_num):
self.weight = np.random.normal(0,1,size=(in_num, out_num))

def forward(self,x):
self.x = x
return self.x @ self.weight

def backward(self,G):
delta_weight = self.x.T @ G
delta_x = G @ self.weight.T
self.weight -= lr * delta_weight # 优化器的内容,梯度下降优化器 SGD return delta_x

class Sigmoid:
def forward(self,x):
self.r = sigmoid(x)
return self.r
def backward(self,G):
return G * self.r * (1-self.r)

class Softmax:
def forward(self,x):
self.r = softmax(x)
return self.r
def backward(self,G): # 传的是label
return (self.r - G)/self.r.shape[0] # batch_size就是第0个维度的shape

if __name__ == "__main__":
train_datas,train_label,test_datas,test_label = get_datas()

# 定义参数
epoch = 100
batch_size = 600 # 一次性处理多少图片
lr = 0.01
hidden_num = 256 # 隐层大小

layers = [
Linear(784,hidden_num),
Sigmoid(),
Linear(hidden_num,10),
Softmax()
]

batch_times = int(np.ceil(len(train_datas) / batch_size)) # np.ceil 向上取整

for e in range(epoch):
for batch_index in range(batch_times):

x = train_datas[batch_index * batch_size : (batch_index + 1) * batch_size] # 按行为单位取出,每次取batch_size行
batch_label = train_label[batch_index * batch_size : (batch_index + 1) * batch_size]

# forward
for layer in layers:
x = layer.forward(x)

# 计算loss
loss = -np.mean(batch_label * np.log(x))/batch_size # 求平均loss

# backward && 更新梯度
G = batch_label
for layer in layers[::-1]: # 表示从后往前
G = layer.backward(G)

# 利用测试集计算精确度
x = test_datas
for layer in layers:
x = layer.forward(x)

pre = np.argmax(x, axis=1) # 取一行最大值的下标,最终的pre是一个10000行1列的向量
acc = np.sum(pre==test_label)/10000

print(acc)
  1. 最终代码(完全封装)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import matplotlib.pyplot as plt  
import numpy as np
import struct

def load_labels(file): # 加载数据
with open(file, "rb") as f:
data = f.read()
return np.asanyarray(bytearray(data[8:]), dtype=np.int32)

def load_images(file): # 加载数据
with open(file, "rb") as f:
data = f.read()
magic_number, num_items, rows, cols = struct.unpack(">iiii", data[:16])
return np.asanyarray(bytearray(data[16:]), dtype=np.uint8).reshape(num_items, -1)

# 将label变成矩阵(60000*10)
def make_one_hot(labels,class_num=10):
result = np.zeros((len(labels),class_num))
for index,lab in enumerate(labels): # enumerate()函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据下标和数据
result[index][lab] = 1
return result

def sigmoid(x):
return 1/(1+np.exp(-x))

def softmax(x):
ex = np.exp(x) # 整个矩阵的每个元素都求指数
sum_ex = np.sum(ex, axis=1, keepdims=True) # 按行求指数结果的和,axis=1表示按行,keepdims=True表示保持原来形状
return ex/sum_ex

def get_datas():
train_datas = load_images("data/train-images.idx3-ubyte") / 255 # (60000, 784)
train_label = make_one_hot(load_labels("data/train-labels.idx1-ubyte"),10) # # (60000,)

test_datas = load_images("data/t10k-images.idx3-ubyte") / 255
test_label = load_labels("data/t10k-labels.idx1-ubyte")

return train_datas,train_label,test_datas,test_label

class Linear:
def __init__(self,in_num,out_num):
self.weight = np.random.normal(0,1,size=(in_num, out_num))

def forward(self,x):
self.x = x
return self.x @ self.weight

def backward(self,G):
delta_weight = self.x.T @ G
delta_x = G @ self.weight.T
self.weight -= lr * delta_weight # 优化器的内容,梯度下降优化器 SGD return delta_x

def __call__(self, x): # 类似于重载
return self.forward(x)

class Sigmoid:
def forward(self,x):
self.r = sigmoid(x)
return self.r

def backward(self,G):
return G * self.r * (1-self.r)

def __call__(self, x):
return self.forward(x)

class Softmax:
def forward(self,x):
self.r = softmax(x)
return self.r

def backward(self,G): # 传的是label
return (self.r - G)/self.r.shape[0] # batch_size就是第0个维度的shape

def __call__(self, x):
return self.forward(x)

class MyModel:
def __init__(self,layers):
self.layers = layers

def forward(self,x,label=None):
for layer in self.layers:
x = layer(x)
self.x = x
if label is not None:
self.label = label
loss = -np.mean(label * np.log(x)) / x.shape[0]
return loss

def backward(self):
G = self.label
for layer in self.layers[::-1]:
G = layer.backward(G)

def __call__(self, *args): # *args变参,数量不变
return self.forward(*args)



if __name__ == "__main__":
train_datas,train_label,test_datas,test_label = get_datas()

# 定义参数
epoch = 100
batch_size = 600 # 一次性处理多少图片
lr = 0.01
hidden_num = 256 # 隐层大小

model = MyModel([
Linear(784,hidden_num),
Sigmoid(),
Linear(hidden_num,10),
Softmax()
])

batch_times = int(np.ceil(len(train_datas) / batch_size)) # np.ceil 向上取整

for e in range(epoch):
for batch_index in range(batch_times):

x = train_datas[batch_index * batch_size : (batch_index + 1) * batch_size] # 按行为单位取出,每次取batch_size行
batch_label = train_label[batch_index * batch_size : (batch_index + 1) * batch_size]

# forward
loss = model.forward(x,batch_label)
if batch_index%100==0:
print(f"loss={loss:.3f}")
model.backward()

# backward && 更新梯度
model.backward()

# 利用测试集计算精确度
x = test_datas
model.forward(x)

pre = np.argmax(model.x, axis=1) # 取一行最大值的下标,最终的pre是一个10000行1列的向量
acc = np.sum(pre==test_label)/10000

print(f"acc={acc:.3f}")