1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
| import matplotlib.pyplot as plt import numpy as np import struct def load_labels(file): with open(file, "rb") as f: data = f.read() return np.asanyarray(bytearray(data[8:]), dtype=np.int32) def load_images(file): with open(file, "rb") as f: data = f.read() magic_number, num_items, rows, cols = struct.unpack(">iiii", data[:16]) return np.asanyarray(bytearray(data[16:]), dtype=np.uint8).reshape(num_items, -1)
def make_one_hot(labels,class_num=10): result = np.zeros((len(labels),class_num)) for index,lab in enumerate(labels): result[index][lab] = 1 return result def sigmoid(x): return 1/(1+np.exp(-x)) def softmax(x): ex = np.exp(x) sum_ex = np.sum(ex, axis=1, keepdims=True) return ex/sum_ex def get_datas(): train_datas = load_images("data/train-images.idx3-ubyte") / 255 train_label = make_one_hot(load_labels("data/train-labels.idx1-ubyte"),10) test_datas = load_images("data/t10k-images.idx3-ubyte") / 255 test_label = load_labels("data/t10k-labels.idx1-ubyte") return train_datas,train_label,test_datas,test_label class Linear: def __init__(self,in_num,out_num): self.weight = np.random.normal(0,1,size=(in_num, out_num)) def forward(self,x): self.x = x return self.x @ self.weight def backward(self,G): delta_weight = self.x.T @ G delta_x = G @ self.weight.T self.weight -= lr * delta_weight def __call__(self, x): return self.forward(x) class Sigmoid: def forward(self,x): self.r = sigmoid(x) return self.r def backward(self,G): return G * self.r * (1-self.r) def __call__(self, x): return self.forward(x) class Softmax: def forward(self,x): self.r = softmax(x) return self.r def backward(self,G): return (self.r - G)/self.r.shape[0] def __call__(self, x): return self.forward(x) class MyModel: def __init__(self,layers): self.layers = layers def forward(self,x,label=None): for layer in self.layers: x = layer(x) self.x = x if label is not None: self.label = label loss = -np.mean(label * np.log(x)) / x.shape[0] return loss def backward(self): G = self.label for layer in self.layers[::-1]: G = layer.backward(G) def __call__(self, *args): return self.forward(*args) if __name__ == "__main__": train_datas,train_label,test_datas,test_label = get_datas() epoch = 100 batch_size = 600 lr = 0.01 hidden_num = 256 model = MyModel([ Linear(784,hidden_num), Sigmoid(), Linear(hidden_num,10), Softmax() ]) batch_times = int(np.ceil(len(train_datas) / batch_size)) for e in range(epoch): for batch_index in range(batch_times): x = train_datas[batch_index * batch_size : (batch_index + 1) * batch_size] batch_label = train_label[batch_index * batch_size : (batch_index + 1) * batch_size] loss = model.forward(x,batch_label) if batch_index%100==0: print(f"loss={loss:.3f}") model.backward() model.backward() x = test_datas model.forward(x) pre = np.argmax(model.x, axis=1) acc = np.sum(pre==test_label)/10000 print(f"acc={acc:.3f}")
|