# 神經網絡反向傳播算法

#### 奇技指南

• 反向傳播階段：

• 權值更新階段：

• 反向傳播階段：

• 權值更新階段：

import numpy as np

class DNN:

def __init__(self, input_shape, shape, activations, eta=0.1, threshold=1e-5, softmax=False, max_epochs=1000,

regularization=0.001, minibatch_size=5, momentum=0.9, decay_power=0.5, verbose=False):

if not len(shape) == len(activations):

raise Exception("activations must equal to number od layers.")

self.depth = len(shape)

self.activity_levels = [np.mat([0])] * self.depth

self.outputs = [np.mat(np.mat([0]))] * (self.depth + 1)

self.deltas = [np.mat(np.mat([0]))] * self.depth

self.eta = float(eta)

self.effective_eta = self.eta

self.threshold = float(threshold)

self.max_epochs = int(max_epochs)

self.regularization = float(regularization)

self.is_softmax = bool(softmax)

self.verbose = bool(verbose)

self.minibatch_size = int(minibatch_size)

self.momentum = float(momentum)

self.decay_power = float(decay_power)

self.iterations = 0

self.epochs = 0

self.activations = activations

self.activation_func = []

self.activation_func_diff = []

for f in activations:

if f == "sigmoid":

self.activation_func.append(np.vectorize(self.sigmoid))

self.activation_func_diff.append(np.vectorize(self.sigmoid_diff))

elif f == "identity":

self.activation_func.append(np.vectorize(self.identity))

self.activation_func_diff.append(np.vectorize(self.identity_diff))

elif f == "relu":

self.activation_func.append(np.vectorize(self.relu))

self.activation_func_diff.append(np.vectorize(self.relu_diff))

else:

raise Exception("activation function {:s}".format(f))

self.weights = [np.mat(np.mat([0]))] * self.depth

self.biases = [np.mat(np.mat([0]))] * self.depth

self.acc_weights_delta = [np.mat(np.mat([0]))] * self.depth

self.acc_biases_delta = [np.mat(np.mat([0]))] * self.depth

self.weights[0] = np.mat(np.random.random((shape[0], input_shape)) / 100)

self.biases[0] = np.mat(np.random.random((shape[0], 1)) / 100)

for idx in np.arange(1, len(shape)):

self.weights[idx] = np.mat(np.random.random((shape[idx], shape[idx - 1])) / 100)

self.biases[idx] = np.mat(np.random.random((shape[idx], 1)) / 100)

def compute(self, x):

result = x

for idx in np.arange(0, self.depth):

self.outputs[idx] = result

al = self.weights[idx] * result + self.biases[idx]

self.activity_levels[idx] = al

result = self.activation_func[idx](al)

self.outputs[self.depth] = result

return self.softmax(result) if self.is_softmax else result

def predict(self, x):

return self.compute(np.mat(x).T).T.A

def bp(self, d):

tmp = d.T

for idx in np.arange(0, self.depth)[::-1]:

delta = np.multiply(tmp, self.activation_func_diff[idx](self.activity_levels[idx]).T)

self.deltas[idx] = delta

tmp = delta * self.weights[idx]

def update(self):

self.effective_eta = self.eta / np.power(self.iterations, self.decay_power)

for idx in np.arange(0, self.depth):

weights_grad = -self.deltas[idx].T * self.outputs[idx].T / self.deltas[idx].shape[0] + \

self.regularization * self.weights[idx]

biases_grad = -np.mean(self.deltas[idx].T, axis=1) + self.regularization * self.biases[idx]

# accumulated delta

self.acc_weights_delta[idx] = self.acc_weights_delta[

idx] * self.momentum - self.effective_eta * weights_grad

self.acc_biases_delta[idx] = self.acc_biases_delta[idx] * self.momentum - self.effective_eta * biases_grad

self.weights[idx] = self.weights[idx] + self.acc_weights_delta[idx]

self.biases[idx] = self.biases[idx] + self.acc_biases_delta[idx]

def fit(self, x, y):

x = np.mat(x)

y = np.mat(y)

loss = []

self.iterations = 0

self.epochs = 0

start = 0

train_set_size = x.shape[0]

while True:

end = start + self.minibatch_size

minibatch_x = x[start:end].T

minibatch_y = y[start:end].T

yp = self.compute(minibatch_x)

d = minibatch_y - yp

if self.is_softmax:

loss.append(np.mean(-np.sum(np.multiply(minibatch_y, np.log(yp + 1e-1000)), axis=0)))

else:

loss.append(np.mean(np.sqrt(np.sum(np.power(d, 2), axis=0))))

self.iterations += 1

start = (start + self.minibatch_size) % train_set_size

if self.iterations % train_set_size == 0:

self.epochs += 1

mean_e = np.mean(loss)

loss = []

if self.verbose:

print("epoch: {:d}. mean loss: {:.6f}. learning rate: {:.8f}".format(self.epochs, mean_e,

self.effective_eta))

if self.epochs >= self.max_epochs or mean_e < self.threshold:

break

self.bp(d)

self.update()

@staticmethod

def sigmoid(x):

return 1.0 / (1.0 + np.power(np.e, min(-x, 1e2)))

@staticmethod

def sigmoid_diff(x):

return np.power(np.e, min(-x, 1e2)) / (1.0 + np.power(np.e, min(-x, 1e2))) ** 2

@staticmethod

def relu(x):

return x if x > 0 else 0.0

@staticmethod

def relu_diff(x):

return 1.0 if x > 0 else 0.0

@staticmethod

def identity(x):

return x

@staticmethod

def identity_diff(x):

return 1.0

@staticmethod

def softmax(x):

x[x > 1e2] = 1e2

ep = np.power(np.e, x)

return ep / np.sum(ep, axis=0)

import numpy as np

import matplotlib.pyplot as plt

from matplotlib.colors import ListedColormap

from mentat.classification_model import DNN

from mpl_toolkits.mplot3d import Axes3D

np.random.seed(42)

hidden_layer_size = [3, 5, 8] # 隱藏層神經元個數（所有隱藏層都取同樣數量神經元）

hidden_layers = 1 # 隱藏層數量

hidden_layer_activation_func = "sigmoid" # 隱藏層激活函數

learning_rate = 0.4 # 學習率

max_epochs = 200 # 訓練 epoch 數量

regularization_strength = 0.0001 # 正則化強度

minibatch_size = 40 # mini batch 樣本數

momentum = 0.6 # 衝量慣性

decay_power = 0.2 # 學習率衰減指數

def f1(x):

return (x[:, 0] ** 2 + x[:, 1] ** 2).reshape((len(x), 1))

def f2(x):

return (x[:, 0] ** 2 - x[:, 1] ** 2).reshape((len(x), 1))

def f3(x):

return (np.cos(1.2 * x[:, 0]) * np.cos(1.2 * x[:, 1])).reshape((len(x), 1))

funcs = [f1, f2, f3]

X = np.random.uniform(low=-2.0, high=2.0, size=(100, 2))

x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5

y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5

xx, yy = np.meshgrid(np.arange(x_min, x_max, .02), np.arange(y_min, y_max, .02))

# 模型

names = ["{:d} neurons per layer".format(hs) for hs

in hidden_layer_size]

classifiers = [

DNN(input_shape=2, shape=[hs] * hidden_layers + [1],

activations=[hidden_layer_activation_func] * hidden_layers + ["identity"], eta=learning_rate, threshold=0.001,

softmax=False, max_epochs=max_epochs, regularization=regularization_strength, verbose=True,

minibatch_size=minibatch_size, momentum=momentum, decay_power=decay_power) for hs in

hidden_layer_size

]

figure = plt.figure(figsize=(5 * len(classifiers) + 2, 4 * len(funcs)))

cm = plt.cm.PuOr

cm_bright = ListedColormap(["#DB9019", "#00343F"])

i = 1

for cnt, f in enumerate(funcs):

zz = f(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

z = f(X)

ax = figure.add_subplot(len(funcs), len(classifiers) + 1, i, projection="3d")

if cnt == 0:

ax.set_title("data")

ax.plot_surface(xx, yy, zz, rstride=1, cstride=1, alpha=0.6, cmap=cm)

ax.contourf(xx, yy, zz, zdir='z', offset=zz.min(), alpha=0.6, cmap=cm)

ax.scatter(X[:, 0], X[:, 1], z.ravel(), cmap=cm_bright, edgecolors='k')

ax.set_xlim(xx.min(), xx.max())

ax.set_ylim(yy.min(), yy.max())

ax.set_zlim(zz.min(), zz.max())

i += 1

for name, clf in zip(names, classifiers):

print("model: {:s} training.".format(name))

ax = plt.subplot(len(funcs), len(classifiers) + 1, i)

clf.fit(X, z)

predict = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

ax = figure.add_subplot(len(funcs), len(classifiers) + 1, i, projection="3d")

if cnt == 0:

ax.set_title(name)

ax.plot_surface(xx, yy, predict, rstride=1, cstride=1, alpha=0.6, cmap=cm)

ax.contourf(xx, yy, predict, zdir='z', offset=zz.min(), alpha=0.6, cmap=cm)

ax.scatter(X[:, 0], X[:, 1], z.ravel(), cmap=cm_bright, edgecolors='k')

ax.set_xlim(xx.min(), xx.max())

ax.set_ylim(yy.min(), yy.max())

ax.set_zlim(zz.min(), zz.max())

i += 1

print("model: {:s} train finished.".format(name))

plt.tight_layout()

plt.savefig(

"pic/dnn_fitting_{:d}_{:.6f}_{:d}_{:.6f}_{:.3f}_{:3f}.png".format(hidden_layers, learning_rate, max_epochs,

regularization_strength, momentum, decay_power))

360官方技術公眾號