优化算法
tf.keras.optimizers.SGD(learning_rate=0.05) SGD算法
tf.keras.optimizers.SGD(learning_rate=0.004,momentum=0.9) 动量法
tf.keras.optimizers.Adagrad(learning_rate=0.01) AdaGrad算法
tf.keras.optimizers.RMSprop(learning_rate=0.01,rho=0.9) RMSporop算法
tf.keras.optimizers.Adadelta(learning_rate=0.01,rho=0.9) Adadelta算法
keras.optimizers.Adam(learning_rate=0.01) Adam算法
算法实现
%matplotlib inline
import numpy as np
import time
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
# 读取数据
def get_data():
data = np.genfromtxt('airfoil_self_noise.dat', delimiter='\t')
data = (data - data.mean(axis=0)) / data.std(axis=0)
return tf.convert_to_tensor(data[:1500, :-1],dtype=tf.float32), tf.convert_to_tensor(data[:1500, -1],dtype=tf.float32)
features,labels = get_data()
# 定义训练函数
def train_tensorflow(trainer_name, features, labels,batch_size=10, num_epochs=2):
# 初始化模型
net = tf.keras.Sequential()
net.add(tf.keras.layers.Dense(1))
loss = tf.losses.MeanSquaredError()
def eval_loss():
return np.array(tf.reduce_mean(loss(net(features), labels)))
ls = [eval_loss()]
data_iter = tf.data.Dataset.from_tensor_slices((features,labels)).batch(batch_size)
data_iter = data_iter.shuffle(100)
# 创建Trainer实例来迭代模型参数
for _ in range(num_epochs):
start = time.time()
for batch_i, (X, y) in enumerate(data_iter):
with tf.GradientTape() as tape:
l = tf.reduce_mean(loss(net(X), y)) # 使用平均损失
grads = tape.gradient(l, net.trainable_variables)
trainer_name.apply_gradients(zip(grads, net.trainable_variables)) # 迭代模型参数
if (batch_i + 1) * batch_size % 100 == 0:
ls.append(eval_loss()) # 每100个样本记录下当前训练误差
# 打印结果和作图
print('loss: %f, time: %f ' % (ls[-1], time.time() - start))
plt.figure()
plt.plot(np.linspace(0, num_epochs, len(ls)), ls)
plt.xlabel('epoch')
plt.ylabel('loss')
# SGD算法
trainer1=tf.keras.optimizers.SGD(learning_rate=0.05)
train_tensorflow(trainer1, features, labels)
# 动量法 通过参数momentum来指定动量超参数
trainer2 = tf.keras.optimizers.SGD(learning_rate=0.004,momentum=0.9)
train_tensorflow(trainer2,features,labels)
# AdaGrad算法
trainer3 = tf.keras.optimizers.Adagrad(learning_rate=0.01)
train_tensorflow(trainer3,features,labels)
# RMSprop算法 超参数γ通过alpha指定
trainer4 = tf.keras.optimizers.RMSprop(learning_rate=0.01,rho=0.9)
train_tensorflow(trainer4,features,labels)
# Adadelta算法 超参数可以通过rho来指定
trainer5 = tf.keras.optimizers.Adadelta(learning_rate=0.01,rho=0.9)
train_tensorflow(trainer5,features,labels)
# Adam算法 时间步t通过hyperparams参数传入adam函数
trainer6 = keras.optimizers.Adam(learning_rate=0.01)
train_tensorflow(trainer6,features,labels)
自编码器 MNIST-VAE实战
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import Sequential, layers
from PIL import Image
from matplotlib import pyplot as plt
def save_images(imgs, name):
new_im = Image.new('L', (280, 280))
index = 0
for i in range(0, 280, 28):
for j in range(0, 280, 28):
im = imgs[index]
im = Image.fromarray(im, mode='L')
new_im.paste(im, (i, j))
index += 1
new_im.save(name)
h_dim = 20
batchsz = 512
lr = 1e-3
(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
x_train, x_test = x_train.astype(np.float32) / 255., x_test.astype(np.float32) / 255.
# we do not need label
train_db = tf.data.Dataset.from_tensor_slices(x_train)
train_db = train_db.shuffle(batchsz * 5).batch(batchsz)
test_db = tf.data.Dataset.from_tensor_slices(x_test)
test_db = test_db.batch(batchsz)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
z_dim = 10
class VAE(keras.Model):
def __init__(self):
super(VAE, self).__init__()
# Encoder
self.fc1 = layers.Dense(128)
self.fc2 = layers.Dense(z_dim) # get mean prediction
self.fc3 = layers.Dense(z_dim)
# Decoder
self.fc4 = layers.Dense(128)
self.fc5 = layers.Dense(784)
def encoder(self, x):
h = tf.nn.relu(self.fc1(x))
# get mean
mu = self.fc2(h)
# get variance
log_var = self.fc3(h)
return mu, log_var
def decoder(self, z):
out = tf.nn.relu(self.fc4(z))
out = self.fc5(out)
return out
def reparameterize(self, mu, log_var):
eps = tf.random.normal(log_var.shape)
std = tf.exp(log_var*0.5)
z = mu + std * eps
return z
def call(self, inputs, training=None):
# [b, 784] => [b, z_dim], [b, z_dim]
mu, log_var = self.encoder(inputs)
# reparameterization trick
z = self.reparameterize(mu, log_var)
x_hat = self.decoder(z)
return x_hat, mu, log_var
model = VAE()
model.build(input_shape=(4, 784))
optimizer = tf.optimizers.Adam(lr)
for epoch in range(1000):
for step, x in enumerate(train_db):
x = tf.reshape(x, [-1, 784])
with tf.GradientTape() as tape:
x_rec_logits, mu, log_var = model(x)
rec_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=x, logits=x_rec_logits)
rec_loss = tf.reduce_sum(rec_loss) / x.shape[0]
# compute kl divergence (mu, var) ~ N (0, 1)
# https://stats.stackexchange.com/questions/7440/kl-divergence-between-two-univariate-gaussians
kl_div = -0.5 * (log_var + 1 - mu**2 - tf.exp(log_var))
kl_div = tf.reduce_sum(kl_div) / x.shape[0]
loss = rec_loss + 1. * kl_div
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
if step % 100 == 0:
print(epoch, step, 'kl div:', float(kl_div), 'rec loss:', float(rec_loss))
# evaluation
z = tf.random.normal((batchsz, z_dim))
logits = model.decoder(z)
x_hat = tf.sigmoid(logits)
x_hat = tf.reshape(x_hat, [-1, 28, 28]).numpy() *255.
x_hat = x_hat.astype(np.uint8)
save_images(x_hat, 'vae_images/sampled_epoch%d.png'%epoch)
x = next(iter(test_db))
x = tf.reshape(x, [-1, 784])
x_hat_logits, _, _ = model(x)
x_hat = tf.sigmoid(x_hat_logits)
x_hat = tf.reshape(x_hat, [-1, 28, 28]).numpy() *255.
x_hat = x_hat.astype(np.uint8)
save_images(x_hat, 'vae_images/rec_epoch%d.png'%epoch)
扫一扫 获得更多内容
(公众号:Python工厂)