diff --git a/examples/stateful_lstm.py b/examples/stateful_lstm.py index 1277b95eb..458d7a113 100644 --- a/examples/stateful_lstm.py +++ b/examples/stateful_lstm.py @@ -59,7 +59,7 @@ model.add(LSTM(50, return_sequences=False, stateful=True)) model.add(Dense(1)) -model.compile(loss='rmse', optimizer='rmsprop') +model.compile(loss='mse', optimizer='rmsprop') print('Training') for i in range(epochs): diff --git a/keras/layers/embeddings.py b/keras/layers/embeddings.py index b669040dc..5200fed41 100644 --- a/keras/layers/embeddings.py +++ b/keras/layers/embeddings.py @@ -1,11 +1,8 @@ from __future__ import absolute_import -import numpy as np from .. import backend as K -from .. import activations, initializations, regularizers, constraints -from ..layers.core import Layer, MaskedLayer - -from ..constraints import unitnorm +from .. import initializations, regularizers, constraints +from ..layers.core import Layer class Embedding(Layer): @@ -108,7 +105,8 @@ class Embedding(Layer): B = K.random_binomial((self.input_dim,), p=retain_p) else: B = K.ones((self.input_dim)) * retain_p - out = K.gather(self.W * K.expand_dims(B), X) # we zero-out rows of W at random + # we zero-out rows of W at random + out = K.gather(self.W * K.expand_dims(B), X) return out def get_config(self): diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py index 98adef788..37aaca1f2 100644 --- a/keras/layers/recurrent.py +++ b/keras/layers/recurrent.py @@ -5,7 +5,6 @@ import numpy as np from .. import backend as K from .. import activations, initializations, regularizers from ..layers.core import MaskedLayer -from ..backend.common import _FLOATX class Recurrent(MaskedLayer): @@ -208,7 +207,7 @@ class SimpleRNN(Recurrent): ''' def __init__(self, output_dim, init='glorot_uniform', inner_init='orthogonal', - activation='sigmoid', + activation='tanh', W_regularizer=None, U_regularizer=None, b_regularizer=None, dropout_W=0., dropout_U=0., **kwargs): self.output_dim = output_dim @@ -265,8 +264,9 @@ class SimpleRNN(Recurrent): self.states = [K.zeros((input_shape[0], self.output_dim))] def step(self, x, states): - # states only contains the previous output. - assert len(states) == 3 # 1 state and 2 constants + # states contains the previous output, + # and the two dropout matrices from self.get_constants() + assert len(states) == 3 # 1 state and 2 constants prev_output = states[0] B_W = states[1] B_U = states[2] @@ -287,8 +287,8 @@ class SimpleRNN(Recurrent): B_W = K.random_binomial((nb_samples, self.input_dim), p=retain_p_W) B_U = K.random_binomial((nb_samples, self.output_dim), p=retain_p_U) else: - B_W = np.ones(1, dtype=_FLOATX) * retain_p_W - B_U = np.ones(1, dtype=_FLOATX) * retain_p_U + B_W = np.ones(1, dtype=K.floatx()) * retain_p_W + B_U = np.ones(1, dtype=K.floatx()) * retain_p_U return [B_W, B_U] def get_config(self): @@ -334,7 +334,7 @@ class GRU(Recurrent): ''' def __init__(self, output_dim, init='glorot_uniform', inner_init='orthogonal', - activation='sigmoid', inner_activation='hard_sigmoid', + activation='tanh', inner_activation='hard_sigmoid', W_regularizer=None, U_regularizer=None, b_regularizer=None, dropout_W=0., dropout_U=0., **kwargs): self.output_dim = output_dim @@ -406,10 +406,10 @@ class GRU(Recurrent): self.states = [K.zeros((input_shape[0], self.output_dim))] def step(self, x, states): - assert len(states) == 3 # 1 state and 2 constants - h_tm1 = states[0] - B_W = states[1] - B_U = states[2] + assert len(states) == 3 # 1 state and 2 constants + h_tm1 = states[0] # previous memory + B_W = states[1] # dropout matrix for input units + B_U = states[2] # dropout matrix for recurrent units x_z = K.dot(x * B_W[0], self.W_z) + self.b_z x_r = K.dot(x * B_W[1], self.W_r) + self.b_r @@ -435,8 +435,8 @@ class GRU(Recurrent): B_W = [K.random_binomial((nb_samples, self.input_dim), p=retain_p_W) for _ in range(3)] B_U = [K.random_binomial((nb_samples, self.output_dim), p=retain_p_U) for _ in range(3)] else: - B_W = np.ones(3, dtype=_FLOATX) * retain_p_W - B_U = np.ones(3, dtype=_FLOATX) * retain_p_U + B_W = np.ones(3, dtype=K.floatx()) * retain_p_W + B_U = np.ones(3, dtype=K.floatx()) * retain_p_U return [B_W, B_U] def get_config(self): @@ -573,7 +573,7 @@ class LSTM(Recurrent): K.zeros((input_shape[0], self.output_dim))] def step(self, x, states): - assert len(states) == 4 # 2 states and 2 constants + assert len(states) == 4 # 2 states and 2 constants h_tm1 = states[0] c_tm1 = states[1] B_W = states[2] @@ -604,8 +604,8 @@ class LSTM(Recurrent): B_W = [K.random_binomial((nb_samples, self.input_dim), p=retain_p_W) for _ in range(4)] B_U = [K.random_binomial((nb_samples, self.output_dim), p=retain_p_U) for _ in range(4)] else: - B_W = np.ones(4, dtype=_FLOATX) * retain_p_W - B_U = np.ones(4, dtype=_FLOATX) * retain_p_U + B_W = np.ones(4, dtype=K.floatx()) * retain_p_W + B_U = np.ones(4, dtype=K.floatx()) * retain_p_U return [B_W, B_U] def get_config(self): diff --git a/keras/utils/generic_utils.py b/keras/utils/generic_utils.py index 552463bcf..2d180fb67 100644 --- a/keras/utils/generic_utils.py +++ b/keras/utils/generic_utils.py @@ -63,15 +63,15 @@ class Progbar(object): numdigits = int(np.floor(np.log10(self.target))) + 1 barstr = '%%%dd/%%%dd [' % (numdigits, numdigits) bar = barstr % (current, self.target) - prog = float(current)/self.target - prog_width = int(self.width*prog) + prog = float(current) / self.target + prog_width = int(self.width * prog) if prog_width > 0: - bar += ('='*(prog_width-1)) + bar += ('=' * (prog_width-1)) if current < self.target: bar += '>' else: bar += '=' - bar += ('.'*(self.width-prog_width)) + bar += ('.' * (self.width - prog_width)) bar += ']' sys.stdout.write(bar) self.total_width = len(bar) @@ -80,7 +80,7 @@ class Progbar(object): time_per_unit = (now - self.start) / current else: time_per_unit = 0 - eta = time_per_unit*(self.target - current) + eta = time_per_unit * (self.target - current) info = '' if current < self.target: info += ' - ETA: %ds' % eta @@ -99,7 +99,7 @@ class Progbar(object): self.total_width += len(info) if prev_total_width > self.total_width: - info += ((prev_total_width-self.total_width) * " ") + info += ((prev_total_width - self.total_width) * " ") sys.stdout.write(info) sys.stdout.flush() @@ -120,4 +120,4 @@ class Progbar(object): sys.stdout.write(info + "\n") def add(self, n, values=[]): - self.update(self.seen_so_far+n, values) + self.update(self.seen_so_far + n, values) diff --git a/tests/keras/backend/test_backends.py b/tests/keras/backend/test_backends.py index 4c532a7dd..02cb55a8e 100644 --- a/tests/keras/backend/test_backends.py +++ b/tests/keras/backend/test_backends.py @@ -273,7 +273,7 @@ class TestBackend(object): check_single_tensor_operation('tanh', (4, 2)) # dropout - val = np.random.random((20, 20)) + val = np.random.random((100, 100)) xth = KTH.variable(val) xtf = KTF.variable(val) zth = KTH.eval(KTH.dropout(xth, level=0.2))