add in predict_generator and tests

* add in predict_generator and tests * fix PEP8 details * Pre-allocate predictions * make predictions return list if neccessary * reset batch_size for other tests, make less wonky generator
2016-04-06 09:03:37 -07:00 · 2016-04-06 09:03:37 -07:00 · b4bdc5a0fa
commit b4bdc5a0fa
parent 6911fa2cba
3 changed files with 106 additions and 13 deletions
--- a/keras/engine/training.py
+++ b/keras/engine/training.py
@ -1403,3 +1403,74 @@ class Model(Container):
                averages.append(np.average([out[i] for out in all_outs],
                                weights=weights))
            return averages
+
+    def predict_generator(self, generator, val_samples):
+        '''Generate predictions for the input samples from a data generator.
+        The generator should return the same kind of data as accepted by
+        `predict_on_batch`.
+
+        Arguments:
+            generator:
+                generator yielding input samples
+            val_samples:
+                total number of samples to generate from `generator`
+                before returning.
+
+        # Returns
+            A Numpy array of predictions.
+        '''
+        processed_samples = 0
+        wait_time = 0.05
+        all_outs = []
+        data_gen_queue, _stop = generator_queue(generator)
+
+        while processed_samples < val_samples:
+            generator_output = None
+            while not _stop.is_set():
+                if not data_gen_queue.empty():
+                    generator_output = data_gen_queue.get()
+                    break
+                else:
+                    time.sleep(wait_time)
+
+            if isinstance(generator_output, tuple):
+                if len(generator_output) == 2:
+                    x, y = generator_output
+                    sample_weight = None
+                elif len(generator_output) == 3:
+                    x, y, sample_weight = generator_output
+                else:
+                    _stop.set()
+                    raise Exception('output of generator should be a tuple '
+                                    '(x, y, sample_weight) '
+                                    'or (x, y). Found: ' + str(generator_output))
+            else:
+                x = generator_output
+
+            x = standardize_input_data(x, self.input_names)
+
+            try:
+                outs = self.predict_on_batch(x)
+            except Exception as e:
+                _stop.set()
+                raise e
+
+            nb_samples = len(x[0])
+
+            if type(outs) != list:
+                outs = [outs]
+
+            if len(all_outs) == 0:
+                for out in outs:
+                    shape = (val_samples,) + out.shape[1:]
+                    all_outs.append(np.zeros(shape))
+
+            for i, out in enumerate(outs):
+                all_outs[i][processed_samples:(processed_samples + nb_samples)] = out
+
+            processed_samples += nb_samples
+
+        _stop.set()
+        if len(all_outs) == 1:
+            return all_outs[0]
+        return all_outs
--- a/keras/models.py
+++ b/keras/models.py
@ -659,6 +659,24 @@ class Sequential(Model):
        return self.model.evaluate_generator(generator,
                                             val_samples)

+    def predict_generator(self, generator, val_samples):
+        '''Generate predictions for the input samples from a data generator.
+        The generator should return the same kind of data as accepted by
+        `predict_on_batch`.
+
+        Arguments:
+            generator:
+                generator yielding input samples
+            val_samples:
+                total number of samples to generate from `generator`
+                before returning.
+
+        # Returns
+            A Numpy array of predictions.
+        '''
+
+        return self.model.predict_generator(generator, val_samples)
+
    def get_config(self):
        '''Returns the model configuration
        as a Python dictionary.
--- a/tests/keras/test_sequential_model.py
+++ b/tests/keras/test_sequential_model.py
@ -11,6 +11,8 @@ from keras.layers.core import Dense, Activation, Merge, Lambda
 from keras.utils import np_utils
 from keras.utils.test_utils import get_test_data
 from keras.models import model_from_json, model_from_yaml
+from keras import objectives
+from keras.engine.training import make_batches


 input_dim = 16
@ -72,19 +74,15 @@ def test_sequential():
    (X_train, y_train), (X_test, y_test) = _get_test_data()

    # TODO: factor out
-    def data_generator(train):
-        if train:
-            max_batch_index = len(X_train) // batch_size
-        else:
-            max_batch_index = len(X_test) // batch_size
-        i = 0
+    def data_generator(x, y, batch_size=50):
+        index_array = np.arange(len(x))
        while 1:
-            if train:
-                yield (X_train[i * batch_size: (i + 1) * batch_size], y_train[i * batch_size: (i + 1) * batch_size])
-            else:
-                yield (X_test[i * batch_size: (i + 1) * batch_size], y_test[i * batch_size: (i + 1) * batch_size])
-            i += 1
-            i = i % max_batch_index
+            batches = make_batches(len(X_test), batch_size)
+            for batch_index, (batch_start, batch_end) in enumerate(batches):
+                batch_ids = index_array[batch_start:batch_end]
+                x_batch = x[batch_ids]
+                y_batch = y[batch_ids]
+                yield (x_batch, y_batch)

    model = Sequential()
    model.add(Dense(nb_hidden, input_shape=(input_dim,)))
@ -100,9 +98,15 @@ def test_sequential():

    model.train_on_batch(X_train[:32], y_train[:32])

-    gen_loss = model.evaluate_generator(data_generator(True), 256)
    loss = model.evaluate(X_test, y_test)

+    prediction = model.predict_generator(data_generator(X_test, y_test), X_test.shape[0])
+    gen_loss = model.evaluate_generator(data_generator(X_test, y_test, 50), X_test.shape[0])
+    pred_loss = K.eval(K.mean(objectives.get(model.loss)(K.variable(y_test), K.variable(prediction))))
+
+    assert(np.isclose(pred_loss, loss))
+    assert(np.isclose(gen_loss, loss))
+
    model.predict(X_test, verbose=0)
    model.predict_classes(X_test, verbose=0)
    model.predict_proba(X_test, verbose=0)