Optionally load weights by name (#3488)

* Adding feature to load_weights by name Squashed commit of the following: commit fd47e763855c34ed78d26ee441d83e0e63f08119 Author: Arel Cordero <arel@ditto.us.com> Date: Thu Aug 18 16:02:14 2016 +0000 typo commit d0b06c03080131c55ab4777064a196ff339ad7df Author: Arel Cordero <arel@ditto.us.com> Date: Thu Aug 18 15:52:35 2016 +0000 update documentation for "load_weights" commit 844cfc2e8c9c6f267799a22ed54ac4d75807c5ab Author: Arel Cordero <arel@ditto.us.com> Date: Thu Aug 18 02:42:10 2016 +0000 batch updating weights commit f361a70da4b40b961f1af9c8f1c3cd26273d0cad Author: Arel Cordero <arel@ditto.us.com> Date: Thu Aug 18 02:29:17 2016 +0000 removing pudb line commit 738de4c371503626b4c9dbae6428fb279b368a76 Author: Arel Cordero <arel@ditto.us.com> Date: Wed Aug 17 19:56:51 2016 +0000 adding unit tests for loading weights by name commit cb0971b3cfe62452ab445e4034098cab2be3031b Author: Arel Cordero <arel@ditto.us.com> Date: Tue Aug 16 23:45:32 2016 +0000 cleaning up code based on comments commit ef08fd2c9f5d3c65359cbdf5b090e08733a518de Author: Arel Cordero <arel@ditto.us.com> Date: Tue Aug 16 04:50:46 2016 +0000 debugging commit 0d74f0e997960886b1044c26001de6cd6ad90bb9 Author: Arel Cordero <arel@ditto.us.com> Date: Tue Aug 16 04:15:43 2016 +0000 optionally load model by name * changed random file names to use tempfile module * clean up documentation strings * clarifying documentation
2016-09-06 14:42:31 -04:00 · 2016-09-06 14:42:31 -04:00 · 607635d2ce
commit 607635d2ce
parent b8fddc862e
4 changed files with 206 additions and 9 deletions
--- a/docs/templates/getting-started/faq.md
+++ b/docs/templates/getting-started/faq.md
@ -113,12 +113,39 @@ Note that you will first need to install HDF5 and the Python library h5py, which
 model.save_weights('my_model_weights.h5')
 ```

-Assuming you have code for instantiating your model, you can then load the weights you saved into a model with the same architecture:
+Assuming you have code for instantiating your model, you can then load the weights you saved into a model with the *same* architecture:

 ```python
 model.load_weights('my_model_weights.h5')
 ```

+If you need to load weights into a *different* architecture (with some layers in common), for instance for fine-tuning or transfer-learning, you can load weights by *layer name*:
+
+```python
+model.load_weights('my_model_weights.h5', by_name=True)
+```
+
+For example:
+
+```python
+"""
+Assume original model looks like this:
+    model = Sequential()
+    model.add(Dense(2, input_dim=3, name="dense_1"))
+    model.add(Dense(3, name="dense_2"))
+    ...
+    model.save_weights(fname)
+"""
+
+# new model
+model = Sequential()
+model.add(Dense(2, input_dim=3, name="dense_1"))  # will be loaded
+model.add(Dense(10, name="new_dense"))  # will not be loaded
+
+# load weights from first model; will only affect the first layer, dense_1.
+model.load_weights(fname, by_name=True)
+```
+
 ---

 ### Why is the training loss much higher than the testing loss?
--- a/docs/templates/models/about-keras-models.md
+++ b/docs/templates/models/about-keras-models.md
@ -30,4 +30,4 @@ yaml_string = model.to_yaml()
 model = model_from_yaml(yaml_string)
 ```
 - `model.save_weights(filepath)`: saves the weights of the model as a HDF5 file.
- `model.load_weights(filepath)`: loads the weights of the model from a HDF5 file (created by `save_weights`).
+- `model.load_weights(filepath, by_name=False)`: loads the weights of the model from a HDF5 file (created by `save_weights`). By default, the architecture is expected to be unchanged. To load weights into a different architecture (with some layers in common), use `by_name=True` to load only those layers with the same name.
--- a/keras/engine/topology.py
+++ b/keras/engine/topology.py
@ -2469,14 +2469,30 @@ class Container(Layer):
                else:
                    param_dset[:] = val

-    def load_weights(self, filepath):
+    def load_weights(self, filepath, by_name=False):
        '''Load all layer weights from a HDF5 save file.
+
+        If `by_name` is False (default) weights are loaded
+        based on the network's topology, meaning the architecture
+        should be the same as when the weights were saved.
+        Note that layers that don't have weights are not taken
+        into account in the topological ordering, so adding or
+        removing layers is fine as long as they don't have weights.
+
+        If `by_name` is True, weights are loaded into layers
+        only if they share the same name. This is useful
+        for fine-tuning or transfer-learning models where
+        some of the layers have changed.
        '''
        import h5py
        f = h5py.File(filepath, mode='r')
        if 'layer_names' not in f.attrs and 'model_weights' in f:
            f = f['model_weights']
-        self.load_weights_from_hdf5_group(f)
+        if by_name:
+            self.load_weights_from_hdf5_group_by_name(f)
+        else:
+            self.load_weights_from_hdf5_group(f)
+
        if hasattr(f, 'close'):
            f.close()

@ -2552,6 +2568,54 @@ class Container(Layer):
                weight_value_tuples += zip(symbolic_weights, weight_values)
            K.batch_set_value(weight_value_tuples)

+    def load_weights_from_hdf5_group_by_name(self, f):
+        ''' Name-based weight loading
+        (instead of topological weight loading).
+        Layers that have no matching name are skipped.
+        '''
+        if hasattr(self, 'flattened_layers'):
+            # support for legacy Sequential/Merge behavior
+            flattened_layers = self.flattened_layers
+        else:
+            flattened_layers = self.layers
+
+        if 'nb_layers' in f.attrs:
+                raise Exception('The weight file you are trying to load is' +
+                                ' in a legacy format that does not support' +
+                                ' name-based weight loading.')
+        else:
+            # new file format
+            layer_names = [n.decode('utf8') for n in f.attrs['layer_names']]
+
+            # Reverse index of layer name to list of layers with name.
+            index = {}
+            for layer in flattened_layers:
+                if layer.name:
+                    index.setdefault(layer.name, []).append(layer)
+
+            # we batch weight value assignments in a single backend call
+            # which provides a speedup in TensorFlow.
+            weight_value_tuples = []
+            for k, name in enumerate(layer_names):
+                g = f[name]
+                weight_names = [n.decode('utf8') for n in g.attrs['weight_names']]
+                weight_values = [g[weight_name] for weight_name in weight_names]
+
+                for layer in index.get(name, []):
+                    symbolic_weights = layer.weights
+                    if len(weight_values) != len(symbolic_weights):
+                        raise Exception('Layer #' + str(k) +
+                                        ' (named "' + layer.name +
+                                        '") expects ' +
+                                        str(len(symbolic_weights)) +
+                                        ' weight(s), but the saved weights' +
+                                        ' have ' + str(len(weight_values)) +
+                                        ' element(s).')
+                    # set values
+                    for i in range(len(weight_values)):
+                        weight_value_tuples.append((symbolic_weights[i], weight_values[i]))
+            K.batch_set_value(weight_value_tuples)
+
    def _updated_config(self):
        '''shared between different serialization methods'''
        from keras import __version__ as keras_version
--- a/tests/test_model_saving.py
+++ b/tests/test_model_saving.py
@ -1,5 +1,6 @@
 import pytest
 import os
+import tempfile
 import numpy as np
 from numpy.testing import assert_allclose

@ -28,7 +29,7 @@ def test_sequential_model_saving():
    model.train_on_batch(x, y)

    out = model.predict(x)
-    fname = 'tmp_' + str(np.random.randint(10000)) + '.h5'
+    _, fname = tempfile.mkstemp('.h5')
    save_model(model, fname)

    new_model = load_model(fname)
@ -62,7 +63,7 @@ def test_sequential_model_saving_2():
    model.train_on_batch(x, y)

    out = model.predict(x)
-    fname = 'tmp_' + str(np.random.randint(10000)) + '.h5'
+    _, fname = tempfile.mkstemp('.h5')
    save_model(model, fname)

    model = load_model(fname,
@ -89,7 +90,7 @@ def test_fuctional_model_saving():
    model.train_on_batch(x, y)

    out = model.predict(x)
-    fname = 'tmp_' + str(np.random.randint(10000)) + '.h5'
+    _, fname = tempfile.mkstemp('.h5')
    save_model(model, fname)

    model = load_model(fname)
@ -106,7 +107,7 @@ def test_saving_without_compilation():
    model.add(Dense(3))
    model.compile(loss='mse', optimizer='sgd', metrics=['acc'])

-    fname = 'tmp_' + str(np.random.randint(10000)) + '.h5'
+    _, fname = tempfile.mkstemp('.h5')
    save_model(model, fname)
    model = load_model(fname)
    os.remove(fname)
@ -120,11 +121,116 @@ def test_saving_right_after_compilation():
    model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
    model.model._make_train_function()

-    fname = 'tmp_' + str(np.random.randint(10000)) + '.h5'
+    _, fname = tempfile.mkstemp('.h5')
    save_model(model, fname)
    model = load_model(fname)
    os.remove(fname)


+@keras_test
+def test_loading_weights_by_name():
+    """
+    test loading model weights by name on:
+        - sequential model
+    """
+
+    # test with custom optimizer, loss
+    custom_opt = optimizers.rmsprop
+    custom_loss = objectives.mse
+
+    # sequential model
+    model = Sequential()
+    model.add(Dense(2, input_dim=3, name="rick"))
+    model.add(Dense(3, name="morty"))
+    model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc'])
+
+    x = np.random.random((1, 3))
+    y = np.random.random((1, 3))
+    model.train_on_batch(x, y)
+
+    out = model.predict(x)
+    old_weights = [layer.get_weights() for layer in model.layers]
+    _, fname = tempfile.mkstemp('.h5')
+
+    model.save_weights(fname)
+
+    # delete and recreate model
+    del(model)
+    model = Sequential()
+    model.add(Dense(2, input_dim=3, name="rick"))
+    model.add(Dense(3, name="morty"))
+    model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc'])
+
+    # load weights from first model
+    model.load_weights(fname, by_name=True)
+    os.remove(fname)
+
+    out2 = model.predict(x)
+    assert_allclose(out, out2, atol=1e-05)
+    for i in range(len(model.layers)):
+        new_weights = model.layers[i].get_weights()
+        for j in range(len(new_weights)):
+            assert_allclose(old_weights[i][j], new_weights[j], atol=1e-05)
+
+
+@keras_test
+def test_loading_weights_by_name_2():
+    """
+    test loading model weights by name on:
+        - both sequential and functional api models
+        - different architecture with shared names
+    """
+
+    # test with custom optimizer, loss
+    custom_opt = optimizers.rmsprop
+    custom_loss = objectives.mse
+
+    # sequential model
+    model = Sequential()
+    model.add(Dense(2, input_dim=3, name="rick"))
+    model.add(Dense(3, name="morty"))
+    model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc'])
+
+    x = np.random.random((1, 3))
+    y = np.random.random((1, 3))
+    model.train_on_batch(x, y)
+
+    out = model.predict(x)
+    old_weights = [layer.get_weights() for layer in model.layers]
+    _, fname = tempfile.mkstemp('.h5')
+
+    model.save_weights(fname)
+
+    # delete and recreate model using Functional API
+    del(model)
+    data = Input(shape=(3,))
+    rick = Dense(2, name="rick")(data)
+    jerry = Dense(3, name="jerry")(rick)  # add 2 layers (but maintain shapes)
+    jessica = Dense(2, name="jessica")(jerry)
+    morty = Dense(3, name="morty")(jessica)
+
+    model = Model(input=[data], output=[morty])
+    model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc'])
+
+    # load weights from first model
+    model.load_weights(fname, by_name=True)
+    os.remove(fname)
+
+    out2 = model.predict(x)
+    assert np.max(np.abs(out - out2)) > 1e-05
+
+    rick = model.layers[1].get_weights()
+    jerry = model.layers[2].get_weights()
+    jessica = model.layers[3].get_weights()
+    morty = model.layers[4].get_weights()
+
+    assert_allclose(old_weights[0][0], rick[0], atol=1e-05)
+    assert_allclose(old_weights[0][1], rick[1], atol=1e-05)
+    assert_allclose(old_weights[1][0], morty[0], atol=1e-05)
+    assert_allclose(old_weights[1][1], morty[1], atol=1e-05)
+    assert_allclose(np.zeros_like(jerry[1]), jerry[1])  # biases init to 0
+    assert_allclose(np.zeros_like(jessica[1]), jessica[1])  # biases init to 0
+
+
 if __name__ == '__main__':
    pytest.main([__file__])