Initial Sparse Matrix Support (#3695)

* Minimal SparseTensor support for TensorFlow * Basic Theano support for Sparse dot product * Sparse Input for Both + Sparse Concat for TF * Fixed issue with _keras_shape for sparse Inputs * pep8 * Cleanup + Theano concat (untested) * Bug fix & pep8 * Fix Theano concat * Bugfix & simplification * Next step: Unit tests * Basic unit test for sparse dot; TF works, TH fails * Fix KTH is_sparse * pep8 * more tests, sparse KTH.eval, pep8 * sparse model test * address code review comments * make sparse boolean in K.placeholder * skip sparse tests when TH.sparse import fails * pep8 * pep8 * fixed flakey test, auto-dense in KTH.eval * fixed some more len/shape issues for fit_generator * fixed some more len/shape issues for prediction * Added better exceptions when theano.sparse fails to import * betterer * pep8
2016-09-09 19:26:37 -04:00 · 2016-09-09 19:26:37 -04:00 · 79edae58d5
commit 79edae58d5
parent 6675776640
6 changed files with 209 additions and 26 deletions
--- a/keras/backend/tensorflow_backend.py
+++ b/keras/backend/tensorflow_backend.py
@ -9,6 +9,7 @@ import os
 import copy
 import warnings
 from .common import _FLOATX, _EPSILON, _IMAGE_DIM_ORDERING, reset_uids
+py_all = all

 # INTERNAL UTILS

@ -117,6 +118,17 @@ def _to_tensor(x, dtype):
    return x


+def is_sparse(tensor):
+    return isinstance(tensor, tf.SparseTensor)
+
+
+def to_dense(tensor):
+    if is_sparse(tensor):
+        return tf.sparse_tensor_to_dense(tensor)
+    else:
+        return tensor
+
+
 def variable(value, dtype=_FLOATX, name=None):
    '''Instantiates a tensor.

@ -128,6 +140,12 @@ def variable(value, dtype=_FLOATX, name=None):
    # Returns
        Tensor variable instance.
    '''
+    if hasattr(value, 'tocoo'):
+        sparse_coo = value.tocoo()
+        indices = np.concatenate((np.expand_dims(sparse_coo.row, 1), np.expand_dims(sparse_coo.col, 1)), 1)
+        # SparseTensor doesn't need initialization
+        return tf.SparseTensor(indices=indices, values=value.data, shape=value.shape)
+
    v = tf.Variable(value, dtype=_convert_string_dtype(dtype), name=name)
    if _MANUAL_VAR_INIT:
        return v
@ -148,7 +166,7 @@ def variable(value, dtype=_FLOATX, name=None):
    return v


-def placeholder(shape=None, ndim=None, dtype=_FLOATX, name=None):
+def placeholder(shape=None, ndim=None, dtype=_FLOATX, sparse=False, name=None):
    '''Instantiates a placeholder.

    # Arguments
@ -166,7 +184,11 @@ def placeholder(shape=None, ndim=None, dtype=_FLOATX, name=None):
    if not shape:
        if ndim:
            shape = tuple([None for _ in range(ndim)])
-    x = tf.placeholder(dtype, shape=shape, name=name)
+    if sparse:
+        tf_shape = tf.constant(np.array(list([0 for _ in range(len(shape))]), dtype=np.int64))
+        x = tf.sparse_placeholder(dtype, shape=tf_shape, name=name)
+    else:
+        x = tf.placeholder(dtype, shape=shape, name=name)
    x._keras_shape = shape
    x._uses_learning_phase = False
    return x
@ -190,6 +212,9 @@ def int_shape(x):
 def ndim(x):
    '''Returns the number of axes in a tensor, as an integer.
    '''
+    if is_sparse(x):
+        return int(x.shape.get_shape()[0])
+
    dims = x.get_shape()._dims
    if dims is not None:
        return len(dims)
@ -206,7 +231,7 @@ def eval(x):
    '''Evaluates the value of a tensor.
    Returns a Numpy array.
    '''
-    return x.eval(session=get_session())
+    return to_dense(x).eval(session=get_session())


 def zeros(shape, dtype=_FLOATX, name=None):
@ -318,7 +343,10 @@ def dot(x, y):
        xt = tf.reshape(x, [-1, x_shape[-1]])
        yt = tf.reshape(tf.transpose(y, perm=y_permute_dim), [y_shape[-2], -1])
        return tf.reshape(tf.matmul(xt, yt), x_shape[:-1] + y_shape[:-2] + y_shape[-1:])
-    out = tf.matmul(x, y)
+    if is_sparse(x):
+        out = tf.sparse_tensor_dense_matmul(x, y)
+    else:
+        out = tf.matmul(x, y)
    return out


@ -676,11 +704,16 @@ def concatenate(tensors, axis=-1):
    '''Concantes a list of tensors alongside the specified axis.
    '''
    if axis < 0:
-        if len(tensors[0].get_shape()):
-            axis = axis % len(tensors[0].get_shape())
+        dims = ndim(tensors[0])
+        if dims:
+            axis = axis % dims
        else:
            axis = 0
-    return tf.concat(axis, tensors)
+
+    if py_all([is_sparse(x) for x in tensors]):
+        return tf.sparse_concat(axis, tensors)
+    else:
+        return tf.concat(axis, [to_dense(x) for x in tensors])


 def reshape(x, shape):
@ -969,8 +1002,13 @@ class Function(object):

    def __call__(self, inputs):
        assert type(inputs) in {list, tuple}
-        names = [getattr(v, 'name', None) for v in self.inputs]
-        feed_dict = dict(zip(names, inputs))
+        feed_dict = {}
+        for tensor, value in zip(self.inputs, inputs):
+            if is_sparse(tensor):
+                sparse_coo = value.tocoo()
+                indices = np.concatenate((np.expand_dims(sparse_coo.row, 1), np.expand_dims(sparse_coo.col, 1)), 1)
+                value = (indices, value.data, value.shape)
+            feed_dict[tensor] = value
        session = get_session()
        updated = session.run(self.outputs + [self.updates_op], feed_dict=feed_dict)
        return updated[:len(self.outputs)]
--- a/keras/backend/theano_backend.py
+++ b/keras/backend/theano_backend.py
@ -4,6 +4,10 @@ from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 from theano.tensor.signal import pool
 from theano.tensor.nnet import conv3d2d
 from theano.printing import Print
+try:
+    import theano.sparse as th_sparse_module
+except ImportError:
+    th_sparse_module = None
 try:
    from theano.tensor.nnet.nnet import softsign as T_softsign
 except ImportError:
@ -11,6 +15,7 @@ except ImportError:
 import inspect
 import numpy as np
 from .common import _FLOATX, _EPSILON, _IMAGE_DIM_ORDERING
+py_all = all


 # INTERNAL UTILS
@ -30,17 +35,38 @@ def set_learning_phase(value):
                         '0 or 1.')
    _LEARNING_PHASE = value

-
 # VARIABLE MANIPULATION

+
+def _assert_sparse_module():
+    if not th_sparse_module:
+        raise ImportError("Failed to import theano.sparse\n"
+                          "You probably need to pip install nose-parameterized")
+
+
+def is_sparse(tensor):
+    return th_sparse_module and isinstance(tensor.type, th_sparse_module.SparseType)
+
+
+def to_dense(tensor):
+    if is_sparse(tensor):
+        return th_sparse_module.dense_from_sparse(tensor)
+    else:
+        return tensor
+
+
 def variable(value, dtype=_FLOATX, name=None):
    '''Instantiate a tensor variable.
    '''
-    value = np.asarray(value, dtype=dtype)
-    return theano.shared(value=value, name=name, strict=False)
+    if hasattr(value, 'tocoo'):
+        _assert_sparse_module()
+        return th_sparse_module.as_sparse_variable(value)
+    else:
+        value = np.asarray(value, dtype=dtype)
+        return theano.shared(value=value, name=name, strict=False)


-def placeholder(shape=None, ndim=None, dtype=_FLOATX, name=None):
+def placeholder(shape=None, ndim=None, dtype=_FLOATX, sparse=False, name=None):
    '''Instantiate an input data placeholder variable.
    '''
    if shape is None and ndim is None:
@ -51,7 +77,11 @@ def placeholder(shape=None, ndim=None, dtype=_FLOATX, name=None):
        shape = tuple([None for _ in range(ndim)])

    broadcast = (False,) * ndim
-    x = T.TensorType(dtype, broadcast)(name)
+    if sparse:
+        _assert_sparse_module()
+        x = th_sparse_module.csr_matrix(name=name, dtype=dtype)
+    else:
+        x = T.TensorType(dtype, broadcast)(name)
    x._keras_shape = shape
    x._uses_learning_phase = False
    return x
@ -77,7 +107,7 @@ def dtype(x):
 def eval(x):
    '''Run a graph.
    '''
-    return x.eval()
+    return to_dense(x).eval()


 def zeros(shape, dtype=_FLOATX, name=None):
@ -156,7 +186,10 @@ Assumed overridden:


 def dot(x, y):
-    return T.dot(x, y)
+    if is_sparse(x):
+        return th_sparse_module.basic.structured_dot(x, y)
+    else:
+        return T.dot(x, y)


 def batch_dot(x, y, axes=None):
@ -402,7 +435,16 @@ def batch_normalization(x, mean, var, beta, gamma, epsilon=0.0001):
 # SHAPE OPERATIONS

 def concatenate(tensors, axis=-1):
-    return T.concatenate(tensors, axis=axis)
+    if py_all([is_sparse(x) for x in tensors]):
+        axis = axis % ndim(tensors[0])
+        if axis == 0:
+            return th_sparse_module.basic.vstack(tensors, format='csr')
+        elif axis == 1:
+            return th_sparse_module.basic.hstack(tensors, format='csr')
+        else:
+            raise Exception('Invalid concat axis for sparse matrix: ' + axis)
+    else:
+        return T.concatenate([to_dense(x) for x in tensors], axis=axis)


 def reshape(x, shape):
--- a/keras/engine/topology.py
+++ b/keras/engine/topology.py
@ -947,7 +947,7 @@ class InputLayer(Layer):
    '''TODO: dosctring
    '''
    def __init__(self, input_shape=None, batch_input_shape=None,
-                 input_dtype=None, input_tensor=None, name=None):
+                 input_dtype=None, input_tensor=None, sparse=False, name=None):
        self.input_spec = None
        self.supports_masking = False
        self.uses_learning_phase = False
@ -964,6 +964,8 @@ class InputLayer(Layer):
        self.regularizers = []
        self.constraints = {}

+        self.sparse = sparse
+
        if not name:
            prefix = 'input'
            name = prefix + '_' + str(K.get_uid(prefix))
@ -1004,6 +1006,7 @@ class InputLayer(Layer):
        if input_tensor is None:
            input_tensor = K.placeholder(shape=batch_input_shape,
                                         dtype=input_dtype,
+                                         sparse=self.sparse,
                                         name=self.name)
        else:
            input_tensor._keras_shape = batch_input_shape
@ -1025,12 +1028,13 @@ class InputLayer(Layer):
    def get_config(self):
        config = {'batch_input_shape': self.batch_input_shape,
                  'input_dtype': self.input_dtype,
+                  'sparse': self.sparse,
                  'name': self.name}
        return config


 def Input(shape=None, batch_shape=None,
-          name=None, dtype=K.floatx(),
+          name=None, dtype=K.floatx(), sparse=False,
          tensor=None):
    '''`Input()` is used to instantiate a Keras tensor.
    A Keras tensor is a tensor object from the underlying backend
@ -1063,6 +1067,7 @@ def Input(shape=None, batch_shape=None,
            It will be autogenerated if it isn't provided.
        dtype: The data type expected by the input, as a string
            (`float32`, `float64`, `int32`...)
+        sparse: a boolean specifying whether this will be a sparse tensor

    # Example usage

@ -1082,6 +1087,7 @@ def Input(shape=None, batch_shape=None,
        batch_shape = (None,) + tuple(shape)
    input_layer = InputLayer(batch_input_shape=batch_shape,
                             name=name, input_dtype=dtype,
+                             sparse=sparse,
                             input_tensor=tensor)
    # return tensor including _keras_shape and _keras_history
    # note that in this case train_output and test_output are the same pointer.
--- a/keras/engine/training.py
+++ b/keras/engine/training.py
@ -763,9 +763,9 @@ class Model(Container):
            do_validation = True
            if verbose:
                print('Train on %d samples, validate on %d samples' %
-                      (len(ins[0]), len(val_ins[0])))
+                      (ins[0].shape[0], val_ins[0].shape[0]))

-        nb_train_sample = len(ins[0])
+        nb_train_sample = ins[0].shape[0]
        index_array = np.arange(nb_train_sample)

        self.history = cbks.History()
@ -859,7 +859,7 @@ class Model(Container):
            or list of arrays of predictions
            (if the model has multiple outputs).
        '''
-        nb_sample = len(ins[0])
+        nb_sample = ins[0].shape[0]
        outs = []
        if verbose == 1:
            progbar = Progbar(target=nb_sample)
@ -904,7 +904,7 @@ class Model(Container):
            and/or metrics). The attribute `model.metrics_names` will give you
            the display labels for the scalar outputs.
        '''
-        nb_sample = len(ins[0])
+        nb_sample = ins[0].shape[0]
        outs = []
        if verbose == 1:
            progbar = Progbar(target=nb_sample)
@ -1426,11 +1426,11 @@ class Model(Container):
                # build batch logs
                batch_logs = {}
                if type(x) is list:
-                    batch_size = len(x[0])
+                    batch_size = x[0].shape[0]
                elif type(x) is dict:
-                    batch_size = len(list(x.values())[0])
+                    batch_size = list(x.values())[0].shape[0]
                else:
-                    batch_size = len(x)
+                    batch_size = x.shape[0]
                batch_logs['batch'] = batch_index
                batch_logs['size'] = batch_size
                callbacks.on_batch_begin(batch_index, batch_logs)
--- a/tests/keras/backend/test_backends.py
+++ b/tests/keras/backend/test_backends.py
@ -2,6 +2,7 @@ import sys
 import pytest
 from numpy.testing import assert_allclose
 import numpy as np
+import scipy.sparse as sparse

 from keras.backend import theano_backend as KTH
 from keras.backend import tensorflow_backend as KTF
@ -780,6 +781,61 @@ class TestBackend(object):
            koh = K.eval(K.one_hot(K.variable(indices, dtype='int32'), nb_classes))
            assert np.all(koh == oh)

+    def test_sparse_dot(self):
+        x_d = np.array([0, 7, 2, 3], dtype=np.float32)
+        x_r = np.array([0, 2, 2, 3], dtype=np.int64)
+        x_c = np.array([4, 3, 2, 3], dtype=np.int64)
+
+        x_sparse = sparse.csr_matrix((x_d, (x_r, x_c)), shape=(4, 5))
+        x_dense = x_sparse.toarray()
+
+        W = np.random.random((5, 4))
+
+        backends = [KTF]
+        if KTH.th_sparse_module:
+            # Theano has some dependency issues for sparse
+            backends.append(KTH)
+
+        for K in backends:
+            t_W = K.variable(W)
+            k_s = K.eval(K.dot(K.variable(x_sparse), t_W))
+            k_d = K.eval(K.dot(K.variable(x_dense), t_W))
+
+            assert k_s.shape == k_d.shape
+            assert_allclose(k_s, k_d, atol=1e-05)
+
+    def test_sparse_concat(self):
+        x_d = np.array([0, 7, 2, 3], dtype=np.float32)
+        x_r = np.array([0, 2, 2, 3], dtype=np.int64)
+        x_c = np.array([4, 3, 2, 3], dtype=np.int64)
+
+        x_sparse_1 = sparse.csr_matrix((x_d, (x_r, x_c)), shape=(4, 5))
+
+        x_d = np.array([0, 7, 2, 3], dtype=np.float32)
+        x_r = np.array([0, 2, 2, 3], dtype=np.int64)
+        x_c = np.array([4, 3, 2, 3], dtype=np.int64)
+
+        x_sparse_2 = sparse.csr_matrix((x_d, (x_r, x_c)), shape=(4, 5))
+
+        x_dense_1 = x_sparse_1.toarray()
+        x_dense_2 = x_sparse_2.toarray()
+
+        backends = [KTF]
+        if KTH.th_sparse_module:
+            # Theano has some dependency issues for sparse
+            backends.append(KTH)
+
+        for K in backends:
+            k_s = K.concatenate([K.variable(x_sparse_1), K.variable(x_sparse_2)])
+            assert K.is_sparse(k_s)
+
+            k_s_d = K.eval(k_s)
+
+            k_d = K.eval(K.concatenate([K.variable(x_dense_1), K.variable(x_dense_2)]))
+
+            assert k_s_d.shape == k_d.shape
+            assert_allclose(k_s_d, k_d, atol=1e-05)
+

 if __name__ == '__main__':
    pytest.main([__file__])
--- a/tests/keras/test_sparse.py
+++ b/tests/keras/test_sparse.py
@ -0,0 +1,41 @@
+from __future__ import absolute_import
+from __future__ import print_function
+import pytest
+
+from keras.models import Model
+from keras.layers import Dense, Input
+from keras.utils.test_utils import keras_test
+from keras import backend as K
+from keras.backend import theano_backend as KTH
+from keras.backend import tensorflow_backend as KTF
+
+import scipy.sparse as sparse
+import numpy as np
+np.random.seed(1337)
+
+
+input_dim = 16
+nb_hidden = 8
+nb_class = 4
+batch_size = 32
+nb_epoch = 1
+
+
+def do_sparse():
+    return K == KTF or KTH.th_sparse_module
+
+
+@keras_test
+def test_sparse_mlp():
+    if not do_sparse():
+        return
+
+    input = Input(batch_shape=(None, input_dim), sparse=True)
+    hidden = Dense(nb_hidden, activation='relu')(input)
+    hidden = Dense(nb_hidden, activation='relu')(hidden)
+    predictions = Dense(nb_class, activation='sigmoid')(hidden)
+    model = Model(input=[input], output=predictions)
+    model.compile(loss='mse', optimizer='sgd')
+    x = sparse.rand(batch_size, input_dim, density=0.1, format='csr')
+    y = np.random.random((batch_size, nb_class))
+    model.fit(x, y, nb_epoch=1)