Bundle current master of ceres-solver

Thins brings up some speed improvements: SPARSE_SCHUR is approx 1.3-1.5x times faster ITERATIVE_SCHUR is approx 1.2x times faster For blender this means camera solution go a bit faster now. Would not have affect on tracking speed.
2013-04-05 09:22:54 +00:00 · 2013-04-05 09:22:54 +00:00 · 43b61fb8bd
commit 43b61fb8bd
parent a00b72bc75
14 changed files with 911 additions and 403 deletions
--- a/extern/libmv/third_party/ceres/CMakeLists.txt
+++ b/extern/libmv/third_party/ceres/CMakeLists.txt
@ -141,6 +141,7 @@ set(SRC
 	include/ceres/solver.h
 	include/ceres/types.h
 	internal/ceres/array_utils.h
 	internal/ceres/blas.h
 	internal/ceres/block_evaluate_preparer.h
 	internal/ceres/block_jacobian_writer.h
 	internal/ceres/block_jacobi_preconditioner.h
--- a/extern/libmv/third_party/ceres/ChangeLog
+++ b/extern/libmv/third_party/ceres/ChangeLog
@ -1,3 +1,185 @@
 commit 520d35ef22dbcb05e344451c03ae64304e524a06
 Author: Sameer Agarwal <sameeragarwal@google.com>
 Date:   Thu Apr 4 08:16:02 2013 -0700
    Further BLAS improvements.
    1. Switch to Eigen's implementation when all dimensions are fixed.
    2. Use lazyProduct for eigen matrix-vector product. This brings
       eigen's performance on iterative_schur closer to what it used
       to be before the last commit. There is however still an
       improvement to be had by using the naive implementation when
       the matrix and vector have dynamic dimensions.
    BENCHMARK
                                          HEAD                                       CHANGE
    problem-16-22106-pre.txt
    gcc-eigen       sparse_schur         0.859    gcc-eigen       sparse_schur        0.853
    clang-eigen     sparse_schur         0.848    clang-eigen     sparse_schur        0.850
    gcc-blas        sparse_schur         0.956    gcc-blas        sparse_schur        0.865
    clang-blas      sparse_schur         0.954    clang-blas      sparse_schur        0.858
    gcc-eigen       iterative_schur      4.656    gcc-eigen       iterative_schur     3.271
    clang-eigen     iterative_schur      4.664    clang-eigen     iterative_schur     3.307
    gcc-blas        iterative_schur      2.598    gcc-blas        iterative_schur     2.620
    clang-blas      iterative_schur      2.554    clang-blas      iterative_schur     2.567
    problem-49-7776-pre.txt
    gcc-eigen       sparse_schur         0.477    gcc-eigen       sparse_schur        0.472
    clang-eigen     sparse_schur         0.475    clang-eigen     sparse_schur        0.479
    gcc-blas        sparse_schur         0.521    gcc-blas        sparse_schur        0.469
    clang-blas      sparse_schur         0.508    clang-blas      sparse_schur        0.471
    gcc-eigen       iterative_schur      3.172    gcc-eigen       iterative_schur     2.088
    clang-eigen     iterative_schur      3.161    clang-eigen     iterative_schur     2.079
    gcc-blas        iterative_schur      1.701    gcc-blas        iterative_schur     1.720
    clang-blas      iterative_schur      1.708    clang-blas      iterative_schur     1.694
    problem-245-198739-pre.txt
    gcc-eigen       sparse_schur        28.092    gcc-eigen       sparse_schur       28.233
    clang-eigen     sparse_schur        28.148    clang-eigen     sparse_schur       28.400
    gcc-blas        sparse_schur        30.919    gcc-blas        sparse_schur       28.110
    clang-blas      sparse_schur        31.001    clang-blas      sparse_schur       28.407
    gcc-eigen       iterative_schur     63.095    gcc-eigen       iterative_schur    43.694
    clang-eigen     iterative_schur     63.412    clang-eigen     iterative_schur    43.473
    gcc-blas        iterative_schur     33.353    gcc-blas        iterative_schur    33.321
    clang-blas      iterative_schur     33.276    clang-blas      iterative_schur    33.278
    problem-257-65132-pre.txt
    gcc-eigen       sparse_schur         3.687    gcc-eigen       sparse_schur        3.629
    clang-eigen     sparse_schur         3.669    clang-eigen     sparse_schur        3.652
    gcc-blas        sparse_schur         3.947    gcc-blas        sparse_schur        3.673
    clang-blas      sparse_schur         3.952    clang-blas      sparse_schur        3.678
    gcc-eigen       iterative_schur    121.512    gcc-eigen       iterative_schur    76.833
    clang-eigen     iterative_schur    123.547    clang-eigen     iterative_schur    78.763
    gcc-blas        iterative_schur     68.334    gcc-blas        iterative_schur    68.612
    clang-blas      iterative_schur     67.793    clang-blas      iterative_schur    68.266
    Notes:
    1. Naive BLAS was a bit worse than eigen on fixed sized matrices. We did not see this
       before because of the different inlining thresholds. Fixing this boosted eigen's
       performance. Also the disparity between gcc and clang has gone away.
    2. SPARSE_SCHUR performance remains the same, since it is only testing static sized
       matrices.
    3. ITERATIVE_SCHUR performance goes up substantially due to the lazyProduct change,
       but even there, since most of the products are dynamic sized, the naive implementation
       wins handily.
    Change-Id: Idc17f35b9c68aaebb1b2e131adf3af8374a85a4c
 commit 25ac54807eedf16fd6c34efc390901ee549a7d14
 Author: Sameer Agarwal <sameeragarwal@google.com>
 Date:   Wed Apr 3 18:51:27 2013 -0700
    Speed up Jets.
    Change-Id: I101bac1b1a1cf72ca49ffcf843b73c0ef5a6dfcb
 commit 3d6eceb45cf27024865908f0c10a5c2b0f8719cf
 Author: Sameer Agarwal <sameeragarwal@google.com>
 Date:   Tue Apr 2 21:45:48 2013 -0700
    Replace more instances of Eigen GEMV with Ceres BLAS.
    With this ITERATIVE_SCHUR with JACOBI preconditioner went down from
    280 seconds to 150 seconds on problem-744-543562-pre.txt.
    Change-Id: I4f319c1108421e8d59f58654a4c0576ad65df609
 commit 296fa9b1279ee1900c8ae32d70e97cd10fc0b46b
 Author: Sameer Agarwal <sameeragarwal@google.com>
 Date:   Tue Apr 2 09:44:15 2013 -0700
    Replace Eigen block operations with small GEMM and GEMV loops.
    1. Add Matrix-Matrix and Matrix-Vector multiply functions.
    2. Replace Eigen usage in SchurEliminator with these custom
       matrix operations.
    3. Save on some memory allocations in ChunkOuterProduct.
    4. Replace LDLT with LLT.
    As a result on problem-16-22106-pre.txt, the linear solver time
    goes down from 1.2s to 0.64s.
    Change-Id: I2daa667960e0a1e8834489965a30be31f37fd87f
 commit 222ca20e8facf706582fe696b7f41247391eac12
 Author: Sameer Agarwal <sameeragarwal@google.com>
 Date:   Mon Apr 1 09:11:07 2013 -0700
    SuiteSparse cleanup.
    1. Silence CHOLMOD's indefiniteness warnings.
    2. Add a comment about how the error handling in suitesparse.cc
       needs to be improved.
    3. Move the analysis logging into suitesparse.cc and out of the
       three callsites.
    Change-Id: Idd396b8ea4bf59fc1ffc7f9fcbbc7b38ed71643c
 commit b7ba93459b7f584eedb1a9f42f3d06bccabd33dc
 Author: Petter Strandmark <petter.strandmark@gmail.com>
 Date:   Tue Feb 19 12:52:58 2013 +0100
    Enable larger tuple sizes for Visual Studio 2012.
    Visual Studio 2012 does not have variadic templates and implements
    tuples differently. By default, only sizes up to 5 are supported,
    which conflicts with Gtest.
    Change-Id: Ieb8d59e4329863cbfa2729d8a76db0714c08d259
 commit 564a83fcc690ac8383bf52a782c45757ae7fa2ad
 Author: Sameer Agarwal <sameeragarwal@google.com>
 Date:   Tue Mar 26 11:11:43 2013 -0700
    Lint cleanup from William Rucklidge.
    Change-Id: I8d4a0aa3e264775d20e99a6b5265f3023de92560
 commit cbe64827becbbaab5b435a71bf00353b4ddd026b
 Author: Sameer Agarwal <sameeragarwal@google.com>
 Date:   Mon Mar 25 17:39:53 2013 -0700
    Update documentation
    Change-Id: Iea3c4b5409e593b1fb070a491ba8a479db32ca58
 commit 802639c89603c9541e624766349d1989a1f641c0
 Author: Pablo Speciale <pablo.speciale@gmail.com>
 Date:   Mon Mar 25 20:53:45 2013 -0700
    ceresproblem label was incorrect
    Change-Id: I3e210375adba4fa50ff3c25398b20a65d241cb35
 commit 6bcb8d9c304a3b218f8788018dfdfe368bb7d60c
 Author: Pablo Speciale <pablo.speciale@gmail.com>
 Date:   Mon Mar 25 16:40:26 2013 -0700
    Compiling against static or shared library
    Change-Id: I3fb35e9a49f90b8894f59dde49c90a7c2dd74b0a
 commit 619cfe692020c078275b68eac2167232fafdfffb
 Author: Sameer Agarwal <sameeragarwal@google.com>
 Date:   Mon Mar 25 14:03:41 2013 -0700
    Update version history
    Change-Id: I1d036efad1507efd19d8581f147b38170b1f0543
 commit 6ae34757850a5fa8213e0d1a540d9d75d6840a08
 Author: Pablo Speciale <pablo.speciale@gmail.com>
 Date:   Sun Mar 24 22:30:52 2013 -0700
    Added documentation regarding the use of Ceres with cmake (once installed)
    Commets about the flag ``BUILD_DOCUMENTATION=ON``
    Change-Id: I8814927e60af190c8043bfc36e77fe76bfe6f562
 commit f46de9e697eb5b8756084615e29ded48600a4d39
 Author: Sergey Sharybin <sergey.vfx@gmail.com>
 Date:   Thu Mar 21 15:31:35 2013 +0600
@ -437,171 +619,3 @@ Date:   Sun Feb 24 14:15:45 2013 -0800
    Minor release script fixes.
    Change-Id: Ifd0a7f4f584c85d4d9574eca46094b372a8d7aff
 commit b53c9667f508c125b8aa833e7a063fa44ef8a98e
 Author: Sergey Sharybin <sergey.vfx@gmail.com>
 Date:   Mon Feb 25 01:14:26 2013 +0600
    Solve No Previous Prototype GCC warning
    In some cases there were missing includes of own
    header files from implementation files.
    In other cases moved function which are only used
    within single file into an anonymous namespace.
    Change-Id: I2c6b411bcfbc521e2a5f21265dc8e009a548b1c8
 commit 267ccc45a3e875bf87832a8ad615be690b4926d3
 Author: Sergey Sharybin <sergey.vfx@gmail.com>
 Date:   Mon Feb 25 01:04:16 2013 +0600
    Fix for MinGW build on Windows
    GG_LONGLONG and GG_ULONGLONG shall use LL and ULL suffixes,
    since MinGW is actuall a GCC compiler.
    Solved by checking whether compilation happens with MinGW
    or not using standard MinGW's __MINGW32__ and __MINGW64__
    definitions.
    Change-Id: I789b34f6342a56ba42f4b280f7242700022ab7a1
 commit 509f68cfe3fd13b794c4e67ff38c761407c858cf
 Author: Sameer Agarwal <sameeragarwal@google.com>
 Date:   Wed Feb 20 01:39:03 2013 -0800
    Problem::Evaluate implementation.
    1. Add Problem::Evaluate and tests.
    2. Remove Solver::Summary::initial/final_*
    3. Remove Solver::Options::return_* members.
    4. Various cpplint cleanups.
    Change-Id: I4266de53489896f72d9c6798c5efde6748d68a47
 commit d4a0bf86d688d1b68e00ff302858de5a4e0d9727
 Author: Keir Mierle <mierle@gmail.com>
 Date:   Sun Feb 24 10:35:44 2013 -0800
    Fix threading build on Windows.
    On Windows, including the "windows.h" header defines an enormous number of
    symbols; some of which are macros with common names. In particular, "ERROR" and
    "min" and "max" get defined. This causes clashes when user code references
    these names in a context other than the intended use in windows.h.
    To deal with this, the Microsoft engineers added the ability to control the
    definition of these symbols by adding extra defines. In particular, including
    windows.h in the following way
      #define NOGDI
      #define NOMINMAX
    will reduce the number of macros defined. This way they will not conflict with
    other uses in Ceres. For example, numeric_limits<double>::max() is impossible
    to call without defining NOMINMAX.
    Change-Id: I166f5d3bb6dc0e2e4b2ebf800fb19e49206f7874
 commit beb4505311011130a7e54632137b0fbb5824cc9b
 Author: Sameer Agarwal <sameeragarwal@google.com>
 Date:   Fri Feb 22 13:37:01 2013 -0800
    Minor fixes
    Based on William Rucklidge's review, including
    a nasty bug in parameter block removal.
    Change-Id: I3a692e589f600ff560ecae9fa85bb0b76063d403
 commit 9a88bd7c4b40e2a1e0cd9b0dc09a3517c467e04e
 Author: Sameer Agarwal <sameeragarwal@google.com>
 Date:   Tue Feb 19 13:09:12 2013 -0800
    Minor bug fixes
    Change-Id: I94e4521adf76a6c77db954c4a8955168e9d37b55
 commit 956ed7e8f2054623615e6ae3601055d613897f26
 Author: Sameer Agarwal <sameeragarwal@google.com>
 Date:   Tue Feb 19 07:06:15 2013 -0800
    Various minor fixes.
    1. Unused variable warnings and fixes.
    2. Minor documentation update.
    Change-Id: I815588a5806df1030a7c8750f4fb594c503f8998
 commit 3e2c4ef9ad35e94198f4f3367b99fd91e26996a1
 Author: Keir Mierle <mierle@gmail.com>
 Date:   Sun Feb 17 12:37:55 2013 -0800
    Add adapters for column/row-major matrices to rotation.h
    This patch introduces a matrix wrapper (MatrixAdapter) that allows to
    transparently pass pointers to row-major or column-major matrices
    to the conversion functions.
    Change-Id: I7f1683a8722088cffcc542f593ce7eb46fca109b
 commit 04938efe4bedec112083c5ceb227ba004f96bd01
 Author: Keir Mierle <mierle@gmail.com>
 Date:   Sun Feb 17 12:37:55 2013 -0800
    Add support for removing parameter and residual blocks.
    This adds support for removing parameter and residual blocks.
    There are two modes of operation: in the first, removals of
    paremeter blocks are expensive, since each remove requires
    scanning all residual blocks to find ones that depend on the
    removed parameter. In the other, extra memory is sacrificed to
    maintain a list of the residuals a parameter block depends on,
    removing the need to scan. In both cases, removing residual blocks
    is fast.
    As a caveat, any removals destroys the ordering of the parameters,
    so the residuals or jacobian returned from Solver::Solve() is
    meaningless. There is some debate on the best way to handle this;
    the details remain for a future change.
    This also adds some overhead, even in the case that fast removals
    are not requested:
    - 1 int32 to each residual, to track its position in the program.
    - 1 pointer to each parameter, to store the dependent residuals.
    Change-Id: I71dcac8656679329a15ee7fc12c0df07030c12af
 commit fa21df8cd969bb257b87c9ef7c0147d8d5ea8725
 Author: Sameer Agarwal <sameeragarwal@google.com>
 Date:   Mon Feb 18 08:48:52 2013 -0800
    Add script for building documentation.
    Update make_release
    Minor documentation fixes.
    Change-Id: I1248ec3f58be66b5929aee6f2aa392c15d53ed83
 commit 290b975d1d4eba44205bbeb0fa6b3ce8a6fa4a0c
 Author: Sameer Agarwal <sameeragarwal@google.com>
 Date:   Sun Feb 17 16:50:37 2013 -0800
    Preconditioner refactoring.
    1. Added a Preconditioner interface.
    2. SCHUR_JACOBI is now its own class and is independent of
    SuiteSparse.
    Change-Id: Id912ab19cf3736e61d1b90ddaf5bfba33e877ec4
 commit d010de543530001fa917501a13ba02879c8ea52f
 Author: Sameer Agarwal <sameeragarwal@google.com>
 Date:   Fri Feb 15 14:26:56 2013 -0800
    Solver::Summary::FullReport() supports line search now.
    Change-Id: Ib08d300198b85d9732cfb5785af4235ca4bd5226
--- a/extern/libmv/third_party/ceres/files.txt
+++ b/extern/libmv/third_party/ceres/files.txt
@ -31,6 +31,7 @@ include/ceres/solver.h
 include/ceres/types.h
 internal/ceres/array_utils.cc
 internal/ceres/array_utils.h
 internal/ceres/blas.h
 internal/ceres/block_evaluate_preparer.cc
 internal/ceres/block_evaluate_preparer.h
 internal/ceres/block_jacobian_writer.cc
--- a/extern/libmv/third_party/ceres/include/ceres/jet.h
+++ b/extern/libmv/third_party/ceres/include/ceres/jet.h
@ -349,7 +349,11 @@ Jet<T, N> operator/(const Jet<T, N>& f,
  //
  // which holds because v*v = 0.
  h.a = f.a / g.a;
-  h.v = (f.v - f.a / g.a * g.v) / g.a;
+  const T g_a_inverse = 1.0 / g.a;
  const T f_a_by_g_a = f.a * g_a_inverse;
  for (int i = 0; i < N; ++i) {
    h.v[i] = (f.v[i] - f_a_by_g_a * g.v[i]) * g_a_inverse;
  }
  return h;
 }
@ -358,7 +362,8 @@ template<typename T, int N> inline
 Jet<T, N> operator/(T s, const Jet<T, N>& g) {
  Jet<T, N> h;
  h.a = s / g.a;
-  h.v = - s * g.v / (g.a * g.a);
+  const T minus_s_g_a_inverse2 = -s / (g.a * g.a);
  h.v = g.v * minus_s_g_a_inverse2;
  return h;
 }
@ -366,8 +371,9 @@ Jet<T, N> operator/(T s, const Jet<T, N>& g) {
 template<typename T, int N> inline
 Jet<T, N> operator/(const Jet<T, N>& f, T s) {
  Jet<T, N> h;
-  h.a = f.a / s;
+  const T s_inverse = 1.0 / s;
-  h.v = f.v / s;
+  h.a = f.a * s_inverse;
  h.v = f.v * s_inverse;
  return h;
 }
@ -425,7 +431,8 @@ template <typename T, int N> inline
 Jet<T, N> log(const Jet<T, N>& f) {
  Jet<T, N> g;
  g.a = log(f.a);
-  g.v = f.v / f.a;
+  const T a_inverse = T(1.0) / f.a;
  g.v = f.v * a_inverse;
  return g;
 }
@ -443,7 +450,8 @@ template <typename T, int N> inline
 Jet<T, N> sqrt(const Jet<T, N>& f) {
  Jet<T, N> g;
  g.a = sqrt(f.a);
-  g.v = f.v / (T(2.0) * g.a);
+  const T two_a_inverse = 1.0 / (T(2.0) * g.a);
  g.v = f.v * two_a_inverse;
  return g;
 }
@ -452,7 +460,7 @@ template <typename T, int N> inline
 Jet<T, N> cos(const Jet<T, N>& f) {
  Jet<T, N> g;
  g.a = cos(f.a);
-  T sin_a = sin(f.a);
+  const T sin_a = sin(f.a);
  g.v = - sin_a * f.v;
  return g;
 }
@ -462,7 +470,8 @@ template <typename T, int N> inline
 Jet<T, N> acos(const Jet<T, N>& f) {
  Jet<T, N> g;
  g.a = acos(f.a);
-  g.v = - T(1.0) / sqrt(T(1.0) - f.a * f.a) * f.v;
+  const T tmp = - T(1.0) / sqrt(T(1.0) - f.a * f.a);
  g.v = tmp * f.v;
  return g;
 }
@ -471,7 +480,7 @@ template <typename T, int N> inline
 Jet<T, N> sin(const Jet<T, N>& f) {
  Jet<T, N> g;
  g.a = sin(f.a);
-  T cos_a = cos(f.a);
+  const T cos_a = cos(f.a);
  g.v = cos_a * f.v;
  return g;
 }
@ -481,7 +490,8 @@ template <typename T, int N> inline
 Jet<T, N> asin(const Jet<T, N>& f) {
  Jet<T, N> g;
  g.a = asin(f.a);
-  g.v = T(1.0) / sqrt(T(1.0) - f.a * f.a) * f.v;
+  const T tmp = T(1.0) / sqrt(T(1.0) - f.a * f.a);
  g.v = tmp * f.v;
  return g;
 }
--- a/extern/libmv/third_party/ceres/internal/ceres/blas.h
+++ b/extern/libmv/third_party/ceres/internal/ceres/blas.h
@ -0,0 +1,496 @@
 // Ceres Solver - A fast non-linear least squares minimizer
 // Copyright 2013 Google Inc. All rights reserved.
 // http://code.google.com/p/ceres-solver/
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 // * Redistributions of source code must retain the above copyright notice,
 //   this list of conditions and the following disclaimer.
 // * Redistributions in binary form must reproduce the above copyright notice,
 //   this list of conditions and the following disclaimer in the documentation
 //   and/or other materials provided with the distribution.
 // * Neither the name of Google Inc. nor the names of its contributors may be
 //   used to endorse or promote products derived from this software without
 //   specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 // POSSIBILITY OF SUCH DAMAGE.
 //
 // Author: sameeragarwal@google.com (Sameer Agarwal)
 //
 // Simple blas functions for use in the Schur Eliminator. These are
 // fairly basic implementations which already yield a significant
 // speedup in the eliminator performance.
 #ifndef CERES_INTERNAL_BLAS_H_
 #define CERES_INTERNAL_BLAS_H_
 #include "ceres/internal/eigen.h"
 #include "glog/logging.h"
 namespace ceres {
 namespace internal {
 // Remove the ".noalias()" annotation from the matrix matrix
 // mutliplies to produce a correct build with the Android NDK,
 // including versions 6, 7, 8, and 8b, when built with STLPort and the
 // non-standalone toolchain (i.e. ndk-build). This appears to be a
 // compiler bug; if the workaround is not in place, the line
 //
 //   block.noalias() -= A * B;
 //
 // gets compiled to
 //
 //   block.noalias() += A * B;
 //
 // which breaks schur elimination. Introducing a temporary by removing the
 // .noalias() annotation causes the issue to disappear. Tracking this
 // issue down was tricky, since the test suite doesn't run when built with
 // the non-standalone toolchain.
 //
 // TODO(keir): Make a reproduction case for this and send it upstream.
 #ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG
 #define CERES_MAYBE_NOALIAS
 #else
 #define CERES_MAYBE_NOALIAS .noalias()
 #endif
 // For the matrix-matrix functions below, there are three functions
 // for each functionality. Foo, FooNaive and FooEigen. Foo is the one
 // to be called by the user. FooNaive is a basic loop based
 // implementation and FooEigen uses Eigen's implementation. Foo
 // chooses between FooNaive and FooEigen depending on how many of the
 // template arguments are fixed at compile time. Currently, FooEigen
 // is called if all matrix dimenions are compile time
 // constants. FooNaive is called otherwise. This leads to the best
 // performance currently.
 //
 // TODO(sameeragarwal): Benchmark and simplify the matrix-vector
 // functions.
 // C op A * B;
 //
 // where op can be +=, -=, or =.
 //
 // The template parameters (kRowA, kColA, kRowB, kColB) allow
 // specialization of the loop at compile time. If this information is
 // not available, then Eigen::Dynamic should be used as the template
 // argument.
 //
 //   kOperation =  1  -> C += A * B
 //   kOperation = -1  -> C -= A * B
 //   kOperation =  0  -> C  = A * B
 //
 // The function can write into matrices C which are larger than the
 // matrix A * B. This is done by specifying the true size of C via
 // row_stride_c and col_stride_c, and then indicating where A * B
 // should be written into by start_row_c and start_col_c.
 //
 // Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c =
 // 4 and start_col_c = 5, then if A = 3x2 and B = 2x4, we get
 //
 //   ------------
 //   ------------
 //   ------------
 //   ------------
 //   -----xxxx---
 //   -----xxxx---
 //   -----xxxx---
 //   ------------
 //   ------------
 //   ------------
 //
 template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
 inline void MatrixMatrixMultiplyEigen(const double* A,
                                      const int num_row_a,
                                      const int num_col_a,
                                      const double* B,
                                      const int num_row_b,
                                      const int num_col_b,
                                      double* C,
                                      const int start_row_c,
                                      const int start_col_c,
                                      const int row_stride_c,
                                      const int col_stride_c) {
  const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a);
  const typename EigenTypes<kRowB, kColB>::ConstMatrixRef Bref(B, num_row_b, num_col_b);
  MatrixRef Cref(C, row_stride_c, col_stride_c);
  Eigen::Block<MatrixRef, kRowA, kColB> block(Cref,
                                              start_row_c, start_col_c,
                                              num_row_a, num_col_b);
  if (kOperation > 0) {
    block CERES_MAYBE_NOALIAS += Aref * Bref;
  } else if (kOperation < 0) {
    block CERES_MAYBE_NOALIAS -= Aref * Bref;
  } else {
    block CERES_MAYBE_NOALIAS = Aref * Bref;
  }
 }
 template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
 inline void MatrixMatrixMultiplyNaive(const double* A,
                                      const int num_row_a,
                                      const int num_col_a,
                                      const double* B,
                                      const int num_row_b,
                                      const int num_col_b,
                                      double* C,
                                      const int start_row_c,
                                      const int start_col_c,
                                      const int row_stride_c,
                                      const int col_stride_c) {
  DCHECK_GT(num_row_a, 0);
  DCHECK_GT(num_col_a, 0);
  DCHECK_GT(num_row_b, 0);
  DCHECK_GT(num_col_b, 0);
  DCHECK_GE(start_row_c, 0);
  DCHECK_GE(start_col_c, 0);
  DCHECK_GT(row_stride_c, 0);
  DCHECK_GT(col_stride_c, 0);
  DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
  DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
  DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b));
  DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b));
  const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
  const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
  const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b);
  const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b);
  DCHECK_EQ(NUM_COL_A, NUM_ROW_B);
  const int NUM_ROW_C = NUM_ROW_A;
  const int NUM_COL_C = NUM_COL_B;
  DCHECK_LT(start_row_c + NUM_ROW_C, row_stride_c);
  DCHECK_LT(start_col_c + NUM_COL_C, col_stride_c);
  for (int row = 0; row < NUM_ROW_C; ++row) {
    for (int col = 0; col < NUM_COL_C; ++col) {
      double tmp = 0.0;
      for (int k = 0; k < NUM_COL_A; ++k) {
        tmp += A[row * NUM_COL_A + k] * B[k * NUM_COL_B + col];
      }
      const int index = (row + start_row_c) * col_stride_c + start_col_c + col;
      if (kOperation > 0) {
        C[index] += tmp;
      } else if (kOperation < 0) {
        C[index] -= tmp;
      } else {
        C[index] = tmp;
      }
    }
  }
 }
 template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
 inline void MatrixMatrixMultiply(const double* A,
                                 const int num_row_a,
                                 const int num_col_a,
                                 const double* B,
                                 const int num_row_b,
                                 const int num_col_b,
                                 double* C,
                                 const int start_row_c,
                                 const int start_col_c,
                                 const int row_stride_c,
                                 const int col_stride_c) {
 #ifdef CERES_NO_CUSTOM_BLAS
  MatrixMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>(
      A, num_row_a, num_col_a,
      B, num_row_b, num_col_b,
      C, start_row_c, start_col_c, row_stride_c, col_stride_c);
  return;
 #else
  if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic &&
      kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) {
    MatrixMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>(
        A, num_row_a, num_col_a,
        B, num_row_b, num_col_b,
        C, start_row_c, start_col_c, row_stride_c, col_stride_c);
  } else {
    MatrixMatrixMultiplyNaive<kRowA, kColA, kRowB, kColB, kOperation>(
        A, num_row_a, num_col_a,
        B, num_row_b, num_col_b,
        C, start_row_c, start_col_c, row_stride_c, col_stride_c);
  }
 #endif
 }
 // C op A' * B;
 //
 // where op can be +=, -=, or =.
 //
 // The template parameters (kRowA, kColA, kRowB, kColB) allow
 // specialization of the loop at compile time. If this information is
 // not available, then Eigen::Dynamic should be used as the template
 // argument.
 //
 //   kOperation =  1  -> C += A' * B
 //   kOperation = -1  -> C -= A' * B
 //   kOperation =  0  -> C  = A' * B
 //
 // The function can write into matrices C which are larger than the
 // matrix A' * B. This is done by specifying the true size of C via
 // row_stride_c and col_stride_c, and then indicating where A * B
 // should be written into by start_row_c and start_col_c.
 //
 // Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c =
 // 4 and start_col_c = 5, then if A = 2x3 and B = 2x4, we get
 //
 //   ------------
 //   ------------
 //   ------------
 //   ------------
 //   -----xxxx---
 //   -----xxxx---
 //   -----xxxx---
 //   ------------
 //   ------------
 //   ------------
 //
 template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
 inline void MatrixTransposeMatrixMultiplyEigen(const double* A,
                                               const int num_row_a,
                                               const int num_col_a,
                                               const double* B,
                                               const int num_row_b,
                                               const int num_col_b,
                                               double* C,
                                               const int start_row_c,
                                               const int start_col_c,
                                               const int row_stride_c,
                                               const int col_stride_c) {
  const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a);
  const typename EigenTypes<kRowB, kColB>::ConstMatrixRef Bref(B, num_row_b, num_col_b);
  MatrixRef Cref(C, row_stride_c, col_stride_c);
  Eigen::Block<MatrixRef, kColA, kColB> block(Cref,
                                              start_row_c, start_col_c,
                                              num_col_a, num_col_b);
  if (kOperation > 0) {
    block CERES_MAYBE_NOALIAS += Aref.transpose() * Bref;
  } else if (kOperation < 0) {
    block CERES_MAYBE_NOALIAS -= Aref.transpose() * Bref;
  } else {
    block CERES_MAYBE_NOALIAS = Aref.transpose() * Bref;
  }
 }
 template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
 inline void MatrixTransposeMatrixMultiplyNaive(const double* A,
                                               const int num_row_a,
                                               const int num_col_a,
                                               const double* B,
                                               const int num_row_b,
                                               const int num_col_b,
                                               double* C,
                                               const int start_row_c,
                                               const int start_col_c,
                                               const int row_stride_c,
                                               const int col_stride_c) {
  DCHECK_GT(num_row_a, 0);
  DCHECK_GT(num_col_a, 0);
  DCHECK_GT(num_row_b, 0);
  DCHECK_GT(num_col_b, 0);
  DCHECK_GE(start_row_c, 0);
  DCHECK_GE(start_col_c, 0);
  DCHECK_GT(row_stride_c, 0);
  DCHECK_GT(col_stride_c, 0);
  DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
  DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
  DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b));
  DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b));
  const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
  const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
  const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b);
  const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b);
  DCHECK_EQ(NUM_ROW_A, NUM_ROW_B);
  const int NUM_ROW_C = NUM_COL_A;
  const int NUM_COL_C = NUM_COL_B;
  DCHECK_LT(start_row_c + NUM_ROW_C, row_stride_c);
  DCHECK_LT(start_col_c + NUM_COL_C, col_stride_c);
  for (int row = 0; row < NUM_ROW_C; ++row) {
    for (int col = 0; col < NUM_COL_C; ++col) {
      double tmp = 0.0;
      for (int k = 0; k < NUM_ROW_A; ++k) {
        tmp += A[k * NUM_COL_A + row] * B[k * NUM_COL_B + col];
      }
      const int index = (row + start_row_c) * col_stride_c + start_col_c + col;
      if (kOperation > 0) {
        C[index]+= tmp;
      } else if (kOperation < 0) {
        C[index]-= tmp;
      } else {
        C[index]= tmp;
      }
    }
  }
 }
 template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
 inline void MatrixTransposeMatrixMultiply(const double* A,
                                          const int num_row_a,
                                          const int num_col_a,
                                          const double* B,
                                          const int num_row_b,
                                          const int num_col_b,
                                          double* C,
                                          const int start_row_c,
                                          const int start_col_c,
                                          const int row_stride_c,
                                          const int col_stride_c) {
 #ifdef CERES_NO_CUSTOM_BLAS
  MatrixTransposeMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>(
      A, num_row_a, num_col_a,
      B, num_row_b, num_col_b,
      C, start_row_c, start_col_c, row_stride_c, col_stride_c);
  return;
 #else
  if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic &&
      kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) {
    MatrixTransposeMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>(
        A, num_row_a, num_col_a,
        B, num_row_b, num_col_b,
        C, start_row_c, start_col_c, row_stride_c, col_stride_c);
  } else {
    MatrixTransposeMatrixMultiplyNaive<kRowA, kColA, kRowB, kColB, kOperation>(
        A, num_row_a, num_col_a,
        B, num_row_b, num_col_b,
        C, start_row_c, start_col_c, row_stride_c, col_stride_c);
  }
 #endif
 }
 template<int kRowA, int kColA, int kOperation>
 inline void MatrixVectorMultiply(const double* A,
                                 const int num_row_a,
                                 const int num_col_a,
                                 const double* b,
                                 double* c) {
 #ifdef CERES_NO_CUSTOM_BLAS
  const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a);
  const typename EigenTypes<kColA>::ConstVectorRef bref(b, num_col_a);
  typename EigenTypes<kRowA>::VectorRef cref(c, num_row_a);
  // lazyProduct works better than .noalias() for matrix-vector
  // products.
  if (kOperation > 0) {
    cref += Aref.lazyProduct(bref);
  } else if (kOperation < 0) {
    cref -= Aref.lazyProduct(bref);
  } else {
    cref -= Aref.lazyProduct(bref);
  }
 #else
  DCHECK_GT(num_row_a, 0);
  DCHECK_GT(num_col_a, 0);
  DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
  DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
  const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
  const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
  for (int row = 0; row < NUM_ROW_A; ++row) {
    double tmp = 0.0;
    for (int col = 0; col < NUM_COL_A; ++col) {
      tmp += A[row * NUM_COL_A + col] * b[col];
    }
    if (kOperation > 0) {
      c[row] += tmp;
    } else if (kOperation < 0) {
      c[row] -= tmp;
    } else {
      c[row] = tmp;
    }
  }
 #endif  // CERES_NO_CUSTOM_BLAS
 }
 // c op A' * b;
 //
 // where op can be +=, -=, or =.
 //
 // The template parameters (kRowA, kColA) allow specialization of the
 // loop at compile time. If this information is not available, then
 // Eigen::Dynamic should be used as the template argument.
 //
 // kOperation =  1  -> c += A' * b
 // kOperation = -1  -> c -= A' * b
 // kOperation =  0  -> c  = A' * b
 template<int kRowA, int kColA, int kOperation>
 inline void MatrixTransposeVectorMultiply(const double* A,
                                          const int num_row_a,
                                          const int num_col_a,
                                          const double* b,
                                          double* c) {
 #ifdef CERES_NO_CUSTOM_BLAS
  const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a);
  const typename EigenTypes<kRowA>::ConstVectorRef bref(b, num_row_a);
  typename EigenTypes<kColA>::VectorRef cref(c, num_col_a);
  // lazyProduct works better than .noalias() for matrix-vector
  // products.
  if (kOperation > 0) {
    cref += Aref.transpose().lazyProduct(bref);
  } else if (kOperation < 0) {
    cref -= Aref.transpose().lazyProduct(bref);
  } else {
    cref -= Aref.transpose().lazyProduct(bref);
  }
 #else
  DCHECK_GT(num_row_a, 0);
  DCHECK_GT(num_col_a, 0);
  DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
  DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
  const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
  const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
  for (int row = 0; row < NUM_COL_A; ++row) {
    double tmp = 0.0;
    for (int col = 0; col < NUM_ROW_A; ++col) {
      tmp += A[col * NUM_COL_A + row] * b[col];
    }
    if (kOperation > 0) {
      c[row] += tmp;
    } else if (kOperation < 0) {
      c[row] -= tmp;
    } else {
      c[row] = tmp;
    }
  }
 #endif  // CERES_NO_CUSTOM_BLAS
 }
 #undef CERES_MAYBE_NOALIAS
 }  // namespace internal
 }  // namespace ceres
 #endif  // CERES_INTERNAL_BLAS_H_
--- a/extern/libmv/third_party/ceres/internal/ceres/block_sparse_matrix.cc
+++ b/extern/libmv/third_party/ceres/internal/ceres/block_sparse_matrix.cc
@ -33,6 +33,7 @@
 #include <cstddef>
 #include <algorithm>
 #include <vector>
 #include "ceres/blas.h"
 #include "ceres/block_structure.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/matrix_proto.h"
@ -117,16 +118,15 @@ void BlockSparseMatrix::RightMultiply(const double* x,  double* y) const {
  for (int i = 0; i < block_structure_->rows.size(); ++i) {
    int row_block_pos = block_structure_->rows[i].block.position;
    int row_block_size = block_structure_->rows[i].block.size;
    VectorRef yref(y + row_block_pos, row_block_size);
    const vector<Cell>& cells = block_structure_->rows[i].cells;
    for (int j = 0; j < cells.size(); ++j) {
      int col_block_id = cells[j].block_id;
      int col_block_size = block_structure_->cols[col_block_id].size;
      int col_block_pos = block_structure_->cols[col_block_id].position;
-      ConstVectorRef xref(x + col_block_pos, col_block_size);
+      MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
-      MatrixRef m(values_.get() + cells[j].position,
+          values_.get() + cells[j].position, row_block_size, col_block_size,
-                  row_block_size, col_block_size);
+          x + col_block_pos,
-      yref += m.lazyProduct(xref);
+          y + row_block_pos);
    }
  }
 }
@ -138,16 +138,16 @@ void BlockSparseMatrix::LeftMultiply(const double* x, double* y) const {
  for (int i = 0; i < block_structure_->rows.size(); ++i) {
    int row_block_pos = block_structure_->rows[i].block.position;
    int row_block_size = block_structure_->rows[i].block.size;
    const ConstVectorRef xref(x + row_block_pos, row_block_size);
    const vector<Cell>& cells = block_structure_->rows[i].cells;
    for (int j = 0; j < cells.size(); ++j) {
      int col_block_id = cells[j].block_id;
      int col_block_size = block_structure_->cols[col_block_id].size;
      int col_block_pos = block_structure_->cols[col_block_id].position;
-      VectorRef yref(y + col_block_pos, col_block_size);
+      MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
-      MatrixRef m(values_.get() + cells[j].position,
+          values_.get() + cells[j].position, row_block_size, col_block_size,
-                  row_block_size, col_block_size);
+          x + row_block_pos,
-      yref += m.transpose().lazyProduct(xref);
+          y + col_block_pos);
    }
  }
 }
--- a/extern/libmv/third_party/ceres/internal/ceres/partitioned_matrix_view.cc
+++ b/extern/libmv/third_party/ceres/internal/ceres/partitioned_matrix_view.cc
@ -35,6 +35,7 @@
 #include <algorithm>
 #include <cstring>
 #include <vector>
 #include "ceres/blas.h"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/block_structure.h"
 #include "ceres/internal/eigen.h"
@ -103,13 +104,10 @@ void PartitionedMatrixView::RightMultiplyE(const double* x, double* y) const {
    const int col_block_id = cell.block_id;
    const int col_block_pos = bs->cols[col_block_id].position;
    const int col_block_size = bs->cols[col_block_id].size;
-
+    MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
-    ConstVectorRef xref(x + col_block_pos, col_block_size);
+        row_values + cell.position, row_block_size, col_block_size,
-    VectorRef yref(y + row_block_pos, row_block_size);
+        x + col_block_pos,
-    ConstMatrixRef m(row_values + cell.position,
+        y + row_block_pos);
                     row_block_size,
                     col_block_size);
    yref += m.lazyProduct(xref);
  }
 }
@ -124,20 +122,16 @@ void PartitionedMatrixView::RightMultiplyF(const double* x, double* y) const {
  for (int r = 0; r < bs->rows.size(); ++r) {
    const int row_block_pos = bs->rows[r].block.position;
    const int row_block_size = bs->rows[r].block.size;
    VectorRef yref(y + row_block_pos, row_block_size);
    const vector<Cell>& cells = bs->rows[r].cells;
    for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
      const double* row_values = matrix_.RowBlockValues(r);
      const int col_block_id = cells[c].block_id;
      const int col_block_pos = bs->cols[col_block_id].position;
      const int col_block_size = bs->cols[col_block_id].size;
-
+      MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
-      ConstVectorRef xref(x + col_block_pos - num_cols_e(),
+          row_values + cells[c].position, row_block_size, col_block_size,
-                          col_block_size);
+          x + col_block_pos - num_cols_e(),
-      ConstMatrixRef m(row_values + cells[c].position,
+          y + row_block_pos);
                       row_block_size,
                       col_block_size);
      yref += m.lazyProduct(xref);
    }
  }
 }
@ -155,13 +149,10 @@ void PartitionedMatrixView::LeftMultiplyE(const double* x, double* y) const {
    const int col_block_id = cell.block_id;
    const int col_block_pos = bs->cols[col_block_id].position;
    const int col_block_size = bs->cols[col_block_id].size;
-
+    MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
-    ConstVectorRef xref(x + row_block_pos, row_block_size);
+        row_values + cell.position, row_block_size, col_block_size,
-    VectorRef yref(y + col_block_pos, col_block_size);
+        x + row_block_pos,
-    ConstMatrixRef m(row_values + cell.position,
+        y + col_block_pos);
                     row_block_size,
                     col_block_size);
    yref += m.transpose().lazyProduct(xref);
  }
 }
@ -176,19 +167,16 @@ void PartitionedMatrixView::LeftMultiplyF(const double* x, double* y) const {
  for (int r = 0; r < bs->rows.size(); ++r) {
    const int row_block_pos = bs->rows[r].block.position;
    const int row_block_size = bs->rows[r].block.size;
    ConstVectorRef xref(x + row_block_pos, row_block_size);
    const vector<Cell>& cells = bs->rows[r].cells;
    for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
      const double* row_values = matrix_.RowBlockValues(r);
      const int col_block_id = cells[c].block_id;
      const int col_block_pos = bs->cols[col_block_id].position;
      const int col_block_size = bs->cols[col_block_id].size;
-
+      MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
-      VectorRef yref(y + col_block_pos - num_cols_e(), col_block_size);
+        row_values + cells[c].position, row_block_size, col_block_size,
-      ConstMatrixRef m(row_values + cells[c].position,
+        x + row_block_pos,
-                       row_block_size,
+        y + col_block_pos - num_cols_e());
                       col_block_size);
      yref += m.transpose().lazyProduct(xref);
    }
  }
 }
@ -267,15 +255,15 @@ void PartitionedMatrixView::UpdateBlockDiagonalEtE(
    const int row_block_size = bs->rows[r].block.size;
    const int block_id = cell.block_id;
    const int col_block_size = bs->cols[block_id].size;
    ConstMatrixRef m(row_values + cell.position,
                           row_block_size,
                           col_block_size);
    const int cell_position =
        block_diagonal_structure->rows[block_id].cells[0].position;
-    MatrixRef(block_diagonal->mutable_values() + cell_position,
+    MatrixTransposeMatrixMultiply
-              col_block_size, col_block_size).noalias() += m.transpose() * m;
+        <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
            row_values + cell.position, row_block_size, col_block_size,
            row_values + cell.position, row_block_size, col_block_size,
            block_diagonal->mutable_values() + cell_position,
            0, 0, col_block_size, col_block_size);
  }
 }
@ -298,15 +286,16 @@ void PartitionedMatrixView::UpdateBlockDiagonalFtF(
    for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
      const int col_block_id = cells[c].block_id;
      const int col_block_size = bs->cols[col_block_id].size;
      ConstMatrixRef m(row_values + cells[c].position,
                       row_block_size,
                       col_block_size);
      const int diagonal_block_id = col_block_id - num_col_blocks_e_;
      const int cell_position =
          block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
-      MatrixRef(block_diagonal->mutable_values() + cell_position,
+      MatrixTransposeMatrixMultiply
-                col_block_size, col_block_size).noalias() += m.transpose() * m;
+          <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
              row_values + cells[c].position, row_block_size, col_block_size,
              row_values + cells[c].position, row_block_size, col_block_size,
              block_diagonal->mutable_values() + cell_position,
              0, 0, col_block_size, col_block_size);
    }
  }
 }
--- a/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc
+++ b/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc
@ -291,14 +291,8 @@ bool SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse(
    } else {
      factor_ = ss_.AnalyzeCholesky(cholmod_lhs);
    }
    if (VLOG_IS_ON(2)) {
      cholmod_print_common(const_cast<char*>("Symbolic Analysis"), ss_.mutable_cc());
    }
  }
  CHECK_NOTNULL(factor_);
  cholmod_dense* cholmod_solution =
      ss_.SolveCholesky(cholmod_lhs, factor_, cholmod_rhs);
--- a/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator.h
+++ b/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator.h
@ -321,9 +321,25 @@ class SchurEliminator : public SchurEliminatorBase {
  // see the documentation of the Chunk object above.
  vector<Chunk> chunks_;
  // TODO(sameeragarwal): The following two arrays contain per-thread
  // storage. They should be refactored into a per thread struct.
  // Buffer to store the products of the y and z blocks generated
-  // during the elimination phase.
+  // during the elimination phase. buffer_ is of size num_threads *
  // buffer_size_. Each thread accesses the chunk
  //
  //   [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_]
  //
  scoped_array<double> buffer_;
  // Buffer to store per thread matrix matrix products used by
  // ChunkOuterProduct. Like buffer_ it is of size num_threads *
  // buffer_size_. Each thread accesses the chunk
  //
  //   [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_]
  //
  scoped_array<double> chunk_outer_product_buffer_;
  int buffer_size_;
  int num_threads_;
  int uneliminated_row_begins_;
--- a/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator_impl.h
+++ b/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator_impl.h
@ -34,10 +34,6 @@
 #ifndef CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_
 #define CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_
 #ifdef CERES_USE_OPENMP
 #include <omp.h>
 #endif
 // Eigen has an internal threshold switching between different matrix
 // multiplication algorithms. In particular for matrices larger than
 // EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD it uses a cache friendly
@ -46,19 +42,25 @@
 // are thin and long, the default choice may not be optimal. This is
 // the case for us, as the default choice causes a 30% performance
 // regression when we moved from Eigen2 to Eigen3.
 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 10
 #ifdef CERES_USE_OPENMP
 #include <omp.h>
 #endif
 #include <algorithm>
 #include <map>
 #include "Eigen/Dense"
 #include "ceres/blas.h"
 #include "ceres/block_random_access_matrix.h"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/block_structure.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/map_util.h"
 #include "ceres/schur_eliminator.h"
 #include "ceres/stl_util.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "glog/logging.h"
 namespace ceres {
@ -149,13 +151,16 @@ Init(int num_eliminate_blocks, const CompressedRowBlockStructure* bs) {
  buffer_.reset(new double[buffer_size_ * num_threads_]);
  // chunk_outer_product_buffer_ only needs to store e_block_size *
  // f_block_size, which is always less than buffer_size_, so we just
  // allocate buffer_size_ per thread.
  chunk_outer_product_buffer_.reset(new double[buffer_size_ * num_threads_]);
  STLDeleteElements(&rhs_locks_);
  rhs_locks_.resize(num_col_blocks - num_eliminate_blocks_);
  for (int i = 0; i < num_col_blocks - num_eliminate_blocks_; ++i) {
    rhs_locks_[i] = new Mutex;
  }
  VLOG(1) << "Eliminator threads: " << num_threads_;
 }
 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
@ -261,7 +266,7 @@ Eliminate(const BlockSparseMatrixBase* A,
    typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix inverse_ete =
        ete
        .template selfadjointView<Eigen::Upper>()
-        .ldlt()
+        .llt()
        .solve(Matrix::Identity(e_block_size, e_block_size));
    // For the current chunk compute and update the rhs of the reduced
@ -294,8 +299,8 @@ BackSubstitute(const BlockSparseMatrixBase* A,
    const int e_block_id = bs->rows[chunk.start].cells.front().block_id;
    const int e_block_size = bs->cols[e_block_id].size;
-    typename EigenTypes<kEBlockSize>::VectorRef y_block(
+    double* y_ptr = y +  bs->cols[e_block_id].position;
-        y +  bs->cols[e_block_id].position, e_block_size);
+    typename EigenTypes<kEBlockSize>::VectorRef y_block(y_ptr, e_block_size);
    typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix
        ete(e_block_size, e_block_size);
@ -312,40 +317,35 @@ BackSubstitute(const BlockSparseMatrixBase* A,
      const double* row_values = A->RowBlockValues(chunk.start + j);
      const Cell& e_cell = row.cells.front();
      DCHECK_EQ(e_block_id, e_cell.block_id);
      const typename EigenTypes<kRowBlockSize, kEBlockSize>::ConstMatrixRef
          e_block(row_values + e_cell.position,
                  row.block.size,
                  e_block_size);
-      typename EigenTypes<kRowBlockSize>::Vector
+      typename EigenTypes<kRowBlockSize>::Vector sj =
          sj =
          typename EigenTypes<kRowBlockSize>::ConstVectorRef
-          (b + bs->rows[chunk.start + j].block.position,
+          (b + bs->rows[chunk.start + j].block.position, row.block.size);
           row.block.size);
      for (int c = 1; c < row.cells.size(); ++c) {
        const int f_block_id = row.cells[c].block_id;
        const int f_block_size = bs->cols[f_block_id].size;
        const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
            f_block(row_values + row.cells[c].position,
                    row.block.size, f_block_size);
        const int r_block = f_block_id - num_eliminate_blocks_;
-        sj -= f_block *
+        MatrixVectorMultiply<kRowBlockSize, kFBlockSize, -1>(
-            typename EigenTypes<kFBlockSize>::ConstVectorRef
+            row_values + row.cells[c].position, row.block.size, f_block_size,
-            (z + lhs_row_layout_[r_block], f_block_size);
+            z + lhs_row_layout_[r_block],
            sj.data());
      }
-      y_block += e_block.transpose() * sj;
+      MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
-      ete.template selfadjointView<Eigen::Upper>()
+          row_values + e_cell.position, row.block.size, e_block_size,
-          .rankUpdate(e_block.transpose(), 1.0);
+          sj.data(),
          y_ptr);
      MatrixTransposeMatrixMultiply
          <kRowBlockSize, kEBlockSize,kRowBlockSize, kEBlockSize, 1>(
              row_values + e_cell.position, row.block.size, e_block_size,
              row_values + e_cell.position, row.block.size, e_block_size,
              ete.data(), 0, 0, e_block_size, e_block_size);
    }
-    y_block =
+    ete.llt().solveInPlace(y_block);
        ete
        .template selfadjointView<Eigen::Upper>()
        .ldlt()
        .solve(y_block);
  }
 }
@ -382,15 +382,12 @@ UpdateRhs(const Chunk& chunk,
    for (int c = 1; c < row.cells.size(); ++c) {
      const int block_id = row.cells[c].block_id;
      const int block_size = bs->cols[block_id].size;
      const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
          b(row_values + row.cells[c].position,
            row.block.size, block_size);
      const int block = block_id - num_eliminate_blocks_;
      CeresMutexLock l(rhs_locks_[block]);
-      typename EigenTypes<kFBlockSize>::VectorRef
+      MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
-          (rhs + lhs_row_layout_[block], block_size).noalias()
+          row_values + row.cells[c].position,
-          += b.transpose() * sj;
+          row.block.size, block_size,
          sj.data(), rhs + lhs_row_layout_[block]);
    }
    b_pos += row.block.size;
  }
@ -446,34 +443,31 @@ ChunkDiagonalBlockAndGradient(
    // Extract the e_block, ETE += E_i' E_i
    const Cell& e_cell = row.cells.front();
-    const typename EigenTypes<kRowBlockSize, kEBlockSize>::ConstMatrixRef
+    MatrixTransposeMatrixMultiply
-        e_block(row_values + e_cell.position,
+        <kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(
-                row.block.size,
+            row_values + e_cell.position, row.block.size, e_block_size,
-                e_block_size);
+            row_values + e_cell.position, row.block.size, e_block_size,
-
+            ete->data(), 0, 0, e_block_size, e_block_size);
    ete->template selfadjointView<Eigen::Upper>()
        .rankUpdate(e_block.transpose(), 1.0);
    // g += E_i' b_i
-    g->noalias() += e_block.transpose() *
+    MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
-        typename EigenTypes<kRowBlockSize>::ConstVectorRef
+        row_values + e_cell.position, row.block.size, e_block_size,
-        (b + b_pos, row.block.size);
+        b + b_pos,
        g->data());
    // buffer = E'F. This computation is done by iterating over the
    // f_blocks for each row in the chunk.
    for (int c = 1; c < row.cells.size(); ++c) {
      const int f_block_id = row.cells[c].block_id;
      const int f_block_size = bs->cols[f_block_id].size;
      const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
          f_block(row_values + row.cells[c].position,
                  row.block.size, f_block_size);
      double* buffer_ptr =
          buffer +  FindOrDie(chunk.buffer_layout, f_block_id);
-
+      MatrixTransposeMatrixMultiply
-      typename EigenTypes<kEBlockSize, kFBlockSize>::MatrixRef
+          <kRowBlockSize, kEBlockSize, kRowBlockSize, kFBlockSize, 1>(
-          (buffer_ptr,  e_block_size, f_block_size).noalias()
+          row_values + e_cell.position, row.block.size, e_block_size,
-          += e_block.transpose() * f_block;
+          row_values + row.cells[c].position, row.block.size, f_block_size,
          buffer_ptr, 0, 0, e_block_size, f_block_size);
    }
    b_pos += row.block.size;
  }
@ -497,15 +491,24 @@ ChunkOuterProduct(const CompressedRowBlockStructure* bs,
  // references to the left hand side.
  const int e_block_size = inverse_ete.rows();
  BufferLayoutType::const_iterator it1 = buffer_layout.begin();
 #ifdef CERES_USE_OPENMP
  int thread_id = omp_get_thread_num();
 #else
  int thread_id = 0;
 #endif
  double* b1_transpose_inverse_ete =
      chunk_outer_product_buffer_.get() + thread_id * buffer_size_;
  // S(i,j) -= bi' * ete^{-1} b_j
  for (; it1 != buffer_layout.end(); ++it1) {
    const int block1 = it1->first - num_eliminate_blocks_;
    const int block1_size = bs->cols[it1->first].size;
-
+    MatrixTransposeMatrixMultiply
-    const typename EigenTypes<kEBlockSize, kFBlockSize>::ConstMatrixRef
+        <kEBlockSize, kFBlockSize, kEBlockSize, kEBlockSize, 0>(
-        b1(buffer + it1->second, e_block_size, block1_size);
+        buffer + it1->second, e_block_size, block1_size,
-    const typename EigenTypes<kFBlockSize, kEBlockSize>::Matrix
+        inverse_ete.data(), e_block_size, e_block_size,
-        b1_transpose_inverse_ete = b1.transpose() * inverse_ete;
+        b1_transpose_inverse_ete, 0, 0, block1_size, e_block_size);
    BufferLayoutType::const_iterator it2 = it1;
    for (; it2 != buffer_layout.end(); ++it2) {
@ -515,46 +518,15 @@ ChunkOuterProduct(const CompressedRowBlockStructure* bs,
      CellInfo* cell_info = lhs->GetCell(block1, block2,
                                         &r, &c,
                                         &row_stride, &col_stride);
-      if (cell_info == NULL) {
+      if (cell_info != NULL) {
-        continue;
+        const int block2_size = bs->cols[it2->first].size;
        CeresMutexLock l(&cell_info->m);
        MatrixMatrixMultiply
            <kFBlockSize, kEBlockSize, kEBlockSize, kFBlockSize, -1>(
                b1_transpose_inverse_ete, block1_size, e_block_size,
                buffer  + it2->second, e_block_size, block2_size,
                cell_info->values, r, c, row_stride, col_stride);
      }
      const int block2_size = bs->cols[it2->first].size;
      const typename EigenTypes<kEBlockSize, kFBlockSize>::ConstMatrixRef
          b2(buffer + it2->second, e_block_size, block2_size);
      CeresMutexLock l(&cell_info->m);
      MatrixRef m(cell_info->values, row_stride, col_stride);
      // We explicitly construct a block object here instead of using
      // m.block(), as m.block() variant of the constructor does not
      // allow mixing of template sizing and runtime sizing parameters
      // like the Matrix class does.
      Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize>
          block(m, r, c,  block1_size, block2_size);
 #ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG
      // Removing the ".noalias()" annotation on the following statement is
      // necessary to produce a correct build with the Android NDK, including
      // versions 6, 7, 8, and 8b, when built with STLPort and the
      // non-standalone toolchain (i.e. ndk-build). This appears to be a
      // compiler bug; if the workaround is not in place, the line
      //
      //   block.noalias() -= b1_transpose_inverse_ete * b2;
      //
      // gets compiled to
      //
      //   block.noalias() += b1_transpose_inverse_ete * b2;
      //
      // which breaks schur elimination. Introducing a temporary by removing the
      // .noalias() annotation causes the issue to disappear. Tracking this
      // issue down was tricky, since the test suite doesn't run when built with
      // the non-standalone toolchain.
      //
      // TODO(keir): Make a reproduction case for this and send it upstream.
      block -= b1_transpose_inverse_ete * b2;
 #else
      block.noalias() -= b1_transpose_inverse_ete * b2;
 #endif  // CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG
    }
  }
 }
@ -624,10 +596,13 @@ NoEBlockRowOuterProduct(const BlockSparseMatrixBase* A,
                                       &row_stride, &col_stride);
    if (cell_info != NULL) {
      CeresMutexLock l(&cell_info->m);
-      MatrixRef m(cell_info->values, row_stride, col_stride);
+      // This multiply currently ignores the fact that this is a
-      m.block(r, c, block1_size, block1_size)
+      // symmetric outer product.
-          .selfadjointView<Eigen::Upper>()
+      MatrixTransposeMatrixMultiply
-          .rankUpdate(b1.transpose(), 1.0);
+          <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
              row_values + row.cells[i].position, row.block.size, block1_size,
              row_values + row.cells[i].position, row.block.size, block1_size,
              cell_info->values, r, c, row_stride, col_stride);
    }
    for (int j = i + 1; j < row.cells.size(); ++j) {
@ -638,17 +613,15 @@ NoEBlockRowOuterProduct(const BlockSparseMatrixBase* A,
      CellInfo* cell_info = lhs->GetCell(block1, block2,
                                         &r, &c,
                                         &row_stride, &col_stride);
-      if (cell_info == NULL) {
+      if (cell_info != NULL) {
-        continue;
+        const int block2_size = bs->cols[row.cells[j].block_id].size;
        CeresMutexLock l(&cell_info->m);
        MatrixTransposeMatrixMultiply
            <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
                row_values + row.cells[i].position, row.block.size, block1_size,
                row_values + row.cells[j].position, row.block.size, block2_size,
                cell_info->values, r, c, row_stride, col_stride);
      }
      const int block2_size = bs->cols[row.cells[j].block_id].size;
      CeresMutexLock l(&cell_info->m);
      MatrixRef m(cell_info->values, row_stride, col_stride);
      m.block(r, c, block1_size, block2_size).noalias() +=
          b1.transpose() * ConstMatrixRef(row_values + row.cells[j].position,
                                          row.block.size,
                                          block2_size);
    }
  }
 }
@ -670,25 +643,18 @@ EBlockRowOuterProduct(const BlockSparseMatrixBase* A,
    DCHECK_GE(block1, 0);
    const int block1_size = bs->cols[row.cells[i].block_id].size;
-    const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
+    int r, c, row_stride, col_stride;
-        b1(row_values + row.cells[i].position,
+    CellInfo* cell_info = lhs->GetCell(block1, block1,
-           row.block.size, block1_size);
+                                       &r, &c,
-    {
+                                       &row_stride, &col_stride);
-      int r, c, row_stride, col_stride;
+    if (cell_info != NULL) {
      CellInfo* cell_info = lhs->GetCell(block1, block1,
                                         &r, &c,
                                         &row_stride, &col_stride);
      if (cell_info == NULL) {
        continue;
      }
      CeresMutexLock l(&cell_info->m);
-      MatrixRef m(cell_info->values, row_stride, col_stride);
+      // block += b1.transpose() * b1;
-
+      MatrixTransposeMatrixMultiply
-      Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize>
+          <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
-          block(m, r, c,  block1_size, block1_size);
+          row_values + row.cells[i].position, row.block.size, block1_size,
-      block.template selfadjointView<Eigen::Upper>()
+          row_values + row.cells[i].position, row.block.size, block1_size,
-          .rankUpdate(b1.transpose(), 1.0);
+          cell_info->values, r, c, row_stride, col_stride);
    }
    for (int j = i + 1; j < row.cells.size(); ++j) {
@ -700,20 +666,14 @@ EBlockRowOuterProduct(const BlockSparseMatrixBase* A,
      CellInfo* cell_info = lhs->GetCell(block1, block2,
                                         &r, &c,
                                         &row_stride, &col_stride);
-      if (cell_info == NULL) {
+      if (cell_info != NULL) {
-        continue;
+        // block += b1.transpose() * b2;
        MatrixTransposeMatrixMultiply
            <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
                row_values + row.cells[i].position, row.block.size, block1_size,
                row_values + row.cells[j].position, row.block.size, block2_size,
                cell_info->values, r, c, row_stride, col_stride);
      }
      const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
          b2(row_values + row.cells[j].position,
             row.block.size,
             block2_size);
      CeresMutexLock l(&cell_info->m);
      MatrixRef m(cell_info->values, row_stride, col_stride);
      Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize>
          block(m, r, c,  block1_size, block2_size);
      block.noalias() += b1.transpose() * b2;
    }
  }
 }
--- a/extern/libmv/third_party/ceres/internal/ceres/sparse_normal_cholesky_solver.cc
+++ b/extern/libmv/third_party/ceres/internal/ceres/sparse_normal_cholesky_solver.cc
@ -213,13 +213,7 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImplUsingSuiteSparse(
    } else {
      factor_ = ss_.AnalyzeCholesky(lhs.get());
    }
    if (VLOG_IS_ON(2)) {
      cholmod_print_common(const_cast<char*>("Symbolic Analysis"), ss_.mutable_cc());
    }
  }
  CHECK_NOTNULL(factor_);
  event_logger.AddEvent("Analysis");
  cholmod_dense* sol = ss_.SolveCholesky(lhs.get(), factor_, rhs);
--- a/extern/libmv/third_party/ceres/internal/ceres/suitesparse.cc
+++ b/extern/libmv/third_party/ceres/internal/ceres/suitesparse.cc
@ -35,8 +35,18 @@
 #include "cholmod.h"
 #include "ceres/compressed_row_sparse_matrix.h"
 #include "ceres/triplet_sparse_matrix.h"
 namespace ceres {
 namespace internal {
 SuiteSparse::SuiteSparse() {
  cholmod_start(&cc_);
 }
 SuiteSparse::~SuiteSparse() {
  cholmod_finish(&cc_);
 }
 cholmod_sparse* SuiteSparse::CreateSparseMatrix(TripletSparseMatrix* A) {
  cholmod_triplet triplet;
@ -117,10 +127,16 @@ cholmod_factor* SuiteSparse::AnalyzeCholesky(cholmod_sparse* A) {
  cc_.nmethods = 1;
  cc_.method[0].ordering = CHOLMOD_AMD;
  cc_.supernodal = CHOLMOD_AUTO;
  cholmod_factor* factor = cholmod_analyze(A, &cc_);
  CHECK_EQ(cc_.status, CHOLMOD_OK)
      << "Cholmod symbolic analysis failed " << cc_.status;
  CHECK_NOTNULL(factor);
  if (VLOG_IS_ON(2)) {
    cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_);
  }
  return factor;
 }
@ -139,13 +155,20 @@ cholmod_factor* SuiteSparse::AnalyzeCholeskyWithUserOrdering(
    cholmod_sparse* A,
    const vector<int>& ordering) {
  CHECK_EQ(ordering.size(), A->nrow);
  cc_.nmethods = 1;
  cc_.method[0].ordering = CHOLMOD_GIVEN;
  cholmod_factor* factor  =
      cholmod_analyze_p(A, const_cast<int*>(&ordering[0]), NULL, 0, &cc_);
  CHECK_EQ(cc_.status, CHOLMOD_OK)
      << "Cholmod symbolic analysis failed " << cc_.status;
  CHECK_NOTNULL(factor);
  if (VLOG_IS_ON(2)) {
    cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_);
  }
  return factor;
 }
@ -276,36 +299,52 @@ bool SuiteSparse::Cholesky(cholmod_sparse* A, cholmod_factor* L) {
  CHECK_NOTNULL(A);
  CHECK_NOTNULL(L);
  // Save the current print level and silence CHOLMOD, otherwise
  // CHOLMOD is prone to dumping stuff to stderr, which can be
  // distracting when the error (matrix is indefinite) is not a fatal
  // failure.
  const int old_print_level = cc_.print;
  cc_.print = 0;
  cc_.quick_return_if_not_posdef = 1;
  int status = cholmod_factorize(A, L, &cc_);
  cc_.print = old_print_level;
  // TODO(sameeragarwal): This switch statement is not consistent. It
  // treats all kinds of CHOLMOD failures as warnings. Some of these
  // like out of memory are definitely not warnings. The problem is
  // that the return value Cholesky is two valued, but the state of
  // the linear solver is really three valued. SUCCESS,
  // NON_FATAL_FAILURE (e.g., indefinite matrix) and FATAL_FAILURE
  // (e.g. out of memory).
  switch (cc_.status) {
    case CHOLMOD_NOT_INSTALLED:
-      LOG(WARNING) << "Cholmod failure: method not installed.";
+      LOG(WARNING) << "CHOLMOD failure: Method not installed.";
      return false;
    case CHOLMOD_OUT_OF_MEMORY:
-      LOG(WARNING) << "Cholmod failure: out of memory.";
+      LOG(WARNING) << "CHOLMOD failure: Out of memory.";
      return false;
    case CHOLMOD_TOO_LARGE:
-      LOG(WARNING) << "Cholmod failure: integer overflow occured.";
+      LOG(WARNING) << "CHOLMOD failure: Integer overflow occured.";
      return false;
    case CHOLMOD_INVALID:
-      LOG(WARNING) << "Cholmod failure: invalid input.";
+      LOG(WARNING) << "CHOLMOD failure: Invalid input.";
      return false;
    case CHOLMOD_NOT_POSDEF:
      // TODO(sameeragarwal): These two warnings require more
      // sophisticated handling going forward. For now we will be
      // strict and treat them as failures.
-      LOG(WARNING) << "Cholmod warning: matrix not positive definite.";
+      LOG(WARNING) << "CHOLMOD warning: Matrix not positive definite.";
      return false;
    case CHOLMOD_DSMALL:
-      LOG(WARNING) << "Cholmod warning: D for LDL' or diag(L) or "
+      LOG(WARNING) << "CHOLMOD warning: D for LDL' or diag(L) or "
                   << "LL' has tiny absolute value.";
      return false;
    case CHOLMOD_OK:
      if (status != 0) {
        return true;
      }
-      LOG(WARNING) << "Cholmod failure: cholmod_factorize returned zero "
+      LOG(WARNING) << "CHOLMOD failure: cholmod_factorize returned zero "
                   << "but cholmod_common::status is CHOLMOD_OK."
                   << "Please report this to ceres-solver@googlegroups.com.";
      return false;
--- a/extern/libmv/third_party/ceres/internal/ceres/suitesparse.h
+++ b/extern/libmv/third_party/ceres/internal/ceres/suitesparse.h
@ -56,8 +56,8 @@ class TripletSparseMatrix;
 // for all cholmod function calls.
 class SuiteSparse {
 public:
-  SuiteSparse()  { cholmod_start(&cc_);  }
+  SuiteSparse();
-  ~SuiteSparse() { cholmod_finish(&cc_); }
+  ~SuiteSparse();
  // Functions for building cholmod_sparse objects from sparse
  // matrices stored in triplet form. The matrix A is not
--- a/extern/libmv/third_party/ceres/internal/ceres/visibility_based_preconditioner.cc
+++ b/extern/libmv/third_party/ceres/internal/ceres/visibility_based_preconditioner.cc
@ -426,14 +426,8 @@ bool VisibilityBasedPreconditioner::Factorize() {
    } else {
      factor_ = ss_.AnalyzeCholesky(lhs);
    }
    if (VLOG_IS_ON(2)) {
      cholmod_print_common(const_cast<char*>("Symbolic Analysis"), ss_.mutable_cc());
    }
  }
  CHECK_NOTNULL(factor_);
  bool status = ss_.Cholesky(lhs, factor_);
  ss_.Free(lhs);
  return status;