forked from bartvdbraak/blender
Bundle current master of ceres-solver
Thins brings up some speed improvements: SPARSE_SCHUR is approx 1.3-1.5x times faster ITERATIVE_SCHUR is approx 1.2x times faster For blender this means camera solution go a bit faster now. Would not have affect on tracking speed.
This commit is contained in:
parent
a00b72bc75
commit
43b61fb8bd
@ -141,6 +141,7 @@ set(SRC
|
||||
include/ceres/solver.h
|
||||
include/ceres/types.h
|
||||
internal/ceres/array_utils.h
|
||||
internal/ceres/blas.h
|
||||
internal/ceres/block_evaluate_preparer.h
|
||||
internal/ceres/block_jacobian_writer.h
|
||||
internal/ceres/block_jacobi_preconditioner.h
|
||||
|
350
extern/libmv/third_party/ceres/ChangeLog
vendored
350
extern/libmv/third_party/ceres/ChangeLog
vendored
@ -1,3 +1,185 @@
|
||||
commit 520d35ef22dbcb05e344451c03ae64304e524a06
|
||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||
Date: Thu Apr 4 08:16:02 2013 -0700
|
||||
|
||||
Further BLAS improvements.
|
||||
|
||||
1. Switch to Eigen's implementation when all dimensions are fixed.
|
||||
2. Use lazyProduct for eigen matrix-vector product. This brings
|
||||
eigen's performance on iterative_schur closer to what it used
|
||||
to be before the last commit. There is however still an
|
||||
improvement to be had by using the naive implementation when
|
||||
the matrix and vector have dynamic dimensions.
|
||||
|
||||
BENCHMARK
|
||||
HEAD CHANGE
|
||||
|
||||
problem-16-22106-pre.txt
|
||||
gcc-eigen sparse_schur 0.859 gcc-eigen sparse_schur 0.853
|
||||
clang-eigen sparse_schur 0.848 clang-eigen sparse_schur 0.850
|
||||
gcc-blas sparse_schur 0.956 gcc-blas sparse_schur 0.865
|
||||
clang-blas sparse_schur 0.954 clang-blas sparse_schur 0.858
|
||||
gcc-eigen iterative_schur 4.656 gcc-eigen iterative_schur 3.271
|
||||
clang-eigen iterative_schur 4.664 clang-eigen iterative_schur 3.307
|
||||
gcc-blas iterative_schur 2.598 gcc-blas iterative_schur 2.620
|
||||
clang-blas iterative_schur 2.554 clang-blas iterative_schur 2.567
|
||||
|
||||
problem-49-7776-pre.txt
|
||||
gcc-eigen sparse_schur 0.477 gcc-eigen sparse_schur 0.472
|
||||
clang-eigen sparse_schur 0.475 clang-eigen sparse_schur 0.479
|
||||
gcc-blas sparse_schur 0.521 gcc-blas sparse_schur 0.469
|
||||
clang-blas sparse_schur 0.508 clang-blas sparse_schur 0.471
|
||||
gcc-eigen iterative_schur 3.172 gcc-eigen iterative_schur 2.088
|
||||
clang-eigen iterative_schur 3.161 clang-eigen iterative_schur 2.079
|
||||
gcc-blas iterative_schur 1.701 gcc-blas iterative_schur 1.720
|
||||
clang-blas iterative_schur 1.708 clang-blas iterative_schur 1.694
|
||||
|
||||
problem-245-198739-pre.txt
|
||||
gcc-eigen sparse_schur 28.092 gcc-eigen sparse_schur 28.233
|
||||
clang-eigen sparse_schur 28.148 clang-eigen sparse_schur 28.400
|
||||
gcc-blas sparse_schur 30.919 gcc-blas sparse_schur 28.110
|
||||
clang-blas sparse_schur 31.001 clang-blas sparse_schur 28.407
|
||||
gcc-eigen iterative_schur 63.095 gcc-eigen iterative_schur 43.694
|
||||
clang-eigen iterative_schur 63.412 clang-eigen iterative_schur 43.473
|
||||
gcc-blas iterative_schur 33.353 gcc-blas iterative_schur 33.321
|
||||
clang-blas iterative_schur 33.276 clang-blas iterative_schur 33.278
|
||||
|
||||
problem-257-65132-pre.txt
|
||||
gcc-eigen sparse_schur 3.687 gcc-eigen sparse_schur 3.629
|
||||
clang-eigen sparse_schur 3.669 clang-eigen sparse_schur 3.652
|
||||
gcc-blas sparse_schur 3.947 gcc-blas sparse_schur 3.673
|
||||
clang-blas sparse_schur 3.952 clang-blas sparse_schur 3.678
|
||||
gcc-eigen iterative_schur 121.512 gcc-eigen iterative_schur 76.833
|
||||
clang-eigen iterative_schur 123.547 clang-eigen iterative_schur 78.763
|
||||
gcc-blas iterative_schur 68.334 gcc-blas iterative_schur 68.612
|
||||
clang-blas iterative_schur 67.793 clang-blas iterative_schur 68.266
|
||||
|
||||
Notes:
|
||||
|
||||
1. Naive BLAS was a bit worse than eigen on fixed sized matrices. We did not see this
|
||||
before because of the different inlining thresholds. Fixing this boosted eigen's
|
||||
performance. Also the disparity between gcc and clang has gone away.
|
||||
|
||||
2. SPARSE_SCHUR performance remains the same, since it is only testing static sized
|
||||
matrices.
|
||||
|
||||
3. ITERATIVE_SCHUR performance goes up substantially due to the lazyProduct change,
|
||||
but even there, since most of the products are dynamic sized, the naive implementation
|
||||
wins handily.
|
||||
|
||||
Change-Id: Idc17f35b9c68aaebb1b2e131adf3af8374a85a4c
|
||||
|
||||
commit 25ac54807eedf16fd6c34efc390901ee549a7d14
|
||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||
Date: Wed Apr 3 18:51:27 2013 -0700
|
||||
|
||||
Speed up Jets.
|
||||
|
||||
Change-Id: I101bac1b1a1cf72ca49ffcf843b73c0ef5a6dfcb
|
||||
|
||||
commit 3d6eceb45cf27024865908f0c10a5c2b0f8719cf
|
||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||
Date: Tue Apr 2 21:45:48 2013 -0700
|
||||
|
||||
Replace more instances of Eigen GEMV with Ceres BLAS.
|
||||
|
||||
With this ITERATIVE_SCHUR with JACOBI preconditioner went down from
|
||||
280 seconds to 150 seconds on problem-744-543562-pre.txt.
|
||||
|
||||
Change-Id: I4f319c1108421e8d59f58654a4c0576ad65df609
|
||||
|
||||
commit 296fa9b1279ee1900c8ae32d70e97cd10fc0b46b
|
||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||
Date: Tue Apr 2 09:44:15 2013 -0700
|
||||
|
||||
Replace Eigen block operations with small GEMM and GEMV loops.
|
||||
|
||||
1. Add Matrix-Matrix and Matrix-Vector multiply functions.
|
||||
2. Replace Eigen usage in SchurEliminator with these custom
|
||||
matrix operations.
|
||||
3. Save on some memory allocations in ChunkOuterProduct.
|
||||
4. Replace LDLT with LLT.
|
||||
|
||||
As a result on problem-16-22106-pre.txt, the linear solver time
|
||||
goes down from 1.2s to 0.64s.
|
||||
|
||||
Change-Id: I2daa667960e0a1e8834489965a30be31f37fd87f
|
||||
|
||||
commit 222ca20e8facf706582fe696b7f41247391eac12
|
||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||
Date: Mon Apr 1 09:11:07 2013 -0700
|
||||
|
||||
SuiteSparse cleanup.
|
||||
|
||||
1. Silence CHOLMOD's indefiniteness warnings.
|
||||
2. Add a comment about how the error handling in suitesparse.cc
|
||||
needs to be improved.
|
||||
3. Move the analysis logging into suitesparse.cc and out of the
|
||||
three callsites.
|
||||
|
||||
Change-Id: Idd396b8ea4bf59fc1ffc7f9fcbbc7b38ed71643c
|
||||
|
||||
commit b7ba93459b7f584eedb1a9f42f3d06bccabd33dc
|
||||
Author: Petter Strandmark <petter.strandmark@gmail.com>
|
||||
Date: Tue Feb 19 12:52:58 2013 +0100
|
||||
|
||||
Enable larger tuple sizes for Visual Studio 2012.
|
||||
|
||||
Visual Studio 2012 does not have variadic templates and implements
|
||||
tuples differently. By default, only sizes up to 5 are supported,
|
||||
which conflicts with Gtest.
|
||||
|
||||
Change-Id: Ieb8d59e4329863cbfa2729d8a76db0714c08d259
|
||||
|
||||
commit 564a83fcc690ac8383bf52a782c45757ae7fa2ad
|
||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||
Date: Tue Mar 26 11:11:43 2013 -0700
|
||||
|
||||
Lint cleanup from William Rucklidge.
|
||||
|
||||
Change-Id: I8d4a0aa3e264775d20e99a6b5265f3023de92560
|
||||
|
||||
commit cbe64827becbbaab5b435a71bf00353b4ddd026b
|
||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||
Date: Mon Mar 25 17:39:53 2013 -0700
|
||||
|
||||
Update documentation
|
||||
|
||||
Change-Id: Iea3c4b5409e593b1fb070a491ba8a479db32ca58
|
||||
|
||||
commit 802639c89603c9541e624766349d1989a1f641c0
|
||||
Author: Pablo Speciale <pablo.speciale@gmail.com>
|
||||
Date: Mon Mar 25 20:53:45 2013 -0700
|
||||
|
||||
ceresproblem label was incorrect
|
||||
|
||||
Change-Id: I3e210375adba4fa50ff3c25398b20a65d241cb35
|
||||
|
||||
commit 6bcb8d9c304a3b218f8788018dfdfe368bb7d60c
|
||||
Author: Pablo Speciale <pablo.speciale@gmail.com>
|
||||
Date: Mon Mar 25 16:40:26 2013 -0700
|
||||
|
||||
Compiling against static or shared library
|
||||
|
||||
Change-Id: I3fb35e9a49f90b8894f59dde49c90a7c2dd74b0a
|
||||
|
||||
commit 619cfe692020c078275b68eac2167232fafdfffb
|
||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||
Date: Mon Mar 25 14:03:41 2013 -0700
|
||||
|
||||
Update version history
|
||||
|
||||
Change-Id: I1d036efad1507efd19d8581f147b38170b1f0543
|
||||
|
||||
commit 6ae34757850a5fa8213e0d1a540d9d75d6840a08
|
||||
Author: Pablo Speciale <pablo.speciale@gmail.com>
|
||||
Date: Sun Mar 24 22:30:52 2013 -0700
|
||||
|
||||
Added documentation regarding the use of Ceres with cmake (once installed)
|
||||
Commets about the flag ``BUILD_DOCUMENTATION=ON``
|
||||
|
||||
Change-Id: I8814927e60af190c8043bfc36e77fe76bfe6f562
|
||||
|
||||
commit f46de9e697eb5b8756084615e29ded48600a4d39
|
||||
Author: Sergey Sharybin <sergey.vfx@gmail.com>
|
||||
Date: Thu Mar 21 15:31:35 2013 +0600
|
||||
@ -437,171 +619,3 @@ Date: Sun Feb 24 14:15:45 2013 -0800
|
||||
Minor release script fixes.
|
||||
|
||||
Change-Id: Ifd0a7f4f584c85d4d9574eca46094b372a8d7aff
|
||||
|
||||
commit b53c9667f508c125b8aa833e7a063fa44ef8a98e
|
||||
Author: Sergey Sharybin <sergey.vfx@gmail.com>
|
||||
Date: Mon Feb 25 01:14:26 2013 +0600
|
||||
|
||||
Solve No Previous Prototype GCC warning
|
||||
|
||||
In some cases there were missing includes of own
|
||||
header files from implementation files.
|
||||
|
||||
In other cases moved function which are only used
|
||||
within single file into an anonymous namespace.
|
||||
|
||||
Change-Id: I2c6b411bcfbc521e2a5f21265dc8e009a548b1c8
|
||||
|
||||
commit 267ccc45a3e875bf87832a8ad615be690b4926d3
|
||||
Author: Sergey Sharybin <sergey.vfx@gmail.com>
|
||||
Date: Mon Feb 25 01:04:16 2013 +0600
|
||||
|
||||
Fix for MinGW build on Windows
|
||||
|
||||
GG_LONGLONG and GG_ULONGLONG shall use LL and ULL suffixes,
|
||||
since MinGW is actuall a GCC compiler.
|
||||
|
||||
Solved by checking whether compilation happens with MinGW
|
||||
or not using standard MinGW's __MINGW32__ and __MINGW64__
|
||||
definitions.
|
||||
|
||||
Change-Id: I789b34f6342a56ba42f4b280f7242700022ab7a1
|
||||
|
||||
commit 509f68cfe3fd13b794c4e67ff38c761407c858cf
|
||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||
Date: Wed Feb 20 01:39:03 2013 -0800
|
||||
|
||||
Problem::Evaluate implementation.
|
||||
|
||||
1. Add Problem::Evaluate and tests.
|
||||
2. Remove Solver::Summary::initial/final_*
|
||||
3. Remove Solver::Options::return_* members.
|
||||
4. Various cpplint cleanups.
|
||||
|
||||
Change-Id: I4266de53489896f72d9c6798c5efde6748d68a47
|
||||
|
||||
commit d4a0bf86d688d1b68e00ff302858de5a4e0d9727
|
||||
Author: Keir Mierle <mierle@gmail.com>
|
||||
Date: Sun Feb 24 10:35:44 2013 -0800
|
||||
|
||||
Fix threading build on Windows.
|
||||
|
||||
On Windows, including the "windows.h" header defines an enormous number of
|
||||
symbols; some of which are macros with common names. In particular, "ERROR" and
|
||||
"min" and "max" get defined. This causes clashes when user code references
|
||||
these names in a context other than the intended use in windows.h.
|
||||
|
||||
To deal with this, the Microsoft engineers added the ability to control the
|
||||
definition of these symbols by adding extra defines. In particular, including
|
||||
windows.h in the following way
|
||||
|
||||
#define NOGDI
|
||||
#define NOMINMAX
|
||||
|
||||
will reduce the number of macros defined. This way they will not conflict with
|
||||
other uses in Ceres. For example, numeric_limits<double>::max() is impossible
|
||||
to call without defining NOMINMAX.
|
||||
|
||||
Change-Id: I166f5d3bb6dc0e2e4b2ebf800fb19e49206f7874
|
||||
|
||||
commit beb4505311011130a7e54632137b0fbb5824cc9b
|
||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||
Date: Fri Feb 22 13:37:01 2013 -0800
|
||||
|
||||
Minor fixes
|
||||
|
||||
Based on William Rucklidge's review, including
|
||||
a nasty bug in parameter block removal.
|
||||
|
||||
Change-Id: I3a692e589f600ff560ecae9fa85bb0b76063d403
|
||||
|
||||
commit 9a88bd7c4b40e2a1e0cd9b0dc09a3517c467e04e
|
||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||
Date: Tue Feb 19 13:09:12 2013 -0800
|
||||
|
||||
Minor bug fixes
|
||||
|
||||
Change-Id: I94e4521adf76a6c77db954c4a8955168e9d37b55
|
||||
|
||||
commit 956ed7e8f2054623615e6ae3601055d613897f26
|
||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||
Date: Tue Feb 19 07:06:15 2013 -0800
|
||||
|
||||
Various minor fixes.
|
||||
|
||||
1. Unused variable warnings and fixes.
|
||||
2. Minor documentation update.
|
||||
|
||||
Change-Id: I815588a5806df1030a7c8750f4fb594c503f8998
|
||||
|
||||
commit 3e2c4ef9ad35e94198f4f3367b99fd91e26996a1
|
||||
Author: Keir Mierle <mierle@gmail.com>
|
||||
Date: Sun Feb 17 12:37:55 2013 -0800
|
||||
|
||||
Add adapters for column/row-major matrices to rotation.h
|
||||
|
||||
This patch introduces a matrix wrapper (MatrixAdapter) that allows to
|
||||
transparently pass pointers to row-major or column-major matrices
|
||||
to the conversion functions.
|
||||
|
||||
Change-Id: I7f1683a8722088cffcc542f593ce7eb46fca109b
|
||||
|
||||
commit 04938efe4bedec112083c5ceb227ba004f96bd01
|
||||
Author: Keir Mierle <mierle@gmail.com>
|
||||
Date: Sun Feb 17 12:37:55 2013 -0800
|
||||
|
||||
Add support for removing parameter and residual blocks.
|
||||
|
||||
This adds support for removing parameter and residual blocks.
|
||||
There are two modes of operation: in the first, removals of
|
||||
paremeter blocks are expensive, since each remove requires
|
||||
scanning all residual blocks to find ones that depend on the
|
||||
removed parameter. In the other, extra memory is sacrificed to
|
||||
maintain a list of the residuals a parameter block depends on,
|
||||
removing the need to scan. In both cases, removing residual blocks
|
||||
is fast.
|
||||
|
||||
As a caveat, any removals destroys the ordering of the parameters,
|
||||
so the residuals or jacobian returned from Solver::Solve() is
|
||||
meaningless. There is some debate on the best way to handle this;
|
||||
the details remain for a future change.
|
||||
|
||||
This also adds some overhead, even in the case that fast removals
|
||||
are not requested:
|
||||
|
||||
- 1 int32 to each residual, to track its position in the program.
|
||||
- 1 pointer to each parameter, to store the dependent residuals.
|
||||
|
||||
Change-Id: I71dcac8656679329a15ee7fc12c0df07030c12af
|
||||
|
||||
commit fa21df8cd969bb257b87c9ef7c0147d8d5ea8725
|
||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||
Date: Mon Feb 18 08:48:52 2013 -0800
|
||||
|
||||
Add script for building documentation.
|
||||
|
||||
Update make_release
|
||||
|
||||
Minor documentation fixes.
|
||||
|
||||
Change-Id: I1248ec3f58be66b5929aee6f2aa392c15d53ed83
|
||||
|
||||
commit 290b975d1d4eba44205bbeb0fa6b3ce8a6fa4a0c
|
||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||
Date: Sun Feb 17 16:50:37 2013 -0800
|
||||
|
||||
Preconditioner refactoring.
|
||||
|
||||
1. Added a Preconditioner interface.
|
||||
2. SCHUR_JACOBI is now its own class and is independent of
|
||||
SuiteSparse.
|
||||
|
||||
Change-Id: Id912ab19cf3736e61d1b90ddaf5bfba33e877ec4
|
||||
|
||||
commit d010de543530001fa917501a13ba02879c8ea52f
|
||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||
Date: Fri Feb 15 14:26:56 2013 -0800
|
||||
|
||||
Solver::Summary::FullReport() supports line search now.
|
||||
|
||||
Change-Id: Ib08d300198b85d9732cfb5785af4235ca4bd5226
|
||||
|
1
extern/libmv/third_party/ceres/files.txt
vendored
1
extern/libmv/third_party/ceres/files.txt
vendored
@ -31,6 +31,7 @@ include/ceres/solver.h
|
||||
include/ceres/types.h
|
||||
internal/ceres/array_utils.cc
|
||||
internal/ceres/array_utils.h
|
||||
internal/ceres/blas.h
|
||||
internal/ceres/block_evaluate_preparer.cc
|
||||
internal/ceres/block_evaluate_preparer.h
|
||||
internal/ceres/block_jacobian_writer.cc
|
||||
|
@ -349,7 +349,11 @@ Jet<T, N> operator/(const Jet<T, N>& f,
|
||||
//
|
||||
// which holds because v*v = 0.
|
||||
h.a = f.a / g.a;
|
||||
h.v = (f.v - f.a / g.a * g.v) / g.a;
|
||||
const T g_a_inverse = 1.0 / g.a;
|
||||
const T f_a_by_g_a = f.a * g_a_inverse;
|
||||
for (int i = 0; i < N; ++i) {
|
||||
h.v[i] = (f.v[i] - f_a_by_g_a * g.v[i]) * g_a_inverse;
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
@ -358,7 +362,8 @@ template<typename T, int N> inline
|
||||
Jet<T, N> operator/(T s, const Jet<T, N>& g) {
|
||||
Jet<T, N> h;
|
||||
h.a = s / g.a;
|
||||
h.v = - s * g.v / (g.a * g.a);
|
||||
const T minus_s_g_a_inverse2 = -s / (g.a * g.a);
|
||||
h.v = g.v * minus_s_g_a_inverse2;
|
||||
return h;
|
||||
}
|
||||
|
||||
@ -366,8 +371,9 @@ Jet<T, N> operator/(T s, const Jet<T, N>& g) {
|
||||
template<typename T, int N> inline
|
||||
Jet<T, N> operator/(const Jet<T, N>& f, T s) {
|
||||
Jet<T, N> h;
|
||||
h.a = f.a / s;
|
||||
h.v = f.v / s;
|
||||
const T s_inverse = 1.0 / s;
|
||||
h.a = f.a * s_inverse;
|
||||
h.v = f.v * s_inverse;
|
||||
return h;
|
||||
}
|
||||
|
||||
@ -425,7 +431,8 @@ template <typename T, int N> inline
|
||||
Jet<T, N> log(const Jet<T, N>& f) {
|
||||
Jet<T, N> g;
|
||||
g.a = log(f.a);
|
||||
g.v = f.v / f.a;
|
||||
const T a_inverse = T(1.0) / f.a;
|
||||
g.v = f.v * a_inverse;
|
||||
return g;
|
||||
}
|
||||
|
||||
@ -443,7 +450,8 @@ template <typename T, int N> inline
|
||||
Jet<T, N> sqrt(const Jet<T, N>& f) {
|
||||
Jet<T, N> g;
|
||||
g.a = sqrt(f.a);
|
||||
g.v = f.v / (T(2.0) * g.a);
|
||||
const T two_a_inverse = 1.0 / (T(2.0) * g.a);
|
||||
g.v = f.v * two_a_inverse;
|
||||
return g;
|
||||
}
|
||||
|
||||
@ -452,7 +460,7 @@ template <typename T, int N> inline
|
||||
Jet<T, N> cos(const Jet<T, N>& f) {
|
||||
Jet<T, N> g;
|
||||
g.a = cos(f.a);
|
||||
T sin_a = sin(f.a);
|
||||
const T sin_a = sin(f.a);
|
||||
g.v = - sin_a * f.v;
|
||||
return g;
|
||||
}
|
||||
@ -462,7 +470,8 @@ template <typename T, int N> inline
|
||||
Jet<T, N> acos(const Jet<T, N>& f) {
|
||||
Jet<T, N> g;
|
||||
g.a = acos(f.a);
|
||||
g.v = - T(1.0) / sqrt(T(1.0) - f.a * f.a) * f.v;
|
||||
const T tmp = - T(1.0) / sqrt(T(1.0) - f.a * f.a);
|
||||
g.v = tmp * f.v;
|
||||
return g;
|
||||
}
|
||||
|
||||
@ -471,7 +480,7 @@ template <typename T, int N> inline
|
||||
Jet<T, N> sin(const Jet<T, N>& f) {
|
||||
Jet<T, N> g;
|
||||
g.a = sin(f.a);
|
||||
T cos_a = cos(f.a);
|
||||
const T cos_a = cos(f.a);
|
||||
g.v = cos_a * f.v;
|
||||
return g;
|
||||
}
|
||||
@ -481,7 +490,8 @@ template <typename T, int N> inline
|
||||
Jet<T, N> asin(const Jet<T, N>& f) {
|
||||
Jet<T, N> g;
|
||||
g.a = asin(f.a);
|
||||
g.v = T(1.0) / sqrt(T(1.0) - f.a * f.a) * f.v;
|
||||
const T tmp = T(1.0) / sqrt(T(1.0) - f.a * f.a);
|
||||
g.v = tmp * f.v;
|
||||
return g;
|
||||
}
|
||||
|
||||
|
496
extern/libmv/third_party/ceres/internal/ceres/blas.h
vendored
Normal file
496
extern/libmv/third_party/ceres/internal/ceres/blas.h
vendored
Normal file
@ -0,0 +1,496 @@
|
||||
// Ceres Solver - A fast non-linear least squares minimizer
|
||||
// Copyright 2013 Google Inc. All rights reserved.
|
||||
// http://code.google.com/p/ceres-solver/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: sameeragarwal@google.com (Sameer Agarwal)
|
||||
//
|
||||
// Simple blas functions for use in the Schur Eliminator. These are
|
||||
// fairly basic implementations which already yield a significant
|
||||
// speedup in the eliminator performance.
|
||||
|
||||
#ifndef CERES_INTERNAL_BLAS_H_
|
||||
#define CERES_INTERNAL_BLAS_H_
|
||||
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
// Remove the ".noalias()" annotation from the matrix matrix
|
||||
// mutliplies to produce a correct build with the Android NDK,
|
||||
// including versions 6, 7, 8, and 8b, when built with STLPort and the
|
||||
// non-standalone toolchain (i.e. ndk-build). This appears to be a
|
||||
// compiler bug; if the workaround is not in place, the line
|
||||
//
|
||||
// block.noalias() -= A * B;
|
||||
//
|
||||
// gets compiled to
|
||||
//
|
||||
// block.noalias() += A * B;
|
||||
//
|
||||
// which breaks schur elimination. Introducing a temporary by removing the
|
||||
// .noalias() annotation causes the issue to disappear. Tracking this
|
||||
// issue down was tricky, since the test suite doesn't run when built with
|
||||
// the non-standalone toolchain.
|
||||
//
|
||||
// TODO(keir): Make a reproduction case for this and send it upstream.
|
||||
#ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG
|
||||
#define CERES_MAYBE_NOALIAS
|
||||
#else
|
||||
#define CERES_MAYBE_NOALIAS .noalias()
|
||||
#endif
|
||||
|
||||
// For the matrix-matrix functions below, there are three functions
|
||||
// for each functionality. Foo, FooNaive and FooEigen. Foo is the one
|
||||
// to be called by the user. FooNaive is a basic loop based
|
||||
// implementation and FooEigen uses Eigen's implementation. Foo
|
||||
// chooses between FooNaive and FooEigen depending on how many of the
|
||||
// template arguments are fixed at compile time. Currently, FooEigen
|
||||
// is called if all matrix dimenions are compile time
|
||||
// constants. FooNaive is called otherwise. This leads to the best
|
||||
// performance currently.
|
||||
//
|
||||
// TODO(sameeragarwal): Benchmark and simplify the matrix-vector
|
||||
// functions.
|
||||
|
||||
// C op A * B;
|
||||
//
|
||||
// where op can be +=, -=, or =.
|
||||
//
|
||||
// The template parameters (kRowA, kColA, kRowB, kColB) allow
|
||||
// specialization of the loop at compile time. If this information is
|
||||
// not available, then Eigen::Dynamic should be used as the template
|
||||
// argument.
|
||||
//
|
||||
// kOperation = 1 -> C += A * B
|
||||
// kOperation = -1 -> C -= A * B
|
||||
// kOperation = 0 -> C = A * B
|
||||
//
|
||||
// The function can write into matrices C which are larger than the
|
||||
// matrix A * B. This is done by specifying the true size of C via
|
||||
// row_stride_c and col_stride_c, and then indicating where A * B
|
||||
// should be written into by start_row_c and start_col_c.
|
||||
//
|
||||
// Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c =
|
||||
// 4 and start_col_c = 5, then if A = 3x2 and B = 2x4, we get
|
||||
//
|
||||
// ------------
|
||||
// ------------
|
||||
// ------------
|
||||
// ------------
|
||||
// -----xxxx---
|
||||
// -----xxxx---
|
||||
// -----xxxx---
|
||||
// ------------
|
||||
// ------------
|
||||
// ------------
|
||||
//
|
||||
template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
|
||||
inline void MatrixMatrixMultiplyEigen(const double* A,
|
||||
const int num_row_a,
|
||||
const int num_col_a,
|
||||
const double* B,
|
||||
const int num_row_b,
|
||||
const int num_col_b,
|
||||
double* C,
|
||||
const int start_row_c,
|
||||
const int start_col_c,
|
||||
const int row_stride_c,
|
||||
const int col_stride_c) {
|
||||
const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a);
|
||||
const typename EigenTypes<kRowB, kColB>::ConstMatrixRef Bref(B, num_row_b, num_col_b);
|
||||
MatrixRef Cref(C, row_stride_c, col_stride_c);
|
||||
Eigen::Block<MatrixRef, kRowA, kColB> block(Cref,
|
||||
start_row_c, start_col_c,
|
||||
num_row_a, num_col_b);
|
||||
if (kOperation > 0) {
|
||||
block CERES_MAYBE_NOALIAS += Aref * Bref;
|
||||
} else if (kOperation < 0) {
|
||||
block CERES_MAYBE_NOALIAS -= Aref * Bref;
|
||||
} else {
|
||||
block CERES_MAYBE_NOALIAS = Aref * Bref;
|
||||
}
|
||||
}
|
||||
|
||||
template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
|
||||
inline void MatrixMatrixMultiplyNaive(const double* A,
|
||||
const int num_row_a,
|
||||
const int num_col_a,
|
||||
const double* B,
|
||||
const int num_row_b,
|
||||
const int num_col_b,
|
||||
double* C,
|
||||
const int start_row_c,
|
||||
const int start_col_c,
|
||||
const int row_stride_c,
|
||||
const int col_stride_c) {
|
||||
DCHECK_GT(num_row_a, 0);
|
||||
DCHECK_GT(num_col_a, 0);
|
||||
DCHECK_GT(num_row_b, 0);
|
||||
DCHECK_GT(num_col_b, 0);
|
||||
DCHECK_GE(start_row_c, 0);
|
||||
DCHECK_GE(start_col_c, 0);
|
||||
DCHECK_GT(row_stride_c, 0);
|
||||
DCHECK_GT(col_stride_c, 0);
|
||||
|
||||
DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
|
||||
DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
|
||||
DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b));
|
||||
DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b));
|
||||
|
||||
const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
|
||||
const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
|
||||
const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b);
|
||||
const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b);
|
||||
DCHECK_EQ(NUM_COL_A, NUM_ROW_B);
|
||||
|
||||
const int NUM_ROW_C = NUM_ROW_A;
|
||||
const int NUM_COL_C = NUM_COL_B;
|
||||
DCHECK_LT(start_row_c + NUM_ROW_C, row_stride_c);
|
||||
DCHECK_LT(start_col_c + NUM_COL_C, col_stride_c);
|
||||
|
||||
for (int row = 0; row < NUM_ROW_C; ++row) {
|
||||
for (int col = 0; col < NUM_COL_C; ++col) {
|
||||
double tmp = 0.0;
|
||||
for (int k = 0; k < NUM_COL_A; ++k) {
|
||||
tmp += A[row * NUM_COL_A + k] * B[k * NUM_COL_B + col];
|
||||
}
|
||||
|
||||
const int index = (row + start_row_c) * col_stride_c + start_col_c + col;
|
||||
if (kOperation > 0) {
|
||||
C[index] += tmp;
|
||||
} else if (kOperation < 0) {
|
||||
C[index] -= tmp;
|
||||
} else {
|
||||
C[index] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
|
||||
inline void MatrixMatrixMultiply(const double* A,
|
||||
const int num_row_a,
|
||||
const int num_col_a,
|
||||
const double* B,
|
||||
const int num_row_b,
|
||||
const int num_col_b,
|
||||
double* C,
|
||||
const int start_row_c,
|
||||
const int start_col_c,
|
||||
const int row_stride_c,
|
||||
const int col_stride_c) {
|
||||
#ifdef CERES_NO_CUSTOM_BLAS
|
||||
MatrixMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>(
|
||||
A, num_row_a, num_col_a,
|
||||
B, num_row_b, num_col_b,
|
||||
C, start_row_c, start_col_c, row_stride_c, col_stride_c);
|
||||
return;
|
||||
|
||||
#else
|
||||
|
||||
if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic &&
|
||||
kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) {
|
||||
MatrixMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>(
|
||||
A, num_row_a, num_col_a,
|
||||
B, num_row_b, num_col_b,
|
||||
C, start_row_c, start_col_c, row_stride_c, col_stride_c);
|
||||
} else {
|
||||
MatrixMatrixMultiplyNaive<kRowA, kColA, kRowB, kColB, kOperation>(
|
||||
A, num_row_a, num_col_a,
|
||||
B, num_row_b, num_col_b,
|
||||
C, start_row_c, start_col_c, row_stride_c, col_stride_c);
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// C op A' * B;
|
||||
//
|
||||
// where op can be +=, -=, or =.
|
||||
//
|
||||
// The template parameters (kRowA, kColA, kRowB, kColB) allow
|
||||
// specialization of the loop at compile time. If this information is
|
||||
// not available, then Eigen::Dynamic should be used as the template
|
||||
// argument.
|
||||
//
|
||||
// kOperation = 1 -> C += A' * B
|
||||
// kOperation = -1 -> C -= A' * B
|
||||
// kOperation = 0 -> C = A' * B
|
||||
//
|
||||
// The function can write into matrices C which are larger than the
|
||||
// matrix A' * B. This is done by specifying the true size of C via
|
||||
// row_stride_c and col_stride_c, and then indicating where A * B
|
||||
// should be written into by start_row_c and start_col_c.
|
||||
//
|
||||
// Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c =
|
||||
// 4 and start_col_c = 5, then if A = 2x3 and B = 2x4, we get
|
||||
//
|
||||
// ------------
|
||||
// ------------
|
||||
// ------------
|
||||
// ------------
|
||||
// -----xxxx---
|
||||
// -----xxxx---
|
||||
// -----xxxx---
|
||||
// ------------
|
||||
// ------------
|
||||
// ------------
|
||||
//
|
||||
template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
|
||||
inline void MatrixTransposeMatrixMultiplyEigen(const double* A,
|
||||
const int num_row_a,
|
||||
const int num_col_a,
|
||||
const double* B,
|
||||
const int num_row_b,
|
||||
const int num_col_b,
|
||||
double* C,
|
||||
const int start_row_c,
|
||||
const int start_col_c,
|
||||
const int row_stride_c,
|
||||
const int col_stride_c) {
|
||||
const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a);
|
||||
const typename EigenTypes<kRowB, kColB>::ConstMatrixRef Bref(B, num_row_b, num_col_b);
|
||||
MatrixRef Cref(C, row_stride_c, col_stride_c);
|
||||
Eigen::Block<MatrixRef, kColA, kColB> block(Cref,
|
||||
start_row_c, start_col_c,
|
||||
num_col_a, num_col_b);
|
||||
if (kOperation > 0) {
|
||||
block CERES_MAYBE_NOALIAS += Aref.transpose() * Bref;
|
||||
} else if (kOperation < 0) {
|
||||
block CERES_MAYBE_NOALIAS -= Aref.transpose() * Bref;
|
||||
} else {
|
||||
block CERES_MAYBE_NOALIAS = Aref.transpose() * Bref;
|
||||
}
|
||||
}
|
||||
|
||||
template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
|
||||
inline void MatrixTransposeMatrixMultiplyNaive(const double* A,
|
||||
const int num_row_a,
|
||||
const int num_col_a,
|
||||
const double* B,
|
||||
const int num_row_b,
|
||||
const int num_col_b,
|
||||
double* C,
|
||||
const int start_row_c,
|
||||
const int start_col_c,
|
||||
const int row_stride_c,
|
||||
const int col_stride_c) {
|
||||
DCHECK_GT(num_row_a, 0);
|
||||
DCHECK_GT(num_col_a, 0);
|
||||
DCHECK_GT(num_row_b, 0);
|
||||
DCHECK_GT(num_col_b, 0);
|
||||
DCHECK_GE(start_row_c, 0);
|
||||
DCHECK_GE(start_col_c, 0);
|
||||
DCHECK_GT(row_stride_c, 0);
|
||||
DCHECK_GT(col_stride_c, 0);
|
||||
|
||||
DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
|
||||
DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
|
||||
DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b));
|
||||
DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b));
|
||||
|
||||
const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
|
||||
const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
|
||||
const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b);
|
||||
const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b);
|
||||
DCHECK_EQ(NUM_ROW_A, NUM_ROW_B);
|
||||
|
||||
const int NUM_ROW_C = NUM_COL_A;
|
||||
const int NUM_COL_C = NUM_COL_B;
|
||||
DCHECK_LT(start_row_c + NUM_ROW_C, row_stride_c);
|
||||
DCHECK_LT(start_col_c + NUM_COL_C, col_stride_c);
|
||||
|
||||
for (int row = 0; row < NUM_ROW_C; ++row) {
|
||||
for (int col = 0; col < NUM_COL_C; ++col) {
|
||||
double tmp = 0.0;
|
||||
for (int k = 0; k < NUM_ROW_A; ++k) {
|
||||
tmp += A[k * NUM_COL_A + row] * B[k * NUM_COL_B + col];
|
||||
}
|
||||
|
||||
const int index = (row + start_row_c) * col_stride_c + start_col_c + col;
|
||||
if (kOperation > 0) {
|
||||
C[index]+= tmp;
|
||||
} else if (kOperation < 0) {
|
||||
C[index]-= tmp;
|
||||
} else {
|
||||
C[index]= tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
|
||||
inline void MatrixTransposeMatrixMultiply(const double* A,
|
||||
const int num_row_a,
|
||||
const int num_col_a,
|
||||
const double* B,
|
||||
const int num_row_b,
|
||||
const int num_col_b,
|
||||
double* C,
|
||||
const int start_row_c,
|
||||
const int start_col_c,
|
||||
const int row_stride_c,
|
||||
const int col_stride_c) {
|
||||
#ifdef CERES_NO_CUSTOM_BLAS
|
||||
MatrixTransposeMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>(
|
||||
A, num_row_a, num_col_a,
|
||||
B, num_row_b, num_col_b,
|
||||
C, start_row_c, start_col_c, row_stride_c, col_stride_c);
|
||||
return;
|
||||
|
||||
#else
|
||||
|
||||
if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic &&
|
||||
kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) {
|
||||
MatrixTransposeMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>(
|
||||
A, num_row_a, num_col_a,
|
||||
B, num_row_b, num_col_b,
|
||||
C, start_row_c, start_col_c, row_stride_c, col_stride_c);
|
||||
} else {
|
||||
MatrixTransposeMatrixMultiplyNaive<kRowA, kColA, kRowB, kColB, kOperation>(
|
||||
A, num_row_a, num_col_a,
|
||||
B, num_row_b, num_col_b,
|
||||
C, start_row_c, start_col_c, row_stride_c, col_stride_c);
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int kRowA, int kColA, int kOperation>
|
||||
inline void MatrixVectorMultiply(const double* A,
|
||||
const int num_row_a,
|
||||
const int num_col_a,
|
||||
const double* b,
|
||||
double* c) {
|
||||
#ifdef CERES_NO_CUSTOM_BLAS
|
||||
const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a);
|
||||
const typename EigenTypes<kColA>::ConstVectorRef bref(b, num_col_a);
|
||||
typename EigenTypes<kRowA>::VectorRef cref(c, num_row_a);
|
||||
|
||||
// lazyProduct works better than .noalias() for matrix-vector
|
||||
// products.
|
||||
if (kOperation > 0) {
|
||||
cref += Aref.lazyProduct(bref);
|
||||
} else if (kOperation < 0) {
|
||||
cref -= Aref.lazyProduct(bref);
|
||||
} else {
|
||||
cref -= Aref.lazyProduct(bref);
|
||||
}
|
||||
#else
|
||||
|
||||
DCHECK_GT(num_row_a, 0);
|
||||
DCHECK_GT(num_col_a, 0);
|
||||
DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
|
||||
DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
|
||||
|
||||
const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
|
||||
const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
|
||||
|
||||
for (int row = 0; row < NUM_ROW_A; ++row) {
|
||||
double tmp = 0.0;
|
||||
for (int col = 0; col < NUM_COL_A; ++col) {
|
||||
tmp += A[row * NUM_COL_A + col] * b[col];
|
||||
}
|
||||
|
||||
if (kOperation > 0) {
|
||||
c[row] += tmp;
|
||||
} else if (kOperation < 0) {
|
||||
c[row] -= tmp;
|
||||
} else {
|
||||
c[row] = tmp;
|
||||
}
|
||||
}
|
||||
#endif // CERES_NO_CUSTOM_BLAS
|
||||
}
|
||||
|
||||
// c op A' * b;
|
||||
//
|
||||
// where op can be +=, -=, or =.
|
||||
//
|
||||
// The template parameters (kRowA, kColA) allow specialization of the
|
||||
// loop at compile time. If this information is not available, then
|
||||
// Eigen::Dynamic should be used as the template argument.
|
||||
//
|
||||
// kOperation = 1 -> c += A' * b
|
||||
// kOperation = -1 -> c -= A' * b
|
||||
// kOperation = 0 -> c = A' * b
|
||||
template<int kRowA, int kColA, int kOperation>
|
||||
inline void MatrixTransposeVectorMultiply(const double* A,
|
||||
const int num_row_a,
|
||||
const int num_col_a,
|
||||
const double* b,
|
||||
double* c) {
|
||||
#ifdef CERES_NO_CUSTOM_BLAS
|
||||
const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a);
|
||||
const typename EigenTypes<kRowA>::ConstVectorRef bref(b, num_row_a);
|
||||
typename EigenTypes<kColA>::VectorRef cref(c, num_col_a);
|
||||
|
||||
// lazyProduct works better than .noalias() for matrix-vector
|
||||
// products.
|
||||
if (kOperation > 0) {
|
||||
cref += Aref.transpose().lazyProduct(bref);
|
||||
} else if (kOperation < 0) {
|
||||
cref -= Aref.transpose().lazyProduct(bref);
|
||||
} else {
|
||||
cref -= Aref.transpose().lazyProduct(bref);
|
||||
}
|
||||
#else
|
||||
|
||||
DCHECK_GT(num_row_a, 0);
|
||||
DCHECK_GT(num_col_a, 0);
|
||||
DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
|
||||
DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
|
||||
|
||||
const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
|
||||
const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
|
||||
|
||||
for (int row = 0; row < NUM_COL_A; ++row) {
|
||||
double tmp = 0.0;
|
||||
for (int col = 0; col < NUM_ROW_A; ++col) {
|
||||
tmp += A[col * NUM_COL_A + row] * b[col];
|
||||
}
|
||||
|
||||
if (kOperation > 0) {
|
||||
c[row] += tmp;
|
||||
} else if (kOperation < 0) {
|
||||
c[row] -= tmp;
|
||||
} else {
|
||||
c[row] = tmp;
|
||||
}
|
||||
}
|
||||
#endif // CERES_NO_CUSTOM_BLAS
|
||||
}
|
||||
|
||||
#undef CERES_MAYBE_NOALIAS
|
||||
|
||||
} // namespace internal
|
||||
} // namespace ceres
|
||||
|
||||
#endif // CERES_INTERNAL_BLAS_H_
|
@ -33,6 +33,7 @@
|
||||
#include <cstddef>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include "ceres/blas.h"
|
||||
#include "ceres/block_structure.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/matrix_proto.h"
|
||||
@ -117,16 +118,15 @@ void BlockSparseMatrix::RightMultiply(const double* x, double* y) const {
|
||||
for (int i = 0; i < block_structure_->rows.size(); ++i) {
|
||||
int row_block_pos = block_structure_->rows[i].block.position;
|
||||
int row_block_size = block_structure_->rows[i].block.size;
|
||||
VectorRef yref(y + row_block_pos, row_block_size);
|
||||
const vector<Cell>& cells = block_structure_->rows[i].cells;
|
||||
for (int j = 0; j < cells.size(); ++j) {
|
||||
int col_block_id = cells[j].block_id;
|
||||
int col_block_size = block_structure_->cols[col_block_id].size;
|
||||
int col_block_pos = block_structure_->cols[col_block_id].position;
|
||||
ConstVectorRef xref(x + col_block_pos, col_block_size);
|
||||
MatrixRef m(values_.get() + cells[j].position,
|
||||
row_block_size, col_block_size);
|
||||
yref += m.lazyProduct(xref);
|
||||
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
values_.get() + cells[j].position, row_block_size, col_block_size,
|
||||
x + col_block_pos,
|
||||
y + row_block_pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -138,16 +138,16 @@ void BlockSparseMatrix::LeftMultiply(const double* x, double* y) const {
|
||||
for (int i = 0; i < block_structure_->rows.size(); ++i) {
|
||||
int row_block_pos = block_structure_->rows[i].block.position;
|
||||
int row_block_size = block_structure_->rows[i].block.size;
|
||||
const ConstVectorRef xref(x + row_block_pos, row_block_size);
|
||||
const vector<Cell>& cells = block_structure_->rows[i].cells;
|
||||
for (int j = 0; j < cells.size(); ++j) {
|
||||
int col_block_id = cells[j].block_id;
|
||||
int col_block_size = block_structure_->cols[col_block_id].size;
|
||||
int col_block_pos = block_structure_->cols[col_block_id].position;
|
||||
VectorRef yref(y + col_block_pos, col_block_size);
|
||||
MatrixRef m(values_.get() + cells[j].position,
|
||||
row_block_size, col_block_size);
|
||||
yref += m.transpose().lazyProduct(xref);
|
||||
MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
values_.get() + cells[j].position, row_block_size, col_block_size,
|
||||
x + row_block_pos,
|
||||
y + col_block_pos);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
#include "ceres/blas.h"
|
||||
#include "ceres/block_sparse_matrix.h"
|
||||
#include "ceres/block_structure.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
@ -103,13 +104,10 @@ void PartitionedMatrixView::RightMultiplyE(const double* x, double* y) const {
|
||||
const int col_block_id = cell.block_id;
|
||||
const int col_block_pos = bs->cols[col_block_id].position;
|
||||
const int col_block_size = bs->cols[col_block_id].size;
|
||||
|
||||
ConstVectorRef xref(x + col_block_pos, col_block_size);
|
||||
VectorRef yref(y + row_block_pos, row_block_size);
|
||||
ConstMatrixRef m(row_values + cell.position,
|
||||
row_block_size,
|
||||
col_block_size);
|
||||
yref += m.lazyProduct(xref);
|
||||
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
row_values + cell.position, row_block_size, col_block_size,
|
||||
x + col_block_pos,
|
||||
y + row_block_pos);
|
||||
}
|
||||
}
|
||||
|
||||
@ -124,20 +122,16 @@ void PartitionedMatrixView::RightMultiplyF(const double* x, double* y) const {
|
||||
for (int r = 0; r < bs->rows.size(); ++r) {
|
||||
const int row_block_pos = bs->rows[r].block.position;
|
||||
const int row_block_size = bs->rows[r].block.size;
|
||||
VectorRef yref(y + row_block_pos, row_block_size);
|
||||
const vector<Cell>& cells = bs->rows[r].cells;
|
||||
for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
|
||||
const double* row_values = matrix_.RowBlockValues(r);
|
||||
const int col_block_id = cells[c].block_id;
|
||||
const int col_block_pos = bs->cols[col_block_id].position;
|
||||
const int col_block_size = bs->cols[col_block_id].size;
|
||||
|
||||
ConstVectorRef xref(x + col_block_pos - num_cols_e(),
|
||||
col_block_size);
|
||||
ConstMatrixRef m(row_values + cells[c].position,
|
||||
row_block_size,
|
||||
col_block_size);
|
||||
yref += m.lazyProduct(xref);
|
||||
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
row_values + cells[c].position, row_block_size, col_block_size,
|
||||
x + col_block_pos - num_cols_e(),
|
||||
y + row_block_pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -155,13 +149,10 @@ void PartitionedMatrixView::LeftMultiplyE(const double* x, double* y) const {
|
||||
const int col_block_id = cell.block_id;
|
||||
const int col_block_pos = bs->cols[col_block_id].position;
|
||||
const int col_block_size = bs->cols[col_block_id].size;
|
||||
|
||||
ConstVectorRef xref(x + row_block_pos, row_block_size);
|
||||
VectorRef yref(y + col_block_pos, col_block_size);
|
||||
ConstMatrixRef m(row_values + cell.position,
|
||||
row_block_size,
|
||||
col_block_size);
|
||||
yref += m.transpose().lazyProduct(xref);
|
||||
MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
row_values + cell.position, row_block_size, col_block_size,
|
||||
x + row_block_pos,
|
||||
y + col_block_pos);
|
||||
}
|
||||
}
|
||||
|
||||
@ -176,19 +167,16 @@ void PartitionedMatrixView::LeftMultiplyF(const double* x, double* y) const {
|
||||
for (int r = 0; r < bs->rows.size(); ++r) {
|
||||
const int row_block_pos = bs->rows[r].block.position;
|
||||
const int row_block_size = bs->rows[r].block.size;
|
||||
ConstVectorRef xref(x + row_block_pos, row_block_size);
|
||||
const vector<Cell>& cells = bs->rows[r].cells;
|
||||
for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
|
||||
const double* row_values = matrix_.RowBlockValues(r);
|
||||
const int col_block_id = cells[c].block_id;
|
||||
const int col_block_pos = bs->cols[col_block_id].position;
|
||||
const int col_block_size = bs->cols[col_block_id].size;
|
||||
|
||||
VectorRef yref(y + col_block_pos - num_cols_e(), col_block_size);
|
||||
ConstMatrixRef m(row_values + cells[c].position,
|
||||
row_block_size,
|
||||
col_block_size);
|
||||
yref += m.transpose().lazyProduct(xref);
|
||||
MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
row_values + cells[c].position, row_block_size, col_block_size,
|
||||
x + row_block_pos,
|
||||
y + col_block_pos - num_cols_e());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -267,15 +255,15 @@ void PartitionedMatrixView::UpdateBlockDiagonalEtE(
|
||||
const int row_block_size = bs->rows[r].block.size;
|
||||
const int block_id = cell.block_id;
|
||||
const int col_block_size = bs->cols[block_id].size;
|
||||
ConstMatrixRef m(row_values + cell.position,
|
||||
row_block_size,
|
||||
col_block_size);
|
||||
|
||||
const int cell_position =
|
||||
block_diagonal_structure->rows[block_id].cells[0].position;
|
||||
|
||||
MatrixRef(block_diagonal->mutable_values() + cell_position,
|
||||
col_block_size, col_block_size).noalias() += m.transpose() * m;
|
||||
MatrixTransposeMatrixMultiply
|
||||
<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
row_values + cell.position, row_block_size, col_block_size,
|
||||
row_values + cell.position, row_block_size, col_block_size,
|
||||
block_diagonal->mutable_values() + cell_position,
|
||||
0, 0, col_block_size, col_block_size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -298,15 +286,16 @@ void PartitionedMatrixView::UpdateBlockDiagonalFtF(
|
||||
for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
|
||||
const int col_block_id = cells[c].block_id;
|
||||
const int col_block_size = bs->cols[col_block_id].size;
|
||||
ConstMatrixRef m(row_values + cells[c].position,
|
||||
row_block_size,
|
||||
col_block_size);
|
||||
const int diagonal_block_id = col_block_id - num_col_blocks_e_;
|
||||
const int cell_position =
|
||||
block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
|
||||
|
||||
MatrixRef(block_diagonal->mutable_values() + cell_position,
|
||||
col_block_size, col_block_size).noalias() += m.transpose() * m;
|
||||
MatrixTransposeMatrixMultiply
|
||||
<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
row_values + cells[c].position, row_block_size, col_block_size,
|
||||
row_values + cells[c].position, row_block_size, col_block_size,
|
||||
block_diagonal->mutable_values() + cell_position,
|
||||
0, 0, col_block_size, col_block_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -291,14 +291,8 @@ bool SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse(
|
||||
} else {
|
||||
factor_ = ss_.AnalyzeCholesky(cholmod_lhs);
|
||||
}
|
||||
|
||||
if (VLOG_IS_ON(2)) {
|
||||
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), ss_.mutable_cc());
|
||||
}
|
||||
}
|
||||
|
||||
CHECK_NOTNULL(factor_);
|
||||
|
||||
cholmod_dense* cholmod_solution =
|
||||
ss_.SolveCholesky(cholmod_lhs, factor_, cholmod_rhs);
|
||||
|
||||
|
@ -321,9 +321,25 @@ class SchurEliminator : public SchurEliminatorBase {
|
||||
// see the documentation of the Chunk object above.
|
||||
vector<Chunk> chunks_;
|
||||
|
||||
// TODO(sameeragarwal): The following two arrays contain per-thread
|
||||
// storage. They should be refactored into a per thread struct.
|
||||
|
||||
// Buffer to store the products of the y and z blocks generated
|
||||
// during the elimination phase.
|
||||
// during the elimination phase. buffer_ is of size num_threads *
|
||||
// buffer_size_. Each thread accesses the chunk
|
||||
//
|
||||
// [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_]
|
||||
//
|
||||
scoped_array<double> buffer_;
|
||||
|
||||
// Buffer to store per thread matrix matrix products used by
|
||||
// ChunkOuterProduct. Like buffer_ it is of size num_threads *
|
||||
// buffer_size_. Each thread accesses the chunk
|
||||
//
|
||||
// [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_]
|
||||
//
|
||||
scoped_array<double> chunk_outer_product_buffer_;
|
||||
|
||||
int buffer_size_;
|
||||
int num_threads_;
|
||||
int uneliminated_row_begins_;
|
||||
|
@ -34,10 +34,6 @@
|
||||
#ifndef CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_
|
||||
#define CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_
|
||||
|
||||
#ifdef CERES_USE_OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
// Eigen has an internal threshold switching between different matrix
|
||||
// multiplication algorithms. In particular for matrices larger than
|
||||
// EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD it uses a cache friendly
|
||||
@ -46,19 +42,25 @@
|
||||
// are thin and long, the default choice may not be optimal. This is
|
||||
// the case for us, as the default choice causes a 30% performance
|
||||
// regression when we moved from Eigen2 to Eigen3.
|
||||
|
||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 10
|
||||
|
||||
#ifdef CERES_USE_OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include "Eigen/Dense"
|
||||
#include "ceres/blas.h"
|
||||
#include "ceres/block_random_access_matrix.h"
|
||||
#include "ceres/block_sparse_matrix.h"
|
||||
#include "ceres/block_structure.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/scoped_ptr.h"
|
||||
#include "ceres/map_util.h"
|
||||
#include "ceres/schur_eliminator.h"
|
||||
#include "ceres/stl_util.h"
|
||||
#include "ceres/internal/eigen.h"
|
||||
#include "ceres/internal/scoped_ptr.h"
|
||||
#include "glog/logging.h"
|
||||
|
||||
namespace ceres {
|
||||
@ -149,13 +151,16 @@ Init(int num_eliminate_blocks, const CompressedRowBlockStructure* bs) {
|
||||
|
||||
buffer_.reset(new double[buffer_size_ * num_threads_]);
|
||||
|
||||
// chunk_outer_product_buffer_ only needs to store e_block_size *
|
||||
// f_block_size, which is always less than buffer_size_, so we just
|
||||
// allocate buffer_size_ per thread.
|
||||
chunk_outer_product_buffer_.reset(new double[buffer_size_ * num_threads_]);
|
||||
|
||||
STLDeleteElements(&rhs_locks_);
|
||||
rhs_locks_.resize(num_col_blocks - num_eliminate_blocks_);
|
||||
for (int i = 0; i < num_col_blocks - num_eliminate_blocks_; ++i) {
|
||||
rhs_locks_[i] = new Mutex;
|
||||
}
|
||||
|
||||
VLOG(1) << "Eliminator threads: " << num_threads_;
|
||||
}
|
||||
|
||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||
@ -261,7 +266,7 @@ Eliminate(const BlockSparseMatrixBase* A,
|
||||
typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix inverse_ete =
|
||||
ete
|
||||
.template selfadjointView<Eigen::Upper>()
|
||||
.ldlt()
|
||||
.llt()
|
||||
.solve(Matrix::Identity(e_block_size, e_block_size));
|
||||
|
||||
// For the current chunk compute and update the rhs of the reduced
|
||||
@ -294,8 +299,8 @@ BackSubstitute(const BlockSparseMatrixBase* A,
|
||||
const int e_block_id = bs->rows[chunk.start].cells.front().block_id;
|
||||
const int e_block_size = bs->cols[e_block_id].size;
|
||||
|
||||
typename EigenTypes<kEBlockSize>::VectorRef y_block(
|
||||
y + bs->cols[e_block_id].position, e_block_size);
|
||||
double* y_ptr = y + bs->cols[e_block_id].position;
|
||||
typename EigenTypes<kEBlockSize>::VectorRef y_block(y_ptr, e_block_size);
|
||||
|
||||
typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix
|
||||
ete(e_block_size, e_block_size);
|
||||
@ -312,40 +317,35 @@ BackSubstitute(const BlockSparseMatrixBase* A,
|
||||
const double* row_values = A->RowBlockValues(chunk.start + j);
|
||||
const Cell& e_cell = row.cells.front();
|
||||
DCHECK_EQ(e_block_id, e_cell.block_id);
|
||||
const typename EigenTypes<kRowBlockSize, kEBlockSize>::ConstMatrixRef
|
||||
e_block(row_values + e_cell.position,
|
||||
row.block.size,
|
||||
e_block_size);
|
||||
|
||||
typename EigenTypes<kRowBlockSize>::Vector
|
||||
sj =
|
||||
typename EigenTypes<kRowBlockSize>::Vector sj =
|
||||
typename EigenTypes<kRowBlockSize>::ConstVectorRef
|
||||
(b + bs->rows[chunk.start + j].block.position,
|
||||
row.block.size);
|
||||
(b + bs->rows[chunk.start + j].block.position, row.block.size);
|
||||
|
||||
for (int c = 1; c < row.cells.size(); ++c) {
|
||||
const int f_block_id = row.cells[c].block_id;
|
||||
const int f_block_size = bs->cols[f_block_id].size;
|
||||
const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
|
||||
f_block(row_values + row.cells[c].position,
|
||||
row.block.size, f_block_size);
|
||||
const int r_block = f_block_id - num_eliminate_blocks_;
|
||||
|
||||
sj -= f_block *
|
||||
typename EigenTypes<kFBlockSize>::ConstVectorRef
|
||||
(z + lhs_row_layout_[r_block], f_block_size);
|
||||
MatrixVectorMultiply<kRowBlockSize, kFBlockSize, -1>(
|
||||
row_values + row.cells[c].position, row.block.size, f_block_size,
|
||||
z + lhs_row_layout_[r_block],
|
||||
sj.data());
|
||||
}
|
||||
|
||||
y_block += e_block.transpose() * sj;
|
||||
ete.template selfadjointView<Eigen::Upper>()
|
||||
.rankUpdate(e_block.transpose(), 1.0);
|
||||
MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
|
||||
row_values + e_cell.position, row.block.size, e_block_size,
|
||||
sj.data(),
|
||||
y_ptr);
|
||||
|
||||
MatrixTransposeMatrixMultiply
|
||||
<kRowBlockSize, kEBlockSize,kRowBlockSize, kEBlockSize, 1>(
|
||||
row_values + e_cell.position, row.block.size, e_block_size,
|
||||
row_values + e_cell.position, row.block.size, e_block_size,
|
||||
ete.data(), 0, 0, e_block_size, e_block_size);
|
||||
}
|
||||
|
||||
y_block =
|
||||
ete
|
||||
.template selfadjointView<Eigen::Upper>()
|
||||
.ldlt()
|
||||
.solve(y_block);
|
||||
ete.llt().solveInPlace(y_block);
|
||||
}
|
||||
}
|
||||
|
||||
@ -382,15 +382,12 @@ UpdateRhs(const Chunk& chunk,
|
||||
for (int c = 1; c < row.cells.size(); ++c) {
|
||||
const int block_id = row.cells[c].block_id;
|
||||
const int block_size = bs->cols[block_id].size;
|
||||
const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
|
||||
b(row_values + row.cells[c].position,
|
||||
row.block.size, block_size);
|
||||
|
||||
const int block = block_id - num_eliminate_blocks_;
|
||||
CeresMutexLock l(rhs_locks_[block]);
|
||||
typename EigenTypes<kFBlockSize>::VectorRef
|
||||
(rhs + lhs_row_layout_[block], block_size).noalias()
|
||||
+= b.transpose() * sj;
|
||||
MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
|
||||
row_values + row.cells[c].position,
|
||||
row.block.size, block_size,
|
||||
sj.data(), rhs + lhs_row_layout_[block]);
|
||||
}
|
||||
b_pos += row.block.size;
|
||||
}
|
||||
@ -446,34 +443,31 @@ ChunkDiagonalBlockAndGradient(
|
||||
|
||||
// Extract the e_block, ETE += E_i' E_i
|
||||
const Cell& e_cell = row.cells.front();
|
||||
const typename EigenTypes<kRowBlockSize, kEBlockSize>::ConstMatrixRef
|
||||
e_block(row_values + e_cell.position,
|
||||
row.block.size,
|
||||
e_block_size);
|
||||
|
||||
ete->template selfadjointView<Eigen::Upper>()
|
||||
.rankUpdate(e_block.transpose(), 1.0);
|
||||
MatrixTransposeMatrixMultiply
|
||||
<kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(
|
||||
row_values + e_cell.position, row.block.size, e_block_size,
|
||||
row_values + e_cell.position, row.block.size, e_block_size,
|
||||
ete->data(), 0, 0, e_block_size, e_block_size);
|
||||
|
||||
// g += E_i' b_i
|
||||
g->noalias() += e_block.transpose() *
|
||||
typename EigenTypes<kRowBlockSize>::ConstVectorRef
|
||||
(b + b_pos, row.block.size);
|
||||
MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
|
||||
row_values + e_cell.position, row.block.size, e_block_size,
|
||||
b + b_pos,
|
||||
g->data());
|
||||
|
||||
|
||||
// buffer = E'F. This computation is done by iterating over the
|
||||
// f_blocks for each row in the chunk.
|
||||
for (int c = 1; c < row.cells.size(); ++c) {
|
||||
const int f_block_id = row.cells[c].block_id;
|
||||
const int f_block_size = bs->cols[f_block_id].size;
|
||||
const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
|
||||
f_block(row_values + row.cells[c].position,
|
||||
row.block.size, f_block_size);
|
||||
|
||||
double* buffer_ptr =
|
||||
buffer + FindOrDie(chunk.buffer_layout, f_block_id);
|
||||
|
||||
typename EigenTypes<kEBlockSize, kFBlockSize>::MatrixRef
|
||||
(buffer_ptr, e_block_size, f_block_size).noalias()
|
||||
+= e_block.transpose() * f_block;
|
||||
MatrixTransposeMatrixMultiply
|
||||
<kRowBlockSize, kEBlockSize, kRowBlockSize, kFBlockSize, 1>(
|
||||
row_values + e_cell.position, row.block.size, e_block_size,
|
||||
row_values + row.cells[c].position, row.block.size, f_block_size,
|
||||
buffer_ptr, 0, 0, e_block_size, f_block_size);
|
||||
}
|
||||
b_pos += row.block.size;
|
||||
}
|
||||
@ -497,15 +491,24 @@ ChunkOuterProduct(const CompressedRowBlockStructure* bs,
|
||||
// references to the left hand side.
|
||||
const int e_block_size = inverse_ete.rows();
|
||||
BufferLayoutType::const_iterator it1 = buffer_layout.begin();
|
||||
|
||||
#ifdef CERES_USE_OPENMP
|
||||
int thread_id = omp_get_thread_num();
|
||||
#else
|
||||
int thread_id = 0;
|
||||
#endif
|
||||
double* b1_transpose_inverse_ete =
|
||||
chunk_outer_product_buffer_.get() + thread_id * buffer_size_;
|
||||
|
||||
// S(i,j) -= bi' * ete^{-1} b_j
|
||||
for (; it1 != buffer_layout.end(); ++it1) {
|
||||
const int block1 = it1->first - num_eliminate_blocks_;
|
||||
const int block1_size = bs->cols[it1->first].size;
|
||||
|
||||
const typename EigenTypes<kEBlockSize, kFBlockSize>::ConstMatrixRef
|
||||
b1(buffer + it1->second, e_block_size, block1_size);
|
||||
const typename EigenTypes<kFBlockSize, kEBlockSize>::Matrix
|
||||
b1_transpose_inverse_ete = b1.transpose() * inverse_ete;
|
||||
MatrixTransposeMatrixMultiply
|
||||
<kEBlockSize, kFBlockSize, kEBlockSize, kEBlockSize, 0>(
|
||||
buffer + it1->second, e_block_size, block1_size,
|
||||
inverse_ete.data(), e_block_size, e_block_size,
|
||||
b1_transpose_inverse_ete, 0, 0, block1_size, e_block_size);
|
||||
|
||||
BufferLayoutType::const_iterator it2 = it1;
|
||||
for (; it2 != buffer_layout.end(); ++it2) {
|
||||
@ -515,46 +518,15 @@ ChunkOuterProduct(const CompressedRowBlockStructure* bs,
|
||||
CellInfo* cell_info = lhs->GetCell(block1, block2,
|
||||
&r, &c,
|
||||
&row_stride, &col_stride);
|
||||
if (cell_info == NULL) {
|
||||
continue;
|
||||
if (cell_info != NULL) {
|
||||
const int block2_size = bs->cols[it2->first].size;
|
||||
CeresMutexLock l(&cell_info->m);
|
||||
MatrixMatrixMultiply
|
||||
<kFBlockSize, kEBlockSize, kEBlockSize, kFBlockSize, -1>(
|
||||
b1_transpose_inverse_ete, block1_size, e_block_size,
|
||||
buffer + it2->second, e_block_size, block2_size,
|
||||
cell_info->values, r, c, row_stride, col_stride);
|
||||
}
|
||||
|
||||
const int block2_size = bs->cols[it2->first].size;
|
||||
const typename EigenTypes<kEBlockSize, kFBlockSize>::ConstMatrixRef
|
||||
b2(buffer + it2->second, e_block_size, block2_size);
|
||||
|
||||
CeresMutexLock l(&cell_info->m);
|
||||
MatrixRef m(cell_info->values, row_stride, col_stride);
|
||||
|
||||
// We explicitly construct a block object here instead of using
|
||||
// m.block(), as m.block() variant of the constructor does not
|
||||
// allow mixing of template sizing and runtime sizing parameters
|
||||
// like the Matrix class does.
|
||||
Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize>
|
||||
block(m, r, c, block1_size, block2_size);
|
||||
#ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG
|
||||
// Removing the ".noalias()" annotation on the following statement is
|
||||
// necessary to produce a correct build with the Android NDK, including
|
||||
// versions 6, 7, 8, and 8b, when built with STLPort and the
|
||||
// non-standalone toolchain (i.e. ndk-build). This appears to be a
|
||||
// compiler bug; if the workaround is not in place, the line
|
||||
//
|
||||
// block.noalias() -= b1_transpose_inverse_ete * b2;
|
||||
//
|
||||
// gets compiled to
|
||||
//
|
||||
// block.noalias() += b1_transpose_inverse_ete * b2;
|
||||
//
|
||||
// which breaks schur elimination. Introducing a temporary by removing the
|
||||
// .noalias() annotation causes the issue to disappear. Tracking this
|
||||
// issue down was tricky, since the test suite doesn't run when built with
|
||||
// the non-standalone toolchain.
|
||||
//
|
||||
// TODO(keir): Make a reproduction case for this and send it upstream.
|
||||
block -= b1_transpose_inverse_ete * b2;
|
||||
#else
|
||||
block.noalias() -= b1_transpose_inverse_ete * b2;
|
||||
#endif // CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -624,10 +596,13 @@ NoEBlockRowOuterProduct(const BlockSparseMatrixBase* A,
|
||||
&row_stride, &col_stride);
|
||||
if (cell_info != NULL) {
|
||||
CeresMutexLock l(&cell_info->m);
|
||||
MatrixRef m(cell_info->values, row_stride, col_stride);
|
||||
m.block(r, c, block1_size, block1_size)
|
||||
.selfadjointView<Eigen::Upper>()
|
||||
.rankUpdate(b1.transpose(), 1.0);
|
||||
// This multiply currently ignores the fact that this is a
|
||||
// symmetric outer product.
|
||||
MatrixTransposeMatrixMultiply
|
||||
<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
row_values + row.cells[i].position, row.block.size, block1_size,
|
||||
row_values + row.cells[i].position, row.block.size, block1_size,
|
||||
cell_info->values, r, c, row_stride, col_stride);
|
||||
}
|
||||
|
||||
for (int j = i + 1; j < row.cells.size(); ++j) {
|
||||
@ -638,17 +613,15 @@ NoEBlockRowOuterProduct(const BlockSparseMatrixBase* A,
|
||||
CellInfo* cell_info = lhs->GetCell(block1, block2,
|
||||
&r, &c,
|
||||
&row_stride, &col_stride);
|
||||
if (cell_info == NULL) {
|
||||
continue;
|
||||
if (cell_info != NULL) {
|
||||
const int block2_size = bs->cols[row.cells[j].block_id].size;
|
||||
CeresMutexLock l(&cell_info->m);
|
||||
MatrixTransposeMatrixMultiply
|
||||
<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||
row_values + row.cells[i].position, row.block.size, block1_size,
|
||||
row_values + row.cells[j].position, row.block.size, block2_size,
|
||||
cell_info->values, r, c, row_stride, col_stride);
|
||||
}
|
||||
|
||||
const int block2_size = bs->cols[row.cells[j].block_id].size;
|
||||
CeresMutexLock l(&cell_info->m);
|
||||
MatrixRef m(cell_info->values, row_stride, col_stride);
|
||||
m.block(r, c, block1_size, block2_size).noalias() +=
|
||||
b1.transpose() * ConstMatrixRef(row_values + row.cells[j].position,
|
||||
row.block.size,
|
||||
block2_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -670,25 +643,18 @@ EBlockRowOuterProduct(const BlockSparseMatrixBase* A,
|
||||
DCHECK_GE(block1, 0);
|
||||
|
||||
const int block1_size = bs->cols[row.cells[i].block_id].size;
|
||||
const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
|
||||
b1(row_values + row.cells[i].position,
|
||||
row.block.size, block1_size);
|
||||
{
|
||||
int r, c, row_stride, col_stride;
|
||||
CellInfo* cell_info = lhs->GetCell(block1, block1,
|
||||
&r, &c,
|
||||
&row_stride, &col_stride);
|
||||
if (cell_info == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int r, c, row_stride, col_stride;
|
||||
CellInfo* cell_info = lhs->GetCell(block1, block1,
|
||||
&r, &c,
|
||||
&row_stride, &col_stride);
|
||||
if (cell_info != NULL) {
|
||||
CeresMutexLock l(&cell_info->m);
|
||||
MatrixRef m(cell_info->values, row_stride, col_stride);
|
||||
|
||||
Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize>
|
||||
block(m, r, c, block1_size, block1_size);
|
||||
block.template selfadjointView<Eigen::Upper>()
|
||||
.rankUpdate(b1.transpose(), 1.0);
|
||||
// block += b1.transpose() * b1;
|
||||
MatrixTransposeMatrixMultiply
|
||||
<kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
|
||||
row_values + row.cells[i].position, row.block.size, block1_size,
|
||||
row_values + row.cells[i].position, row.block.size, block1_size,
|
||||
cell_info->values, r, c, row_stride, col_stride);
|
||||
}
|
||||
|
||||
for (int j = i + 1; j < row.cells.size(); ++j) {
|
||||
@ -700,20 +666,14 @@ EBlockRowOuterProduct(const BlockSparseMatrixBase* A,
|
||||
CellInfo* cell_info = lhs->GetCell(block1, block2,
|
||||
&r, &c,
|
||||
&row_stride, &col_stride);
|
||||
if (cell_info == NULL) {
|
||||
continue;
|
||||
if (cell_info != NULL) {
|
||||
// block += b1.transpose() * b2;
|
||||
MatrixTransposeMatrixMultiply
|
||||
<kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
|
||||
row_values + row.cells[i].position, row.block.size, block1_size,
|
||||
row_values + row.cells[j].position, row.block.size, block2_size,
|
||||
cell_info->values, r, c, row_stride, col_stride);
|
||||
}
|
||||
|
||||
const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
|
||||
b2(row_values + row.cells[j].position,
|
||||
row.block.size,
|
||||
block2_size);
|
||||
|
||||
CeresMutexLock l(&cell_info->m);
|
||||
MatrixRef m(cell_info->values, row_stride, col_stride);
|
||||
Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize>
|
||||
block(m, r, c, block1_size, block2_size);
|
||||
block.noalias() += b1.transpose() * b2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -213,13 +213,7 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImplUsingSuiteSparse(
|
||||
} else {
|
||||
factor_ = ss_.AnalyzeCholesky(lhs.get());
|
||||
}
|
||||
|
||||
if (VLOG_IS_ON(2)) {
|
||||
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), ss_.mutable_cc());
|
||||
}
|
||||
}
|
||||
|
||||
CHECK_NOTNULL(factor_);
|
||||
event_logger.AddEvent("Analysis");
|
||||
|
||||
cholmod_dense* sol = ss_.SolveCholesky(lhs.get(), factor_, rhs);
|
||||
|
@ -35,8 +35,18 @@
|
||||
#include "cholmod.h"
|
||||
#include "ceres/compressed_row_sparse_matrix.h"
|
||||
#include "ceres/triplet_sparse_matrix.h"
|
||||
|
||||
namespace ceres {
|
||||
namespace internal {
|
||||
|
||||
SuiteSparse::SuiteSparse() {
|
||||
cholmod_start(&cc_);
|
||||
}
|
||||
|
||||
SuiteSparse::~SuiteSparse() {
|
||||
cholmod_finish(&cc_);
|
||||
}
|
||||
|
||||
cholmod_sparse* SuiteSparse::CreateSparseMatrix(TripletSparseMatrix* A) {
|
||||
cholmod_triplet triplet;
|
||||
|
||||
@ -117,10 +127,16 @@ cholmod_factor* SuiteSparse::AnalyzeCholesky(cholmod_sparse* A) {
|
||||
cc_.nmethods = 1;
|
||||
cc_.method[0].ordering = CHOLMOD_AMD;
|
||||
cc_.supernodal = CHOLMOD_AUTO;
|
||||
|
||||
cholmod_factor* factor = cholmod_analyze(A, &cc_);
|
||||
CHECK_EQ(cc_.status, CHOLMOD_OK)
|
||||
<< "Cholmod symbolic analysis failed " << cc_.status;
|
||||
CHECK_NOTNULL(factor);
|
||||
|
||||
if (VLOG_IS_ON(2)) {
|
||||
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_);
|
||||
}
|
||||
|
||||
return factor;
|
||||
}
|
||||
|
||||
@ -139,13 +155,20 @@ cholmod_factor* SuiteSparse::AnalyzeCholeskyWithUserOrdering(
|
||||
cholmod_sparse* A,
|
||||
const vector<int>& ordering) {
|
||||
CHECK_EQ(ordering.size(), A->nrow);
|
||||
|
||||
cc_.nmethods = 1;
|
||||
cc_.method[0].ordering = CHOLMOD_GIVEN;
|
||||
|
||||
cholmod_factor* factor =
|
||||
cholmod_analyze_p(A, const_cast<int*>(&ordering[0]), NULL, 0, &cc_);
|
||||
CHECK_EQ(cc_.status, CHOLMOD_OK)
|
||||
<< "Cholmod symbolic analysis failed " << cc_.status;
|
||||
CHECK_NOTNULL(factor);
|
||||
|
||||
if (VLOG_IS_ON(2)) {
|
||||
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_);
|
||||
}
|
||||
|
||||
return factor;
|
||||
}
|
||||
|
||||
@ -276,36 +299,52 @@ bool SuiteSparse::Cholesky(cholmod_sparse* A, cholmod_factor* L) {
|
||||
CHECK_NOTNULL(A);
|
||||
CHECK_NOTNULL(L);
|
||||
|
||||
// Save the current print level and silence CHOLMOD, otherwise
|
||||
// CHOLMOD is prone to dumping stuff to stderr, which can be
|
||||
// distracting when the error (matrix is indefinite) is not a fatal
|
||||
// failure.
|
||||
const int old_print_level = cc_.print;
|
||||
cc_.print = 0;
|
||||
|
||||
cc_.quick_return_if_not_posdef = 1;
|
||||
int status = cholmod_factorize(A, L, &cc_);
|
||||
cc_.print = old_print_level;
|
||||
|
||||
// TODO(sameeragarwal): This switch statement is not consistent. It
|
||||
// treats all kinds of CHOLMOD failures as warnings. Some of these
|
||||
// like out of memory are definitely not warnings. The problem is
|
||||
// that the return value Cholesky is two valued, but the state of
|
||||
// the linear solver is really three valued. SUCCESS,
|
||||
// NON_FATAL_FAILURE (e.g., indefinite matrix) and FATAL_FAILURE
|
||||
// (e.g. out of memory).
|
||||
switch (cc_.status) {
|
||||
case CHOLMOD_NOT_INSTALLED:
|
||||
LOG(WARNING) << "Cholmod failure: method not installed.";
|
||||
LOG(WARNING) << "CHOLMOD failure: Method not installed.";
|
||||
return false;
|
||||
case CHOLMOD_OUT_OF_MEMORY:
|
||||
LOG(WARNING) << "Cholmod failure: out of memory.";
|
||||
LOG(WARNING) << "CHOLMOD failure: Out of memory.";
|
||||
return false;
|
||||
case CHOLMOD_TOO_LARGE:
|
||||
LOG(WARNING) << "Cholmod failure: integer overflow occured.";
|
||||
LOG(WARNING) << "CHOLMOD failure: Integer overflow occured.";
|
||||
return false;
|
||||
case CHOLMOD_INVALID:
|
||||
LOG(WARNING) << "Cholmod failure: invalid input.";
|
||||
LOG(WARNING) << "CHOLMOD failure: Invalid input.";
|
||||
return false;
|
||||
case CHOLMOD_NOT_POSDEF:
|
||||
// TODO(sameeragarwal): These two warnings require more
|
||||
// sophisticated handling going forward. For now we will be
|
||||
// strict and treat them as failures.
|
||||
LOG(WARNING) << "Cholmod warning: matrix not positive definite.";
|
||||
LOG(WARNING) << "CHOLMOD warning: Matrix not positive definite.";
|
||||
return false;
|
||||
case CHOLMOD_DSMALL:
|
||||
LOG(WARNING) << "Cholmod warning: D for LDL' or diag(L) or "
|
||||
LOG(WARNING) << "CHOLMOD warning: D for LDL' or diag(L) or "
|
||||
<< "LL' has tiny absolute value.";
|
||||
return false;
|
||||
case CHOLMOD_OK:
|
||||
if (status != 0) {
|
||||
return true;
|
||||
}
|
||||
LOG(WARNING) << "Cholmod failure: cholmod_factorize returned zero "
|
||||
LOG(WARNING) << "CHOLMOD failure: cholmod_factorize returned zero "
|
||||
<< "but cholmod_common::status is CHOLMOD_OK."
|
||||
<< "Please report this to ceres-solver@googlegroups.com.";
|
||||
return false;
|
||||
|
@ -56,8 +56,8 @@ class TripletSparseMatrix;
|
||||
// for all cholmod function calls.
|
||||
class SuiteSparse {
|
||||
public:
|
||||
SuiteSparse() { cholmod_start(&cc_); }
|
||||
~SuiteSparse() { cholmod_finish(&cc_); }
|
||||
SuiteSparse();
|
||||
~SuiteSparse();
|
||||
|
||||
// Functions for building cholmod_sparse objects from sparse
|
||||
// matrices stored in triplet form. The matrix A is not
|
||||
|
@ -426,14 +426,8 @@ bool VisibilityBasedPreconditioner::Factorize() {
|
||||
} else {
|
||||
factor_ = ss_.AnalyzeCholesky(lhs);
|
||||
}
|
||||
|
||||
if (VLOG_IS_ON(2)) {
|
||||
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), ss_.mutable_cc());
|
||||
}
|
||||
}
|
||||
|
||||
CHECK_NOTNULL(factor_);
|
||||
|
||||
bool status = ss_.Cholesky(lhs, factor_);
|
||||
ss_.Free(lhs);
|
||||
return status;
|
||||
|
Loading…
Reference in New Issue
Block a user