forked from bartvdbraak/blender
Bundle current master of ceres-solver
Thins brings up some speed improvements: SPARSE_SCHUR is approx 1.3-1.5x times faster ITERATIVE_SCHUR is approx 1.2x times faster For blender this means camera solution go a bit faster now. Would not have affect on tracking speed.
This commit is contained in:
parent
a00b72bc75
commit
43b61fb8bd
@ -141,6 +141,7 @@ set(SRC
|
|||||||
include/ceres/solver.h
|
include/ceres/solver.h
|
||||||
include/ceres/types.h
|
include/ceres/types.h
|
||||||
internal/ceres/array_utils.h
|
internal/ceres/array_utils.h
|
||||||
|
internal/ceres/blas.h
|
||||||
internal/ceres/block_evaluate_preparer.h
|
internal/ceres/block_evaluate_preparer.h
|
||||||
internal/ceres/block_jacobian_writer.h
|
internal/ceres/block_jacobian_writer.h
|
||||||
internal/ceres/block_jacobi_preconditioner.h
|
internal/ceres/block_jacobi_preconditioner.h
|
||||||
|
350
extern/libmv/third_party/ceres/ChangeLog
vendored
350
extern/libmv/third_party/ceres/ChangeLog
vendored
@ -1,3 +1,185 @@
|
|||||||
|
commit 520d35ef22dbcb05e344451c03ae64304e524a06
|
||||||
|
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||||
|
Date: Thu Apr 4 08:16:02 2013 -0700
|
||||||
|
|
||||||
|
Further BLAS improvements.
|
||||||
|
|
||||||
|
1. Switch to Eigen's implementation when all dimensions are fixed.
|
||||||
|
2. Use lazyProduct for eigen matrix-vector product. This brings
|
||||||
|
eigen's performance on iterative_schur closer to what it used
|
||||||
|
to be before the last commit. There is however still an
|
||||||
|
improvement to be had by using the naive implementation when
|
||||||
|
the matrix and vector have dynamic dimensions.
|
||||||
|
|
||||||
|
BENCHMARK
|
||||||
|
HEAD CHANGE
|
||||||
|
|
||||||
|
problem-16-22106-pre.txt
|
||||||
|
gcc-eigen sparse_schur 0.859 gcc-eigen sparse_schur 0.853
|
||||||
|
clang-eigen sparse_schur 0.848 clang-eigen sparse_schur 0.850
|
||||||
|
gcc-blas sparse_schur 0.956 gcc-blas sparse_schur 0.865
|
||||||
|
clang-blas sparse_schur 0.954 clang-blas sparse_schur 0.858
|
||||||
|
gcc-eigen iterative_schur 4.656 gcc-eigen iterative_schur 3.271
|
||||||
|
clang-eigen iterative_schur 4.664 clang-eigen iterative_schur 3.307
|
||||||
|
gcc-blas iterative_schur 2.598 gcc-blas iterative_schur 2.620
|
||||||
|
clang-blas iterative_schur 2.554 clang-blas iterative_schur 2.567
|
||||||
|
|
||||||
|
problem-49-7776-pre.txt
|
||||||
|
gcc-eigen sparse_schur 0.477 gcc-eigen sparse_schur 0.472
|
||||||
|
clang-eigen sparse_schur 0.475 clang-eigen sparse_schur 0.479
|
||||||
|
gcc-blas sparse_schur 0.521 gcc-blas sparse_schur 0.469
|
||||||
|
clang-blas sparse_schur 0.508 clang-blas sparse_schur 0.471
|
||||||
|
gcc-eigen iterative_schur 3.172 gcc-eigen iterative_schur 2.088
|
||||||
|
clang-eigen iterative_schur 3.161 clang-eigen iterative_schur 2.079
|
||||||
|
gcc-blas iterative_schur 1.701 gcc-blas iterative_schur 1.720
|
||||||
|
clang-blas iterative_schur 1.708 clang-blas iterative_schur 1.694
|
||||||
|
|
||||||
|
problem-245-198739-pre.txt
|
||||||
|
gcc-eigen sparse_schur 28.092 gcc-eigen sparse_schur 28.233
|
||||||
|
clang-eigen sparse_schur 28.148 clang-eigen sparse_schur 28.400
|
||||||
|
gcc-blas sparse_schur 30.919 gcc-blas sparse_schur 28.110
|
||||||
|
clang-blas sparse_schur 31.001 clang-blas sparse_schur 28.407
|
||||||
|
gcc-eigen iterative_schur 63.095 gcc-eigen iterative_schur 43.694
|
||||||
|
clang-eigen iterative_schur 63.412 clang-eigen iterative_schur 43.473
|
||||||
|
gcc-blas iterative_schur 33.353 gcc-blas iterative_schur 33.321
|
||||||
|
clang-blas iterative_schur 33.276 clang-blas iterative_schur 33.278
|
||||||
|
|
||||||
|
problem-257-65132-pre.txt
|
||||||
|
gcc-eigen sparse_schur 3.687 gcc-eigen sparse_schur 3.629
|
||||||
|
clang-eigen sparse_schur 3.669 clang-eigen sparse_schur 3.652
|
||||||
|
gcc-blas sparse_schur 3.947 gcc-blas sparse_schur 3.673
|
||||||
|
clang-blas sparse_schur 3.952 clang-blas sparse_schur 3.678
|
||||||
|
gcc-eigen iterative_schur 121.512 gcc-eigen iterative_schur 76.833
|
||||||
|
clang-eigen iterative_schur 123.547 clang-eigen iterative_schur 78.763
|
||||||
|
gcc-blas iterative_schur 68.334 gcc-blas iterative_schur 68.612
|
||||||
|
clang-blas iterative_schur 67.793 clang-blas iterative_schur 68.266
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
|
||||||
|
1. Naive BLAS was a bit worse than eigen on fixed sized matrices. We did not see this
|
||||||
|
before because of the different inlining thresholds. Fixing this boosted eigen's
|
||||||
|
performance. Also the disparity between gcc and clang has gone away.
|
||||||
|
|
||||||
|
2. SPARSE_SCHUR performance remains the same, since it is only testing static sized
|
||||||
|
matrices.
|
||||||
|
|
||||||
|
3. ITERATIVE_SCHUR performance goes up substantially due to the lazyProduct change,
|
||||||
|
but even there, since most of the products are dynamic sized, the naive implementation
|
||||||
|
wins handily.
|
||||||
|
|
||||||
|
Change-Id: Idc17f35b9c68aaebb1b2e131adf3af8374a85a4c
|
||||||
|
|
||||||
|
commit 25ac54807eedf16fd6c34efc390901ee549a7d14
|
||||||
|
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||||
|
Date: Wed Apr 3 18:51:27 2013 -0700
|
||||||
|
|
||||||
|
Speed up Jets.
|
||||||
|
|
||||||
|
Change-Id: I101bac1b1a1cf72ca49ffcf843b73c0ef5a6dfcb
|
||||||
|
|
||||||
|
commit 3d6eceb45cf27024865908f0c10a5c2b0f8719cf
|
||||||
|
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||||
|
Date: Tue Apr 2 21:45:48 2013 -0700
|
||||||
|
|
||||||
|
Replace more instances of Eigen GEMV with Ceres BLAS.
|
||||||
|
|
||||||
|
With this ITERATIVE_SCHUR with JACOBI preconditioner went down from
|
||||||
|
280 seconds to 150 seconds on problem-744-543562-pre.txt.
|
||||||
|
|
||||||
|
Change-Id: I4f319c1108421e8d59f58654a4c0576ad65df609
|
||||||
|
|
||||||
|
commit 296fa9b1279ee1900c8ae32d70e97cd10fc0b46b
|
||||||
|
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||||
|
Date: Tue Apr 2 09:44:15 2013 -0700
|
||||||
|
|
||||||
|
Replace Eigen block operations with small GEMM and GEMV loops.
|
||||||
|
|
||||||
|
1. Add Matrix-Matrix and Matrix-Vector multiply functions.
|
||||||
|
2. Replace Eigen usage in SchurEliminator with these custom
|
||||||
|
matrix operations.
|
||||||
|
3. Save on some memory allocations in ChunkOuterProduct.
|
||||||
|
4. Replace LDLT with LLT.
|
||||||
|
|
||||||
|
As a result on problem-16-22106-pre.txt, the linear solver time
|
||||||
|
goes down from 1.2s to 0.64s.
|
||||||
|
|
||||||
|
Change-Id: I2daa667960e0a1e8834489965a30be31f37fd87f
|
||||||
|
|
||||||
|
commit 222ca20e8facf706582fe696b7f41247391eac12
|
||||||
|
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||||
|
Date: Mon Apr 1 09:11:07 2013 -0700
|
||||||
|
|
||||||
|
SuiteSparse cleanup.
|
||||||
|
|
||||||
|
1. Silence CHOLMOD's indefiniteness warnings.
|
||||||
|
2. Add a comment about how the error handling in suitesparse.cc
|
||||||
|
needs to be improved.
|
||||||
|
3. Move the analysis logging into suitesparse.cc and out of the
|
||||||
|
three callsites.
|
||||||
|
|
||||||
|
Change-Id: Idd396b8ea4bf59fc1ffc7f9fcbbc7b38ed71643c
|
||||||
|
|
||||||
|
commit b7ba93459b7f584eedb1a9f42f3d06bccabd33dc
|
||||||
|
Author: Petter Strandmark <petter.strandmark@gmail.com>
|
||||||
|
Date: Tue Feb 19 12:52:58 2013 +0100
|
||||||
|
|
||||||
|
Enable larger tuple sizes for Visual Studio 2012.
|
||||||
|
|
||||||
|
Visual Studio 2012 does not have variadic templates and implements
|
||||||
|
tuples differently. By default, only sizes up to 5 are supported,
|
||||||
|
which conflicts with Gtest.
|
||||||
|
|
||||||
|
Change-Id: Ieb8d59e4329863cbfa2729d8a76db0714c08d259
|
||||||
|
|
||||||
|
commit 564a83fcc690ac8383bf52a782c45757ae7fa2ad
|
||||||
|
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||||
|
Date: Tue Mar 26 11:11:43 2013 -0700
|
||||||
|
|
||||||
|
Lint cleanup from William Rucklidge.
|
||||||
|
|
||||||
|
Change-Id: I8d4a0aa3e264775d20e99a6b5265f3023de92560
|
||||||
|
|
||||||
|
commit cbe64827becbbaab5b435a71bf00353b4ddd026b
|
||||||
|
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||||
|
Date: Mon Mar 25 17:39:53 2013 -0700
|
||||||
|
|
||||||
|
Update documentation
|
||||||
|
|
||||||
|
Change-Id: Iea3c4b5409e593b1fb070a491ba8a479db32ca58
|
||||||
|
|
||||||
|
commit 802639c89603c9541e624766349d1989a1f641c0
|
||||||
|
Author: Pablo Speciale <pablo.speciale@gmail.com>
|
||||||
|
Date: Mon Mar 25 20:53:45 2013 -0700
|
||||||
|
|
||||||
|
ceresproblem label was incorrect
|
||||||
|
|
||||||
|
Change-Id: I3e210375adba4fa50ff3c25398b20a65d241cb35
|
||||||
|
|
||||||
|
commit 6bcb8d9c304a3b218f8788018dfdfe368bb7d60c
|
||||||
|
Author: Pablo Speciale <pablo.speciale@gmail.com>
|
||||||
|
Date: Mon Mar 25 16:40:26 2013 -0700
|
||||||
|
|
||||||
|
Compiling against static or shared library
|
||||||
|
|
||||||
|
Change-Id: I3fb35e9a49f90b8894f59dde49c90a7c2dd74b0a
|
||||||
|
|
||||||
|
commit 619cfe692020c078275b68eac2167232fafdfffb
|
||||||
|
Author: Sameer Agarwal <sameeragarwal@google.com>
|
||||||
|
Date: Mon Mar 25 14:03:41 2013 -0700
|
||||||
|
|
||||||
|
Update version history
|
||||||
|
|
||||||
|
Change-Id: I1d036efad1507efd19d8581f147b38170b1f0543
|
||||||
|
|
||||||
|
commit 6ae34757850a5fa8213e0d1a540d9d75d6840a08
|
||||||
|
Author: Pablo Speciale <pablo.speciale@gmail.com>
|
||||||
|
Date: Sun Mar 24 22:30:52 2013 -0700
|
||||||
|
|
||||||
|
Added documentation regarding the use of Ceres with cmake (once installed)
|
||||||
|
Commets about the flag ``BUILD_DOCUMENTATION=ON``
|
||||||
|
|
||||||
|
Change-Id: I8814927e60af190c8043bfc36e77fe76bfe6f562
|
||||||
|
|
||||||
commit f46de9e697eb5b8756084615e29ded48600a4d39
|
commit f46de9e697eb5b8756084615e29ded48600a4d39
|
||||||
Author: Sergey Sharybin <sergey.vfx@gmail.com>
|
Author: Sergey Sharybin <sergey.vfx@gmail.com>
|
||||||
Date: Thu Mar 21 15:31:35 2013 +0600
|
Date: Thu Mar 21 15:31:35 2013 +0600
|
||||||
@ -437,171 +619,3 @@ Date: Sun Feb 24 14:15:45 2013 -0800
|
|||||||
Minor release script fixes.
|
Minor release script fixes.
|
||||||
|
|
||||||
Change-Id: Ifd0a7f4f584c85d4d9574eca46094b372a8d7aff
|
Change-Id: Ifd0a7f4f584c85d4d9574eca46094b372a8d7aff
|
||||||
|
|
||||||
commit b53c9667f508c125b8aa833e7a063fa44ef8a98e
|
|
||||||
Author: Sergey Sharybin <sergey.vfx@gmail.com>
|
|
||||||
Date: Mon Feb 25 01:14:26 2013 +0600
|
|
||||||
|
|
||||||
Solve No Previous Prototype GCC warning
|
|
||||||
|
|
||||||
In some cases there were missing includes of own
|
|
||||||
header files from implementation files.
|
|
||||||
|
|
||||||
In other cases moved function which are only used
|
|
||||||
within single file into an anonymous namespace.
|
|
||||||
|
|
||||||
Change-Id: I2c6b411bcfbc521e2a5f21265dc8e009a548b1c8
|
|
||||||
|
|
||||||
commit 267ccc45a3e875bf87832a8ad615be690b4926d3
|
|
||||||
Author: Sergey Sharybin <sergey.vfx@gmail.com>
|
|
||||||
Date: Mon Feb 25 01:04:16 2013 +0600
|
|
||||||
|
|
||||||
Fix for MinGW build on Windows
|
|
||||||
|
|
||||||
GG_LONGLONG and GG_ULONGLONG shall use LL and ULL suffixes,
|
|
||||||
since MinGW is actuall a GCC compiler.
|
|
||||||
|
|
||||||
Solved by checking whether compilation happens with MinGW
|
|
||||||
or not using standard MinGW's __MINGW32__ and __MINGW64__
|
|
||||||
definitions.
|
|
||||||
|
|
||||||
Change-Id: I789b34f6342a56ba42f4b280f7242700022ab7a1
|
|
||||||
|
|
||||||
commit 509f68cfe3fd13b794c4e67ff38c761407c858cf
|
|
||||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
|
||||||
Date: Wed Feb 20 01:39:03 2013 -0800
|
|
||||||
|
|
||||||
Problem::Evaluate implementation.
|
|
||||||
|
|
||||||
1. Add Problem::Evaluate and tests.
|
|
||||||
2. Remove Solver::Summary::initial/final_*
|
|
||||||
3. Remove Solver::Options::return_* members.
|
|
||||||
4. Various cpplint cleanups.
|
|
||||||
|
|
||||||
Change-Id: I4266de53489896f72d9c6798c5efde6748d68a47
|
|
||||||
|
|
||||||
commit d4a0bf86d688d1b68e00ff302858de5a4e0d9727
|
|
||||||
Author: Keir Mierle <mierle@gmail.com>
|
|
||||||
Date: Sun Feb 24 10:35:44 2013 -0800
|
|
||||||
|
|
||||||
Fix threading build on Windows.
|
|
||||||
|
|
||||||
On Windows, including the "windows.h" header defines an enormous number of
|
|
||||||
symbols; some of which are macros with common names. In particular, "ERROR" and
|
|
||||||
"min" and "max" get defined. This causes clashes when user code references
|
|
||||||
these names in a context other than the intended use in windows.h.
|
|
||||||
|
|
||||||
To deal with this, the Microsoft engineers added the ability to control the
|
|
||||||
definition of these symbols by adding extra defines. In particular, including
|
|
||||||
windows.h in the following way
|
|
||||||
|
|
||||||
#define NOGDI
|
|
||||||
#define NOMINMAX
|
|
||||||
|
|
||||||
will reduce the number of macros defined. This way they will not conflict with
|
|
||||||
other uses in Ceres. For example, numeric_limits<double>::max() is impossible
|
|
||||||
to call without defining NOMINMAX.
|
|
||||||
|
|
||||||
Change-Id: I166f5d3bb6dc0e2e4b2ebf800fb19e49206f7874
|
|
||||||
|
|
||||||
commit beb4505311011130a7e54632137b0fbb5824cc9b
|
|
||||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
|
||||||
Date: Fri Feb 22 13:37:01 2013 -0800
|
|
||||||
|
|
||||||
Minor fixes
|
|
||||||
|
|
||||||
Based on William Rucklidge's review, including
|
|
||||||
a nasty bug in parameter block removal.
|
|
||||||
|
|
||||||
Change-Id: I3a692e589f600ff560ecae9fa85bb0b76063d403
|
|
||||||
|
|
||||||
commit 9a88bd7c4b40e2a1e0cd9b0dc09a3517c467e04e
|
|
||||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
|
||||||
Date: Tue Feb 19 13:09:12 2013 -0800
|
|
||||||
|
|
||||||
Minor bug fixes
|
|
||||||
|
|
||||||
Change-Id: I94e4521adf76a6c77db954c4a8955168e9d37b55
|
|
||||||
|
|
||||||
commit 956ed7e8f2054623615e6ae3601055d613897f26
|
|
||||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
|
||||||
Date: Tue Feb 19 07:06:15 2013 -0800
|
|
||||||
|
|
||||||
Various minor fixes.
|
|
||||||
|
|
||||||
1. Unused variable warnings and fixes.
|
|
||||||
2. Minor documentation update.
|
|
||||||
|
|
||||||
Change-Id: I815588a5806df1030a7c8750f4fb594c503f8998
|
|
||||||
|
|
||||||
commit 3e2c4ef9ad35e94198f4f3367b99fd91e26996a1
|
|
||||||
Author: Keir Mierle <mierle@gmail.com>
|
|
||||||
Date: Sun Feb 17 12:37:55 2013 -0800
|
|
||||||
|
|
||||||
Add adapters for column/row-major matrices to rotation.h
|
|
||||||
|
|
||||||
This patch introduces a matrix wrapper (MatrixAdapter) that allows to
|
|
||||||
transparently pass pointers to row-major or column-major matrices
|
|
||||||
to the conversion functions.
|
|
||||||
|
|
||||||
Change-Id: I7f1683a8722088cffcc542f593ce7eb46fca109b
|
|
||||||
|
|
||||||
commit 04938efe4bedec112083c5ceb227ba004f96bd01
|
|
||||||
Author: Keir Mierle <mierle@gmail.com>
|
|
||||||
Date: Sun Feb 17 12:37:55 2013 -0800
|
|
||||||
|
|
||||||
Add support for removing parameter and residual blocks.
|
|
||||||
|
|
||||||
This adds support for removing parameter and residual blocks.
|
|
||||||
There are two modes of operation: in the first, removals of
|
|
||||||
paremeter blocks are expensive, since each remove requires
|
|
||||||
scanning all residual blocks to find ones that depend on the
|
|
||||||
removed parameter. In the other, extra memory is sacrificed to
|
|
||||||
maintain a list of the residuals a parameter block depends on,
|
|
||||||
removing the need to scan. In both cases, removing residual blocks
|
|
||||||
is fast.
|
|
||||||
|
|
||||||
As a caveat, any removals destroys the ordering of the parameters,
|
|
||||||
so the residuals or jacobian returned from Solver::Solve() is
|
|
||||||
meaningless. There is some debate on the best way to handle this;
|
|
||||||
the details remain for a future change.
|
|
||||||
|
|
||||||
This also adds some overhead, even in the case that fast removals
|
|
||||||
are not requested:
|
|
||||||
|
|
||||||
- 1 int32 to each residual, to track its position in the program.
|
|
||||||
- 1 pointer to each parameter, to store the dependent residuals.
|
|
||||||
|
|
||||||
Change-Id: I71dcac8656679329a15ee7fc12c0df07030c12af
|
|
||||||
|
|
||||||
commit fa21df8cd969bb257b87c9ef7c0147d8d5ea8725
|
|
||||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
|
||||||
Date: Mon Feb 18 08:48:52 2013 -0800
|
|
||||||
|
|
||||||
Add script for building documentation.
|
|
||||||
|
|
||||||
Update make_release
|
|
||||||
|
|
||||||
Minor documentation fixes.
|
|
||||||
|
|
||||||
Change-Id: I1248ec3f58be66b5929aee6f2aa392c15d53ed83
|
|
||||||
|
|
||||||
commit 290b975d1d4eba44205bbeb0fa6b3ce8a6fa4a0c
|
|
||||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
|
||||||
Date: Sun Feb 17 16:50:37 2013 -0800
|
|
||||||
|
|
||||||
Preconditioner refactoring.
|
|
||||||
|
|
||||||
1. Added a Preconditioner interface.
|
|
||||||
2. SCHUR_JACOBI is now its own class and is independent of
|
|
||||||
SuiteSparse.
|
|
||||||
|
|
||||||
Change-Id: Id912ab19cf3736e61d1b90ddaf5bfba33e877ec4
|
|
||||||
|
|
||||||
commit d010de543530001fa917501a13ba02879c8ea52f
|
|
||||||
Author: Sameer Agarwal <sameeragarwal@google.com>
|
|
||||||
Date: Fri Feb 15 14:26:56 2013 -0800
|
|
||||||
|
|
||||||
Solver::Summary::FullReport() supports line search now.
|
|
||||||
|
|
||||||
Change-Id: Ib08d300198b85d9732cfb5785af4235ca4bd5226
|
|
||||||
|
1
extern/libmv/third_party/ceres/files.txt
vendored
1
extern/libmv/third_party/ceres/files.txt
vendored
@ -31,6 +31,7 @@ include/ceres/solver.h
|
|||||||
include/ceres/types.h
|
include/ceres/types.h
|
||||||
internal/ceres/array_utils.cc
|
internal/ceres/array_utils.cc
|
||||||
internal/ceres/array_utils.h
|
internal/ceres/array_utils.h
|
||||||
|
internal/ceres/blas.h
|
||||||
internal/ceres/block_evaluate_preparer.cc
|
internal/ceres/block_evaluate_preparer.cc
|
||||||
internal/ceres/block_evaluate_preparer.h
|
internal/ceres/block_evaluate_preparer.h
|
||||||
internal/ceres/block_jacobian_writer.cc
|
internal/ceres/block_jacobian_writer.cc
|
||||||
|
@ -349,7 +349,11 @@ Jet<T, N> operator/(const Jet<T, N>& f,
|
|||||||
//
|
//
|
||||||
// which holds because v*v = 0.
|
// which holds because v*v = 0.
|
||||||
h.a = f.a / g.a;
|
h.a = f.a / g.a;
|
||||||
h.v = (f.v - f.a / g.a * g.v) / g.a;
|
const T g_a_inverse = 1.0 / g.a;
|
||||||
|
const T f_a_by_g_a = f.a * g_a_inverse;
|
||||||
|
for (int i = 0; i < N; ++i) {
|
||||||
|
h.v[i] = (f.v[i] - f_a_by_g_a * g.v[i]) * g_a_inverse;
|
||||||
|
}
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -358,7 +362,8 @@ template<typename T, int N> inline
|
|||||||
Jet<T, N> operator/(T s, const Jet<T, N>& g) {
|
Jet<T, N> operator/(T s, const Jet<T, N>& g) {
|
||||||
Jet<T, N> h;
|
Jet<T, N> h;
|
||||||
h.a = s / g.a;
|
h.a = s / g.a;
|
||||||
h.v = - s * g.v / (g.a * g.a);
|
const T minus_s_g_a_inverse2 = -s / (g.a * g.a);
|
||||||
|
h.v = g.v * minus_s_g_a_inverse2;
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -366,8 +371,9 @@ Jet<T, N> operator/(T s, const Jet<T, N>& g) {
|
|||||||
template<typename T, int N> inline
|
template<typename T, int N> inline
|
||||||
Jet<T, N> operator/(const Jet<T, N>& f, T s) {
|
Jet<T, N> operator/(const Jet<T, N>& f, T s) {
|
||||||
Jet<T, N> h;
|
Jet<T, N> h;
|
||||||
h.a = f.a / s;
|
const T s_inverse = 1.0 / s;
|
||||||
h.v = f.v / s;
|
h.a = f.a * s_inverse;
|
||||||
|
h.v = f.v * s_inverse;
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -425,7 +431,8 @@ template <typename T, int N> inline
|
|||||||
Jet<T, N> log(const Jet<T, N>& f) {
|
Jet<T, N> log(const Jet<T, N>& f) {
|
||||||
Jet<T, N> g;
|
Jet<T, N> g;
|
||||||
g.a = log(f.a);
|
g.a = log(f.a);
|
||||||
g.v = f.v / f.a;
|
const T a_inverse = T(1.0) / f.a;
|
||||||
|
g.v = f.v * a_inverse;
|
||||||
return g;
|
return g;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -443,7 +450,8 @@ template <typename T, int N> inline
|
|||||||
Jet<T, N> sqrt(const Jet<T, N>& f) {
|
Jet<T, N> sqrt(const Jet<T, N>& f) {
|
||||||
Jet<T, N> g;
|
Jet<T, N> g;
|
||||||
g.a = sqrt(f.a);
|
g.a = sqrt(f.a);
|
||||||
g.v = f.v / (T(2.0) * g.a);
|
const T two_a_inverse = 1.0 / (T(2.0) * g.a);
|
||||||
|
g.v = f.v * two_a_inverse;
|
||||||
return g;
|
return g;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -452,7 +460,7 @@ template <typename T, int N> inline
|
|||||||
Jet<T, N> cos(const Jet<T, N>& f) {
|
Jet<T, N> cos(const Jet<T, N>& f) {
|
||||||
Jet<T, N> g;
|
Jet<T, N> g;
|
||||||
g.a = cos(f.a);
|
g.a = cos(f.a);
|
||||||
T sin_a = sin(f.a);
|
const T sin_a = sin(f.a);
|
||||||
g.v = - sin_a * f.v;
|
g.v = - sin_a * f.v;
|
||||||
return g;
|
return g;
|
||||||
}
|
}
|
||||||
@ -462,7 +470,8 @@ template <typename T, int N> inline
|
|||||||
Jet<T, N> acos(const Jet<T, N>& f) {
|
Jet<T, N> acos(const Jet<T, N>& f) {
|
||||||
Jet<T, N> g;
|
Jet<T, N> g;
|
||||||
g.a = acos(f.a);
|
g.a = acos(f.a);
|
||||||
g.v = - T(1.0) / sqrt(T(1.0) - f.a * f.a) * f.v;
|
const T tmp = - T(1.0) / sqrt(T(1.0) - f.a * f.a);
|
||||||
|
g.v = tmp * f.v;
|
||||||
return g;
|
return g;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -471,7 +480,7 @@ template <typename T, int N> inline
|
|||||||
Jet<T, N> sin(const Jet<T, N>& f) {
|
Jet<T, N> sin(const Jet<T, N>& f) {
|
||||||
Jet<T, N> g;
|
Jet<T, N> g;
|
||||||
g.a = sin(f.a);
|
g.a = sin(f.a);
|
||||||
T cos_a = cos(f.a);
|
const T cos_a = cos(f.a);
|
||||||
g.v = cos_a * f.v;
|
g.v = cos_a * f.v;
|
||||||
return g;
|
return g;
|
||||||
}
|
}
|
||||||
@ -481,7 +490,8 @@ template <typename T, int N> inline
|
|||||||
Jet<T, N> asin(const Jet<T, N>& f) {
|
Jet<T, N> asin(const Jet<T, N>& f) {
|
||||||
Jet<T, N> g;
|
Jet<T, N> g;
|
||||||
g.a = asin(f.a);
|
g.a = asin(f.a);
|
||||||
g.v = T(1.0) / sqrt(T(1.0) - f.a * f.a) * f.v;
|
const T tmp = T(1.0) / sqrt(T(1.0) - f.a * f.a);
|
||||||
|
g.v = tmp * f.v;
|
||||||
return g;
|
return g;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
496
extern/libmv/third_party/ceres/internal/ceres/blas.h
vendored
Normal file
496
extern/libmv/third_party/ceres/internal/ceres/blas.h
vendored
Normal file
@ -0,0 +1,496 @@
|
|||||||
|
// Ceres Solver - A fast non-linear least squares minimizer
|
||||||
|
// Copyright 2013 Google Inc. All rights reserved.
|
||||||
|
// http://code.google.com/p/ceres-solver/
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistributions of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
// * Neither the name of Google Inc. nor the names of its contributors may be
|
||||||
|
// used to endorse or promote products derived from this software without
|
||||||
|
// specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
// POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
// Author: sameeragarwal@google.com (Sameer Agarwal)
|
||||||
|
//
|
||||||
|
// Simple blas functions for use in the Schur Eliminator. These are
|
||||||
|
// fairly basic implementations which already yield a significant
|
||||||
|
// speedup in the eliminator performance.
|
||||||
|
|
||||||
|
#ifndef CERES_INTERNAL_BLAS_H_
|
||||||
|
#define CERES_INTERNAL_BLAS_H_
|
||||||
|
|
||||||
|
#include "ceres/internal/eigen.h"
|
||||||
|
#include "glog/logging.h"
|
||||||
|
|
||||||
|
namespace ceres {
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
// Remove the ".noalias()" annotation from the matrix matrix
|
||||||
|
// mutliplies to produce a correct build with the Android NDK,
|
||||||
|
// including versions 6, 7, 8, and 8b, when built with STLPort and the
|
||||||
|
// non-standalone toolchain (i.e. ndk-build). This appears to be a
|
||||||
|
// compiler bug; if the workaround is not in place, the line
|
||||||
|
//
|
||||||
|
// block.noalias() -= A * B;
|
||||||
|
//
|
||||||
|
// gets compiled to
|
||||||
|
//
|
||||||
|
// block.noalias() += A * B;
|
||||||
|
//
|
||||||
|
// which breaks schur elimination. Introducing a temporary by removing the
|
||||||
|
// .noalias() annotation causes the issue to disappear. Tracking this
|
||||||
|
// issue down was tricky, since the test suite doesn't run when built with
|
||||||
|
// the non-standalone toolchain.
|
||||||
|
//
|
||||||
|
// TODO(keir): Make a reproduction case for this and send it upstream.
|
||||||
|
#ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG
|
||||||
|
#define CERES_MAYBE_NOALIAS
|
||||||
|
#else
|
||||||
|
#define CERES_MAYBE_NOALIAS .noalias()
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// For the matrix-matrix functions below, there are three functions
|
||||||
|
// for each functionality. Foo, FooNaive and FooEigen. Foo is the one
|
||||||
|
// to be called by the user. FooNaive is a basic loop based
|
||||||
|
// implementation and FooEigen uses Eigen's implementation. Foo
|
||||||
|
// chooses between FooNaive and FooEigen depending on how many of the
|
||||||
|
// template arguments are fixed at compile time. Currently, FooEigen
|
||||||
|
// is called if all matrix dimenions are compile time
|
||||||
|
// constants. FooNaive is called otherwise. This leads to the best
|
||||||
|
// performance currently.
|
||||||
|
//
|
||||||
|
// TODO(sameeragarwal): Benchmark and simplify the matrix-vector
|
||||||
|
// functions.
|
||||||
|
|
||||||
|
// C op A * B;
|
||||||
|
//
|
||||||
|
// where op can be +=, -=, or =.
|
||||||
|
//
|
||||||
|
// The template parameters (kRowA, kColA, kRowB, kColB) allow
|
||||||
|
// specialization of the loop at compile time. If this information is
|
||||||
|
// not available, then Eigen::Dynamic should be used as the template
|
||||||
|
// argument.
|
||||||
|
//
|
||||||
|
// kOperation = 1 -> C += A * B
|
||||||
|
// kOperation = -1 -> C -= A * B
|
||||||
|
// kOperation = 0 -> C = A * B
|
||||||
|
//
|
||||||
|
// The function can write into matrices C which are larger than the
|
||||||
|
// matrix A * B. This is done by specifying the true size of C via
|
||||||
|
// row_stride_c and col_stride_c, and then indicating where A * B
|
||||||
|
// should be written into by start_row_c and start_col_c.
|
||||||
|
//
|
||||||
|
// Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c =
|
||||||
|
// 4 and start_col_c = 5, then if A = 3x2 and B = 2x4, we get
|
||||||
|
//
|
||||||
|
// ------------
|
||||||
|
// ------------
|
||||||
|
// ------------
|
||||||
|
// ------------
|
||||||
|
// -----xxxx---
|
||||||
|
// -----xxxx---
|
||||||
|
// -----xxxx---
|
||||||
|
// ------------
|
||||||
|
// ------------
|
||||||
|
// ------------
|
||||||
|
//
|
||||||
|
template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
|
||||||
|
inline void MatrixMatrixMultiplyEigen(const double* A,
|
||||||
|
const int num_row_a,
|
||||||
|
const int num_col_a,
|
||||||
|
const double* B,
|
||||||
|
const int num_row_b,
|
||||||
|
const int num_col_b,
|
||||||
|
double* C,
|
||||||
|
const int start_row_c,
|
||||||
|
const int start_col_c,
|
||||||
|
const int row_stride_c,
|
||||||
|
const int col_stride_c) {
|
||||||
|
const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a);
|
||||||
|
const typename EigenTypes<kRowB, kColB>::ConstMatrixRef Bref(B, num_row_b, num_col_b);
|
||||||
|
MatrixRef Cref(C, row_stride_c, col_stride_c);
|
||||||
|
Eigen::Block<MatrixRef, kRowA, kColB> block(Cref,
|
||||||
|
start_row_c, start_col_c,
|
||||||
|
num_row_a, num_col_b);
|
||||||
|
if (kOperation > 0) {
|
||||||
|
block CERES_MAYBE_NOALIAS += Aref * Bref;
|
||||||
|
} else if (kOperation < 0) {
|
||||||
|
block CERES_MAYBE_NOALIAS -= Aref * Bref;
|
||||||
|
} else {
|
||||||
|
block CERES_MAYBE_NOALIAS = Aref * Bref;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
|
||||||
|
inline void MatrixMatrixMultiplyNaive(const double* A,
|
||||||
|
const int num_row_a,
|
||||||
|
const int num_col_a,
|
||||||
|
const double* B,
|
||||||
|
const int num_row_b,
|
||||||
|
const int num_col_b,
|
||||||
|
double* C,
|
||||||
|
const int start_row_c,
|
||||||
|
const int start_col_c,
|
||||||
|
const int row_stride_c,
|
||||||
|
const int col_stride_c) {
|
||||||
|
DCHECK_GT(num_row_a, 0);
|
||||||
|
DCHECK_GT(num_col_a, 0);
|
||||||
|
DCHECK_GT(num_row_b, 0);
|
||||||
|
DCHECK_GT(num_col_b, 0);
|
||||||
|
DCHECK_GE(start_row_c, 0);
|
||||||
|
DCHECK_GE(start_col_c, 0);
|
||||||
|
DCHECK_GT(row_stride_c, 0);
|
||||||
|
DCHECK_GT(col_stride_c, 0);
|
||||||
|
|
||||||
|
DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
|
||||||
|
DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
|
||||||
|
DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b));
|
||||||
|
DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b));
|
||||||
|
|
||||||
|
const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
|
||||||
|
const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
|
||||||
|
const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b);
|
||||||
|
const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b);
|
||||||
|
DCHECK_EQ(NUM_COL_A, NUM_ROW_B);
|
||||||
|
|
||||||
|
const int NUM_ROW_C = NUM_ROW_A;
|
||||||
|
const int NUM_COL_C = NUM_COL_B;
|
||||||
|
DCHECK_LT(start_row_c + NUM_ROW_C, row_stride_c);
|
||||||
|
DCHECK_LT(start_col_c + NUM_COL_C, col_stride_c);
|
||||||
|
|
||||||
|
for (int row = 0; row < NUM_ROW_C; ++row) {
|
||||||
|
for (int col = 0; col < NUM_COL_C; ++col) {
|
||||||
|
double tmp = 0.0;
|
||||||
|
for (int k = 0; k < NUM_COL_A; ++k) {
|
||||||
|
tmp += A[row * NUM_COL_A + k] * B[k * NUM_COL_B + col];
|
||||||
|
}
|
||||||
|
|
||||||
|
const int index = (row + start_row_c) * col_stride_c + start_col_c + col;
|
||||||
|
if (kOperation > 0) {
|
||||||
|
C[index] += tmp;
|
||||||
|
} else if (kOperation < 0) {
|
||||||
|
C[index] -= tmp;
|
||||||
|
} else {
|
||||||
|
C[index] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
|
||||||
|
inline void MatrixMatrixMultiply(const double* A,
|
||||||
|
const int num_row_a,
|
||||||
|
const int num_col_a,
|
||||||
|
const double* B,
|
||||||
|
const int num_row_b,
|
||||||
|
const int num_col_b,
|
||||||
|
double* C,
|
||||||
|
const int start_row_c,
|
||||||
|
const int start_col_c,
|
||||||
|
const int row_stride_c,
|
||||||
|
const int col_stride_c) {
|
||||||
|
#ifdef CERES_NO_CUSTOM_BLAS
|
||||||
|
MatrixMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>(
|
||||||
|
A, num_row_a, num_col_a,
|
||||||
|
B, num_row_b, num_col_b,
|
||||||
|
C, start_row_c, start_col_c, row_stride_c, col_stride_c);
|
||||||
|
return;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic &&
|
||||||
|
kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) {
|
||||||
|
MatrixMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>(
|
||||||
|
A, num_row_a, num_col_a,
|
||||||
|
B, num_row_b, num_col_b,
|
||||||
|
C, start_row_c, start_col_c, row_stride_c, col_stride_c);
|
||||||
|
} else {
|
||||||
|
MatrixMatrixMultiplyNaive<kRowA, kColA, kRowB, kColB, kOperation>(
|
||||||
|
A, num_row_a, num_col_a,
|
||||||
|
B, num_row_b, num_col_b,
|
||||||
|
C, start_row_c, start_col_c, row_stride_c, col_stride_c);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// C op A' * B;
|
||||||
|
//
|
||||||
|
// where op can be +=, -=, or =.
|
||||||
|
//
|
||||||
|
// The template parameters (kRowA, kColA, kRowB, kColB) allow
|
||||||
|
// specialization of the loop at compile time. If this information is
|
||||||
|
// not available, then Eigen::Dynamic should be used as the template
|
||||||
|
// argument.
|
||||||
|
//
|
||||||
|
// kOperation = 1 -> C += A' * B
|
||||||
|
// kOperation = -1 -> C -= A' * B
|
||||||
|
// kOperation = 0 -> C = A' * B
|
||||||
|
//
|
||||||
|
// The function can write into matrices C which are larger than the
|
||||||
|
// matrix A' * B. This is done by specifying the true size of C via
|
||||||
|
// row_stride_c and col_stride_c, and then indicating where A * B
|
||||||
|
// should be written into by start_row_c and start_col_c.
|
||||||
|
//
|
||||||
|
// Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c =
|
||||||
|
// 4 and start_col_c = 5, then if A = 2x3 and B = 2x4, we get
|
||||||
|
//
|
||||||
|
// ------------
|
||||||
|
// ------------
|
||||||
|
// ------------
|
||||||
|
// ------------
|
||||||
|
// -----xxxx---
|
||||||
|
// -----xxxx---
|
||||||
|
// -----xxxx---
|
||||||
|
// ------------
|
||||||
|
// ------------
|
||||||
|
// ------------
|
||||||
|
//
|
||||||
|
template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
|
||||||
|
inline void MatrixTransposeMatrixMultiplyEigen(const double* A,
|
||||||
|
const int num_row_a,
|
||||||
|
const int num_col_a,
|
||||||
|
const double* B,
|
||||||
|
const int num_row_b,
|
||||||
|
const int num_col_b,
|
||||||
|
double* C,
|
||||||
|
const int start_row_c,
|
||||||
|
const int start_col_c,
|
||||||
|
const int row_stride_c,
|
||||||
|
const int col_stride_c) {
|
||||||
|
const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a);
|
||||||
|
const typename EigenTypes<kRowB, kColB>::ConstMatrixRef Bref(B, num_row_b, num_col_b);
|
||||||
|
MatrixRef Cref(C, row_stride_c, col_stride_c);
|
||||||
|
Eigen::Block<MatrixRef, kColA, kColB> block(Cref,
|
||||||
|
start_row_c, start_col_c,
|
||||||
|
num_col_a, num_col_b);
|
||||||
|
if (kOperation > 0) {
|
||||||
|
block CERES_MAYBE_NOALIAS += Aref.transpose() * Bref;
|
||||||
|
} else if (kOperation < 0) {
|
||||||
|
block CERES_MAYBE_NOALIAS -= Aref.transpose() * Bref;
|
||||||
|
} else {
|
||||||
|
block CERES_MAYBE_NOALIAS = Aref.transpose() * Bref;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
|
||||||
|
inline void MatrixTransposeMatrixMultiplyNaive(const double* A,
|
||||||
|
const int num_row_a,
|
||||||
|
const int num_col_a,
|
||||||
|
const double* B,
|
||||||
|
const int num_row_b,
|
||||||
|
const int num_col_b,
|
||||||
|
double* C,
|
||||||
|
const int start_row_c,
|
||||||
|
const int start_col_c,
|
||||||
|
const int row_stride_c,
|
||||||
|
const int col_stride_c) {
|
||||||
|
DCHECK_GT(num_row_a, 0);
|
||||||
|
DCHECK_GT(num_col_a, 0);
|
||||||
|
DCHECK_GT(num_row_b, 0);
|
||||||
|
DCHECK_GT(num_col_b, 0);
|
||||||
|
DCHECK_GE(start_row_c, 0);
|
||||||
|
DCHECK_GE(start_col_c, 0);
|
||||||
|
DCHECK_GT(row_stride_c, 0);
|
||||||
|
DCHECK_GT(col_stride_c, 0);
|
||||||
|
|
||||||
|
DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
|
||||||
|
DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
|
||||||
|
DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b));
|
||||||
|
DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b));
|
||||||
|
|
||||||
|
const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
|
||||||
|
const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
|
||||||
|
const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b);
|
||||||
|
const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b);
|
||||||
|
DCHECK_EQ(NUM_ROW_A, NUM_ROW_B);
|
||||||
|
|
||||||
|
const int NUM_ROW_C = NUM_COL_A;
|
||||||
|
const int NUM_COL_C = NUM_COL_B;
|
||||||
|
DCHECK_LT(start_row_c + NUM_ROW_C, row_stride_c);
|
||||||
|
DCHECK_LT(start_col_c + NUM_COL_C, col_stride_c);
|
||||||
|
|
||||||
|
for (int row = 0; row < NUM_ROW_C; ++row) {
|
||||||
|
for (int col = 0; col < NUM_COL_C; ++col) {
|
||||||
|
double tmp = 0.0;
|
||||||
|
for (int k = 0; k < NUM_ROW_A; ++k) {
|
||||||
|
tmp += A[k * NUM_COL_A + row] * B[k * NUM_COL_B + col];
|
||||||
|
}
|
||||||
|
|
||||||
|
const int index = (row + start_row_c) * col_stride_c + start_col_c + col;
|
||||||
|
if (kOperation > 0) {
|
||||||
|
C[index]+= tmp;
|
||||||
|
} else if (kOperation < 0) {
|
||||||
|
C[index]-= tmp;
|
||||||
|
} else {
|
||||||
|
C[index]= tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
|
||||||
|
inline void MatrixTransposeMatrixMultiply(const double* A,
|
||||||
|
const int num_row_a,
|
||||||
|
const int num_col_a,
|
||||||
|
const double* B,
|
||||||
|
const int num_row_b,
|
||||||
|
const int num_col_b,
|
||||||
|
double* C,
|
||||||
|
const int start_row_c,
|
||||||
|
const int start_col_c,
|
||||||
|
const int row_stride_c,
|
||||||
|
const int col_stride_c) {
|
||||||
|
#ifdef CERES_NO_CUSTOM_BLAS
|
||||||
|
MatrixTransposeMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>(
|
||||||
|
A, num_row_a, num_col_a,
|
||||||
|
B, num_row_b, num_col_b,
|
||||||
|
C, start_row_c, start_col_c, row_stride_c, col_stride_c);
|
||||||
|
return;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic &&
|
||||||
|
kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) {
|
||||||
|
MatrixTransposeMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>(
|
||||||
|
A, num_row_a, num_col_a,
|
||||||
|
B, num_row_b, num_col_b,
|
||||||
|
C, start_row_c, start_col_c, row_stride_c, col_stride_c);
|
||||||
|
} else {
|
||||||
|
MatrixTransposeMatrixMultiplyNaive<kRowA, kColA, kRowB, kColB, kOperation>(
|
||||||
|
A, num_row_a, num_col_a,
|
||||||
|
B, num_row_b, num_col_b,
|
||||||
|
C, start_row_c, start_col_c, row_stride_c, col_stride_c);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int kRowA, int kColA, int kOperation>
|
||||||
|
inline void MatrixVectorMultiply(const double* A,
|
||||||
|
const int num_row_a,
|
||||||
|
const int num_col_a,
|
||||||
|
const double* b,
|
||||||
|
double* c) {
|
||||||
|
#ifdef CERES_NO_CUSTOM_BLAS
|
||||||
|
const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a);
|
||||||
|
const typename EigenTypes<kColA>::ConstVectorRef bref(b, num_col_a);
|
||||||
|
typename EigenTypes<kRowA>::VectorRef cref(c, num_row_a);
|
||||||
|
|
||||||
|
// lazyProduct works better than .noalias() for matrix-vector
|
||||||
|
// products.
|
||||||
|
if (kOperation > 0) {
|
||||||
|
cref += Aref.lazyProduct(bref);
|
||||||
|
} else if (kOperation < 0) {
|
||||||
|
cref -= Aref.lazyProduct(bref);
|
||||||
|
} else {
|
||||||
|
cref -= Aref.lazyProduct(bref);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
|
||||||
|
DCHECK_GT(num_row_a, 0);
|
||||||
|
DCHECK_GT(num_col_a, 0);
|
||||||
|
DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
|
||||||
|
DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
|
||||||
|
|
||||||
|
const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
|
||||||
|
const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
|
||||||
|
|
||||||
|
for (int row = 0; row < NUM_ROW_A; ++row) {
|
||||||
|
double tmp = 0.0;
|
||||||
|
for (int col = 0; col < NUM_COL_A; ++col) {
|
||||||
|
tmp += A[row * NUM_COL_A + col] * b[col];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (kOperation > 0) {
|
||||||
|
c[row] += tmp;
|
||||||
|
} else if (kOperation < 0) {
|
||||||
|
c[row] -= tmp;
|
||||||
|
} else {
|
||||||
|
c[row] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // CERES_NO_CUSTOM_BLAS
|
||||||
|
}
|
||||||
|
|
||||||
|
// c op A' * b;
|
||||||
|
//
|
||||||
|
// where op can be +=, -=, or =.
|
||||||
|
//
|
||||||
|
// The template parameters (kRowA, kColA) allow specialization of the
|
||||||
|
// loop at compile time. If this information is not available, then
|
||||||
|
// Eigen::Dynamic should be used as the template argument.
|
||||||
|
//
|
||||||
|
// kOperation = 1 -> c += A' * b
|
||||||
|
// kOperation = -1 -> c -= A' * b
|
||||||
|
// kOperation = 0 -> c = A' * b
|
||||||
|
template<int kRowA, int kColA, int kOperation>
|
||||||
|
inline void MatrixTransposeVectorMultiply(const double* A,
|
||||||
|
const int num_row_a,
|
||||||
|
const int num_col_a,
|
||||||
|
const double* b,
|
||||||
|
double* c) {
|
||||||
|
#ifdef CERES_NO_CUSTOM_BLAS
|
||||||
|
const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a);
|
||||||
|
const typename EigenTypes<kRowA>::ConstVectorRef bref(b, num_row_a);
|
||||||
|
typename EigenTypes<kColA>::VectorRef cref(c, num_col_a);
|
||||||
|
|
||||||
|
// lazyProduct works better than .noalias() for matrix-vector
|
||||||
|
// products.
|
||||||
|
if (kOperation > 0) {
|
||||||
|
cref += Aref.transpose().lazyProduct(bref);
|
||||||
|
} else if (kOperation < 0) {
|
||||||
|
cref -= Aref.transpose().lazyProduct(bref);
|
||||||
|
} else {
|
||||||
|
cref -= Aref.transpose().lazyProduct(bref);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
|
||||||
|
DCHECK_GT(num_row_a, 0);
|
||||||
|
DCHECK_GT(num_col_a, 0);
|
||||||
|
DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
|
||||||
|
DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
|
||||||
|
|
||||||
|
const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
|
||||||
|
const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
|
||||||
|
|
||||||
|
for (int row = 0; row < NUM_COL_A; ++row) {
|
||||||
|
double tmp = 0.0;
|
||||||
|
for (int col = 0; col < NUM_ROW_A; ++col) {
|
||||||
|
tmp += A[col * NUM_COL_A + row] * b[col];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (kOperation > 0) {
|
||||||
|
c[row] += tmp;
|
||||||
|
} else if (kOperation < 0) {
|
||||||
|
c[row] -= tmp;
|
||||||
|
} else {
|
||||||
|
c[row] = tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // CERES_NO_CUSTOM_BLAS
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef CERES_MAYBE_NOALIAS
|
||||||
|
|
||||||
|
} // namespace internal
|
||||||
|
} // namespace ceres
|
||||||
|
|
||||||
|
#endif // CERES_INTERNAL_BLAS_H_
|
@ -33,6 +33,7 @@
|
|||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include "ceres/blas.h"
|
||||||
#include "ceres/block_structure.h"
|
#include "ceres/block_structure.h"
|
||||||
#include "ceres/internal/eigen.h"
|
#include "ceres/internal/eigen.h"
|
||||||
#include "ceres/matrix_proto.h"
|
#include "ceres/matrix_proto.h"
|
||||||
@ -117,16 +118,15 @@ void BlockSparseMatrix::RightMultiply(const double* x, double* y) const {
|
|||||||
for (int i = 0; i < block_structure_->rows.size(); ++i) {
|
for (int i = 0; i < block_structure_->rows.size(); ++i) {
|
||||||
int row_block_pos = block_structure_->rows[i].block.position;
|
int row_block_pos = block_structure_->rows[i].block.position;
|
||||||
int row_block_size = block_structure_->rows[i].block.size;
|
int row_block_size = block_structure_->rows[i].block.size;
|
||||||
VectorRef yref(y + row_block_pos, row_block_size);
|
|
||||||
const vector<Cell>& cells = block_structure_->rows[i].cells;
|
const vector<Cell>& cells = block_structure_->rows[i].cells;
|
||||||
for (int j = 0; j < cells.size(); ++j) {
|
for (int j = 0; j < cells.size(); ++j) {
|
||||||
int col_block_id = cells[j].block_id;
|
int col_block_id = cells[j].block_id;
|
||||||
int col_block_size = block_structure_->cols[col_block_id].size;
|
int col_block_size = block_structure_->cols[col_block_id].size;
|
||||||
int col_block_pos = block_structure_->cols[col_block_id].position;
|
int col_block_pos = block_structure_->cols[col_block_id].position;
|
||||||
ConstVectorRef xref(x + col_block_pos, col_block_size);
|
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||||
MatrixRef m(values_.get() + cells[j].position,
|
values_.get() + cells[j].position, row_block_size, col_block_size,
|
||||||
row_block_size, col_block_size);
|
x + col_block_pos,
|
||||||
yref += m.lazyProduct(xref);
|
y + row_block_pos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -138,16 +138,16 @@ void BlockSparseMatrix::LeftMultiply(const double* x, double* y) const {
|
|||||||
for (int i = 0; i < block_structure_->rows.size(); ++i) {
|
for (int i = 0; i < block_structure_->rows.size(); ++i) {
|
||||||
int row_block_pos = block_structure_->rows[i].block.position;
|
int row_block_pos = block_structure_->rows[i].block.position;
|
||||||
int row_block_size = block_structure_->rows[i].block.size;
|
int row_block_size = block_structure_->rows[i].block.size;
|
||||||
const ConstVectorRef xref(x + row_block_pos, row_block_size);
|
|
||||||
const vector<Cell>& cells = block_structure_->rows[i].cells;
|
const vector<Cell>& cells = block_structure_->rows[i].cells;
|
||||||
for (int j = 0; j < cells.size(); ++j) {
|
for (int j = 0; j < cells.size(); ++j) {
|
||||||
int col_block_id = cells[j].block_id;
|
int col_block_id = cells[j].block_id;
|
||||||
int col_block_size = block_structure_->cols[col_block_id].size;
|
int col_block_size = block_structure_->cols[col_block_id].size;
|
||||||
int col_block_pos = block_structure_->cols[col_block_id].position;
|
int col_block_pos = block_structure_->cols[col_block_id].position;
|
||||||
VectorRef yref(y + col_block_pos, col_block_size);
|
MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||||
MatrixRef m(values_.get() + cells[j].position,
|
values_.get() + cells[j].position, row_block_size, col_block_size,
|
||||||
row_block_size, col_block_size);
|
x + row_block_pos,
|
||||||
yref += m.transpose().lazyProduct(xref);
|
y + col_block_pos);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -35,6 +35,7 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include "ceres/blas.h"
|
||||||
#include "ceres/block_sparse_matrix.h"
|
#include "ceres/block_sparse_matrix.h"
|
||||||
#include "ceres/block_structure.h"
|
#include "ceres/block_structure.h"
|
||||||
#include "ceres/internal/eigen.h"
|
#include "ceres/internal/eigen.h"
|
||||||
@ -103,13 +104,10 @@ void PartitionedMatrixView::RightMultiplyE(const double* x, double* y) const {
|
|||||||
const int col_block_id = cell.block_id;
|
const int col_block_id = cell.block_id;
|
||||||
const int col_block_pos = bs->cols[col_block_id].position;
|
const int col_block_pos = bs->cols[col_block_id].position;
|
||||||
const int col_block_size = bs->cols[col_block_id].size;
|
const int col_block_size = bs->cols[col_block_id].size;
|
||||||
|
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||||
ConstVectorRef xref(x + col_block_pos, col_block_size);
|
row_values + cell.position, row_block_size, col_block_size,
|
||||||
VectorRef yref(y + row_block_pos, row_block_size);
|
x + col_block_pos,
|
||||||
ConstMatrixRef m(row_values + cell.position,
|
y + row_block_pos);
|
||||||
row_block_size,
|
|
||||||
col_block_size);
|
|
||||||
yref += m.lazyProduct(xref);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -124,20 +122,16 @@ void PartitionedMatrixView::RightMultiplyF(const double* x, double* y) const {
|
|||||||
for (int r = 0; r < bs->rows.size(); ++r) {
|
for (int r = 0; r < bs->rows.size(); ++r) {
|
||||||
const int row_block_pos = bs->rows[r].block.position;
|
const int row_block_pos = bs->rows[r].block.position;
|
||||||
const int row_block_size = bs->rows[r].block.size;
|
const int row_block_size = bs->rows[r].block.size;
|
||||||
VectorRef yref(y + row_block_pos, row_block_size);
|
|
||||||
const vector<Cell>& cells = bs->rows[r].cells;
|
const vector<Cell>& cells = bs->rows[r].cells;
|
||||||
for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
|
for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
|
||||||
const double* row_values = matrix_.RowBlockValues(r);
|
const double* row_values = matrix_.RowBlockValues(r);
|
||||||
const int col_block_id = cells[c].block_id;
|
const int col_block_id = cells[c].block_id;
|
||||||
const int col_block_pos = bs->cols[col_block_id].position;
|
const int col_block_pos = bs->cols[col_block_id].position;
|
||||||
const int col_block_size = bs->cols[col_block_id].size;
|
const int col_block_size = bs->cols[col_block_id].size;
|
||||||
|
MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||||
ConstVectorRef xref(x + col_block_pos - num_cols_e(),
|
row_values + cells[c].position, row_block_size, col_block_size,
|
||||||
col_block_size);
|
x + col_block_pos - num_cols_e(),
|
||||||
ConstMatrixRef m(row_values + cells[c].position,
|
y + row_block_pos);
|
||||||
row_block_size,
|
|
||||||
col_block_size);
|
|
||||||
yref += m.lazyProduct(xref);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -155,13 +149,10 @@ void PartitionedMatrixView::LeftMultiplyE(const double* x, double* y) const {
|
|||||||
const int col_block_id = cell.block_id;
|
const int col_block_id = cell.block_id;
|
||||||
const int col_block_pos = bs->cols[col_block_id].position;
|
const int col_block_pos = bs->cols[col_block_id].position;
|
||||||
const int col_block_size = bs->cols[col_block_id].size;
|
const int col_block_size = bs->cols[col_block_id].size;
|
||||||
|
MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||||
ConstVectorRef xref(x + row_block_pos, row_block_size);
|
row_values + cell.position, row_block_size, col_block_size,
|
||||||
VectorRef yref(y + col_block_pos, col_block_size);
|
x + row_block_pos,
|
||||||
ConstMatrixRef m(row_values + cell.position,
|
y + col_block_pos);
|
||||||
row_block_size,
|
|
||||||
col_block_size);
|
|
||||||
yref += m.transpose().lazyProduct(xref);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -176,19 +167,16 @@ void PartitionedMatrixView::LeftMultiplyF(const double* x, double* y) const {
|
|||||||
for (int r = 0; r < bs->rows.size(); ++r) {
|
for (int r = 0; r < bs->rows.size(); ++r) {
|
||||||
const int row_block_pos = bs->rows[r].block.position;
|
const int row_block_pos = bs->rows[r].block.position;
|
||||||
const int row_block_size = bs->rows[r].block.size;
|
const int row_block_size = bs->rows[r].block.size;
|
||||||
ConstVectorRef xref(x + row_block_pos, row_block_size);
|
|
||||||
const vector<Cell>& cells = bs->rows[r].cells;
|
const vector<Cell>& cells = bs->rows[r].cells;
|
||||||
for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
|
for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
|
||||||
const double* row_values = matrix_.RowBlockValues(r);
|
const double* row_values = matrix_.RowBlockValues(r);
|
||||||
const int col_block_id = cells[c].block_id;
|
const int col_block_id = cells[c].block_id;
|
||||||
const int col_block_pos = bs->cols[col_block_id].position;
|
const int col_block_pos = bs->cols[col_block_id].position;
|
||||||
const int col_block_size = bs->cols[col_block_id].size;
|
const int col_block_size = bs->cols[col_block_id].size;
|
||||||
|
MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||||
VectorRef yref(y + col_block_pos - num_cols_e(), col_block_size);
|
row_values + cells[c].position, row_block_size, col_block_size,
|
||||||
ConstMatrixRef m(row_values + cells[c].position,
|
x + row_block_pos,
|
||||||
row_block_size,
|
y + col_block_pos - num_cols_e());
|
||||||
col_block_size);
|
|
||||||
yref += m.transpose().lazyProduct(xref);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -267,15 +255,15 @@ void PartitionedMatrixView::UpdateBlockDiagonalEtE(
|
|||||||
const int row_block_size = bs->rows[r].block.size;
|
const int row_block_size = bs->rows[r].block.size;
|
||||||
const int block_id = cell.block_id;
|
const int block_id = cell.block_id;
|
||||||
const int col_block_size = bs->cols[block_id].size;
|
const int col_block_size = bs->cols[block_id].size;
|
||||||
ConstMatrixRef m(row_values + cell.position,
|
|
||||||
row_block_size,
|
|
||||||
col_block_size);
|
|
||||||
|
|
||||||
const int cell_position =
|
const int cell_position =
|
||||||
block_diagonal_structure->rows[block_id].cells[0].position;
|
block_diagonal_structure->rows[block_id].cells[0].position;
|
||||||
|
|
||||||
MatrixRef(block_diagonal->mutable_values() + cell_position,
|
MatrixTransposeMatrixMultiply
|
||||||
col_block_size, col_block_size).noalias() += m.transpose() * m;
|
<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||||
|
row_values + cell.position, row_block_size, col_block_size,
|
||||||
|
row_values + cell.position, row_block_size, col_block_size,
|
||||||
|
block_diagonal->mutable_values() + cell_position,
|
||||||
|
0, 0, col_block_size, col_block_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -298,15 +286,16 @@ void PartitionedMatrixView::UpdateBlockDiagonalFtF(
|
|||||||
for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
|
for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
|
||||||
const int col_block_id = cells[c].block_id;
|
const int col_block_id = cells[c].block_id;
|
||||||
const int col_block_size = bs->cols[col_block_id].size;
|
const int col_block_size = bs->cols[col_block_id].size;
|
||||||
ConstMatrixRef m(row_values + cells[c].position,
|
|
||||||
row_block_size,
|
|
||||||
col_block_size);
|
|
||||||
const int diagonal_block_id = col_block_id - num_col_blocks_e_;
|
const int diagonal_block_id = col_block_id - num_col_blocks_e_;
|
||||||
const int cell_position =
|
const int cell_position =
|
||||||
block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
|
block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
|
||||||
|
|
||||||
MatrixRef(block_diagonal->mutable_values() + cell_position,
|
MatrixTransposeMatrixMultiply
|
||||||
col_block_size, col_block_size).noalias() += m.transpose() * m;
|
<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||||
|
row_values + cells[c].position, row_block_size, col_block_size,
|
||||||
|
row_values + cells[c].position, row_block_size, col_block_size,
|
||||||
|
block_diagonal->mutable_values() + cell_position,
|
||||||
|
0, 0, col_block_size, col_block_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -291,14 +291,8 @@ bool SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse(
|
|||||||
} else {
|
} else {
|
||||||
factor_ = ss_.AnalyzeCholesky(cholmod_lhs);
|
factor_ = ss_.AnalyzeCholesky(cholmod_lhs);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VLOG_IS_ON(2)) {
|
|
||||||
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), ss_.mutable_cc());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CHECK_NOTNULL(factor_);
|
|
||||||
|
|
||||||
cholmod_dense* cholmod_solution =
|
cholmod_dense* cholmod_solution =
|
||||||
ss_.SolveCholesky(cholmod_lhs, factor_, cholmod_rhs);
|
ss_.SolveCholesky(cholmod_lhs, factor_, cholmod_rhs);
|
||||||
|
|
||||||
|
@ -321,9 +321,25 @@ class SchurEliminator : public SchurEliminatorBase {
|
|||||||
// see the documentation of the Chunk object above.
|
// see the documentation of the Chunk object above.
|
||||||
vector<Chunk> chunks_;
|
vector<Chunk> chunks_;
|
||||||
|
|
||||||
|
// TODO(sameeragarwal): The following two arrays contain per-thread
|
||||||
|
// storage. They should be refactored into a per thread struct.
|
||||||
|
|
||||||
// Buffer to store the products of the y and z blocks generated
|
// Buffer to store the products of the y and z blocks generated
|
||||||
// during the elimination phase.
|
// during the elimination phase. buffer_ is of size num_threads *
|
||||||
|
// buffer_size_. Each thread accesses the chunk
|
||||||
|
//
|
||||||
|
// [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_]
|
||||||
|
//
|
||||||
scoped_array<double> buffer_;
|
scoped_array<double> buffer_;
|
||||||
|
|
||||||
|
// Buffer to store per thread matrix matrix products used by
|
||||||
|
// ChunkOuterProduct. Like buffer_ it is of size num_threads *
|
||||||
|
// buffer_size_. Each thread accesses the chunk
|
||||||
|
//
|
||||||
|
// [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_]
|
||||||
|
//
|
||||||
|
scoped_array<double> chunk_outer_product_buffer_;
|
||||||
|
|
||||||
int buffer_size_;
|
int buffer_size_;
|
||||||
int num_threads_;
|
int num_threads_;
|
||||||
int uneliminated_row_begins_;
|
int uneliminated_row_begins_;
|
||||||
|
@ -34,10 +34,6 @@
|
|||||||
#ifndef CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_
|
#ifndef CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_
|
||||||
#define CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_
|
#define CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_
|
||||||
|
|
||||||
#ifdef CERES_USE_OPENMP
|
|
||||||
#include <omp.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Eigen has an internal threshold switching between different matrix
|
// Eigen has an internal threshold switching between different matrix
|
||||||
// multiplication algorithms. In particular for matrices larger than
|
// multiplication algorithms. In particular for matrices larger than
|
||||||
// EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD it uses a cache friendly
|
// EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD it uses a cache friendly
|
||||||
@ -46,19 +42,25 @@
|
|||||||
// are thin and long, the default choice may not be optimal. This is
|
// are thin and long, the default choice may not be optimal. This is
|
||||||
// the case for us, as the default choice causes a 30% performance
|
// the case for us, as the default choice causes a 30% performance
|
||||||
// regression when we moved from Eigen2 to Eigen3.
|
// regression when we moved from Eigen2 to Eigen3.
|
||||||
|
|
||||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 10
|
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 10
|
||||||
|
|
||||||
|
#ifdef CERES_USE_OPENMP
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include "Eigen/Dense"
|
#include "Eigen/Dense"
|
||||||
|
#include "ceres/blas.h"
|
||||||
#include "ceres/block_random_access_matrix.h"
|
#include "ceres/block_random_access_matrix.h"
|
||||||
#include "ceres/block_sparse_matrix.h"
|
#include "ceres/block_sparse_matrix.h"
|
||||||
#include "ceres/block_structure.h"
|
#include "ceres/block_structure.h"
|
||||||
|
#include "ceres/internal/eigen.h"
|
||||||
|
#include "ceres/internal/scoped_ptr.h"
|
||||||
#include "ceres/map_util.h"
|
#include "ceres/map_util.h"
|
||||||
#include "ceres/schur_eliminator.h"
|
#include "ceres/schur_eliminator.h"
|
||||||
#include "ceres/stl_util.h"
|
#include "ceres/stl_util.h"
|
||||||
#include "ceres/internal/eigen.h"
|
|
||||||
#include "ceres/internal/scoped_ptr.h"
|
|
||||||
#include "glog/logging.h"
|
#include "glog/logging.h"
|
||||||
|
|
||||||
namespace ceres {
|
namespace ceres {
|
||||||
@ -149,13 +151,16 @@ Init(int num_eliminate_blocks, const CompressedRowBlockStructure* bs) {
|
|||||||
|
|
||||||
buffer_.reset(new double[buffer_size_ * num_threads_]);
|
buffer_.reset(new double[buffer_size_ * num_threads_]);
|
||||||
|
|
||||||
|
// chunk_outer_product_buffer_ only needs to store e_block_size *
|
||||||
|
// f_block_size, which is always less than buffer_size_, so we just
|
||||||
|
// allocate buffer_size_ per thread.
|
||||||
|
chunk_outer_product_buffer_.reset(new double[buffer_size_ * num_threads_]);
|
||||||
|
|
||||||
STLDeleteElements(&rhs_locks_);
|
STLDeleteElements(&rhs_locks_);
|
||||||
rhs_locks_.resize(num_col_blocks - num_eliminate_blocks_);
|
rhs_locks_.resize(num_col_blocks - num_eliminate_blocks_);
|
||||||
for (int i = 0; i < num_col_blocks - num_eliminate_blocks_; ++i) {
|
for (int i = 0; i < num_col_blocks - num_eliminate_blocks_; ++i) {
|
||||||
rhs_locks_[i] = new Mutex;
|
rhs_locks_[i] = new Mutex;
|
||||||
}
|
}
|
||||||
|
|
||||||
VLOG(1) << "Eliminator threads: " << num_threads_;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
|
||||||
@ -261,7 +266,7 @@ Eliminate(const BlockSparseMatrixBase* A,
|
|||||||
typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix inverse_ete =
|
typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix inverse_ete =
|
||||||
ete
|
ete
|
||||||
.template selfadjointView<Eigen::Upper>()
|
.template selfadjointView<Eigen::Upper>()
|
||||||
.ldlt()
|
.llt()
|
||||||
.solve(Matrix::Identity(e_block_size, e_block_size));
|
.solve(Matrix::Identity(e_block_size, e_block_size));
|
||||||
|
|
||||||
// For the current chunk compute and update the rhs of the reduced
|
// For the current chunk compute and update the rhs of the reduced
|
||||||
@ -294,8 +299,8 @@ BackSubstitute(const BlockSparseMatrixBase* A,
|
|||||||
const int e_block_id = bs->rows[chunk.start].cells.front().block_id;
|
const int e_block_id = bs->rows[chunk.start].cells.front().block_id;
|
||||||
const int e_block_size = bs->cols[e_block_id].size;
|
const int e_block_size = bs->cols[e_block_id].size;
|
||||||
|
|
||||||
typename EigenTypes<kEBlockSize>::VectorRef y_block(
|
double* y_ptr = y + bs->cols[e_block_id].position;
|
||||||
y + bs->cols[e_block_id].position, e_block_size);
|
typename EigenTypes<kEBlockSize>::VectorRef y_block(y_ptr, e_block_size);
|
||||||
|
|
||||||
typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix
|
typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix
|
||||||
ete(e_block_size, e_block_size);
|
ete(e_block_size, e_block_size);
|
||||||
@ -312,40 +317,35 @@ BackSubstitute(const BlockSparseMatrixBase* A,
|
|||||||
const double* row_values = A->RowBlockValues(chunk.start + j);
|
const double* row_values = A->RowBlockValues(chunk.start + j);
|
||||||
const Cell& e_cell = row.cells.front();
|
const Cell& e_cell = row.cells.front();
|
||||||
DCHECK_EQ(e_block_id, e_cell.block_id);
|
DCHECK_EQ(e_block_id, e_cell.block_id);
|
||||||
const typename EigenTypes<kRowBlockSize, kEBlockSize>::ConstMatrixRef
|
|
||||||
e_block(row_values + e_cell.position,
|
|
||||||
row.block.size,
|
|
||||||
e_block_size);
|
|
||||||
|
|
||||||
typename EigenTypes<kRowBlockSize>::Vector
|
typename EigenTypes<kRowBlockSize>::Vector sj =
|
||||||
sj =
|
|
||||||
typename EigenTypes<kRowBlockSize>::ConstVectorRef
|
typename EigenTypes<kRowBlockSize>::ConstVectorRef
|
||||||
(b + bs->rows[chunk.start + j].block.position,
|
(b + bs->rows[chunk.start + j].block.position, row.block.size);
|
||||||
row.block.size);
|
|
||||||
|
|
||||||
for (int c = 1; c < row.cells.size(); ++c) {
|
for (int c = 1; c < row.cells.size(); ++c) {
|
||||||
const int f_block_id = row.cells[c].block_id;
|
const int f_block_id = row.cells[c].block_id;
|
||||||
const int f_block_size = bs->cols[f_block_id].size;
|
const int f_block_size = bs->cols[f_block_id].size;
|
||||||
const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
|
|
||||||
f_block(row_values + row.cells[c].position,
|
|
||||||
row.block.size, f_block_size);
|
|
||||||
const int r_block = f_block_id - num_eliminate_blocks_;
|
const int r_block = f_block_id - num_eliminate_blocks_;
|
||||||
|
|
||||||
sj -= f_block *
|
MatrixVectorMultiply<kRowBlockSize, kFBlockSize, -1>(
|
||||||
typename EigenTypes<kFBlockSize>::ConstVectorRef
|
row_values + row.cells[c].position, row.block.size, f_block_size,
|
||||||
(z + lhs_row_layout_[r_block], f_block_size);
|
z + lhs_row_layout_[r_block],
|
||||||
|
sj.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
y_block += e_block.transpose() * sj;
|
MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
|
||||||
ete.template selfadjointView<Eigen::Upper>()
|
row_values + e_cell.position, row.block.size, e_block_size,
|
||||||
.rankUpdate(e_block.transpose(), 1.0);
|
sj.data(),
|
||||||
|
y_ptr);
|
||||||
|
|
||||||
|
MatrixTransposeMatrixMultiply
|
||||||
|
<kRowBlockSize, kEBlockSize,kRowBlockSize, kEBlockSize, 1>(
|
||||||
|
row_values + e_cell.position, row.block.size, e_block_size,
|
||||||
|
row_values + e_cell.position, row.block.size, e_block_size,
|
||||||
|
ete.data(), 0, 0, e_block_size, e_block_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
y_block =
|
ete.llt().solveInPlace(y_block);
|
||||||
ete
|
|
||||||
.template selfadjointView<Eigen::Upper>()
|
|
||||||
.ldlt()
|
|
||||||
.solve(y_block);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -382,15 +382,12 @@ UpdateRhs(const Chunk& chunk,
|
|||||||
for (int c = 1; c < row.cells.size(); ++c) {
|
for (int c = 1; c < row.cells.size(); ++c) {
|
||||||
const int block_id = row.cells[c].block_id;
|
const int block_id = row.cells[c].block_id;
|
||||||
const int block_size = bs->cols[block_id].size;
|
const int block_size = bs->cols[block_id].size;
|
||||||
const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
|
|
||||||
b(row_values + row.cells[c].position,
|
|
||||||
row.block.size, block_size);
|
|
||||||
|
|
||||||
const int block = block_id - num_eliminate_blocks_;
|
const int block = block_id - num_eliminate_blocks_;
|
||||||
CeresMutexLock l(rhs_locks_[block]);
|
CeresMutexLock l(rhs_locks_[block]);
|
||||||
typename EigenTypes<kFBlockSize>::VectorRef
|
MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
|
||||||
(rhs + lhs_row_layout_[block], block_size).noalias()
|
row_values + row.cells[c].position,
|
||||||
+= b.transpose() * sj;
|
row.block.size, block_size,
|
||||||
|
sj.data(), rhs + lhs_row_layout_[block]);
|
||||||
}
|
}
|
||||||
b_pos += row.block.size;
|
b_pos += row.block.size;
|
||||||
}
|
}
|
||||||
@ -446,34 +443,31 @@ ChunkDiagonalBlockAndGradient(
|
|||||||
|
|
||||||
// Extract the e_block, ETE += E_i' E_i
|
// Extract the e_block, ETE += E_i' E_i
|
||||||
const Cell& e_cell = row.cells.front();
|
const Cell& e_cell = row.cells.front();
|
||||||
const typename EigenTypes<kRowBlockSize, kEBlockSize>::ConstMatrixRef
|
MatrixTransposeMatrixMultiply
|
||||||
e_block(row_values + e_cell.position,
|
<kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(
|
||||||
row.block.size,
|
row_values + e_cell.position, row.block.size, e_block_size,
|
||||||
e_block_size);
|
row_values + e_cell.position, row.block.size, e_block_size,
|
||||||
|
ete->data(), 0, 0, e_block_size, e_block_size);
|
||||||
ete->template selfadjointView<Eigen::Upper>()
|
|
||||||
.rankUpdate(e_block.transpose(), 1.0);
|
|
||||||
|
|
||||||
// g += E_i' b_i
|
// g += E_i' b_i
|
||||||
g->noalias() += e_block.transpose() *
|
MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
|
||||||
typename EigenTypes<kRowBlockSize>::ConstVectorRef
|
row_values + e_cell.position, row.block.size, e_block_size,
|
||||||
(b + b_pos, row.block.size);
|
b + b_pos,
|
||||||
|
g->data());
|
||||||
|
|
||||||
|
|
||||||
// buffer = E'F. This computation is done by iterating over the
|
// buffer = E'F. This computation is done by iterating over the
|
||||||
// f_blocks for each row in the chunk.
|
// f_blocks for each row in the chunk.
|
||||||
for (int c = 1; c < row.cells.size(); ++c) {
|
for (int c = 1; c < row.cells.size(); ++c) {
|
||||||
const int f_block_id = row.cells[c].block_id;
|
const int f_block_id = row.cells[c].block_id;
|
||||||
const int f_block_size = bs->cols[f_block_id].size;
|
const int f_block_size = bs->cols[f_block_id].size;
|
||||||
const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
|
|
||||||
f_block(row_values + row.cells[c].position,
|
|
||||||
row.block.size, f_block_size);
|
|
||||||
|
|
||||||
double* buffer_ptr =
|
double* buffer_ptr =
|
||||||
buffer + FindOrDie(chunk.buffer_layout, f_block_id);
|
buffer + FindOrDie(chunk.buffer_layout, f_block_id);
|
||||||
|
MatrixTransposeMatrixMultiply
|
||||||
typename EigenTypes<kEBlockSize, kFBlockSize>::MatrixRef
|
<kRowBlockSize, kEBlockSize, kRowBlockSize, kFBlockSize, 1>(
|
||||||
(buffer_ptr, e_block_size, f_block_size).noalias()
|
row_values + e_cell.position, row.block.size, e_block_size,
|
||||||
+= e_block.transpose() * f_block;
|
row_values + row.cells[c].position, row.block.size, f_block_size,
|
||||||
|
buffer_ptr, 0, 0, e_block_size, f_block_size);
|
||||||
}
|
}
|
||||||
b_pos += row.block.size;
|
b_pos += row.block.size;
|
||||||
}
|
}
|
||||||
@ -497,15 +491,24 @@ ChunkOuterProduct(const CompressedRowBlockStructure* bs,
|
|||||||
// references to the left hand side.
|
// references to the left hand side.
|
||||||
const int e_block_size = inverse_ete.rows();
|
const int e_block_size = inverse_ete.rows();
|
||||||
BufferLayoutType::const_iterator it1 = buffer_layout.begin();
|
BufferLayoutType::const_iterator it1 = buffer_layout.begin();
|
||||||
|
|
||||||
|
#ifdef CERES_USE_OPENMP
|
||||||
|
int thread_id = omp_get_thread_num();
|
||||||
|
#else
|
||||||
|
int thread_id = 0;
|
||||||
|
#endif
|
||||||
|
double* b1_transpose_inverse_ete =
|
||||||
|
chunk_outer_product_buffer_.get() + thread_id * buffer_size_;
|
||||||
|
|
||||||
// S(i,j) -= bi' * ete^{-1} b_j
|
// S(i,j) -= bi' * ete^{-1} b_j
|
||||||
for (; it1 != buffer_layout.end(); ++it1) {
|
for (; it1 != buffer_layout.end(); ++it1) {
|
||||||
const int block1 = it1->first - num_eliminate_blocks_;
|
const int block1 = it1->first - num_eliminate_blocks_;
|
||||||
const int block1_size = bs->cols[it1->first].size;
|
const int block1_size = bs->cols[it1->first].size;
|
||||||
|
MatrixTransposeMatrixMultiply
|
||||||
const typename EigenTypes<kEBlockSize, kFBlockSize>::ConstMatrixRef
|
<kEBlockSize, kFBlockSize, kEBlockSize, kEBlockSize, 0>(
|
||||||
b1(buffer + it1->second, e_block_size, block1_size);
|
buffer + it1->second, e_block_size, block1_size,
|
||||||
const typename EigenTypes<kFBlockSize, kEBlockSize>::Matrix
|
inverse_ete.data(), e_block_size, e_block_size,
|
||||||
b1_transpose_inverse_ete = b1.transpose() * inverse_ete;
|
b1_transpose_inverse_ete, 0, 0, block1_size, e_block_size);
|
||||||
|
|
||||||
BufferLayoutType::const_iterator it2 = it1;
|
BufferLayoutType::const_iterator it2 = it1;
|
||||||
for (; it2 != buffer_layout.end(); ++it2) {
|
for (; it2 != buffer_layout.end(); ++it2) {
|
||||||
@ -515,46 +518,15 @@ ChunkOuterProduct(const CompressedRowBlockStructure* bs,
|
|||||||
CellInfo* cell_info = lhs->GetCell(block1, block2,
|
CellInfo* cell_info = lhs->GetCell(block1, block2,
|
||||||
&r, &c,
|
&r, &c,
|
||||||
&row_stride, &col_stride);
|
&row_stride, &col_stride);
|
||||||
if (cell_info == NULL) {
|
if (cell_info != NULL) {
|
||||||
continue;
|
const int block2_size = bs->cols[it2->first].size;
|
||||||
|
CeresMutexLock l(&cell_info->m);
|
||||||
|
MatrixMatrixMultiply
|
||||||
|
<kFBlockSize, kEBlockSize, kEBlockSize, kFBlockSize, -1>(
|
||||||
|
b1_transpose_inverse_ete, block1_size, e_block_size,
|
||||||
|
buffer + it2->second, e_block_size, block2_size,
|
||||||
|
cell_info->values, r, c, row_stride, col_stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
const int block2_size = bs->cols[it2->first].size;
|
|
||||||
const typename EigenTypes<kEBlockSize, kFBlockSize>::ConstMatrixRef
|
|
||||||
b2(buffer + it2->second, e_block_size, block2_size);
|
|
||||||
|
|
||||||
CeresMutexLock l(&cell_info->m);
|
|
||||||
MatrixRef m(cell_info->values, row_stride, col_stride);
|
|
||||||
|
|
||||||
// We explicitly construct a block object here instead of using
|
|
||||||
// m.block(), as m.block() variant of the constructor does not
|
|
||||||
// allow mixing of template sizing and runtime sizing parameters
|
|
||||||
// like the Matrix class does.
|
|
||||||
Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize>
|
|
||||||
block(m, r, c, block1_size, block2_size);
|
|
||||||
#ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG
|
|
||||||
// Removing the ".noalias()" annotation on the following statement is
|
|
||||||
// necessary to produce a correct build with the Android NDK, including
|
|
||||||
// versions 6, 7, 8, and 8b, when built with STLPort and the
|
|
||||||
// non-standalone toolchain (i.e. ndk-build). This appears to be a
|
|
||||||
// compiler bug; if the workaround is not in place, the line
|
|
||||||
//
|
|
||||||
// block.noalias() -= b1_transpose_inverse_ete * b2;
|
|
||||||
//
|
|
||||||
// gets compiled to
|
|
||||||
//
|
|
||||||
// block.noalias() += b1_transpose_inverse_ete * b2;
|
|
||||||
//
|
|
||||||
// which breaks schur elimination. Introducing a temporary by removing the
|
|
||||||
// .noalias() annotation causes the issue to disappear. Tracking this
|
|
||||||
// issue down was tricky, since the test suite doesn't run when built with
|
|
||||||
// the non-standalone toolchain.
|
|
||||||
//
|
|
||||||
// TODO(keir): Make a reproduction case for this and send it upstream.
|
|
||||||
block -= b1_transpose_inverse_ete * b2;
|
|
||||||
#else
|
|
||||||
block.noalias() -= b1_transpose_inverse_ete * b2;
|
|
||||||
#endif // CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -624,10 +596,13 @@ NoEBlockRowOuterProduct(const BlockSparseMatrixBase* A,
|
|||||||
&row_stride, &col_stride);
|
&row_stride, &col_stride);
|
||||||
if (cell_info != NULL) {
|
if (cell_info != NULL) {
|
||||||
CeresMutexLock l(&cell_info->m);
|
CeresMutexLock l(&cell_info->m);
|
||||||
MatrixRef m(cell_info->values, row_stride, col_stride);
|
// This multiply currently ignores the fact that this is a
|
||||||
m.block(r, c, block1_size, block1_size)
|
// symmetric outer product.
|
||||||
.selfadjointView<Eigen::Upper>()
|
MatrixTransposeMatrixMultiply
|
||||||
.rankUpdate(b1.transpose(), 1.0);
|
<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||||
|
row_values + row.cells[i].position, row.block.size, block1_size,
|
||||||
|
row_values + row.cells[i].position, row.block.size, block1_size,
|
||||||
|
cell_info->values, r, c, row_stride, col_stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int j = i + 1; j < row.cells.size(); ++j) {
|
for (int j = i + 1; j < row.cells.size(); ++j) {
|
||||||
@ -638,17 +613,15 @@ NoEBlockRowOuterProduct(const BlockSparseMatrixBase* A,
|
|||||||
CellInfo* cell_info = lhs->GetCell(block1, block2,
|
CellInfo* cell_info = lhs->GetCell(block1, block2,
|
||||||
&r, &c,
|
&r, &c,
|
||||||
&row_stride, &col_stride);
|
&row_stride, &col_stride);
|
||||||
if (cell_info == NULL) {
|
if (cell_info != NULL) {
|
||||||
continue;
|
const int block2_size = bs->cols[row.cells[j].block_id].size;
|
||||||
|
CeresMutexLock l(&cell_info->m);
|
||||||
|
MatrixTransposeMatrixMultiply
|
||||||
|
<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
|
||||||
|
row_values + row.cells[i].position, row.block.size, block1_size,
|
||||||
|
row_values + row.cells[j].position, row.block.size, block2_size,
|
||||||
|
cell_info->values, r, c, row_stride, col_stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
const int block2_size = bs->cols[row.cells[j].block_id].size;
|
|
||||||
CeresMutexLock l(&cell_info->m);
|
|
||||||
MatrixRef m(cell_info->values, row_stride, col_stride);
|
|
||||||
m.block(r, c, block1_size, block2_size).noalias() +=
|
|
||||||
b1.transpose() * ConstMatrixRef(row_values + row.cells[j].position,
|
|
||||||
row.block.size,
|
|
||||||
block2_size);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -670,25 +643,18 @@ EBlockRowOuterProduct(const BlockSparseMatrixBase* A,
|
|||||||
DCHECK_GE(block1, 0);
|
DCHECK_GE(block1, 0);
|
||||||
|
|
||||||
const int block1_size = bs->cols[row.cells[i].block_id].size;
|
const int block1_size = bs->cols[row.cells[i].block_id].size;
|
||||||
const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
|
int r, c, row_stride, col_stride;
|
||||||
b1(row_values + row.cells[i].position,
|
CellInfo* cell_info = lhs->GetCell(block1, block1,
|
||||||
row.block.size, block1_size);
|
&r, &c,
|
||||||
{
|
&row_stride, &col_stride);
|
||||||
int r, c, row_stride, col_stride;
|
if (cell_info != NULL) {
|
||||||
CellInfo* cell_info = lhs->GetCell(block1, block1,
|
|
||||||
&r, &c,
|
|
||||||
&row_stride, &col_stride);
|
|
||||||
if (cell_info == NULL) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
CeresMutexLock l(&cell_info->m);
|
CeresMutexLock l(&cell_info->m);
|
||||||
MatrixRef m(cell_info->values, row_stride, col_stride);
|
// block += b1.transpose() * b1;
|
||||||
|
MatrixTransposeMatrixMultiply
|
||||||
Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize>
|
<kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
|
||||||
block(m, r, c, block1_size, block1_size);
|
row_values + row.cells[i].position, row.block.size, block1_size,
|
||||||
block.template selfadjointView<Eigen::Upper>()
|
row_values + row.cells[i].position, row.block.size, block1_size,
|
||||||
.rankUpdate(b1.transpose(), 1.0);
|
cell_info->values, r, c, row_stride, col_stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int j = i + 1; j < row.cells.size(); ++j) {
|
for (int j = i + 1; j < row.cells.size(); ++j) {
|
||||||
@ -700,20 +666,14 @@ EBlockRowOuterProduct(const BlockSparseMatrixBase* A,
|
|||||||
CellInfo* cell_info = lhs->GetCell(block1, block2,
|
CellInfo* cell_info = lhs->GetCell(block1, block2,
|
||||||
&r, &c,
|
&r, &c,
|
||||||
&row_stride, &col_stride);
|
&row_stride, &col_stride);
|
||||||
if (cell_info == NULL) {
|
if (cell_info != NULL) {
|
||||||
continue;
|
// block += b1.transpose() * b2;
|
||||||
|
MatrixTransposeMatrixMultiply
|
||||||
|
<kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
|
||||||
|
row_values + row.cells[i].position, row.block.size, block1_size,
|
||||||
|
row_values + row.cells[j].position, row.block.size, block2_size,
|
||||||
|
cell_info->values, r, c, row_stride, col_stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
|
|
||||||
b2(row_values + row.cells[j].position,
|
|
||||||
row.block.size,
|
|
||||||
block2_size);
|
|
||||||
|
|
||||||
CeresMutexLock l(&cell_info->m);
|
|
||||||
MatrixRef m(cell_info->values, row_stride, col_stride);
|
|
||||||
Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize>
|
|
||||||
block(m, r, c, block1_size, block2_size);
|
|
||||||
block.noalias() += b1.transpose() * b2;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -213,13 +213,7 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImplUsingSuiteSparse(
|
|||||||
} else {
|
} else {
|
||||||
factor_ = ss_.AnalyzeCholesky(lhs.get());
|
factor_ = ss_.AnalyzeCholesky(lhs.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VLOG_IS_ON(2)) {
|
|
||||||
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), ss_.mutable_cc());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CHECK_NOTNULL(factor_);
|
|
||||||
event_logger.AddEvent("Analysis");
|
event_logger.AddEvent("Analysis");
|
||||||
|
|
||||||
cholmod_dense* sol = ss_.SolveCholesky(lhs.get(), factor_, rhs);
|
cholmod_dense* sol = ss_.SolveCholesky(lhs.get(), factor_, rhs);
|
||||||
|
@ -35,8 +35,18 @@
|
|||||||
#include "cholmod.h"
|
#include "cholmod.h"
|
||||||
#include "ceres/compressed_row_sparse_matrix.h"
|
#include "ceres/compressed_row_sparse_matrix.h"
|
||||||
#include "ceres/triplet_sparse_matrix.h"
|
#include "ceres/triplet_sparse_matrix.h"
|
||||||
|
|
||||||
namespace ceres {
|
namespace ceres {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
|
SuiteSparse::SuiteSparse() {
|
||||||
|
cholmod_start(&cc_);
|
||||||
|
}
|
||||||
|
|
||||||
|
SuiteSparse::~SuiteSparse() {
|
||||||
|
cholmod_finish(&cc_);
|
||||||
|
}
|
||||||
|
|
||||||
cholmod_sparse* SuiteSparse::CreateSparseMatrix(TripletSparseMatrix* A) {
|
cholmod_sparse* SuiteSparse::CreateSparseMatrix(TripletSparseMatrix* A) {
|
||||||
cholmod_triplet triplet;
|
cholmod_triplet triplet;
|
||||||
|
|
||||||
@ -117,10 +127,16 @@ cholmod_factor* SuiteSparse::AnalyzeCholesky(cholmod_sparse* A) {
|
|||||||
cc_.nmethods = 1;
|
cc_.nmethods = 1;
|
||||||
cc_.method[0].ordering = CHOLMOD_AMD;
|
cc_.method[0].ordering = CHOLMOD_AMD;
|
||||||
cc_.supernodal = CHOLMOD_AUTO;
|
cc_.supernodal = CHOLMOD_AUTO;
|
||||||
|
|
||||||
cholmod_factor* factor = cholmod_analyze(A, &cc_);
|
cholmod_factor* factor = cholmod_analyze(A, &cc_);
|
||||||
CHECK_EQ(cc_.status, CHOLMOD_OK)
|
CHECK_EQ(cc_.status, CHOLMOD_OK)
|
||||||
<< "Cholmod symbolic analysis failed " << cc_.status;
|
<< "Cholmod symbolic analysis failed " << cc_.status;
|
||||||
CHECK_NOTNULL(factor);
|
CHECK_NOTNULL(factor);
|
||||||
|
|
||||||
|
if (VLOG_IS_ON(2)) {
|
||||||
|
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_);
|
||||||
|
}
|
||||||
|
|
||||||
return factor;
|
return factor;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -139,13 +155,20 @@ cholmod_factor* SuiteSparse::AnalyzeCholeskyWithUserOrdering(
|
|||||||
cholmod_sparse* A,
|
cholmod_sparse* A,
|
||||||
const vector<int>& ordering) {
|
const vector<int>& ordering) {
|
||||||
CHECK_EQ(ordering.size(), A->nrow);
|
CHECK_EQ(ordering.size(), A->nrow);
|
||||||
|
|
||||||
cc_.nmethods = 1;
|
cc_.nmethods = 1;
|
||||||
cc_.method[0].ordering = CHOLMOD_GIVEN;
|
cc_.method[0].ordering = CHOLMOD_GIVEN;
|
||||||
|
|
||||||
cholmod_factor* factor =
|
cholmod_factor* factor =
|
||||||
cholmod_analyze_p(A, const_cast<int*>(&ordering[0]), NULL, 0, &cc_);
|
cholmod_analyze_p(A, const_cast<int*>(&ordering[0]), NULL, 0, &cc_);
|
||||||
CHECK_EQ(cc_.status, CHOLMOD_OK)
|
CHECK_EQ(cc_.status, CHOLMOD_OK)
|
||||||
<< "Cholmod symbolic analysis failed " << cc_.status;
|
<< "Cholmod symbolic analysis failed " << cc_.status;
|
||||||
CHECK_NOTNULL(factor);
|
CHECK_NOTNULL(factor);
|
||||||
|
|
||||||
|
if (VLOG_IS_ON(2)) {
|
||||||
|
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_);
|
||||||
|
}
|
||||||
|
|
||||||
return factor;
|
return factor;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -276,36 +299,52 @@ bool SuiteSparse::Cholesky(cholmod_sparse* A, cholmod_factor* L) {
|
|||||||
CHECK_NOTNULL(A);
|
CHECK_NOTNULL(A);
|
||||||
CHECK_NOTNULL(L);
|
CHECK_NOTNULL(L);
|
||||||
|
|
||||||
|
// Save the current print level and silence CHOLMOD, otherwise
|
||||||
|
// CHOLMOD is prone to dumping stuff to stderr, which can be
|
||||||
|
// distracting when the error (matrix is indefinite) is not a fatal
|
||||||
|
// failure.
|
||||||
|
const int old_print_level = cc_.print;
|
||||||
|
cc_.print = 0;
|
||||||
|
|
||||||
cc_.quick_return_if_not_posdef = 1;
|
cc_.quick_return_if_not_posdef = 1;
|
||||||
int status = cholmod_factorize(A, L, &cc_);
|
int status = cholmod_factorize(A, L, &cc_);
|
||||||
|
cc_.print = old_print_level;
|
||||||
|
|
||||||
|
// TODO(sameeragarwal): This switch statement is not consistent. It
|
||||||
|
// treats all kinds of CHOLMOD failures as warnings. Some of these
|
||||||
|
// like out of memory are definitely not warnings. The problem is
|
||||||
|
// that the return value Cholesky is two valued, but the state of
|
||||||
|
// the linear solver is really three valued. SUCCESS,
|
||||||
|
// NON_FATAL_FAILURE (e.g., indefinite matrix) and FATAL_FAILURE
|
||||||
|
// (e.g. out of memory).
|
||||||
switch (cc_.status) {
|
switch (cc_.status) {
|
||||||
case CHOLMOD_NOT_INSTALLED:
|
case CHOLMOD_NOT_INSTALLED:
|
||||||
LOG(WARNING) << "Cholmod failure: method not installed.";
|
LOG(WARNING) << "CHOLMOD failure: Method not installed.";
|
||||||
return false;
|
return false;
|
||||||
case CHOLMOD_OUT_OF_MEMORY:
|
case CHOLMOD_OUT_OF_MEMORY:
|
||||||
LOG(WARNING) << "Cholmod failure: out of memory.";
|
LOG(WARNING) << "CHOLMOD failure: Out of memory.";
|
||||||
return false;
|
return false;
|
||||||
case CHOLMOD_TOO_LARGE:
|
case CHOLMOD_TOO_LARGE:
|
||||||
LOG(WARNING) << "Cholmod failure: integer overflow occured.";
|
LOG(WARNING) << "CHOLMOD failure: Integer overflow occured.";
|
||||||
return false;
|
return false;
|
||||||
case CHOLMOD_INVALID:
|
case CHOLMOD_INVALID:
|
||||||
LOG(WARNING) << "Cholmod failure: invalid input.";
|
LOG(WARNING) << "CHOLMOD failure: Invalid input.";
|
||||||
return false;
|
return false;
|
||||||
case CHOLMOD_NOT_POSDEF:
|
case CHOLMOD_NOT_POSDEF:
|
||||||
// TODO(sameeragarwal): These two warnings require more
|
// TODO(sameeragarwal): These two warnings require more
|
||||||
// sophisticated handling going forward. For now we will be
|
// sophisticated handling going forward. For now we will be
|
||||||
// strict and treat them as failures.
|
// strict and treat them as failures.
|
||||||
LOG(WARNING) << "Cholmod warning: matrix not positive definite.";
|
LOG(WARNING) << "CHOLMOD warning: Matrix not positive definite.";
|
||||||
return false;
|
return false;
|
||||||
case CHOLMOD_DSMALL:
|
case CHOLMOD_DSMALL:
|
||||||
LOG(WARNING) << "Cholmod warning: D for LDL' or diag(L) or "
|
LOG(WARNING) << "CHOLMOD warning: D for LDL' or diag(L) or "
|
||||||
<< "LL' has tiny absolute value.";
|
<< "LL' has tiny absolute value.";
|
||||||
return false;
|
return false;
|
||||||
case CHOLMOD_OK:
|
case CHOLMOD_OK:
|
||||||
if (status != 0) {
|
if (status != 0) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
LOG(WARNING) << "Cholmod failure: cholmod_factorize returned zero "
|
LOG(WARNING) << "CHOLMOD failure: cholmod_factorize returned zero "
|
||||||
<< "but cholmod_common::status is CHOLMOD_OK."
|
<< "but cholmod_common::status is CHOLMOD_OK."
|
||||||
<< "Please report this to ceres-solver@googlegroups.com.";
|
<< "Please report this to ceres-solver@googlegroups.com.";
|
||||||
return false;
|
return false;
|
||||||
|
@ -56,8 +56,8 @@ class TripletSparseMatrix;
|
|||||||
// for all cholmod function calls.
|
// for all cholmod function calls.
|
||||||
class SuiteSparse {
|
class SuiteSparse {
|
||||||
public:
|
public:
|
||||||
SuiteSparse() { cholmod_start(&cc_); }
|
SuiteSparse();
|
||||||
~SuiteSparse() { cholmod_finish(&cc_); }
|
~SuiteSparse();
|
||||||
|
|
||||||
// Functions for building cholmod_sparse objects from sparse
|
// Functions for building cholmod_sparse objects from sparse
|
||||||
// matrices stored in triplet form. The matrix A is not
|
// matrices stored in triplet form. The matrix A is not
|
||||||
|
@ -426,14 +426,8 @@ bool VisibilityBasedPreconditioner::Factorize() {
|
|||||||
} else {
|
} else {
|
||||||
factor_ = ss_.AnalyzeCholesky(lhs);
|
factor_ = ss_.AnalyzeCholesky(lhs);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VLOG_IS_ON(2)) {
|
|
||||||
cholmod_print_common(const_cast<char*>("Symbolic Analysis"), ss_.mutable_cc());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CHECK_NOTNULL(factor_);
|
|
||||||
|
|
||||||
bool status = ss_.Cholesky(lhs, factor_);
|
bool status = ss_.Cholesky(lhs, factor_);
|
||||||
ss_.Free(lhs);
|
ss_.Free(lhs);
|
||||||
return status;
|
return status;
|
||||||
|
Loading…
Reference in New Issue
Block a user