Added SuperLU 3.0:

http://crd.lbl.gov/~xiaoye/SuperLU/

This is a library to solve sparse matrix systems (type A*x=B). It is able
to solve large systems very FAST. Only the necessary parts of the library
are included to limit file size and compilation time. This means the example
files, fortran interface, test files, matlab interface, cblas library,
complex number part and build system have been left out. All (gcc) warnings
have been fixed too.

This library will be used for LSCM UV unwrapping. With this library, LSCM
unwrapping can be calculated in a split second, making the unwrapping proces
much more interactive.

Added OpenNL (Open Numerical Libary):
http://www.loria.fr/~levy/OpenNL/

OpenNL is a library to easily construct and solve sparse linear systems. We
use a stripped down version, as an interface to SuperLU.

This library was kindly given to use by Bruno Levy.
This commit is contained in:
Brecht Van Lommel 2004-07-13 11:42:13 +00:00
parent 4a13a5720d
commit 4f1c674ee0
51 changed files with 13799 additions and 3 deletions

@ -962,7 +962,8 @@ def blender_libs(env):
'blender_LOD',
'blender_BSP',
'blender_blenkernel',
'blender_IK'])
'blender_IK',
'blender_ONL'])
def ketsji_libs(env):
"""

@ -35,7 +35,7 @@ SOURCEDIR = intern
# include nan_subdirs.mk
ALLDIRS = string ghost guardedalloc bmfont moto container memutil
ALLDIRS += decimation iksolver bsp SoundSystem
ALLDIRS += decimation iksolver bsp SoundSystem opennl
all::
@for i in $(ALLDIRS); do \

@ -8,7 +8,8 @@ SConscript(['SoundSystem/SConscript',
'container/SConscript',
'memutil/SConscript/',
'decimation/SConscript',
'iksolver/SConscript'])
'iksolver/SConscript',
'opennl/SConscript'])
NEW_CSG='false'

67
intern/opennl/Makefile Normal file

@ -0,0 +1,67 @@
#
# $Id$
#
# ***** BEGIN GPL/BL DUAL LICENSE BLOCK *****
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version. The Blender
# Foundation also sells licenses for use in proprietary software under
# the Blender License. See http://www.blender.org/BL/ for information
# about this.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
# All rights reserved.
#
# The Original Code is: all of this file.
#
# Contributor(s): Hans Lambermont
#
# ***** END GPL/BL DUAL LICENSE BLOCK *****
# opennl main makefile.
#
include nan_definitions.mk
LIBNAME = opennl
LIBNAME_SLU = superlu
SOURCEDIR = intern/$(LIBNAME)
SOURCEDIR_SLU = intern/$(LIBNAME_SLU)
DIR = $(OCGDIR)/$(SOURCEDIR)
DIR_SLU = $(OCGDIR)/$(SOURCEDIR_SLU)
DIRS = intern superlu
include nan_subdirs.mk
install: all debug
@[ -d $(NAN_OPENNL) ] || mkdir $(NAN_OPENNL)
@[ -d $(NAN_OPENNL)/include ] || mkdir $(NAN_OPENNL)/include
@[ -d $(NAN_OPENNL)/lib ] || mkdir $(NAN_OPENNL)/lib
@[ -d $(NAN_OPENNL)/lib/debug ] || mkdir $(NAN_OPENNL)/lib/debug
@../tools/cpifdiff.sh $(DIR)/libopennl.a $(NAN_OPENNL)/lib/
@../tools/cpifdiff.sh $(DIR)/debug/libopennl.a $(NAN_OPENNL)/lib/debug/
ifeq ($(OS),darwin)
ranlib $(NAN_OPENNL)/lib/libopennl.a
ranlib $(NAN_OPENNL)/lib/debug/libopennl.a
endif
@../tools/cpifdiff.sh extern/*.h $(NAN_OPENNL)/include/
@[ -d $(NAN_SUPERLU) ] || mkdir $(NAN_SUPERLU)
@[ -d $(NAN_SUPERLU)/lib ] || mkdir $(NAN_SUPERLU)/lib
@[ -d $(NAN_SUPERLU)/lib/debug ] || mkdir $(NAN_SUPERLU)/lib/debug
@../tools/cpifdiff.sh $(DIR_SLU)/libsuperlu.a $(NAN_SUPERLU)/lib/
@../tools/cpifdiff.sh $(DIR_SLU)/debug/libsuperlu.a $(NAN_SUPERLU)/lib/debug/
ifeq ($(OS),darwin)
ranlib $(NAN_SUPERLU)/lib/libsuperlu.a
ranlib $(NAN_SUPERLU)/lib/debug/libsuperlu.a
endif

43
intern/opennl/SConscript Normal file

@ -0,0 +1,43 @@
Import ('user_options_dict')
Import ('library_env')
opennl_env = library_env.Copy ()
source_files = ['intern/opennl.c',
'superlu/colamd.c',
'superlu/get_perm_c.c',
'superlu/heap_relax_snode.c',
'superlu/lsame.c',
'superlu/memory.c',
'superlu/mmd.c',
'superlu/relax_snode.c',
'superlu/scolumn_bmod.c',
'superlu/scolumn_dfs.c',
'superlu/scopy_to_ucol.c',
'superlu/sgssv.c',
'superlu/sgstrf.c',
'superlu/sgstrs.c',
'superlu/smemory.c',
'superlu/smyblas2.c',
'superlu/sp_coletree.c',
'superlu/sp_ienv.c',
'superlu/sp_preorder.c',
'superlu/spanel_bmod.c',
'superlu/spanel_dfs.c',
'superlu/spivotL.c',
'superlu/spruneL.c',
'superlu/ssnode_bmod.c',
'superlu/ssnode_dfs.c',
'superlu/ssp_blas2.c',
'superlu/ssp_blas3.c',
'superlu/strsv.c',
'superlu/superlu_timer.c',
'superlu/sutil.c',
'superlu/util.c',
'superlu/xerbla.c']
opennl_env.Append (CPPPATH = ['extern',
'superlu'])
opennl_env.Library (target='#'+user_options_dict['BUILD_DIR']+'/lib/blender_ONL', source=source_files)

@ -0,0 +1,341 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
675 Mass Ave, Cambridge, MA 02139, USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Library General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
Appendix: How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) 19yy <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) 19yy name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Library General
Public License instead of this License.

@ -0,0 +1,13 @@
This is OpenNL, a library to easily construct and solve sparse linear systems.
* OpenNL is supplied with a set of iterative solvers (Conjugate gradient,
BICGSTAB, GMRes) and preconditioners (Jacobi, SSOR).
* OpenNL can also use other solvers (SuperLU 3.0 supported as an OpenNL
extension)
Note that to be compatible with OpenNL, SuperLU 3.0 needs to be compiled with
the following flag (see make.inc in SuperLU3.0):
CDEFS = -DAdd_ (the default is -DAdd__, just remove the second underscore)
OpenNL was modified for Blender to be used only as a wrapper for SuperLU.

@ -0,0 +1,31 @@
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required
approvals from U.S. Dept. of Energy)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification,
are permitted provided that the following conditions are met:
(1) Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
(2) Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
(3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of
Energy nor the names of its contributors may be used to endorse or promote
products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -0,0 +1,52 @@
SuperLU (Version 3.0)
=====================
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required
approvals from U.S. Dept. of Energy)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
(1) Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
(2) Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
(3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of
Energy nor the names of its contributors may be used to endorse or promote
products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SuperLU contains a set of subroutines to solve a sparse linear system
A*X=B. It uses Gaussian elimination with partial pivoting (GEPP).
The columns of A may be preordered before factorization; the
preordering for sparsity is completely separate from the factorization.
SuperLU is implemented in ANSI C, and must be compiled with standard
ANSI C compilers. It provides functionality for both real and complex
matrices, in both single and double precision. The file names for the
single-precision real version start with letter "s" (such as sgstrf.c);
the file names for the double-precision real version start with letter "d"
(such as dgstrf.c); the file names for the single-precision complex
version start with letter "c" (such as cgstrf.c); the file names
for the double-precision complex version start with letter "z"
(such as zgstrf.c).
SuperLU was modified for Blender to only include single-precision
functionality.

163
intern/opennl/extern/ONL_opennl.h vendored Normal file

@ -0,0 +1,163 @@
/*
* $Id$
*
* OpenNL: Numerical Library
* Copyright (C) 2004 Bruno Levy
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* If you modify this software, you should include a notice giving the
* name of the person performing the modification, the date of modification,
* and the reason for such modification.
*
* Contact: Bruno Levy
*
* levy@loria.fr
*
* ISA Project
* LORIA, INRIA Lorraine,
* Campus Scientifique, BP 239
* 54506 VANDOEUVRE LES NANCY CEDEX
* FRANCE
*
* Note that the GNU General Public License does not permit incorporating
* the Software into proprietary programs.
*/
/*
#define NL_DEBUG
#define NL_PARANOID
*/
#define NL_USE_SUPERLU
#ifndef nlOPENNL_H
#define nlOPENNL_H
#ifdef __cplusplus
extern "C" {
#endif
#define NL_VERSION_0_0 1
/*
*
* Datatypes
*
*/
typedef unsigned int NLenum;
typedef unsigned char NLboolean;
typedef unsigned int NLbitfield;
typedef void NLvoid;
typedef signed char NLbyte; /* 1-byte signed */
typedef short NLshort; /* 2-byte signed */
typedef int NLint; /* 4-byte signed */
typedef unsigned char NLubyte; /* 1-byte unsigned */
typedef unsigned short NLushort; /* 2-byte unsigned */
typedef unsigned int NLuint; /* 4-byte unsigned */
typedef int NLsizei; /* 4-byte signed */
typedef float NLfloat; /* single precision float */
typedef double NLdouble; /* double precision float */
typedef void* NLContext ;
/*
*
* Constants
*
*/
#define NL_FALSE 0x0
#define NL_TRUE 0x1
/* Primitives */
#define NL_SYSTEM 0x0
#define NL_MATRIX 0x1
#define NL_ROW 0x2
/* Solver Parameters */
#define NL_SOLVER 0x100
#define NL_NB_VARIABLES 0x101
#define NL_LEAST_SQUARES 0x102
#define NL_SYMMETRIC 0x106
#define NL_ERROR 0x108
/* Enable / Disable */
#define NL_NORMALIZE_ROWS 0x400
/* Row parameters */
#define NL_RIGHT_HAND_SIDE 0x500
#define NL_ROW_SCALING 0x501
/*
* Contexts
*/
NLContext nlNewContext() ;
void nlDeleteContext(NLContext context) ;
void nlMakeCurrent(NLContext context) ;
NLContext nlGetCurrent() ;
/*
* State set/get
*/
void nlSolverParameterf(NLenum pname, NLfloat param) ;
void nlSolverParameteri(NLenum pname, NLint param) ;
void nlRowParameterf(NLenum pname, NLfloat param) ;
void nlRowParameteri(NLenum pname, NLint param) ;
void nlGetBooleanv(NLenum pname, NLboolean* params) ;
void nlGetFloatv(NLenum pname, NLfloat* params) ;
void nlGetIntergerv(NLenum pname, NLint* params) ;
void nlEnable(NLenum pname) ;
void nlDisable(NLenum pname) ;
NLboolean nlIsEnabled(NLenum pname) ;
/*
* Variables
*/
void nlSetVariable(NLuint index, NLfloat value) ;
NLfloat nlGetVariable(NLuint index) ;
void nlLockVariable(NLuint index) ;
void nlUnlockVariable(NLuint index) ;
NLboolean nlVariableIsLocked(NLuint index) ;
/*
* Begin/End
*/
void nlBegin(NLenum primitive) ;
void nlEnd(NLenum primitive) ;
void nlCoefficient(NLuint index, NLfloat value) ;
/*
* Solve
*/
NLboolean nlSolve() ;
#ifdef __cplusplus
}
#endif
#endif

@ -0,0 +1,43 @@
#
# $Id$
#
# ***** BEGIN GPL/BL DUAL LICENSE BLOCK *****
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version. The Blender
# Foundation also sells licenses for use in proprietary software under
# the Blender License. See http://www.blender.org/BL/ for information
# about this.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
# All rights reserved.
#
# The Original Code is: all of this file.
#
# Contributor(s): none yet.
#
# ***** END GPL/BL DUAL LICENSE BLOCK *****
# opennl intern Makefile
#
LIBNAME = opennl
DIR = $(OCGDIR)/intern/$(LIBNAME)
include nan_compile.mk
CCFLAGS += $(NAN_LEVEL_2_CPP_WARNINGS)
CPPFLAGS += -I../superlu -I../extern

File diff suppressed because it is too large Load Diff

@ -0,0 +1,281 @@
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* November 1, 1997
*
*/
#ifndef __SUPERLU_CNAMES /* allow multiple inclusions */
#define __SUPERLU_CNAMES
/* We want this flag, safer than putting in build system */
#define Add_
/*
* These macros define how C routines will be called. ADD_ assumes that
* they will be called by fortran, which expects C routines to have an
* underscore postfixed to the name (Suns, and the Intel expect this).
* NOCHANGE indicates that fortran will be calling, and that it expects
* the name called by fortran to be identical to that compiled by the C
* (RS6K's do this). UPCASE says it expects C routines called by fortran
* to be in all upcase (CRAY wants this).
*/
#define ADD_ 0
#define ADD__ 1
#define NOCHANGE 2
#define UPCASE 3
#define C_CALL 4
#ifdef UpCase
#define F77_CALL_C UPCASE
#endif
#ifdef NoChange
#define F77_CALL_C NOCHANGE
#endif
#ifdef Add_
#define F77_CALL_C ADD_
#endif
#ifdef Add__
#define F77_CALL_C ADD__
#endif
/* Default */
#ifndef F77_CALL_C
#define F77_CALL_C ADD_
#endif
#if (F77_CALL_C == ADD_)
/*
* These defines set up the naming scheme required to have a fortran 77
* routine call a C routine
* No redefinition necessary to have following Fortran to C interface:
* FORTRAN CALL C DECLARATION
* call dgemm(...) void dgemm_(...)
*
* This is the default.
*/
#endif
#if (F77_CALL_C == ADD__)
/*
* These defines set up the naming scheme required to have a fortran 77
* routine call a C routine
* for following Fortran to C interface:
* FORTRAN CALL C DECLARATION
* call dgemm(...) void dgemm__(...)
*/
#define sasum_ sasum__
#define isamax_ isamax__
#define scopy_ scopy__
#define sscal_ sscal__
#define sger_ sger__
#define snrm2_ snrm2__
#define ssymv_ ssymv__
#define sdot_ sdot__
#define saxpy_ saxpy__
#define ssyr2_ ssyr2__
#define srot_ srot__
#define sgemv_ sgemv__
#define strsv_ strsv__
#define sgemm_ sgemm__
#define strsm_ strsm__
#define dasum_ dasum__
#define idamax_ idamax__
#define dcopy_ dcopy__
#define dscal_ dscal__
#define dger_ dger__
#define dnrm2_ dnrm2__
#define dsymv_ dsymv__
#define ddot_ ddot__
#define daxpy_ daxpy__
#define dsyr2_ dsyr2__
#define drot_ drot__
#define dgemv_ dgemv__
#define dtrsv_ dtrsv__
#define dgemm_ dgemm__
#define dtrsm_ dtrsm__
#define scasum_ scasum__
#define icamax_ icamax__
#define ccopy_ ccopy__
#define cscal_ cscal__
#define scnrm2_ scnrm2__
#define caxpy_ caxpy__
#define cgemv_ cgemv__
#define ctrsv_ ctrsv__
#define cgemm_ cgemm__
#define ctrsm_ ctrsm__
#define cgerc_ cgerc__
#define chemv_ chemv__
#define cher2_ cher2__
#define dzasum_ dzasum__
#define izamax_ izamax__
#define zcopy_ zcopy__
#define zscal_ zscal__
#define dznrm2_ dznrm2__
#define zaxpy_ zaxpy__
#define zgemv_ zgemv__
#define ztrsv_ ztrsv__
#define zgemm_ zgemm__
#define ztrsm_ ztrsm__
#define zgerc_ zgerc__
#define zhemv_ zhemv__
#define zher2_ zher2__
#define c_bridge_dgssv_ c_bridge_dgssv__
#define c_fortran_dgssv_ c_fortran_dgssv__
#endif
#if (F77_CALL_C == UPCASE)
/*
* These defines set up the naming scheme required to have a fortran 77
* routine call a C routine
* following Fortran to C interface:
* FORTRAN CALL C DECLARATION
* call dgemm(...) void DGEMM(...)
*/
#define sasum_ SASUM
#define isamax_ ISAMAX
#define scopy_ SCOPY
#define sscal_ SSCAL
#define sger_ SGER
#define snrm2_ SNRM2
#define ssymv_ SSYMV
#define sdot_ SDOT
#define saxpy_ SAXPY
#define ssyr2_ SSYR2
#define srot_ SROT
#define sgemv_ SGEMV
#define strsv_ STRSV
#define sgemm_ SGEMM
#define strsm_ STRSM
#define dasum_ SASUM
#define idamax_ ISAMAX
#define dcopy_ SCOPY
#define dscal_ SSCAL
#define dger_ SGER
#define dnrm2_ SNRM2
#define dsymv_ SSYMV
#define ddot_ SDOT
#define daxpy_ SAXPY
#define dsyr2_ SSYR2
#define drot_ SROT
#define dgemv_ SGEMV
#define dtrsv_ STRSV
#define dgemm_ SGEMM
#define dtrsm_ STRSM
#define scasum_ SCASUM
#define icamax_ ICAMAX
#define ccopy_ CCOPY
#define cscal_ CSCAL
#define scnrm2_ SCNRM2
#define caxpy_ CAXPY
#define cgemv_ CGEMV
#define ctrsv_ CTRSV
#define cgemm_ CGEMM
#define ctrsm_ CTRSM
#define cgerc_ CGERC
#define chemv_ CHEMV
#define cher2_ CHER2
#define dzasum_ SCASUM
#define izamax_ ICAMAX
#define zcopy_ CCOPY
#define zscal_ CSCAL
#define dznrm2_ SCNRM2
#define zaxpy_ CAXPY
#define zgemv_ CGEMV
#define ztrsv_ CTRSV
#define zgemm_ CGEMM
#define ztrsm_ CTRSM
#define zgerc_ CGERC
#define zhemv_ CHEMV
#define zher2_ CHER2
#define c_bridge_dgssv_ C_BRIDGE_DGSSV
#define c_fortran_dgssv_ C_FORTRAN_DGSSV
#endif
#if (F77_CALL_C == NOCHANGE)
/*
* These defines set up the naming scheme required to have a fortran 77
* routine call a C routine
* for following Fortran to C interface:
* FORTRAN CALL C DECLARATION
* call dgemm(...) void dgemm(...)
*/
#define sasum_ sasum
#define isamax_ isamax
#define scopy_ scopy
#define sscal_ sscal
#define sger_ sger
#define snrm2_ snrm2
#define ssymv_ ssymv
#define sdot_ sdot
#define saxpy_ saxpy
#define ssyr2_ ssyr2
#define srot_ srot
#define sgemv_ sgemv
#define strsv_ strsv
#define sgemm_ sgemm
#define strsm_ strsm
#define dasum_ dasum
#define idamax_ idamax
#define dcopy_ dcopy
#define dscal_ dscal
#define dger_ dger
#define dnrm2_ dnrm2
#define dsymv_ dsymv
#define ddot_ ddot
#define daxpy_ daxpy
#define dsyr2_ dsyr2
#define drot_ drot
#define dgemv_ dgemv
#define dtrsv_ dtrsv
#define dgemm_ dgemm
#define dtrsm_ dtrsm
#define scasum_ scasum
#define icamax_ icamax
#define ccopy_ ccopy
#define cscal_ cscal
#define scnrm2_ scnrm2
#define caxpy_ caxpy
#define cgemv_ cgemv
#define ctrsv_ ctrsv
#define cgemm_ cgemm
#define ctrsm_ ctrsm
#define cgerc_ cgerc
#define chemv_ chemv
#define cher2_ cher2
#define dzasum_ dzasum
#define izamax_ izamax
#define zcopy_ zcopy
#define zscal_ zscal
#define dznrm2_ dznrm2
#define zaxpy_ zaxpy
#define zgemv_ zgemv
#define ztrsv_ ztrsv
#define zgemm_ zgemm
#define ztrsm_ ztrsm
#define zgerc_ zgerc
#define zhemv_ zhemv
#define zher2_ zher2
#define c_bridge_dgssv_ c_bridge_dgssv
#define c_fortran_dgssv_ c_fortran_dgssv
#endif
#endif /* __SUPERLU_CNAMES */

@ -0,0 +1,40 @@
#
# $Id$
#
# ***** BEGIN GPL/BL DUAL LICENSE BLOCK *****
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version. The Blender
# Foundation also sells licenses for use in proprietary software under
# the Blender License. See http://www.blender.org/BL/ for information
# about this.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
# All rights reserved.
#
# The Original Code is: all of this file.
#
# Contributor(s): none yet.
#
# ***** END GPL/BL DUAL LICENSE BLOCK *****
# opennl intern Makefile
#
LIBNAME = superlu
DIR = $(OCGDIR)/intern/$(LIBNAME)
include nan_compile.mk
CCFLAGS += $(NAN_LEVEL_2_CPP_WARNINGS)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,67 @@
/* ========================================================================== */
/* === colamd prototypes and definitions ==================================== */
/* ========================================================================== */
/*
This is the colamd include file,
http://www.cise.ufl.edu/~davis/colamd/colamd.h
for use in the colamd.c, colamdmex.c, and symamdmex.c files located at
http://www.cise.ufl.edu/~davis/colamd/
See those files for a description of colamd and symamd, and for the
copyright notice, which also applies to this file.
August 3, 1998. Version 1.0.
*/
/* ========================================================================== */
/* === Definitions ========================================================== */
/* ========================================================================== */
/* size of the knobs [ ] array. Only knobs [0..1] are currently used. */
#define COLAMD_KNOBS 20
/* number of output statistics. Only A [0..2] are currently used. */
#define COLAMD_STATS 20
/* knobs [0] and A [0]: dense row knob and output statistic. */
#define COLAMD_DENSE_ROW 0
/* knobs [1] and A [1]: dense column knob and output statistic. */
#define COLAMD_DENSE_COL 1
/* A [2]: memory defragmentation count output statistic */
#define COLAMD_DEFRAG_COUNT 2
/* A [3]: whether or not the input columns were jumbled or had duplicates */
#define COLAMD_JUMBLED_COLS 3
/* ========================================================================== */
/* === Prototypes of user-callable routines ================================= */
/* ========================================================================== */
int colamd_recommended /* returns recommended value of Alen */
(
int nnz, /* nonzeros in A */
int n_row, /* number of rows in A */
int n_col /* number of columns in A */
) ;
void colamd_set_defaults /* sets default parameters */
( /* knobs argument is modified on output */
double knobs [COLAMD_KNOBS] /* parameter settings for colamd */
) ;
int colamd /* returns TRUE if successful, FALSE otherwise*/
( /* A and p arguments are modified on output */
int n_row, /* number of rows in A */
int n_col, /* number of columns in A */
int Alen, /* size of the array A */
int A [], /* row indices of A, of size Alen */
int p [], /* column pointers of A, of size n_col+1 */
double knobs [COLAMD_KNOBS] /* parameter settings for colamd */
) ;

@ -0,0 +1,453 @@
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* November 15, 1997
*
*/
#include "ssp_defs.h"
#include "colamd.h"
extern int genmmd_(int *, int *, int *, int *, int *, int *, int *,
int *, int *, int *, int *, int *);
void
get_colamd(
const int m, /* number of rows in matrix A. */
const int n, /* number of columns in matrix A. */
const int nnz,/* number of nonzeros in matrix A. */
int *colptr, /* column pointer of size n+1 for matrix A. */
int *rowind, /* row indices of size nz for matrix A. */
int *perm_c /* out - the column permutation vector. */
)
{
int Alen, *A, i, info, *p;
double *knobs;
Alen = colamd_recommended(nnz, m, n);
if ( !(knobs = (double *) SUPERLU_MALLOC(COLAMD_KNOBS * sizeof(double))) )
ABORT("Malloc fails for knobs");
colamd_set_defaults(knobs);
if (!(A = (int *) SUPERLU_MALLOC(Alen * sizeof(int))) )
ABORT("Malloc fails for A[]");
if (!(p = (int *) SUPERLU_MALLOC((n+1) * sizeof(int))) )
ABORT("Malloc fails for p[]");
for (i = 0; i <= n; ++i) p[i] = colptr[i];
for (i = 0; i < nnz; ++i) A[i] = rowind[i];
info = colamd(m, n, Alen, A, p, knobs);
if ( info == FALSE ) ABORT("COLAMD failed");
for (i = 0; i < n; ++i) perm_c[p[i]] = i;
SUPERLU_FREE(knobs);
SUPERLU_FREE(A);
SUPERLU_FREE(p);
}
void
getata(
const int m, /* number of rows in matrix A. */
const int n, /* number of columns in matrix A. */
const int nz, /* number of nonzeros in matrix A */
int *colptr, /* column pointer of size n+1 for matrix A. */
int *rowind, /* row indices of size nz for matrix A. */
int *atanz, /* out - on exit, returns the actual number of
nonzeros in matrix A'*A. */
int **ata_colptr, /* out - size n+1 */
int **ata_rowind /* out - size *atanz */
)
/*
* Purpose
* =======
*
* Form the structure of A'*A. A is an m-by-n matrix in column oriented
* format represented by (colptr, rowind). The output A'*A is in column
* oriented format (symmetrically, also row oriented), represented by
* (ata_colptr, ata_rowind).
*
* This routine is modified from GETATA routine by Tim Davis.
* The complexity of this algorithm is: SUM_{i=1,m} r(i)^2,
* i.e., the sum of the square of the row counts.
*
* Questions
* =========
* o Do I need to withhold the *dense* rows?
* o How do I know the number of nonzeros in A'*A?
*
*/
{
register int i, j, k, col, num_nz, ti, trow;
int *marker, *b_colptr, *b_rowind;
int *t_colptr, *t_rowind; /* a column oriented form of T = A' */
if ( !(marker = (int*) SUPERLU_MALLOC((SUPERLU_MAX(m,n)+1)*sizeof(int))) )
ABORT("SUPERLU_MALLOC fails for marker[]");
if ( !(t_colptr = (int*) SUPERLU_MALLOC((m+1) * sizeof(int))) )
ABORT("SUPERLU_MALLOC t_colptr[]");
if ( !(t_rowind = (int*) SUPERLU_MALLOC(nz * sizeof(int))) )
ABORT("SUPERLU_MALLOC fails for t_rowind[]");
/* Get counts of each column of T, and set up column pointers */
for (i = 0; i < m; ++i) marker[i] = 0;
for (j = 0; j < n; ++j) {
for (i = colptr[j]; i < colptr[j+1]; ++i)
++marker[rowind[i]];
}
t_colptr[0] = 0;
for (i = 0; i < m; ++i) {
t_colptr[i+1] = t_colptr[i] + marker[i];
marker[i] = t_colptr[i];
}
/* Transpose the matrix from A to T */
for (j = 0; j < n; ++j)
for (i = colptr[j]; i < colptr[j+1]; ++i) {
col = rowind[i];
t_rowind[marker[col]] = j;
++marker[col];
}
/* ----------------------------------------------------------------
compute B = T * A, where column j of B is:
Struct (B_*j) = UNION ( Struct (T_*k) )
A_kj != 0
do not include the diagonal entry
( Partition A as: A = (A_*1, ..., A_*n)
Then B = T * A = (T * A_*1, ..., T * A_*n), where
T * A_*j = (T_*1, ..., T_*m) * A_*j. )
---------------------------------------------------------------- */
/* Zero the diagonal flag */
for (i = 0; i < n; ++i) marker[i] = -1;
/* First pass determines number of nonzeros in B */
num_nz = 0;
for (j = 0; j < n; ++j) {
/* Flag the diagonal so it's not included in the B matrix */
marker[j] = j;
for (i = colptr[j]; i < colptr[j+1]; ++i) {
/* A_kj is nonzero, add pattern of column T_*k to B_*j */
k = rowind[i];
for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) {
trow = t_rowind[ti];
if ( marker[trow] != j ) {
marker[trow] = j;
num_nz++;
}
}
}
}
*atanz = num_nz;
/* Allocate storage for A'*A */
if ( !(*ata_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
ABORT("SUPERLU_MALLOC fails for ata_colptr[]");
if ( *atanz ) {
if ( !(*ata_rowind = (int*) SUPERLU_MALLOC( *atanz * sizeof(int)) ) )
ABORT("SUPERLU_MALLOC fails for ata_rowind[]");
}
b_colptr = *ata_colptr; /* aliasing */
b_rowind = *ata_rowind;
/* Zero the diagonal flag */
for (i = 0; i < n; ++i) marker[i] = -1;
/* Compute each column of B, one at a time */
num_nz = 0;
for (j = 0; j < n; ++j) {
b_colptr[j] = num_nz;
/* Flag the diagonal so it's not included in the B matrix */
marker[j] = j;
for (i = colptr[j]; i < colptr[j+1]; ++i) {
/* A_kj is nonzero, add pattern of column T_*k to B_*j */
k = rowind[i];
for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) {
trow = t_rowind[ti];
if ( marker[trow] != j ) {
marker[trow] = j;
b_rowind[num_nz++] = trow;
}
}
}
}
b_colptr[n] = num_nz;
SUPERLU_FREE(marker);
SUPERLU_FREE(t_colptr);
SUPERLU_FREE(t_rowind);
}
void
at_plus_a(
const int n, /* number of columns in matrix A. */
const int nz, /* number of nonzeros in matrix A */
int *colptr, /* column pointer of size n+1 for matrix A. */
int *rowind, /* row indices of size nz for matrix A. */
int *bnz, /* out - on exit, returns the actual number of
nonzeros in matrix A'*A. */
int **b_colptr, /* out - size n+1 */
int **b_rowind /* out - size *bnz */
)
{
/*
* Purpose
* =======
*
* Form the structure of A'+A. A is an n-by-n matrix in column oriented
* format represented by (colptr, rowind). The output A'+A is in column
* oriented format (symmetrically, also row oriented), represented by
* (b_colptr, b_rowind).
*
*/
register int i, j, k, col, num_nz;
int *t_colptr, *t_rowind; /* a column oriented form of T = A' */
int *marker;
if ( !(marker = (int*) SUPERLU_MALLOC( n * sizeof(int)) ) )
ABORT("SUPERLU_MALLOC fails for marker[]");
if ( !(t_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
ABORT("SUPERLU_MALLOC fails for t_colptr[]");
if ( !(t_rowind = (int*) SUPERLU_MALLOC( nz * sizeof(int)) ) )
ABORT("SUPERLU_MALLOC fails t_rowind[]");
/* Get counts of each column of T, and set up column pointers */
for (i = 0; i < n; ++i) marker[i] = 0;
for (j = 0; j < n; ++j) {
for (i = colptr[j]; i < colptr[j+1]; ++i)
++marker[rowind[i]];
}
t_colptr[0] = 0;
for (i = 0; i < n; ++i) {
t_colptr[i+1] = t_colptr[i] + marker[i];
marker[i] = t_colptr[i];
}
/* Transpose the matrix from A to T */
for (j = 0; j < n; ++j)
for (i = colptr[j]; i < colptr[j+1]; ++i) {
col = rowind[i];
t_rowind[marker[col]] = j;
++marker[col];
}
/* ----------------------------------------------------------------
compute B = A + T, where column j of B is:
Struct (B_*j) = Struct (A_*k) UNION Struct (T_*k)
do not include the diagonal entry
---------------------------------------------------------------- */
/* Zero the diagonal flag */
for (i = 0; i < n; ++i) marker[i] = -1;
/* First pass determines number of nonzeros in B */
num_nz = 0;
for (j = 0; j < n; ++j) {
/* Flag the diagonal so it's not included in the B matrix */
marker[j] = j;
/* Add pattern of column A_*k to B_*j */
for (i = colptr[j]; i < colptr[j+1]; ++i) {
k = rowind[i];
if ( marker[k] != j ) {
marker[k] = j;
++num_nz;
}
}
/* Add pattern of column T_*k to B_*j */
for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) {
k = t_rowind[i];
if ( marker[k] != j ) {
marker[k] = j;
++num_nz;
}
}
}
*bnz = num_nz;
/* Allocate storage for A+A' */
if ( !(*b_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
ABORT("SUPERLU_MALLOC fails for b_colptr[]");
if ( *bnz) {
if ( !(*b_rowind = (int*) SUPERLU_MALLOC( *bnz * sizeof(int)) ) )
ABORT("SUPERLU_MALLOC fails for b_rowind[]");
}
/* Zero the diagonal flag */
for (i = 0; i < n; ++i) marker[i] = -1;
/* Compute each column of B, one at a time */
num_nz = 0;
for (j = 0; j < n; ++j) {
(*b_colptr)[j] = num_nz;
/* Flag the diagonal so it's not included in the B matrix */
marker[j] = j;
/* Add pattern of column A_*k to B_*j */
for (i = colptr[j]; i < colptr[j+1]; ++i) {
k = rowind[i];
if ( marker[k] != j ) {
marker[k] = j;
(*b_rowind)[num_nz++] = k;
}
}
/* Add pattern of column T_*k to B_*j */
for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) {
k = t_rowind[i];
if ( marker[k] != j ) {
marker[k] = j;
(*b_rowind)[num_nz++] = k;
}
}
}
(*b_colptr)[n] = num_nz;
SUPERLU_FREE(marker);
SUPERLU_FREE(t_colptr);
SUPERLU_FREE(t_rowind);
}
void
get_perm_c(int ispec, SuperMatrix *A, int *perm_c)
/*
* Purpose
* =======
*
* GET_PERM_C obtains a permutation matrix Pc, by applying the multiple
* minimum degree ordering code by Joseph Liu to matrix A'*A or A+A'.
* or using approximate minimum degree column ordering by Davis et. al.
* The LU factorization of A*Pc tends to have less fill than the LU
* factorization of A.
*
* Arguments
* =========
*
* ispec (input) int
* Specifies the type of column ordering to reduce fill:
* = 1: minimum degree on the structure of A^T * A
* = 2: minimum degree on the structure of A^T + A
* = 3: approximate minimum degree for unsymmetric matrices
* If ispec == 0, the natural ordering (i.e., Pc = I) is returned.
*
* A (input) SuperMatrix*
* Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
* of the linear equations is A->nrow. Currently, the type of A
* can be: Stype = NC; Dtype = _D; Mtype = GE. In the future,
* more general A can be handled.
*
* perm_c (output) int*
* Column permutation vector of size A->ncol, which defines the
* permutation matrix Pc; perm_c[i] = j means column i of A is
* in position j in A*Pc.
*
*/
{
NCformat *Astore = A->Store;
int m, n, bnz, *b_colptr, i;
int delta, maxint, nofsub, *invp;
int *b_rowind, *dhead, *qsize, *llist, *marker;
double t, SuperLU_timer_();
m = A->nrow;
n = A->ncol;
t = SuperLU_timer_();
switch ( ispec ) {
case 0: /* Natural ordering */
for (i = 0; i < n; ++i) perm_c[i] = i;
#if ( PRNTlevel>=1 )
printf("Use natural column ordering.\n");
#endif
return;
case 1: /* Minimum degree ordering on A'*A */
getata(m, n, Astore->nnz, Astore->colptr, Astore->rowind,
&bnz, &b_colptr, &b_rowind);
#if ( PRNTlevel>=1 )
printf("Use minimum degree ordering on A'*A.\n");
#endif
t = SuperLU_timer_() - t;
/*printf("Form A'*A time = %8.3f\n", t);*/
break;
case 2: /* Minimum degree ordering on A'+A */
if ( m != n ) ABORT("Matrix is not square");
at_plus_a(n, Astore->nnz, Astore->colptr, Astore->rowind,
&bnz, &b_colptr, &b_rowind);
#if ( PRNTlevel>=1 )
printf("Use minimum degree ordering on A'+A.\n");
#endif
t = SuperLU_timer_() - t;
/*printf("Form A'+A time = %8.3f\n", t);*/
break;
case 3: /* Approximate minimum degree column ordering. */
get_colamd(m, n, Astore->nnz, Astore->colptr, Astore->rowind,
perm_c);
#if ( PRNTlevel>=1 )
printf(".. Use approximate minimum degree column ordering.\n");
#endif
return;
default:
ABORT("Invalid ISPEC");
}
if ( bnz != 0 ) {
t = SuperLU_timer_();
/* Initialize and allocate storage for GENMMD. */
delta = 1; /* DELTA is a parameter to allow the choice of nodes
whose degree <= min-degree + DELTA. */
maxint = 2147483647; /* 2**31 - 1 */
invp = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
if ( !invp ) ABORT("SUPERLU_MALLOC fails for invp.");
dhead = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
if ( !dhead ) ABORT("SUPERLU_MALLOC fails for dhead.");
qsize = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
if ( !qsize ) ABORT("SUPERLU_MALLOC fails for qsize.");
llist = (int *) SUPERLU_MALLOC(n*sizeof(int));
if ( !llist ) ABORT("SUPERLU_MALLOC fails for llist.");
marker = (int *) SUPERLU_MALLOC(n*sizeof(int));
if ( !marker ) ABORT("SUPERLU_MALLOC fails for marker.");
/* Transform adjacency list into 1-based indexing required by GENMMD.*/
for (i = 0; i <= n; ++i) ++b_colptr[i];
for (i = 0; i < bnz; ++i) ++b_rowind[i];
genmmd_(&n, b_colptr, b_rowind, perm_c, invp, &delta, dhead,
qsize, llist, marker, &maxint, &nofsub);
/* Transform perm_c into 0-based indexing. */
for (i = 0; i < n; ++i) --perm_c[i];
SUPERLU_FREE(b_colptr);
SUPERLU_FREE(b_rowind);
SUPERLU_FREE(invp);
SUPERLU_FREE(dhead);
SUPERLU_FREE(qsize);
SUPERLU_FREE(llist);
SUPERLU_FREE(marker);
t = SuperLU_timer_() - t;
/* printf("call GENMMD time = %8.3f\n", t);*/
} else { /* Empty adjacency structure */
for (i = 0; i < n; ++i) perm_c[i] = i;
}
}

@ -0,0 +1,116 @@
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
*/
/*
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
Permission is hereby granted to use or copy this program for any
purpose, provided the above notices are retained on all copies.
Permission to modify the code and to distribute modified code is
granted, provided the above notices are retained, and a notice that
the code was modified is included with the above copyright notice.
*/
#include "ssp_defs.h"
void
heap_relax_snode (
const int n,
int *et, /* column elimination tree */
const int relax_columns, /* max no of columns allowed in a
relaxed snode */
int *descendants, /* no of descendants of each node
in the etree */
int *relax_end /* last column in a supernode */
)
{
/*
* Purpose
* =======
* relax_snode() - Identify the initial relaxed supernodes, assuming that
* the matrix has been reordered according to the postorder of the etree.
*
*/
register int i, j, k, l, parent;
register int snode_start; /* beginning of a snode */
int *et_save, *post, *inv_post, *iwork;
int nsuper_et = 0, nsuper_et_post = 0;
/* The etree may not be postordered, but is heap ordered. */
iwork = (int*) intMalloc(3*n+2);
if ( !iwork ) ABORT("SUPERLU_MALLOC fails for iwork[]");
inv_post = iwork + n+1;
et_save = inv_post + n+1;
/* Post order etree */
post = (int *) TreePostorder(n, et);
for (i = 0; i < n+1; ++i) inv_post[post[i]] = i;
/* Renumber etree in postorder */
for (i = 0; i < n; ++i) {
iwork[post[i]] = post[et[i]];
et_save[i] = et[i]; /* Save the original etree */
}
for (i = 0; i < n; ++i) et[i] = iwork[i];
/* Compute the number of descendants of each node in the etree */
ifill (relax_end, n, EMPTY);
for (j = 0; j < n; j++) descendants[j] = 0;
for (j = 0; j < n; j++) {
parent = et[j];
if ( parent != n ) /* not the dummy root */
descendants[parent] += descendants[j] + 1;
}
/* Identify the relaxed supernodes by postorder traversal of the etree. */
for (j = 0; j < n; ) {
parent = et[j];
snode_start = j;
while ( parent != n && descendants[parent] < relax_columns ) {
j = parent;
parent = et[j];
}
/* Found a supernode in postordered etree; j is the last column. */
++nsuper_et_post;
k = n;
for (i = snode_start; i <= j; ++i)
k = SUPERLU_MIN(k, inv_post[i]);
l = inv_post[j];
if ( (l - k) == (j - snode_start) ) {
/* It's also a supernode in the original etree */
relax_end[k] = l; /* Last column is recorded */
++nsuper_et;
} else {
for (i = snode_start; i <= j; ++i) {
l = inv_post[i];
if ( descendants[i] == 0 ) relax_end[l] = l;
}
}
j++;
/* Search for a new leaf */
while ( descendants[j] != 0 && j < n ) j++;
}
#if ( PRNTlevel>=1 )
printf(".. heap_snode_relax:\n"
"\tNo of relaxed snodes in postordered etree:\t%d\n"
"\tNo of relaxed snodes in original etree:\t%d\n",
nsuper_et_post, nsuper_et);
#endif
/* Recover the original etree */
for (i = 0; i < n; ++i) et[i] = et_save[i];
SUPERLU_FREE(post);
SUPERLU_FREE(iwork);
}

@ -0,0 +1,70 @@
int lsame_(char *ca, char *cb)
{
/* -- LAPACK auxiliary routine (version 2.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
September 30, 1994
Purpose
=======
LSAME returns .TRUE. if CA is the same letter as CB regardless of case.
Arguments
=========
CA (input) CHARACTER*1
CB (input) CHARACTER*1
CA and CB specify the single characters to be compared.
=====================================================================
*/
/* System generated locals */
int ret_val;
/* Local variables */
int inta, intb, zcode;
ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
if (ret_val) {
return ret_val;
}
/* Now test for equivalence if both characters are alphabetic. */
zcode = 'Z';
/* Use 'Z' rather than 'A' so that ASCII can be detected on Prime
machines, on which ICHAR returns a value with bit 8 set.
ICHAR('A') on Prime machines returns 193 which is the same as
ICHAR('A') on an EBCDIC machine. */
inta = *(unsigned char *)ca;
intb = *(unsigned char *)cb;
if (zcode == 90 || zcode == 122) {
/* ASCII is assumed - ZCODE is the ASCII code of either lower or
upper case 'Z'. */
if (inta >= 97 && inta <= 122) inta += -32;
if (intb >= 97 && intb <= 122) intb += -32;
} else if (zcode == 233 || zcode == 169) {
/* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or
upper case 'Z'. */
if ((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153) || (inta
>= 162 && inta <= 169))
inta += 64;
if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) || (intb
>= 162 && intb <= 169))
intb += 64;
} else if (zcode == 218 || zcode == 250) {
/* ASCII is assumed, on Prime machines - ZCODE is the ASCII code
plus 128 of either lower or upper case 'Z'. */
if (inta >= 225 && inta <= 250) inta += -32;
if (intb >= 225 && intb <= 250) intb += -32;
}
ret_val = inta == intb;
return ret_val;
} /* lsame_ */

@ -0,0 +1,207 @@
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* November 15, 1997
*
*/
/** Precision-independent memory-related routines.
(Shared by [sdcz]memory.c) **/
#include "ssp_defs.h"
#if ( DEBUGlevel>=1 ) /* Debug malloc/free. */
int superlu_malloc_total = 0;
#define PAD_FACTOR 2
#define DWORD (sizeof(double)) /* Be sure it's no smaller than double. */
void *superlu_malloc(size_t size)
{
char *buf;
buf = (char *) malloc(size + DWORD);
if ( !buf ) {
printf("superlu_malloc fails: malloc_total %.0f MB, size %d\n",
superlu_malloc_total*1e-6, size);
ABORT("superlu_malloc: out of memory");
}
((int_t *) buf)[0] = size;
#if 0
superlu_malloc_total += size + DWORD;
#else
superlu_malloc_total += size;
#endif
return (void *) (buf + DWORD);
}
void superlu_free(void *addr)
{
char *p = ((char *) addr) - DWORD;
if ( !addr )
ABORT("superlu_free: tried to free NULL pointer");
if ( !p )
ABORT("superlu_free: tried to free NULL+DWORD pointer");
{
int_t n = ((int_t *) p)[0];
if ( !n )
ABORT("superlu_free: tried to free a freed pointer");
*((int_t *) p) = 0; /* Set to zero to detect duplicate free's. */
#if 0
superlu_malloc_total -= (n + DWORD);
#else
superlu_malloc_total -= n;
#endif
if ( superlu_malloc_total < 0 )
ABORT("superlu_malloc_total went negative!");
/*free (addr);*/
free (p);
}
}
#else /* production mode */
void *superlu_malloc(size_t size)
{
void *buf;
buf = (void *) malloc(size);
return (buf);
}
void superlu_free(void *addr)
{
free (addr);
}
#endif
/*
* Set up pointers for integer working arrays.
*/
void
SetIWork(int m, int n, int panel_size, int *iworkptr, int **segrep,
int **parent, int **xplore, int **repfnz, int **panel_lsub,
int **xprune, int **marker)
{
*segrep = iworkptr;
*parent = iworkptr + m;
*xplore = *parent + m;
*repfnz = *xplore + m;
*panel_lsub = *repfnz + panel_size * m;
*xprune = *panel_lsub + panel_size * m;
*marker = *xprune + n;
ifill (*repfnz, m * panel_size, EMPTY);
ifill (*panel_lsub, m * panel_size, EMPTY);
}
void
copy_mem_int(int howmany, void *old, void *new)
{
register int i;
int *iold = old;
int *inew = new;
for (i = 0; i < howmany; i++) inew[i] = iold[i];
}
void
user_bcopy(char *src, char *dest, int bytes)
{
char *s_ptr, *d_ptr;
s_ptr = src + bytes - 1;
d_ptr = dest + bytes - 1;
for (; d_ptr >= dest; --s_ptr, --d_ptr ) *d_ptr = *s_ptr;
}
int *intMalloc(int n)
{
int *buf;
buf = (int *) SUPERLU_MALLOC(n * sizeof(int));
if ( !buf ) {
ABORT("SUPERLU_MALLOC fails for buf in intMalloc()");
}
return (buf);
}
int *intCalloc(int n)
{
int *buf;
register int i;
buf = (int *) SUPERLU_MALLOC(n * sizeof(int));
if ( !buf ) {
ABORT("SUPERLU_MALLOC fails for buf in intCalloc()");
}
for (i = 0; i < n; ++i) buf[i] = 0;
return (buf);
}
#if 0
check_expanders()
{
int p;
printf("Check expanders:\n");
for (p = 0; p < NO_MEMTYPE; p++) {
printf("type %d, size %d, mem %d\n",
p, expanders[p].size, (int)expanders[p].mem);
}
return 0;
}
StackInfo()
{
printf("Stack: size %d, used %d, top1 %d, top2 %d\n",
stack.size, stack.used, stack.top1, stack.top2);
return 0;
}
PrintStack(char *msg, GlobalLU_t *Glu)
{
int i;
int *xlsub, *lsub, *xusub, *usub;
xlsub = Glu->xlsub;
lsub = Glu->lsub;
xusub = Glu->xusub;
usub = Glu->usub;
printf("%s\n", msg);
/* printf("\nUCOL: ");
for (i = 0; i < xusub[ndim]; ++i)
printf("%f ", ucol[i]);
printf("\nLSUB: ");
for (i = 0; i < xlsub[ndim]; ++i)
printf("%d ", lsub[i]);
printf("\nUSUB: ");
for (i = 0; i < xusub[ndim]; ++i)
printf("%d ", usub[i]);
printf("\n");*/
return 0;
}
#endif

1012
intern/opennl/superlu/mmd.c Normal file

File diff suppressed because it is too large Load Diff

@ -0,0 +1,71 @@
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* November 15, 1997
*
*/
/*
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
Permission is hereby granted to use or copy this program for any
purpose, provided the above notices are retained on all copies.
Permission to modify the code and to distribute modified code is
granted, provided the above notices are retained, and a notice that
the code was modified is included with the above copyright notice.
*/
#include "ssp_defs.h"
void
relax_snode (
const int n,
int *et, /* column elimination tree */
const int relax_columns, /* max no of columns allowed in a
relaxed snode */
int *descendants, /* no of descendants of each node
in the etree */
int *relax_end /* last column in a supernode */
)
{
/*
* Purpose
* =======
* relax_snode() - Identify the initial relaxed supernodes, assuming that
* the matrix has been reordered according to the postorder of the etree.
*
*/
register int j, parent;
register int snode_start; /* beginning of a snode */
ifill (relax_end, n, EMPTY);
for (j = 0; j < n; j++) descendants[j] = 0;
/* Compute the number of descendants of each node in the etree */
for (j = 0; j < n; j++) {
parent = et[j];
if ( parent != n ) /* not the dummy root */
descendants[parent] += descendants[j] + 1;
}
/* Identify the relaxed supernodes by postorder traversal of the etree. */
for (j = 0; j < n; ) {
parent = et[j];
snode_start = j;
while ( parent != n && descendants[parent] < relax_columns ) {
j = parent;
parent = et[j];
}
/* Found a supernode with j being the last column. */
relax_end[snode_start] = j; /* Last column is recorded */
j++;
/* Search for a new leaf */
while ( descendants[j] != 0 && j < n ) j++;
}
/*printf("No of relaxed snodes: %d; relaxed columns: %d\n",
nsuper, no_relaxed_col); */
}

@ -0,0 +1,353 @@
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
*/
/*
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
Permission is hereby granted to use or copy this program for any
purpose, provided the above notices are retained on all copies.
Permission to modify the code and to distribute modified code is
granted, provided the above notices are retained, and a notice that
the code was modified is included with the above copyright notice.
*/
#include <stdio.h>
#include <stdlib.h>
#include "ssp_defs.h"
/*
* Function prototypes
*/
void susolve(int, int, float*, float*);
void slsolve(int, int, float*, float*);
void smatvec(int, int, int, float*, float*, float*);
/* Return value: 0 - successful return
* > 0 - number of bytes allocated when run out of space
*/
int
scolumn_bmod (
const int jcol, /* in */
const int nseg, /* in */
float *dense, /* in */
float *tempv, /* working array */
int *segrep, /* in */
int *repfnz, /* in */
int fpanelc, /* in -- first column in the current panel */
GlobalLU_t *Glu, /* modified */
SuperLUStat_t *stat /* output */
)
{
/*
* Purpose:
* ========
* Performs numeric block updates (sup-col) in topological order.
* It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
* Special processing on the supernodal portion of L\U[*,j]
*
*/
#ifdef _CRAY
_fcd ftcs1 = _cptofcd("L", strlen("L")),
ftcs2 = _cptofcd("N", strlen("N")),
ftcs3 = _cptofcd("U", strlen("U"));
#endif
#ifdef USE_VENDOR_BLAS
int incx = 1, incy = 1;
float alpha, beta;
#endif
/* krep = representative of current k-th supernode
* fsupc = first supernodal column
* nsupc = no of columns in supernode
* nsupr = no of rows in supernode (used as leading dimension)
* luptr = location of supernodal LU-block in storage
* kfnz = first nonz in the k-th supernodal segment
* no_zeros = no of leading zeros in a supernodal U-segment
*/
float ukj, ukj1, ukj2;
int luptr, luptr1, luptr2;
int fsupc, nsupc, nsupr, segsze;
int nrow; /* No of rows in the matrix of matrix-vector */
int jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
register int lptr, kfnz, isub, irow, i;
register int no_zeros, new_next;
int ufirst, nextlu;
int fst_col; /* First column within small LU update */
int d_fsupc; /* Distance between the first column of the current
panel and the first column of the current snode. */
int *xsup, *supno;
int *lsub, *xlsub;
float *lusup;
int *xlusup;
int nzlumax;
float *tempv1;
float zero = 0.0;
#ifdef USE_VENDOR_BLAS
float one = 1.0;
float none = -1.0;
#endif
int mem_error;
flops_t *ops = stat->ops;
xsup = Glu->xsup;
supno = Glu->supno;
lsub = Glu->lsub;
xlsub = Glu->xlsub;
lusup = Glu->lusup;
xlusup = Glu->xlusup;
nzlumax = Glu->nzlumax;
jcolp1 = jcol + 1;
jsupno = supno[jcol];
/*
* For each nonz supernode segment of U[*,j] in topological order
*/
k = nseg - 1;
for (ksub = 0; ksub < nseg; ksub++) {
krep = segrep[k];
k--;
ksupno = supno[krep];
if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
fsupc = xsup[ksupno];
fst_col = SUPERLU_MAX ( fsupc, fpanelc );
/* Distance from the current supernode to the current panel;
d_fsupc=0 if fsupc > fpanelc. */
d_fsupc = fst_col - fsupc;
luptr = xlusup[fst_col] + d_fsupc;
lptr = xlsub[fsupc] + d_fsupc;
kfnz = repfnz[krep];
kfnz = SUPERLU_MAX ( kfnz, fpanelc );
segsze = krep - kfnz + 1;
nsupc = krep - fst_col + 1;
nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */
nrow = nsupr - d_fsupc - nsupc;
krep_ind = lptr + nsupc - 1;
ops[TRSV] += segsze * (segsze - 1);
ops[GEMV] += 2 * nrow * segsze;
/*
* Case 1: Update U-segment of size 1 -- col-col update
*/
if ( segsze == 1 ) {
ukj = dense[lsub[krep_ind]];
luptr += nsupr*(nsupc-1) + nsupc;
for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
irow = lsub[i];
dense[irow] -= ukj*lusup[luptr];
luptr++;
}
} else if ( segsze <= 3 ) {
ukj = dense[lsub[krep_ind]];
luptr += nsupr*(nsupc-1) + nsupc-1;
ukj1 = dense[lsub[krep_ind - 1]];
luptr1 = luptr - nsupr;
if ( segsze == 2 ) { /* Case 2: 2cols-col update */
ukj -= ukj1 * lusup[luptr1];
dense[lsub[krep_ind]] = ukj;
for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
irow = lsub[i];
luptr++;
luptr1++;
dense[irow] -= ( ukj*lusup[luptr]
+ ukj1*lusup[luptr1] );
}
} else { /* Case 3: 3cols-col update */
ukj2 = dense[lsub[krep_ind - 2]];
luptr2 = luptr1 - nsupr;
ukj1 -= ukj2 * lusup[luptr2-1];
ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
dense[lsub[krep_ind]] = ukj;
dense[lsub[krep_ind-1]] = ukj1;
for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
irow = lsub[i];
luptr++;
luptr1++;
luptr2++;
dense[irow] -= ( ukj*lusup[luptr]
+ ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
}
}
} else {
/*
* Case: sup-col update
* Perform a triangular solve and block update,
* then scatter the result of sup-col update to dense
*/
no_zeros = kfnz - fst_col;
/* Copy U[*,j] segment from dense[*] to tempv[*] */
isub = lptr + no_zeros;
for (i = 0; i < segsze; i++) {
irow = lsub[isub];
tempv[i] = dense[irow];
++isub;
}
/* Dense triangular solve -- start effective triangle */
luptr += nsupr * no_zeros + no_zeros;
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
&nsupr, tempv, &incx );
#else
strsv_( "L", "N", "U", &segsze, &lusup[luptr],
&nsupr, tempv, &incx );
#endif
luptr += segsze; /* Dense matrix-vector */
tempv1 = &tempv[segsze];
alpha = one;
beta = zero;
#ifdef _CRAY
SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr],
&nsupr, tempv, &incx, &beta, tempv1, &incy );
#else
sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr],
&nsupr, tempv, &incx, &beta, tempv1, &incy );
#endif
#else
slsolve ( nsupr, segsze, &lusup[luptr], tempv );
luptr += segsze; /* Dense matrix-vector */
tempv1 = &tempv[segsze];
smatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
#endif
/* Scatter tempv[] into SPA dense[] as a temporary storage */
isub = lptr + no_zeros;
for (i = 0; i < segsze; i++) {
irow = lsub[isub];
dense[irow] = tempv[i];
tempv[i] = zero;
++isub;
}
/* Scatter tempv1[] into SPA dense[] */
for (i = 0; i < nrow; i++) {
irow = lsub[isub];
dense[irow] -= tempv1[i];
tempv1[i] = zero;
++isub;
}
}
} /* if jsupno ... */
} /* for each segment... */
/*
* Process the supernodal portion of L\U[*,j]
*/
nextlu = xlusup[jcol];
fsupc = xsup[jsupno];
/* Copy the SPA dense into L\U[*,j] */
new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
while ( new_next > nzlumax ) {
if ((mem_error = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu)))
return (mem_error);
lusup = Glu->lusup;
lsub = Glu->lsub;
}
for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
irow = lsub[isub];
lusup[nextlu] = dense[irow];
dense[irow] = zero;
++nextlu;
}
xlusup[jcolp1] = nextlu; /* Close L\U[*,jcol] */
/* For more updates within the panel (also within the current supernode),
* should start from the first column of the panel, or the first column
* of the supernode, whichever is bigger. There are 2 cases:
* 1) fsupc < fpanelc, then fst_col := fpanelc
* 2) fsupc >= fpanelc, then fst_col := fsupc
*/
fst_col = SUPERLU_MAX ( fsupc, fpanelc );
if ( fst_col < jcol ) {
/* Distance between the current supernode and the current panel.
d_fsupc=0 if fsupc >= fpanelc. */
d_fsupc = fst_col - fsupc;
lptr = xlsub[fsupc] + d_fsupc;
luptr = xlusup[fst_col] + d_fsupc;
nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */
nsupc = jcol - fst_col; /* Excluding jcol */
nrow = nsupr - d_fsupc - nsupc;
/* Points to the beginning of jcol in snode L\U(jsupno) */
ufirst = xlusup[jcol] + d_fsupc;
ops[TRSV] += nsupc * (nsupc - 1);
ops[GEMV] += 2 * nrow * nsupc;
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr],
&nsupr, &lusup[ufirst], &incx );
#else
strsv_( "L", "N", "U", &nsupc, &lusup[luptr],
&nsupr, &lusup[ufirst], &incx );
#endif
alpha = none; beta = one; /* y := beta*y + alpha*A*x */
#ifdef _CRAY
SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#else
sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#endif
#else
slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
&lusup[ufirst], tempv );
/* Copy updates from tempv[*] into lusup[*] */
isub = ufirst + nsupc;
for (i = 0; i < nrow; i++) {
lusup[isub] -= tempv[i];
tempv[i] = 0.0;
++isub;
}
#endif
} /* if fst_col < jcol ... */
return 0;
}

@ -0,0 +1,270 @@
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
*/
/*
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
Permission is hereby granted to use or copy this program for any
purpose, provided the above notices are retained on all copies.
Permission to modify the code and to distribute modified code is
granted, provided the above notices are retained, and a notice that
the code was modified is included with the above copyright notice.
*/
#include "ssp_defs.h"
/* What type of supernodes we want */
#define T2_SUPER
int
scolumn_dfs(
const int m, /* in - number of rows in the matrix */
const int jcol, /* in */
int *perm_r, /* in */
int *nseg, /* modified - with new segments appended */
int *lsub_col, /* in - defines the RHS vector to start the dfs */
int *segrep, /* modified - with new segments appended */
int *repfnz, /* modified */
int *xprune, /* modified */
int *marker, /* modified */
int *parent, /* working array */
int *xplore, /* working array */
GlobalLU_t *Glu /* modified */
)
{
/*
* Purpose
* =======
* "column_dfs" performs a symbolic factorization on column jcol, and
* decide the supernode boundary.
*
* This routine does not use numeric values, but only use the RHS
* row indices to start the dfs.
*
* A supernode representative is the last column of a supernode.
* The nonzeros in U[*,j] are segments that end at supernodal
* representatives. The routine returns a list of such supernodal
* representatives in topological order of the dfs that generates them.
* The location of the first nonzero in each such supernodal segment
* (supernodal entry location) is also returned.
*
* Local parameters
* ================
* nseg: no of segments in current U[*,j]
* jsuper: jsuper=EMPTY if column j does not belong to the same
* supernode as j-1. Otherwise, jsuper=nsuper.
*
* marker2: A-row --> A-row/col (0/1)
* repfnz: SuperA-col --> PA-row
* parent: SuperA-col --> SuperA-col
* xplore: SuperA-col --> index to L-structure
*
* Return value
* ============
* 0 success;
* > 0 number of bytes allocated when run out of space.
*
*/
int jcolp1, jcolm1, jsuper, nsuper, nextl;
int k, krep, krow, kmark, kperm;
int *marker2; /* Used for small panel LU */
int fsupc; /* First column of a snode */
int myfnz; /* First nonz column of a U-segment */
int chperm, chmark, chrep, kchild;
int xdfs, maxdfs, kpar, oldrep;
int jptr, jm1ptr;
int ito, ifrom, istop; /* Used to compress row subscripts */
int mem_error;
int *xsup, *supno, *lsub, *xlsub;
int nzlmax;
static int first = 1, maxsuper;
xsup = Glu->xsup;
supno = Glu->supno;
lsub = Glu->lsub;
xlsub = Glu->xlsub;
nzlmax = Glu->nzlmax;
if ( first ) {
maxsuper = sp_ienv(3);
first = 0;
}
jcolp1 = jcol + 1;
jcolm1 = jcol - 1;
nsuper = supno[jcol];
jsuper = nsuper;
nextl = xlsub[jcol];
marker2 = &marker[2*m];
/* For each nonzero in A[*,jcol] do dfs */
for (k = 0; lsub_col[k] != EMPTY; k++) {
krow = lsub_col[k];
lsub_col[k] = EMPTY;
kmark = marker2[krow];
/* krow was visited before, go to the next nonz */
if ( kmark == jcol ) continue;
/* For each unmarked nbr krow of jcol
* krow is in L: place it in structure of L[*,jcol]
*/
marker2[krow] = jcol;
kperm = perm_r[krow];
if ( kperm == EMPTY ) {
lsub[nextl++] = krow; /* krow is indexed into A */
if ( nextl >= nzlmax ) {
if ((mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu)))
return (mem_error);
lsub = Glu->lsub;
}
if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing */
} else {
/* krow is in U: if its supernode-rep krep
* has been explored, update repfnz[*]
*/
krep = xsup[supno[kperm]+1] - 1;
myfnz = repfnz[krep];
if ( myfnz != EMPTY ) { /* Visited before */
if ( myfnz > kperm ) repfnz[krep] = kperm;
/* continue; */
}
else {
/* Otherwise, perform dfs starting at krep */
oldrep = EMPTY;
parent[krep] = oldrep;
repfnz[krep] = kperm;
xdfs = xlsub[krep];
maxdfs = xprune[krep];
do {
/*
* For each unmarked kchild of krep
*/
while ( xdfs < maxdfs ) {
kchild = lsub[xdfs];
xdfs++;
chmark = marker2[kchild];
if ( chmark != jcol ) { /* Not reached yet */
marker2[kchild] = jcol;
chperm = perm_r[kchild];
/* Case kchild is in L: place it in L[*,k] */
if ( chperm == EMPTY ) {
lsub[nextl++] = kchild;
if ( nextl >= nzlmax ) {
if ((mem_error =
sLUMemXpand(jcol,nextl,LSUB,&nzlmax,Glu)))
return (mem_error);
lsub = Glu->lsub;
}
if ( chmark != jcolm1 ) jsuper = EMPTY;
} else {
/* Case kchild is in U:
* chrep = its supernode-rep. If its rep has
* been explored, update its repfnz[*]
*/
chrep = xsup[supno[chperm]+1] - 1;
myfnz = repfnz[chrep];
if ( myfnz != EMPTY ) { /* Visited before */
if ( myfnz > chperm )
repfnz[chrep] = chperm;
} else {
/* Continue dfs at super-rep of kchild */
xplore[krep] = xdfs;
oldrep = krep;
krep = chrep; /* Go deeper down G(L^t) */
parent[krep] = oldrep;
repfnz[krep] = chperm;
xdfs = xlsub[krep];
maxdfs = xprune[krep];
} /* else */
} /* else */
} /* if */
} /* while */
/* krow has no more unexplored nbrs;
* place supernode-rep krep in postorder DFS.
* backtrack dfs to its parent
*/
segrep[*nseg] = krep;
++(*nseg);
kpar = parent[krep]; /* Pop from stack, mimic recursion */
if ( kpar == EMPTY ) break; /* dfs done */
krep = kpar;
xdfs = xplore[krep];
maxdfs = xprune[krep];
} while ( kpar != EMPTY ); /* Until empty stack */
} /* else */
} /* else */
} /* for each nonzero ... */
/* Check to see if j belongs in the same supernode as j-1 */
if ( jcol == 0 ) { /* Do nothing for column 0 */
nsuper = supno[0] = 0;
} else {
fsupc = xsup[nsuper];
jptr = xlsub[jcol]; /* Not compressed yet */
jm1ptr = xlsub[jcolm1];
#ifdef T2_SUPER
if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY;
#endif
/* Make sure the number of columns in a supernode doesn't
exceed threshold. */
if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY;
/* If jcol starts a new supernode, reclaim storage space in
* lsub from the previous supernode. Note we only store
* the subscript set of the first and last columns of
* a supernode. (first for num values, last for pruning)
*/
if ( jsuper == EMPTY ) { /* starts a new supernode */
if ( (fsupc < jcolm1-1) ) { /* >= 3 columns in nsuper */
#ifdef CHK_COMPRESS
printf(" Compress lsub[] at super %d-%d\n", fsupc, jcolm1);
#endif
ito = xlsub[fsupc+1];
xlsub[jcolm1] = ito;
istop = ito + jptr - jm1ptr;
xprune[jcolm1] = istop; /* Initialize xprune[jcol-1] */
xlsub[jcol] = istop;
for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
lsub[ito] = lsub[ifrom];
nextl = ito; /* = istop + length(jcol) */
}
nsuper++;
supno[jcol] = nsuper;
} /* if a new supernode */
} /* else: jcol > 0 */
/* Tidy up the pointers before exit */
xsup[nsuper+1] = jcolp1;
supno[jcolp1] = nsuper;
xprune[jcol] = nextl; /* Initialize upper bound for pruning */
xlsub[jcolp1] = nextl;
return 0;
}

@ -0,0 +1,105 @@
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* November 15, 1997
*
*/
/*
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
Permission is hereby granted to use or copy this program for any
purpose, provided the above notices are retained on all copies.
Permission to modify the code and to distribute modified code is
granted, provided the above notices are retained, and a notice that
the code was modified is included with the above copyright notice.
*/
#include "ssp_defs.h"
#include "util.h"
int
scopy_to_ucol(
int jcol, /* in */
int nseg, /* in */
int *segrep, /* in */
int *repfnz, /* in */
int *perm_r, /* in */
float *dense, /* modified - reset to zero on return */
GlobalLU_t *Glu /* modified */
)
{
/*
* Gather from SPA dense[*] to global ucol[*].
*/
int ksub, krep, ksupno;
int i, k, kfnz, segsze;
int fsupc, isub, irow;
int jsupno, nextu;
int new_next, mem_error;
int *xsup, *supno;
int *lsub, *xlsub;
float *ucol;
int *usub, *xusub;
int nzumax;
float zero = 0.0;
xsup = Glu->xsup;
supno = Glu->supno;
lsub = Glu->lsub;
xlsub = Glu->xlsub;
ucol = Glu->ucol;
usub = Glu->usub;
xusub = Glu->xusub;
nzumax = Glu->nzumax;
jsupno = supno[jcol];
nextu = xusub[jcol];
k = nseg - 1;
for (ksub = 0; ksub < nseg; ksub++) {
krep = segrep[k--];
ksupno = supno[krep];
if ( ksupno != jsupno ) { /* Should go into ucol[] */
kfnz = repfnz[krep];
if ( kfnz != EMPTY ) { /* Nonzero U-segment */
fsupc = xsup[ksupno];
isub = xlsub[fsupc] + kfnz - fsupc;
segsze = krep - kfnz + 1;
new_next = nextu + segsze;
while ( new_next > nzumax ) {
if ((mem_error = sLUMemXpand(jcol, nextu, UCOL, &nzumax, Glu)))
return (mem_error);
ucol = Glu->ucol;
if ((mem_error = sLUMemXpand(jcol, nextu, USUB, &nzumax, Glu)))
return (mem_error);
usub = Glu->usub;
lsub = Glu->lsub;
}
for (i = 0; i < segsze; i++) {
irow = lsub[isub];
usub[nextu] = perm_r[irow];
ucol[nextu] = dense[irow];
dense[irow] = zero;
nextu++;
isub++;
}
}
}
} /* for each segment... */
xusub[jcol + 1] = nextu; /* Close U[*,jcol] */
return 0;
}

@ -0,0 +1,221 @@
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
*/
#include "ssp_defs.h"
void
sgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
SuperMatrix *L, SuperMatrix *U, SuperMatrix *B,
SuperLUStat_t *stat, int *info )
{
/*
* Purpose
* =======
*
* SGSSV solves the system of linear equations A*X=B, using the
* LU factorization from SGSTRF. It performs the following steps:
*
* 1. If A is stored column-wise (A->Stype = SLU_NC):
*
* 1.1. Permute the columns of A, forming A*Pc, where Pc
* is a permutation matrix. For more details of this step,
* see sp_preorder.c.
*
* 1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined
* by Gaussian elimination with partial pivoting.
* L is unit lower triangular with offdiagonal entries
* bounded by 1 in magnitude, and U is upper triangular.
*
* 1.3. Solve the system of equations A*X=B using the factored
* form of A.
*
* 2. If A is stored row-wise (A->Stype = SLU_NR), apply the
* above algorithm to the transpose of A:
*
* 2.1. Permute columns of transpose(A) (rows of A),
* forming transpose(A)*Pc, where Pc is a permutation matrix.
* For more details of this step, see sp_preorder.c.
*
* 2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr
* determined by Gaussian elimination with partial pivoting.
* L is unit lower triangular with offdiagonal entries
* bounded by 1 in magnitude, and U is upper triangular.
*
* 2.3. Solve the system of equations A*X=B using the factored
* form of A.
*
* See supermatrix.h for the definition of 'SuperMatrix' structure.
*
* Arguments
* =========
*
* options (input) superlu_options_t*
* The structure defines the input parameters to control
* how the LU decomposition will be performed and how the
* system will be solved.
*
* A (input) SuperMatrix*
* Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
* of linear equations is A->nrow. Currently, the type of A can be:
* Stype = SLU_NC or SLU_NR; Dtype = SLU_S; Mtype = SLU_GE.
* In the future, more general A may be handled.
*
* perm_c (input/output) int*
* If A->Stype = SLU_NC, column permutation vector of size A->ncol
* which defines the permutation matrix Pc; perm_c[i] = j means
* column i of A is in position j in A*Pc.
* If A->Stype = SLU_NR, column permutation vector of size A->nrow
* which describes permutation of columns of transpose(A)
* (rows of A) as described above.
*
* If options->ColPerm = MY_PERMC or options->Fact = SamePattern or
* options->Fact = SamePattern_SameRowPerm, it is an input argument.
* On exit, perm_c may be overwritten by the product of the input
* perm_c and a permutation that postorders the elimination tree
* of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
* is already in postorder.
* Otherwise, it is an output argument.
*
* perm_r (input/output) int*
* If A->Stype = SLU_NC, row permutation vector of size A->nrow,
* which defines the permutation matrix Pr, and is determined
* by partial pivoting. perm_r[i] = j means row i of A is in
* position j in Pr*A.
* If A->Stype = SLU_NR, permutation vector of size A->ncol, which
* determines permutation of rows of transpose(A)
* (columns of A) as described above.
*
* If options->RowPerm = MY_PERMR or
* options->Fact = SamePattern_SameRowPerm, perm_r is an
* input argument.
* otherwise it is an output argument.
*
* L (output) SuperMatrix*
* The factor L from the factorization
* Pr*A*Pc=L*U (if A->Stype = SLU_NC) or
* Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR).
* Uses compressed row subscripts storage for supernodes, i.e.,
* L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
*
* U (output) SuperMatrix*
* The factor U from the factorization
* Pr*A*Pc=L*U (if A->Stype = SLU_NC) or
* Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR).
* Uses column-wise storage scheme, i.e., U has types:
* Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
*
* B (input/output) SuperMatrix*
* B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
* On entry, the right hand side matrix.
* On exit, the solution matrix if info = 0;
*
* stat (output) SuperLUStat_t*
* Record the statistics on runtime and floating-point operation count.
* See util.h for the definition of 'SuperLUStat_t'.
*
* info (output) int*
* = 0: successful exit
* > 0: if info = i, and i is
* <= A->ncol: U(i,i) is exactly zero. The factorization has
* been completed, but the factor U is exactly singular,
* so the solution could not be computed.
* > A->ncol: number of bytes allocated when memory allocation
* failure occurred, plus A->ncol.
*
*/
DNformat *Bstore;
SuperMatrix *AA = NULL;/* A in SLU_NC format used by the factorization routine.*/
SuperMatrix AC; /* Matrix postmultiplied by Pc */
int lwork = 0, *etree, i;
/* Set default values for some parameters */
int panel_size; /* panel size */
int relax; /* no of columns in a relaxed snodes */
int permc_spec;
trans_t trans = NOTRANS;
double *utime;
double t; /* Temporary time */
/* Test the input parameters ... */
*info = 0;
Bstore = B->Store;
if ( options->Fact != DOFACT ) *info = -1;
else if ( A->nrow != A->ncol || A->nrow < 0 ||
(A->Stype != SLU_NC && A->Stype != SLU_NR) ||
A->Dtype != SLU_S || A->Mtype != SLU_GE )
*info = -2;
else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
*info = -7;
if ( *info != 0 ) {
i = -(*info);
xerbla_("sgssv", &i);
return;
}
utime = stat->utime;
/* Convert A to SLU_NC format when necessary. */
if ( A->Stype == SLU_NR ) {
NRformat *Astore = A->Store;
AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
sCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz,
Astore->nzval, Astore->colind, Astore->rowptr,
SLU_NC, A->Dtype, A->Mtype);
trans = TRANS;
} else {
if ( A->Stype == SLU_NC ) AA = A;
}
t = SuperLU_timer_();
/*
* Get column permutation vector perm_c[], according to permc_spec:
* permc_spec = NATURAL: natural ordering
* permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
* permc_spec = MMD_ATA: minimum degree on structure of A'*A
* permc_spec = COLAMD: approximate minimum degree column ordering
* permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
*/
permc_spec = options->ColPerm;
if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
get_perm_c(permc_spec, AA, perm_c);
utime[COLPERM] = SuperLU_timer_() - t;
etree = intMalloc(A->ncol);
t = SuperLU_timer_();
sp_preorder(options, AA, perm_c, etree, &AC);
utime[ETREE] = SuperLU_timer_() - t;
panel_size = sp_ienv(1);
relax = sp_ienv(2);
/*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n",
relax, panel_size, sp_ienv(3), sp_ienv(4));*/
t = SuperLU_timer_();
/* Compute the LU factorization of A. */
sgstrf(options, &AC, relax, panel_size,
etree, NULL, lwork, perm_c, perm_r, L, U, stat, info);
utime[FACT] = SuperLU_timer_() - t;
t = SuperLU_timer_();
if ( *info == 0 ) {
/* Solve the system A*X=B, overwriting B with X. */
sgstrs (trans, L, U, perm_c, perm_r, B, stat, info);
}
utime[SOLVE] = SuperLU_timer_() - t;
SUPERLU_FREE (etree);
Destroy_CompCol_Permuted(&AC);
if ( A->Stype == SLU_NR ) {
Destroy_SuperMatrix_Store(AA);
SUPERLU_FREE(AA);
}
}

@ -0,0 +1,433 @@
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
*/
/*
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
Permission is hereby granted to use or copy this program for any
purpose, provided the above notices are retained on all copies.
Permission to modify the code and to distribute modified code is
granted, provided the above notices are retained, and a notice that
the code was modified is included with the above copyright notice.
*/
#include "ssp_defs.h"
void
sgstrf (superlu_options_t *options, SuperMatrix *A,
int relax, int panel_size, int *etree, void *work, int lwork,
int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
SuperLUStat_t *stat, int *info)
{
/*
* Purpose
* =======
*
* SGSTRF computes an LU factorization of a general sparse m-by-n
* matrix A using partial pivoting with row interchanges.
* The factorization has the form
* Pr * A = L * U
* where Pr is a row permutation matrix, L is lower triangular with unit
* diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
* triangular (upper trapezoidal if A->nrow < A->ncol).
*
* See supermatrix.h for the definition of 'SuperMatrix' structure.
*
* Arguments
* =========
*
* options (input) superlu_options_t*
* The structure defines the input parameters to control
* how the LU decomposition will be performed.
*
* A (input) SuperMatrix*
* Original matrix A, permuted by columns, of dimension
* (A->nrow, A->ncol). The type of A can be:
* Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
*
* drop_tol (input) float (NOT IMPLEMENTED)
* Drop tolerance parameter. At step j of the Gaussian elimination,
* if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
* 0 <= drop_tol <= 1. The default value of drop_tol is 0.
*
* relax (input) int
* To control degree of relaxing supernodes. If the number
* of nodes (columns) in a subtree of the elimination tree is less
* than relax, this subtree is considered as one supernode,
* regardless of the row structures of those columns.
*
* panel_size (input) int
* A panel consists of at most panel_size consecutive columns.
*
* etree (input) int*, dimension (A->ncol)
* Elimination tree of A'*A.
* Note: etree is a vector of parent pointers for a forest whose
* vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
* On input, the columns of A should be permuted so that the
* etree is in a certain postorder.
*
* work (input/output) void*, size (lwork) (in bytes)
* User-supplied work space and space for the output data structures.
* Not referenced if lwork = 0;
*
* lwork (input) int
* Specifies the size of work array in bytes.
* = 0: allocate space internally by system malloc;
* > 0: use user-supplied work array of length lwork in bytes,
* returns error if space runs out.
* = -1: the routine guesses the amount of space needed without
* performing the factorization, and returns it in
* *info; no other side effects.
*
* perm_c (input) int*, dimension (A->ncol)
* Column permutation vector, which defines the
* permutation matrix Pc; perm_c[i] = j means column i of A is
* in position j in A*Pc.
* When searching for diagonal, perm_c[*] is applied to the
* row subscripts of A, so that diagonal threshold pivoting
* can find the diagonal of A, rather than that of A*Pc.
*
* perm_r (input/output) int*, dimension (A->nrow)
* Row permutation vector which defines the permutation matrix Pr,
* perm_r[i] = j means row i of A is in position j in Pr*A.
* If options->Fact = SamePattern_SameRowPerm, the pivoting routine
* will try to use the input perm_r, unless a certain threshold
* criterion is violated. In that case, perm_r is overwritten by
* a new permutation determined by partial pivoting or diagonal
* threshold pivoting.
* Otherwise, perm_r is output argument;
*
* L (output) SuperMatrix*
* The factor L from the factorization Pr*A=L*U; use compressed row
* subscripts storage for supernodes, i.e., L has type:
* Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
*
* U (output) SuperMatrix*
* The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
* storage scheme, i.e., U has types: Stype = SLU_NC,
* Dtype = SLU_S, Mtype = SLU_TRU.
*
* stat (output) SuperLUStat_t*
* Record the statistics on runtime and floating-point operation count.
* See util.h for the definition of 'SuperLUStat_t'.
*
* info (output) int*
* = 0: successful exit
* < 0: if info = -i, the i-th argument had an illegal value
* > 0: if info = i, and i is
* <= A->ncol: U(i,i) is exactly zero. The factorization has
* been completed, but the factor U is exactly singular,
* and division by zero will occur if it is used to solve a
* system of equations.
* > A->ncol: number of bytes allocated when memory allocation
* failure occurred, plus A->ncol. If lwork = -1, it is
* the estimated amount of space needed, plus A->ncol.
*
* ======================================================================
*
* Local Working Arrays:
* ======================
* m = number of rows in the matrix
* n = number of columns in the matrix
*
* xprune[0:n-1]: xprune[*] points to locations in subscript
* vector lsub[*]. For column i, xprune[i] denotes the point where
* structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need
* to be traversed for symbolic factorization.
*
* marker[0:3*m-1]: marker[i] = j means that node i has been
* reached when working on column j.
* Storage: relative to original row subscripts
* NOTE: There are 3 of them: marker/marker1 are used for panel dfs,
* see spanel_dfs.c; marker2 is used for inner-factorization,
* see scolumn_dfs.c.
*
* parent[0:m-1]: parent vector used during dfs
* Storage: relative to new row subscripts
*
* xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
* unexplored neighbor of i in lsub[*]
*
* segrep[0:nseg-1]: contains the list of supernodal representatives
* in topological order of the dfs. A supernode representative is the
* last column of a supernode.
* The maximum size of segrep[] is n.
*
* repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
* supernodal representative r, repfnz[r] is the location of the first
* nonzero in this segment. It is also used during the dfs: repfnz[r]>0
* indicates the supernode r has been explored.
* NOTE: There are W of them, each used for one column of a panel.
*
* panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
* the panel diagonal. These are filled in during spanel_dfs(), and are
* used later in the inner LU factorization within the panel.
* panel_lsub[]/dense[] pair forms the SPA data structure.
* NOTE: There are W of them.
*
* dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
* NOTE: there are W of them.
*
* tempv[0:*]: real temporary used for dense numeric kernels;
* The size of this array is defined by NUM_TEMPV() in ssp_defs.h.
*
*/
/* Local working arrays */
NCPformat *Astore;
int *iperm_r = NULL; /* inverse of perm_r;
used when options->Fact == SamePattern_SameRowPerm */
int *iperm_c; /* inverse of perm_c */
int *iwork;
float *swork;
int *segrep, *repfnz, *parent, *xplore;
int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
int *xprune;
int *marker;
float *dense, *tempv;
int *relax_end;
float *a;
int *asub;
int *xa_begin, *xa_end;
int *xsup, *supno;
int *xlsub, *xlusup, *xusub;
int nzlumax;
static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
/* Local scalars */
fact_t fact = options->Fact;
double diag_pivot_thresh = options->DiagPivotThresh;
int pivrow; /* pivotal row number in the original matrix A */
int nseg1; /* no of segments in U-column above panel row jcol */
int nseg; /* no of segments in each U-column */
register int jcol;
register int kcol; /* end column of a relaxed snode */
register int icol;
register int i, k, jj, new_next, iinfo;
int m, n, min_mn, jsupno, fsupc, nextlu, nextu;
int w_def; /* upper bound on panel width */
int usepr, iperm_r_allocated = 0;
int nnzL, nnzU;
int *panel_histo = stat->panel_histo;
flops_t *ops = stat->ops;
iinfo = 0;
m = A->nrow;
n = A->ncol;
min_mn = SUPERLU_MIN(m, n);
Astore = A->Store;
a = Astore->nzval;
asub = Astore->rowind;
xa_begin = Astore->colbeg;
xa_end = Astore->colend;
/* Allocate storage common to the factor routines */
*info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz,
panel_size, L, U, &Glu, &iwork, &swork);
if ( *info ) return;
xsup = Glu.xsup;
supno = Glu.supno;
xlsub = Glu.xlsub;
xlusup = Glu.xlusup;
xusub = Glu.xusub;
SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
&repfnz, &panel_lsub, &xprune, &marker);
sSetRWork(m, panel_size, swork, &dense, &tempv);
usepr = (fact == SamePattern_SameRowPerm);
if ( usepr ) {
/* Compute the inverse of perm_r */
iperm_r = (int *) intMalloc(m);
for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
iperm_r_allocated = 1;
}
iperm_c = (int *) intMalloc(n);
for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
/* Identify relaxed snodes */
relax_end = (int *) intMalloc(n);
if ( options->SymmetricMode == YES ) {
heap_relax_snode(n, etree, relax, marker, relax_end);
} else {
relax_snode(n, etree, relax, marker, relax_end);
}
ifill (perm_r, m, EMPTY);
ifill (marker, m * NO_MARKER, EMPTY);
supno[0] = -1;
xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0;
w_def = panel_size;
/*
* Work on one "panel" at a time. A panel is one of the following:
* (a) a relaxed supernode at the bottom of the etree, or
* (b) panel_size contiguous columns, defined by the user
*/
for (jcol = 0; jcol < min_mn; ) {
if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
kcol = relax_end[jcol]; /* end of the relaxed snode */
panel_histo[kcol-jcol+1]++;
/* --------------------------------------
* Factorize the relaxed supernode(jcol:kcol)
* -------------------------------------- */
/* Determine the union of the row structure of the snode */
if ( (*info = ssnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
xprune, marker, &Glu)) != 0 )
return;
nextu = xusub[jcol];
nextlu = xlusup[jcol];
jsupno = supno[jcol];
fsupc = xsup[jsupno];
new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
nzlumax = Glu.nzlumax;
while ( new_next > nzlumax ) {
if ( (*info = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) )
return;
}
for (icol = jcol; icol<= kcol; icol++) {
xusub[icol+1] = nextu;
/* Scatter into SPA dense[*] */
for (k = xa_begin[icol]; k < xa_end[icol]; k++)
dense[asub[k]] = a[k];
/* Numeric update within the snode */
ssnode_bmod(icol, fsupc, dense, tempv, &Glu, stat);
if ( (*info = spivotL(icol, diag_pivot_thresh, &usepr, perm_r,
iperm_r, iperm_c, &pivrow, &Glu, stat)) )
if ( iinfo == 0 ) iinfo = *info;
#ifdef DEBUG
sprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu);
#endif
}
jcol = icol;
} else { /* Work on one panel of panel_size columns */
/* Adjust panel_size so that a panel won't overlap with the next
* relaxed snode.
*/
panel_size = w_def;
for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
if ( relax_end[k] != EMPTY ) {
panel_size = k - jcol;
break;
}
if ( k == min_mn ) panel_size = min_mn - jcol;
panel_histo[panel_size]++;
/* symbolic factor on a panel of columns */
spanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
dense, panel_lsub, segrep, repfnz, xprune,
marker, parent, xplore, &Glu);
/* numeric sup-panel updates in topological order */
spanel_bmod(m, panel_size, jcol, nseg1, dense,
tempv, segrep, repfnz, &Glu, stat);
/* Sparse LU within the panel, and below panel diagonal */
for ( jj = jcol; jj < jcol + panel_size; jj++) {
k = (jj - jcol) * m; /* column index for w-wide arrays */
nseg = nseg1; /* Begin after all the panel segments */
if ((*info = scolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
segrep, &repfnz[k], xprune, marker,
parent, xplore, &Glu)) != 0) return;
/* Numeric updates */
if ((*info = scolumn_bmod(jj, (nseg - nseg1), &dense[k],
tempv, &segrep[nseg1], &repfnz[k],
jcol, &Glu, stat)) != 0) return;
/* Copy the U-segments to ucol[*] */
if ((*info = scopy_to_ucol(jj, nseg, segrep, &repfnz[k],
perm_r, &dense[k], &Glu)) != 0)
return;
if ( (*info = spivotL(jj, diag_pivot_thresh, &usepr, perm_r,
iperm_r, iperm_c, &pivrow, &Glu, stat)) )
if ( iinfo == 0 ) iinfo = *info;
/* Prune columns (0:jj-1) using column jj */
spruneL(jj, perm_r, pivrow, nseg, segrep,
&repfnz[k], xprune, &Glu);
/* Reset repfnz[] for this column */
resetrep_col (nseg, segrep, &repfnz[k]);
#ifdef DEBUG
sprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu);
#endif
}
jcol += panel_size; /* Move to the next panel */
} /* else */
} /* for */
*info = iinfo;
if ( m > n ) {
k = 0;
for (i = 0; i < m; ++i)
if ( perm_r[i] == EMPTY ) {
perm_r[i] = n + k;
++k;
}
}
countnz(min_mn, xprune, &nnzL, &nnzU, &Glu);
fixupL(min_mn, perm_r, &Glu);
sLUWorkFree(iwork, swork, &Glu); /* Free work space and compress storage */
if ( fact == SamePattern_SameRowPerm ) {
/* L and U structures may have changed due to possibly different
pivoting, even though the storage is available.
There could also be memory expansions, so the array locations
may have changed, */
((SCformat *)L->Store)->nnz = nnzL;
((SCformat *)L->Store)->nsuper = Glu.supno[n];
((SCformat *)L->Store)->nzval = Glu.lusup;
((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
((SCformat *)L->Store)->rowind = Glu.lsub;
((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
((NCformat *)U->Store)->nnz = nnzU;
((NCformat *)U->Store)->nzval = Glu.ucol;
((NCformat *)U->Store)->rowind = Glu.usub;
((NCformat *)U->Store)->colptr = Glu.xusub;
} else {
sCreate_SuperNode_Matrix(L, A->nrow, A->ncol, nnzL, Glu.lusup,
Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
Glu.xsup, SLU_SC, SLU_S, SLU_TRLU);
sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol,
Glu.usub, Glu.xusub, SLU_NC, SLU_S, SLU_TRU);
}
ops[FACT] += ops[TRSV] + ops[GEMV];
if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
SUPERLU_FREE (iperm_c);
SUPERLU_FREE (relax_end);
}

@ -0,0 +1,331 @@
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
*/
/*
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
Permission is hereby granted to use or copy this program for any
purpose, provided the above notices are retained on all copies.
Permission to modify the code and to distribute modified code is
granted, provided the above notices are retained, and a notice that
the code was modified is included with the above copyright notice.
*/
#include "ssp_defs.h"
/*
* Function prototypes
*/
void susolve(int, int, float*, float*);
void slsolve(int, int, float*, float*);
void smatvec(int, int, int, float*, float*, float*);
void
sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
int *perm_c, int *perm_r, SuperMatrix *B,
SuperLUStat_t *stat, int *info)
{
/*
* Purpose
* =======
*
* SGSTRS solves a system of linear equations A*X=B or A'*X=B
* with A sparse and B dense, using the LU factorization computed by
* SGSTRF.
*
* See supermatrix.h for the definition of 'SuperMatrix' structure.
*
* Arguments
* =========
*
* trans (input) trans_t
* Specifies the form of the system of equations:
* = NOTRANS: A * X = B (No transpose)
* = TRANS: A'* X = B (Transpose)
* = CONJ: A**H * X = B (Conjugate transpose)
*
* L (input) SuperMatrix*
* The factor L from the factorization Pr*A*Pc=L*U as computed by
* sgstrf(). Use compressed row subscripts storage for supernodes,
* i.e., L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
*
* U (input) SuperMatrix*
* The factor U from the factorization Pr*A*Pc=L*U as computed by
* sgstrf(). Use column-wise storage scheme, i.e., U has types:
* Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
*
* perm_c (input) int*, dimension (L->ncol)
* Column permutation vector, which defines the
* permutation matrix Pc; perm_c[i] = j means column i of A is
* in position j in A*Pc.
*
* perm_r (input) int*, dimension (L->nrow)
* Row permutation vector, which defines the permutation matrix Pr;
* perm_r[i] = j means row i of A is in position j in Pr*A.
*
* B (input/output) SuperMatrix*
* B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
* On entry, the right hand side matrix.
* On exit, the solution matrix if info = 0;
*
* stat (output) SuperLUStat_t*
* Record the statistics on runtime and floating-point operation count.
* See util.h for the definition of 'SuperLUStat_t'.
*
* info (output) int*
* = 0: successful exit
* < 0: if info = -i, the i-th argument had an illegal value
*
*/
#ifdef _CRAY
_fcd ftcs1, ftcs2, ftcs3, ftcs4;
#endif
#ifdef USE_VENDOR_BLAS
float alpha = 1.0, beta = 1.0;
float *work_col;
#endif
DNformat *Bstore;
float *Bmat;
SCformat *Lstore;
NCformat *Ustore;
float *Lval, *Uval;
int fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
int i, j, k, iptr, jcol, n, ldb, nrhs;
float *work, *rhs_work, *soln;
flops_t solve_ops;
void sprint_soln();
/* Test input parameters ... */
*info = 0;
Bstore = B->Store;
ldb = Bstore->lda;
nrhs = B->ncol;
if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
else if ( L->nrow != L->ncol || L->nrow < 0 ||
L->Stype != SLU_SC || L->Dtype != SLU_S || L->Mtype != SLU_TRLU )
*info = -2;
else if ( U->nrow != U->ncol || U->nrow < 0 ||
U->Stype != SLU_NC || U->Dtype != SLU_S || U->Mtype != SLU_TRU )
*info = -3;
else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
*info = -6;
if ( *info ) {
i = -(*info);
xerbla_("sgstrs", &i);
return;
}
n = L->nrow;
work = floatCalloc(n * nrhs);
if ( !work ) ABORT("Malloc fails for local work[].");
soln = floatMalloc(n);
if ( !soln ) ABORT("Malloc fails for local soln[].");
Bmat = Bstore->nzval;
Lstore = L->Store;
Lval = Lstore->nzval;
Ustore = U->Store;
Uval = Ustore->nzval;
solve_ops = 0;
if ( trans == NOTRANS ) {
/* Permute right hand sides to form Pr*B */
for (i = 0; i < nrhs; i++) {
rhs_work = &Bmat[i*ldb];
for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
for (k = 0; k < n; k++) rhs_work[k] = soln[k];
}
/* Forward solve PLy=Pb. */
for (k = 0; k <= Lstore->nsuper; k++) {
fsupc = L_FST_SUPC(k);
istart = L_SUB_START(fsupc);
nsupr = L_SUB_START(fsupc+1) - istart;
nsupc = L_FST_SUPC(k+1) - fsupc;
nrow = nsupr - nsupc;
solve_ops += nsupc * (nsupc - 1) * nrhs;
solve_ops += 2 * nrow * nsupc * nrhs;
if ( nsupc == 1 ) {
for (j = 0; j < nrhs; j++) {
rhs_work = &Bmat[j*ldb];
luptr = L_NZ_START(fsupc);
for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
irow = L_SUB(iptr);
++luptr;
rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr];
}
}
} else {
luptr = L_NZ_START(fsupc);
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
ftcs1 = _cptofcd("L", strlen("L"));
ftcs2 = _cptofcd("N", strlen("N"));
ftcs3 = _cptofcd("U", strlen("U"));
STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
&Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha,
&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
&beta, &work[0], &n );
#else
strsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
&Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
sgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha,
&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
&beta, &work[0], &n );
#endif
for (j = 0; j < nrhs; j++) {
rhs_work = &Bmat[j*ldb];
work_col = &work[j*n];
iptr = istart + nsupc;
for (i = 0; i < nrow; i++) {
irow = L_SUB(iptr);
rhs_work[irow] -= work_col[i]; /* Scatter */
work_col[i] = 0.0;
iptr++;
}
}
#else
for (j = 0; j < nrhs; j++) {
rhs_work = &Bmat[j*ldb];
slsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
smatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
&rhs_work[fsupc], &work[0] );
iptr = istart + nsupc;
for (i = 0; i < nrow; i++) {
irow = L_SUB(iptr);
rhs_work[irow] -= work[i];
work[i] = 0.0;
iptr++;
}
}
#endif
} /* else ... */
} /* for L-solve */
#ifdef DEBUG
printf("After L-solve: y=\n");
sprint_soln(n, Bmat);
#endif
/*
* Back solve Ux=y.
*/
for (k = Lstore->nsuper; k >= 0; k--) {
fsupc = L_FST_SUPC(k);
istart = L_SUB_START(fsupc);
nsupr = L_SUB_START(fsupc+1) - istart;
nsupc = L_FST_SUPC(k+1) - fsupc;
luptr = L_NZ_START(fsupc);
solve_ops += nsupc * (nsupc + 1) * nrhs;
if ( nsupc == 1 ) {
rhs_work = &Bmat[0];
for (j = 0; j < nrhs; j++) {
rhs_work[fsupc] /= Lval[luptr];
rhs_work += ldb;
}
} else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
ftcs1 = _cptofcd("L", strlen("L"));
ftcs2 = _cptofcd("U", strlen("U"));
ftcs3 = _cptofcd("N", strlen("N"));
STRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
&Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
#else
strsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
&Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
#endif
#else
for (j = 0; j < nrhs; j++)
susolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
#endif
}
for (j = 0; j < nrhs; ++j) {
rhs_work = &Bmat[j*ldb];
for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
irow = U_SUB(i);
rhs_work[irow] -= rhs_work[jcol] * Uval[i];
}
}
}
} /* for U-solve */
#ifdef DEBUG
printf("After U-solve: x=\n");
sprint_soln(n, Bmat);
#endif
/* Compute the final solution X := Pc*X. */
for (i = 0; i < nrhs; i++) {
rhs_work = &Bmat[i*ldb];
for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
for (k = 0; k < n; k++) rhs_work[k] = soln[k];
}
stat->ops[SOLVE] = solve_ops;
} else { /* Solve A'*X=B or CONJ(A)*X=B */
/* Permute right hand sides to form Pc'*B. */
for (i = 0; i < nrhs; i++) {
rhs_work = &Bmat[i*ldb];
for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
for (k = 0; k < n; k++) rhs_work[k] = soln[k];
}
stat->ops[SOLVE] = 0;
for (k = 0; k < nrhs; ++k) {
/* Multiply by inv(U'). */
sp_strsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
/* Multiply by inv(L'). */
sp_strsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
}
/* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
for (i = 0; i < nrhs; i++) {
rhs_work = &Bmat[i*ldb];
for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
for (k = 0; k < n; k++) rhs_work[k] = soln[k];
}
}
SUPERLU_FREE(work);
SUPERLU_FREE(soln);
}
/*
* Diagnostic print of the solution vector
*/
void
sprint_soln(int n, float *soln)
{
int i;
for (i = 0; i < n; i++)
printf("\t%d: %.4f\n", i, soln[i]);
}

@ -0,0 +1,676 @@
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
*/
#include "ssp_defs.h"
/* Constants */
#define NO_MEMTYPE 4 /* 0: lusup;
1: ucol;
2: lsub;
3: usub */
#define GluIntArray(n) (5 * (n) + 5)
/* Internal prototypes */
void *sexpand (int *, MemType,int, int, GlobalLU_t *);
int sLUWorkInit (int, int, int, int **, float **, LU_space_t);
void copy_mem_float (int, void *, void *);
void sStackCompress (GlobalLU_t *);
void sSetupSpace (void *, int, LU_space_t *);
void *suser_malloc (int, int);
void suser_free (int, int);
/* External prototypes (in memory.c - prec-indep) */
extern void copy_mem_int (int, void *, void *);
extern void user_bcopy (char *, char *, int);
/* Headers for 4 types of dynamatically managed memory */
typedef struct e_node {
int size; /* length of the memory that has been used */
void *mem; /* pointer to the new malloc'd store */
} ExpHeader;
typedef struct {
int size;
int used;
int top1; /* grow upward, relative to &array[0] */
int top2; /* grow downward */
void *array;
} LU_stack_t;
/* Variables local to this file */
static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */
static LU_stack_t stack;
static int no_expand;
/* Macros to manipulate stack */
#define StackFull(x) ( x + stack.used >= stack.size )
#define NotDoubleAlign(addr) ( (long int)addr & 7 )
#define DoubleAlign(addr) ( ((long int)addr + 7) & ~7L )
#define TempSpace(m, w) ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
(w + 1) * m * sizeof(float) )
#define Reduce(alpha) ((alpha + 1) / 2) /* i.e. (alpha-1)/2 + 1 */
/*
* Setup the memory model to be used for factorization.
* lwork = 0: use system malloc;
* lwork > 0: use user-supplied work[] space.
*/
void sSetupSpace(void *work, int lwork, LU_space_t *MemModel)
{
if ( lwork == 0 ) {
*MemModel = SYSTEM; /* malloc/free */
} else if ( lwork > 0 ) {
*MemModel = USER; /* user provided space */
stack.used = 0;
stack.top1 = 0;
stack.top2 = (lwork/4)*4; /* must be word addressable */
stack.size = stack.top2;
stack.array = (void *) work;
}
}
void *suser_malloc(int bytes, int which_end)
{
void *buf;
if ( StackFull(bytes) ) return (NULL);
if ( which_end == HEAD ) {
buf = (char*) stack.array + stack.top1;
stack.top1 += bytes;
} else {
stack.top2 -= bytes;
buf = (char*) stack.array + stack.top2;
}
stack.used += bytes;
return buf;
}
void suser_free(int bytes, int which_end)
{
if ( which_end == HEAD ) {
stack.top1 -= bytes;
} else {
stack.top2 += bytes;
}
stack.used -= bytes;
}
/*
* mem_usage consists of the following fields:
* - for_lu (float)
* The amount of space used in bytes for the L\U data structures.
* - total_needed (float)
* The amount of space needed in bytes to perform factorization.
* - expansions (int)
* Number of memory expansions during the LU factorization.
*/
int sQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
{
SCformat *Lstore;
NCformat *Ustore;
register int n, iword, dword, panel_size = sp_ienv(1);
Lstore = L->Store;
Ustore = U->Store;
n = L->ncol;
iword = sizeof(int);
dword = sizeof(float);
/* For LU factors */
mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
dword + Lstore->rowind_colptr[n] * iword );
mem_usage->for_lu += (float)( (n + 1) * iword +
Ustore->colptr[n] * (dword + iword) );
/* Working storage to support factorization */
mem_usage->total_needed = mem_usage->for_lu +
(float)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
(panel_size + 1) * n * dword );
mem_usage->expansions = --no_expand;
return 0;
} /* sQuerySpace */
/*
* Allocate storage for the data structures common to all factor routines.
* For those unpredictable size, make a guess as FILL * nnz(A).
* Return value:
* If lwork = -1, return the estimated amount of space required, plus n;
* otherwise, return the amount of space actually allocated when
* memory allocation failure occurred.
*/
int
sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu,
int **iwork, float **dwork)
{
int info, iword, dword;
SCformat *Lstore;
NCformat *Ustore;
int *xsup, *supno;
int *lsub, *xlsub;
float *lusup;
int *xlusup;
float *ucol;
int *usub, *xusub;
int nzlmax, nzumax, nzlumax;
int FILL = sp_ienv(6);
Glu->n = n;
no_expand = 0;
iword = sizeof(int);
dword = sizeof(float);
if ( !expanders )
expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader));
if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
if ( fact != SamePattern_SameRowPerm ) {
/* Guess for L\U factors */
nzumax = nzlumax = FILL * annz;
nzlmax = SUPERLU_MAX(1, FILL/4.) * annz;
if ( lwork == -1 ) {
return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
+ (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
} else {
sSetupSpace(work, lwork, &Glu->MemModel);
}
#ifdef DEBUG
printf("sLUMemInit() called: annz %d, MemModel %d\n",
annz, Glu->MemModel);
#endif
/* Integer pointers for L\U factors */
if ( Glu->MemModel == SYSTEM ) {
xsup = intMalloc(n+1);
supno = intMalloc(n+1);
xlsub = intMalloc(n+1);
xlusup = intMalloc(n+1);
xusub = intMalloc(n+1);
} else {
xsup = (int *)suser_malloc((n+1) * iword, HEAD);
supno = (int *)suser_malloc((n+1) * iword, HEAD);
xlsub = (int *)suser_malloc((n+1) * iword, HEAD);
xlusup = (int *)suser_malloc((n+1) * iword, HEAD);
xusub = (int *)suser_malloc((n+1) * iword, HEAD);
}
lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
ucol = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
lsub = (int *) sexpand( &nzlmax, LSUB, 0, 0, Glu );
usub = (int *) sexpand( &nzumax, USUB, 0, 1, Glu );
while ( !lusup || !ucol || !lsub || !usub ) {
if ( Glu->MemModel == SYSTEM ) {
SUPERLU_FREE(lusup);
SUPERLU_FREE(ucol);
SUPERLU_FREE(lsub);
SUPERLU_FREE(usub);
} else {
suser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD);
}
nzlumax /= 2;
nzumax /= 2;
nzlmax /= 2;
if ( nzlumax < annz ) {
printf("Not enough memory to perform factorization.\n");
return (smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
}
lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
ucol = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
lsub = (int *) sexpand( &nzlmax, LSUB, 0, 0, Glu );
usub = (int *) sexpand( &nzumax, USUB, 0, 1, Glu );
}
} else {
/* fact == SamePattern_SameRowPerm */
Lstore = L->Store;
Ustore = U->Store;
xsup = Lstore->sup_to_col;
supno = Lstore->col_to_sup;
xlsub = Lstore->rowind_colptr;
xlusup = Lstore->nzval_colptr;
xusub = Ustore->colptr;
nzlmax = Glu->nzlmax; /* max from previous factorization */
nzumax = Glu->nzumax;
nzlumax = Glu->nzlumax;
if ( lwork == -1 ) {
return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
+ (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
} else if ( lwork == 0 ) {
Glu->MemModel = SYSTEM;
} else {
Glu->MemModel = USER;
stack.top2 = (lwork/4)*4; /* must be word-addressable */
stack.size = stack.top2;
}
lsub = expanders[LSUB].mem = Lstore->rowind;
lusup = expanders[LUSUP].mem = Lstore->nzval;
usub = expanders[USUB].mem = Ustore->rowind;
ucol = expanders[UCOL].mem = Ustore->nzval;;
expanders[LSUB].size = nzlmax;
expanders[LUSUP].size = nzlumax;
expanders[USUB].size = nzumax;
expanders[UCOL].size = nzumax;
}
Glu->xsup = xsup;
Glu->supno = supno;
Glu->lsub = lsub;
Glu->xlsub = xlsub;
Glu->lusup = lusup;
Glu->xlusup = xlusup;
Glu->ucol = ucol;
Glu->usub = usub;
Glu->xusub = xusub;
Glu->nzlmax = nzlmax;
Glu->nzumax = nzumax;
Glu->nzlumax = nzlumax;
info = sLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel);
if ( info )
return ( info + smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
++no_expand;
return 0;
} /* sLUMemInit */
/* Allocate known working storage. Returns 0 if success, otherwise
returns the number of bytes allocated so far when failure occurred. */
int
sLUWorkInit(int m, int n, int panel_size, int **iworkptr,
float **dworkptr, LU_space_t MemModel)
{
int isize, dsize, extra;
float *old_ptr;
int maxsuper = sp_ienv(3),
rowblk = sp_ienv(4);
isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
dsize = (m * panel_size +
NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(float);
if ( MemModel == SYSTEM )
*iworkptr = (int *) intCalloc(isize/sizeof(int));
else
*iworkptr = (int *) suser_malloc(isize, TAIL);
if ( ! *iworkptr ) {
fprintf(stderr, "sLUWorkInit: malloc fails for local iworkptr[]\n");
return (isize + n);
}
if ( MemModel == SYSTEM )
*dworkptr = (float *) SUPERLU_MALLOC(dsize);
else {
*dworkptr = (float *) suser_malloc(dsize, TAIL);
if ( NotDoubleAlign(*dworkptr) ) {
old_ptr = *dworkptr;
*dworkptr = (float*) DoubleAlign(*dworkptr);
*dworkptr = (float*) ((double*)*dworkptr - 1);
extra = (char*)old_ptr - (char*)*dworkptr;
#ifdef DEBUG
printf("sLUWorkInit: not aligned, extra %d\n", extra);
#endif
stack.top2 -= extra;
stack.used += extra;
}
}
if ( ! *dworkptr ) {
fprintf(stderr, "malloc fails for local dworkptr[].");
return (isize + dsize + n);
}
return 0;
}
/*
* Set up pointers for real working arrays.
*/
void
sSetRWork(int m, int panel_size, float *dworkptr,
float **dense, float **tempv)
{
float zero = 0.0;
int maxsuper = sp_ienv(3),
rowblk = sp_ienv(4);
*dense = dworkptr;
*tempv = *dense + panel_size*m;
sfill (*dense, m * panel_size, zero);
sfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);
}
/*
* Free the working storage used by factor routines.
*/
void sLUWorkFree(int *iwork, float *dwork, GlobalLU_t *Glu)
{
if ( Glu->MemModel == SYSTEM ) {
SUPERLU_FREE (iwork);
SUPERLU_FREE (dwork);
} else {
stack.used -= (stack.size - stack.top2);
stack.top2 = stack.size;
/* sStackCompress(Glu); */
}
SUPERLU_FREE (expanders);
expanders = 0;
}
/* Expand the data structures for L and U during the factorization.
* Return value: 0 - successful return
* > 0 - number of bytes allocated when run out of space
*/
int
sLUMemXpand(int jcol,
int next, /* number of elements currently in the factors */
MemType mem_type, /* which type of memory to expand */
int *maxlen, /* modified - maximum length of a data structure */
GlobalLU_t *Glu /* modified - global LU data structures */
)
{
void *new_mem;
#ifdef DEBUG
printf("sLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n",
jcol, next, *maxlen, mem_type);
#endif
if (mem_type == USUB)
new_mem = sexpand(maxlen, mem_type, next, 1, Glu);
else
new_mem = sexpand(maxlen, mem_type, next, 0, Glu);
if ( !new_mem ) {
int nzlmax = Glu->nzlmax;
int nzumax = Glu->nzumax;
int nzlumax = Glu->nzlumax;
fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
return (smemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
}
switch ( mem_type ) {
case LUSUP:
Glu->lusup = (float *) new_mem;
Glu->nzlumax = *maxlen;
break;
case UCOL:
Glu->ucol = (float *) new_mem;
Glu->nzumax = *maxlen;
break;
case LSUB:
Glu->lsub = (int *) new_mem;
Glu->nzlmax = *maxlen;
break;
case USUB:
Glu->usub = (int *) new_mem;
Glu->nzumax = *maxlen;
break;
}
return 0;
}
void
copy_mem_float(int howmany, void *old, void *new)
{
register int i;
float *dold = old;
float *dnew = new;
for (i = 0; i < howmany; i++) dnew[i] = dold[i];
}
/*
* Expand the existing storage to accommodate more fill-ins.
*/
void
*sexpand (
int *prev_len, /* length used from previous call */
MemType type, /* which part of the memory to expand */
int len_to_copy, /* size of the memory to be copied to new store */
int keep_prev, /* = 1: use prev_len;
= 0: compute new_len to expand */
GlobalLU_t *Glu /* modified - global LU data structures */
)
{
float EXPAND = 1.5;
float alpha;
void *new_mem, *old_mem;
int new_len, tries, lword, extra, bytes_to_copy;
alpha = EXPAND;
if ( no_expand == 0 || keep_prev ) /* First time allocate requested */
new_len = *prev_len;
else {
new_len = alpha * *prev_len;
}
if ( type == LSUB || type == USUB ) lword = sizeof(int);
else lword = sizeof(float);
if ( Glu->MemModel == SYSTEM ) {
new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
/* new_mem = (void *) calloc(new_len, lword); */
if ( no_expand != 0 ) {
tries = 0;
if ( keep_prev ) {
if ( !new_mem ) return (NULL);
} else {
while ( !new_mem ) {
if ( ++tries > 10 ) return (NULL);
alpha = Reduce(alpha);
new_len = alpha * *prev_len;
new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
/* new_mem = (void *) calloc(new_len, lword); */
}
}
if ( type == LSUB || type == USUB ) {
copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
} else {
copy_mem_float(len_to_copy, expanders[type].mem, new_mem);
}
SUPERLU_FREE (expanders[type].mem);
}
expanders[type].mem = (void *) new_mem;
} else { /* MemModel == USER */
if ( no_expand == 0 ) {
new_mem = suser_malloc(new_len * lword, HEAD);
if ( NotDoubleAlign(new_mem) &&
(type == LUSUP || type == UCOL) ) {
old_mem = new_mem;
new_mem = (void *)DoubleAlign(new_mem);
extra = (char*)new_mem - (char*)old_mem;
#ifdef DEBUG
printf("expand(): not aligned, extra %d\n", extra);
#endif
stack.top1 += extra;
stack.used += extra;
}
expanders[type].mem = (void *) new_mem;
}
else {
tries = 0;
extra = (new_len - *prev_len) * lword;
if ( keep_prev ) {
if ( StackFull(extra) ) return (NULL);
} else {
while ( StackFull(extra) ) {
if ( ++tries > 10 ) return (NULL);
alpha = Reduce(alpha);
new_len = alpha * *prev_len;
extra = (new_len - *prev_len) * lword;
}
}
if ( type != USUB ) {
new_mem = (void*)((char*)expanders[type + 1].mem + extra);
bytes_to_copy = (char*)stack.array + stack.top1
- (char*)expanders[type + 1].mem;
user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
if ( type < USUB ) {
Glu->usub = expanders[USUB].mem =
(void*)((char*)expanders[USUB].mem + extra);
}
if ( type < LSUB ) {
Glu->lsub = expanders[LSUB].mem =
(void*)((char*)expanders[LSUB].mem + extra);
}
if ( type < UCOL ) {
Glu->ucol = expanders[UCOL].mem =
(void*)((char*)expanders[UCOL].mem + extra);
}
stack.top1 += extra;
stack.used += extra;
if ( type == UCOL ) {
stack.top1 += extra; /* Add same amount for USUB */
stack.used += extra;
}
} /* if ... */
} /* else ... */
}
expanders[type].size = new_len;
*prev_len = new_len;
if ( no_expand ) ++no_expand;
return (void *) expanders[type].mem;
} /* sexpand */
/*
* Compress the work[] array to remove fragmentation.
*/
void
sStackCompress(GlobalLU_t *Glu)
{
register int iword, dword, ndim;
char *last, *fragment;
int *ifrom, *ito;
float *dfrom, *dto;
int *xlsub, *lsub, *xusub, *usub, *xlusup;
float *ucol, *lusup;
iword = sizeof(int);
dword = sizeof(float);
ndim = Glu->n;
xlsub = Glu->xlsub;
lsub = Glu->lsub;
xusub = Glu->xusub;
usub = Glu->usub;
xlusup = Glu->xlusup;
ucol = Glu->ucol;
lusup = Glu->lusup;
dfrom = ucol;
dto = (float *)((char*)lusup + xlusup[ndim] * dword);
copy_mem_float(xusub[ndim], dfrom, dto);
ucol = dto;
ifrom = lsub;
ito = (int *) ((char*)ucol + xusub[ndim] * iword);
copy_mem_int(xlsub[ndim], ifrom, ito);
lsub = ito;
ifrom = usub;
ito = (int *) ((char*)lsub + xlsub[ndim] * iword);
copy_mem_int(xusub[ndim], ifrom, ito);
usub = ito;
last = (char*)usub + xusub[ndim] * iword;
fragment = (char*) (((char*)stack.array + stack.top1) - last);
stack.used -= (long int) fragment;
stack.top1 -= (long int) fragment;
Glu->ucol = ucol;
Glu->lsub = lsub;
Glu->usub = usub;
#ifdef DEBUG
printf("sStackCompress: fragment %d\n", fragment);
/* for (last = 0; last < ndim; ++last)
print_lu_col("After compress:", last, 0);*/
#endif
}
/*
* Allocate storage for original matrix A
*/
void
sallocateA(int n, int nnz, float **a, int **asub, int **xa)
{
*a = (float *) floatMalloc(nnz);
*asub = (int *) intMalloc(nnz);
*xa = (int *) intMalloc(n+1);
}
float *floatMalloc(int n)
{
float *buf;
buf = (float *) SUPERLU_MALLOC(n * sizeof(float));
if ( !buf ) {
ABORT("SUPERLU_MALLOC failed for buf in floatMalloc()\n");
}
return (buf);
}
float *floatCalloc(int n)
{
float *buf;
register int i;
float zero = 0.0;
buf = (float *) SUPERLU_MALLOC(n * sizeof(float));
if ( !buf ) {
ABORT("SUPERLU_MALLOC failed for buf in floatCalloc()\n");
}
for (i = 0; i < n; ++i) buf[i] = zero;
return (buf);
}
int smemory_usage(const int nzlmax, const int nzumax,
const int nzlumax, const int n)
{
register int iword, dword;
iword = sizeof(int);
dword = sizeof(float);
return (10 * n * iword +
nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
}

@ -0,0 +1,225 @@
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* November 15, 1997
*
*/
/*
* File name: smyblas2.c
* Purpose:
* Level 2 BLAS operations: solves and matvec, written in C.
* Note:
* This is only used when the system lacks an efficient BLAS library.
*/
/*
* Solves a dense UNIT lower triangular system. The unit lower
* triangular matrix is stored in a 2D array M(1:nrow,1:ncol).
* The solution will be returned in the rhs vector.
*/
void slsolve ( int ldm, int ncol, float *M, float *rhs )
{
int k;
float x0, x1, x2, x3, x4, x5, x6, x7;
float *M0;
register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
register int firstcol = 0;
M0 = &M[0];
while ( firstcol < ncol - 7 ) { /* Do 8 columns */
Mki0 = M0 + 1;
Mki1 = Mki0 + ldm + 1;
Mki2 = Mki1 + ldm + 1;
Mki3 = Mki2 + ldm + 1;
Mki4 = Mki3 + ldm + 1;
Mki5 = Mki4 + ldm + 1;
Mki6 = Mki5 + ldm + 1;
Mki7 = Mki6 + ldm + 1;
x0 = rhs[firstcol];
x1 = rhs[firstcol+1] - x0 * *Mki0++;
x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
- x3 * *Mki3++;
x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
- x3 * *Mki3++ - x4 * *Mki4++;
x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
- x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++;
x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
- x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++
- x6 * *Mki6++;
rhs[++firstcol] = x1;
rhs[++firstcol] = x2;
rhs[++firstcol] = x3;
rhs[++firstcol] = x4;
rhs[++firstcol] = x5;
rhs[++firstcol] = x6;
rhs[++firstcol] = x7;
++firstcol;
for (k = firstcol; k < ncol; k++)
rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
- x2 * *Mki2++ - x3 * *Mki3++
- x4 * *Mki4++ - x5 * *Mki5++
- x6 * *Mki6++ - x7 * *Mki7++;
M0 += 8 * ldm + 8;
}
while ( firstcol < ncol - 3 ) { /* Do 4 columns */
Mki0 = M0 + 1;
Mki1 = Mki0 + ldm + 1;
Mki2 = Mki1 + ldm + 1;
Mki3 = Mki2 + ldm + 1;
x0 = rhs[firstcol];
x1 = rhs[firstcol+1] - x0 * *Mki0++;
x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
rhs[++firstcol] = x1;
rhs[++firstcol] = x2;
rhs[++firstcol] = x3;
++firstcol;
for (k = firstcol; k < ncol; k++)
rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
- x2 * *Mki2++ - x3 * *Mki3++;
M0 += 4 * ldm + 4;
}
if ( firstcol < ncol - 1 ) { /* Do 2 columns */
Mki0 = M0 + 1;
Mki1 = Mki0 + ldm + 1;
x0 = rhs[firstcol];
x1 = rhs[firstcol+1] - x0 * *Mki0++;
rhs[++firstcol] = x1;
++firstcol;
for (k = firstcol; k < ncol; k++)
rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++;
}
}
/*
* Solves a dense upper triangular system. The upper triangular matrix is
* stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
* in the rhs vector.
*/
void
susolve ( ldm, ncol, M, rhs )
int ldm; /* in */
int ncol; /* in */
float *M; /* in */
float *rhs; /* modified */
{
float xj;
int jcol, j, irow;
jcol = ncol - 1;
for (j = 0; j < ncol; j++) {
xj = rhs[jcol] / M[jcol + jcol*ldm]; /* M(jcol, jcol) */
rhs[jcol] = xj;
for (irow = 0; irow < jcol; irow++)
rhs[irow] -= xj * M[irow + jcol*ldm]; /* M(irow, jcol) */
jcol--;
}
}
/*
* Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
* The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
*/
void smatvec ( ldm, nrow, ncol, M, vec, Mxvec )
int ldm; /* in -- leading dimension of M */
int nrow; /* in */
int ncol; /* in */
float *M; /* in */
float *vec; /* in */
float *Mxvec; /* in/out */
{
float vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
float *M0;
register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
register int firstcol = 0;
int k;
M0 = &M[0];
while ( firstcol < ncol - 7 ) { /* Do 8 columns */
Mki0 = M0;
Mki1 = Mki0 + ldm;
Mki2 = Mki1 + ldm;
Mki3 = Mki2 + ldm;
Mki4 = Mki3 + ldm;
Mki5 = Mki4 + ldm;
Mki6 = Mki5 + ldm;
Mki7 = Mki6 + ldm;
vi0 = vec[firstcol++];
vi1 = vec[firstcol++];
vi2 = vec[firstcol++];
vi3 = vec[firstcol++];
vi4 = vec[firstcol++];
vi5 = vec[firstcol++];
vi6 = vec[firstcol++];
vi7 = vec[firstcol++];
for (k = 0; k < nrow; k++)
Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
+ vi2 * *Mki2++ + vi3 * *Mki3++
+ vi4 * *Mki4++ + vi5 * *Mki5++
+ vi6 * *Mki6++ + vi7 * *Mki7++;
M0 += 8 * ldm;
}
while ( firstcol < ncol - 3 ) { /* Do 4 columns */
Mki0 = M0;
Mki1 = Mki0 + ldm;
Mki2 = Mki1 + ldm;
Mki3 = Mki2 + ldm;
vi0 = vec[firstcol++];
vi1 = vec[firstcol++];
vi2 = vec[firstcol++];
vi3 = vec[firstcol++];
for (k = 0; k < nrow; k++)
Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
+ vi2 * *Mki2++ + vi3 * *Mki3++ ;
M0 += 4 * ldm;
}
while ( firstcol < ncol ) { /* Do 1 column */
Mki0 = M0;
vi0 = vec[firstcol++];
for (k = 0; k < nrow; k++)
Mxvec[k] += vi0 * *Mki0++;
M0 += ldm;
}
}

@ -0,0 +1,332 @@
/* Elimination tree computation and layout routines */
#include <stdio.h>
#include <stdlib.h>
#include "ssp_defs.h"
/*
* Implementation of disjoint set union routines.
* Elements are integers in 0..n-1, and the
* names of the sets themselves are of type int.
*
* Calls are:
* initialize_disjoint_sets (n) initial call.
* s = make_set (i) returns a set containing only i.
* s = link (t, u) returns s = t union u, destroying t and u.
* s = find (i) return name of set containing i.
* finalize_disjoint_sets final call.
*
* This implementation uses path compression but not weighted union.
* See Tarjan's book for details.
* John Gilbert, CMI, 1987.
*
* Implemented path-halving by XSL 07/05/95.
*/
static int *pp; /* parent array for sets */
static
int *mxCallocInt(int n)
{
register int i;
int *buf;
buf = (int *) SUPERLU_MALLOC( n * sizeof(int) );
if ( !buf ) {
ABORT("SUPERLU_MALLOC fails for buf in mxCallocInt()");
}
for (i = 0; i < n; i++) buf[i] = 0;
return (buf);
}
static
void initialize_disjoint_sets (
int n
)
{
pp = mxCallocInt(n);
}
static
int make_set (
int i
)
{
pp[i] = i;
return i;
}
static
int link (
int s,
int t
)
{
pp[s] = t;
return t;
}
/* PATH HALVING */
static
int find (int i)
{
register int p, gp;
p = pp[i];
gp = pp[p];
while (gp != p) {
pp[i] = gp;
i = gp;
p = pp[i];
gp = pp[p];
}
return (p);
}
#if 0
/* PATH COMPRESSION */
static
int find (
int i
)
{
if (pp[i] != i)
pp[i] = find (pp[i]);
return pp[i];
}
#endif
static
void finalize_disjoint_sets (
void
)
{
SUPERLU_FREE(pp);
}
/*
* Find the elimination tree for A'*A.
* This uses something similar to Liu's algorithm.
* It runs in time O(nz(A)*log n) and does not form A'*A.
*
* Input:
* Sparse matrix A. Numeric values are ignored, so any
* explicit zeros are treated as nonzero.
* Output:
* Integer array of parents representing the elimination
* tree of the symbolic product A'*A. Each vertex is a
* column of A, and nc means a root of the elimination forest.
*
* John R. Gilbert, Xerox, 10 Dec 1990
* Based on code by JRG dated 1987, 1988, and 1990.
*/
/*
* Nonsymmetric elimination tree
*/
int
sp_coletree(
int *acolst, int *acolend, /* column start and end past 1 */
int *arow, /* row indices of A */
int nr, int nc, /* dimension of A */
int *parent /* parent in elim tree */
)
{
int *root; /* root of subtee of etree */
int *firstcol; /* first nonzero col in each row*/
int rset, cset;
int row, col;
int rroot;
int p;
root = mxCallocInt (nc);
initialize_disjoint_sets (nc);
/* Compute firstcol[row] = first nonzero column in row */
firstcol = mxCallocInt (nr);
for (row = 0; row < nr; firstcol[row++] = nc);
for (col = 0; col < nc; col++)
for (p = acolst[col]; p < acolend[col]; p++) {
row = arow[p];
firstcol[row] = SUPERLU_MIN(firstcol[row], col);
}
/* Compute etree by Liu's algorithm for symmetric matrices,
except use (firstcol[r],c) in place of an edge (r,c) of A.
Thus each row clique in A'*A is replaced by a star
centered at its first vertex, which has the same fill. */
for (col = 0; col < nc; col++) {
cset = make_set (col);
root[cset] = col;
parent[col] = nc; /* Matlab */
for (p = acolst[col]; p < acolend[col]; p++) {
row = firstcol[arow[p]];
if (row >= col) continue;
rset = find (row);
rroot = root[rset];
if (rroot != col) {
parent[rroot] = col;
cset = link (cset, rset);
root[cset] = col;
}
}
}
SUPERLU_FREE (root);
SUPERLU_FREE (firstcol);
finalize_disjoint_sets ();
return 0;
}
/*
* q = TreePostorder (n, p);
*
* Postorder a tree.
* Input:
* p is a vector of parent pointers for a forest whose
* vertices are the integers 0 to n-1; p[root]==n.
* Output:
* q is a vector indexed by 0..n-1 such that q[i] is the
* i-th vertex in a postorder numbering of the tree.
*
* ( 2/7/95 modified by X.Li:
* q is a vector indexed by 0:n-1 such that vertex i is the
* q[i]-th vertex in a postorder numbering of the tree.
* That is, this is the inverse of the previous q. )
*
* In the child structure, lower-numbered children are represented
* first, so that a tree which is already numbered in postorder
* will not have its order changed.
*
* Written by John Gilbert, Xerox, 10 Dec 1990.
* Based on code written by John Gilbert at CMI in 1987.
*/
static int *first_kid, *next_kid; /* Linked list of children. */
static int *post, postnum;
static
/*
* Depth-first search from vertex v.
*/
void etdfs (
int v
)
{
int w;
for (w = first_kid[v]; w != -1; w = next_kid[w]) {
etdfs (w);
}
/* post[postnum++] = v; in Matlab */
post[v] = postnum++; /* Modified by X.Li on 2/14/95 */
}
/*
* Post order a tree
*/
int *TreePostorder(
int n,
int *parent
)
{
int v, dad;
/* Allocate storage for working arrays and results */
first_kid = mxCallocInt (n+1);
next_kid = mxCallocInt (n+1);
post = mxCallocInt (n+1);
/* Set up structure describing children */
for (v = 0; v <= n; first_kid[v++] = -1);
for (v = n-1; v >= 0; v--) {
dad = parent[v];
next_kid[v] = first_kid[dad];
first_kid[dad] = v;
}
/* Depth-first search from dummy root vertex #n */
postnum = 0;
etdfs (n);
SUPERLU_FREE (first_kid);
SUPERLU_FREE (next_kid);
return post;
}
/*
* p = spsymetree (A);
*
* Find the elimination tree for symmetric matrix A.
* This uses Liu's algorithm, and runs in time O(nz*log n).
*
* Input:
* Square sparse matrix A. No check is made for symmetry;
* elements below and on the diagonal are ignored.
* Numeric values are ignored, so any explicit zeros are
* treated as nonzero.
* Output:
* Integer array of parents representing the etree, with n
* meaning a root of the elimination forest.
* Note:
* This routine uses only the upper triangle, while sparse
* Cholesky (as in spchol.c) uses only the lower. Matlab's
* dense Cholesky uses only the upper. This routine could
* be modified to use the lower triangle either by transposing
* the matrix or by traversing it by rows with auxiliary
* pointer and link arrays.
*
* John R. Gilbert, Xerox, 10 Dec 1990
* Based on code by JRG dated 1987, 1988, and 1990.
* Modified by X.S. Li, November 1999.
*/
/*
* Symmetric elimination tree
*/
int
sp_symetree(
int *acolst, int *acolend, /* column starts and ends past 1 */
int *arow, /* row indices of A */
int n, /* dimension of A */
int *parent /* parent in elim tree */
)
{
int *root; /* root of subtree of etree */
int rset, cset;
int row, col;
int rroot;
int p;
root = mxCallocInt (n);
initialize_disjoint_sets (n);
for (col = 0; col < n; col++) {
cset = make_set (col);
root[cset] = col;
parent[col] = n; /* Matlab */
for (p = acolst[col]; p < acolend[col]; p++) {
row = arow[p];
if (row >= col) continue;
rset = find (row);
rroot = root[rset];
if (rroot != col) {
parent[rroot] = col;
cset = link (cset, rset);
root[cset] = col;
}
}
}
SUPERLU_FREE (root);
finalize_disjoint_sets ();
return 0;
} /* SP_SYMETREE */

@ -0,0 +1,65 @@
/*
* File name: sp_ienv.c
* History: Modified from lapack routine ILAENV
*/
#include "ssp_defs.h"
#include "util.h"
int
sp_ienv(int ispec)
{
/*
Purpose
=======
sp_ienv() is inquired to choose machine-dependent parameters for the
local environment. See ISPEC for a description of the parameters.
This version provides a set of parameters which should give good,
but not optimal, performance on many of the currently available
computers. Users are encouraged to modify this subroutine to set
the tuning parameters for their particular machine using the option
and problem size information in the arguments.
Arguments
=========
ISPEC (input) int
Specifies the parameter to be returned as the value of SP_IENV.
= 1: the panel size w; a panel consists of w consecutive
columns of matrix A in the process of Gaussian elimination.
The best value depends on machine's cache characters.
= 2: the relaxation parameter relax; if the number of
nodes (columns) in a subtree of the elimination tree is less
than relax, this subtree is considered as one supernode,
regardless of their row structures.
= 3: the maximum size for a supernode;
= 4: the minimum row dimension for 2-D blocking to be used;
= 5: the minimum column dimension for 2-D blocking to be used;
= 6: the estimated fills factor for L and U, compared with A;
(SP_IENV) (output) int
>= 0: the value of the parameter specified by ISPEC
< 0: if SP_IENV = -k, the k-th argument had an illegal value.
=====================================================================
*/
int i;
switch (ispec) {
case 1: return (10);
case 2: return (5);
case 3: return (100);
case 4: return (200);
case 5: return (40);
case 6: return (20);
}
/* Invalid value for ISPEC */
i = 1;
xerbla_("sp_ienv", &i);
return 0;
} /* sp_ienv_ */

@ -0,0 +1,203 @@
#include "ssp_defs.h"
void
sp_preorder(superlu_options_t *options, SuperMatrix *A, int *perm_c,
int *etree, SuperMatrix *AC)
{
/*
* Purpose
* =======
*
* sp_preorder() permutes the columns of the original matrix. It performs
* the following steps:
*
* 1. Apply column permutation perm_c[] to A's column pointers to form AC;
*
* 2. If options->Fact = DOFACT, then
* (1) Compute column elimination tree etree[] of AC'AC;
* (2) Post order etree[] to get a postordered elimination tree etree[],
* and a postorder permutation post[];
* (3) Apply post[] permutation to columns of AC;
* (4) Overwrite perm_c[] with the product perm_c * post.
*
* Arguments
* =========
*
* options (input) superlu_options_t*
* Specifies whether or not the elimination tree will be re-used.
* If options->Fact == DOFACT, this means first time factor A,
* etree is computed, postered, and output.
* Otherwise, re-factor A, etree is input, unchanged on exit.
*
* A (input) SuperMatrix*
* Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
* of the linear equations is A->nrow. Currently, the type of A can be:
* Stype = NC or SLU_NCP; Mtype = SLU_GE.
* In the future, more general A may be handled.
*
* perm_c (input/output) int*
* Column permutation vector of size A->ncol, which defines the
* permutation matrix Pc; perm_c[i] = j means column i of A is
* in position j in A*Pc.
* If options->Fact == DOFACT, perm_c is both input and output.
* On output, it is changed according to a postorder of etree.
* Otherwise, perm_c is input.
*
* etree (input/output) int*
* Elimination tree of Pc'*A'*A*Pc, dimension A->ncol.
* If options->Fact == DOFACT, etree is an output argument,
* otherwise it is an input argument.
* Note: etree is a vector of parent pointers for a forest whose
* vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
*
* AC (output) SuperMatrix*
* The resulting matrix after applied the column permutation
* perm_c[] to matrix A. The type of AC can be:
* Stype = SLU_NCP; Dtype = A->Dtype; Mtype = SLU_GE.
*
*/
NCformat *Astore;
NCPformat *ACstore;
int *iwork, *post;
register int n, i;
n = A->ncol;
/* Apply column permutation perm_c to A's column pointers so to
obtain NCP format in AC = A*Pc. */
AC->Stype = SLU_NCP;
AC->Dtype = A->Dtype;
AC->Mtype = A->Mtype;
AC->nrow = A->nrow;
AC->ncol = A->ncol;
Astore = A->Store;
ACstore = AC->Store = (void *) SUPERLU_MALLOC( sizeof(NCPformat) );
if ( !ACstore ) ABORT("SUPERLU_MALLOC fails for ACstore");
ACstore->nnz = Astore->nnz;
ACstore->nzval = Astore->nzval;
ACstore->rowind = Astore->rowind;
ACstore->colbeg = (int*) SUPERLU_MALLOC(n*sizeof(int));
if ( !(ACstore->colbeg) ) ABORT("SUPERLU_MALLOC fails for ACstore->colbeg");
ACstore->colend = (int*) SUPERLU_MALLOC(n*sizeof(int));
if ( !(ACstore->colend) ) ABORT("SUPERLU_MALLOC fails for ACstore->colend");
#ifdef DEBUG
print_int_vec("pre_order:", n, perm_c);
check_perm("Initial perm_c", n, perm_c);
#endif
for (i = 0; i < n; i++) {
ACstore->colbeg[perm_c[i]] = Astore->colptr[i];
ACstore->colend[perm_c[i]] = Astore->colptr[i+1];
}
if ( options->Fact == DOFACT ) {
#undef ETREE_ATplusA
#ifdef ETREE_ATplusA
/*--------------------------------------------
COMPUTE THE ETREE OF Pc*(A'+A)*Pc'.
--------------------------------------------*/
int *b_colptr, *b_rowind, bnz, j;
int *c_colbeg, *c_colend;
/*printf("Use etree(A'+A)\n");*/
/* Form B = A + A'. */
at_plus_a(n, Astore->nnz, Astore->colptr, Astore->rowind,
&bnz, &b_colptr, &b_rowind);
/* Form C = Pc*B*Pc'. */
c_colbeg = (int*) SUPERLU_MALLOC(2*n*sizeof(int));
c_colend = c_colbeg + n;
if (!c_colbeg ) ABORT("SUPERLU_MALLOC fails for c_colbeg/c_colend");
for (i = 0; i < n; i++) {
c_colbeg[perm_c[i]] = b_colptr[i];
c_colend[perm_c[i]] = b_colptr[i+1];
}
for (j = 0; j < n; ++j) {
for (i = c_colbeg[j]; i < c_colend[j]; ++i) {
b_rowind[i] = perm_c[b_rowind[i]];
}
}
/* Compute etree of C. */
sp_symetree(c_colbeg, c_colend, b_rowind, n, etree);
SUPERLU_FREE(b_colptr);
if ( bnz ) SUPERLU_FREE(b_rowind);
SUPERLU_FREE(c_colbeg);
#else
/*--------------------------------------------
COMPUTE THE COLUMN ELIMINATION TREE.
--------------------------------------------*/
sp_coletree(ACstore->colbeg, ACstore->colend, ACstore->rowind,
A->nrow, A->ncol, etree);
#endif
#ifdef DEBUG
print_int_vec("etree:", n, etree);
#endif
/* In symmetric mode, do not do postorder here. */
if ( options->SymmetricMode == NO ) {
/* Post order etree */
post = (int *) TreePostorder(n, etree);
/* for (i = 0; i < n+1; ++i) inv_post[post[i]] = i;
iwork = post; */
#ifdef DEBUG
print_int_vec("post:", n+1, post);
check_perm("post", n, post);
#endif
iwork = (int*) SUPERLU_MALLOC((n+1)*sizeof(int));
if ( !iwork ) ABORT("SUPERLU_MALLOC fails for iwork[]");
/* Renumber etree in postorder */
for (i = 0; i < n; ++i) iwork[post[i]] = post[etree[i]];
for (i = 0; i < n; ++i) etree[i] = iwork[i];
#ifdef DEBUG
print_int_vec("postorder etree:", n, etree);
#endif
/* Postmultiply A*Pc by post[] */
for (i = 0; i < n; ++i) iwork[post[i]] = ACstore->colbeg[i];
for (i = 0; i < n; ++i) ACstore->colbeg[i] = iwork[i];
for (i = 0; i < n; ++i) iwork[post[i]] = ACstore->colend[i];
for (i = 0; i < n; ++i) ACstore->colend[i] = iwork[i];
for (i = 0; i < n; ++i)
iwork[i] = post[perm_c[i]]; /* product of perm_c and post */
for (i = 0; i < n; ++i) perm_c[i] = iwork[i];
#ifdef DEBUG
print_int_vec("Pc*post:", n, perm_c);
check_perm("final perm_c", n, perm_c);
#endif
SUPERLU_FREE (post);
SUPERLU_FREE (iwork);
} /* end postordering */
} /* if options->Fact == DOFACT ... */
}
int check_perm(char *what, int n, int *perm)
{
register int i;
int *marker;
marker = (int *) calloc(n, sizeof(int));
for (i = 0; i < n; ++i) {
if ( marker[perm[i]] == 1 || perm[i] >= n ) {
printf("%s: Not a valid PERM[%d] = %d\n", what, i, perm[i]);
ABORT("check_perm");
} else {
marker[perm[i]] = 1;
}
}
SUPERLU_FREE(marker);
return 0;
}

@ -0,0 +1,449 @@
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
*/
/*
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
Permission is hereby granted to use or copy this program for any
purpose, provided the above notices are retained on all copies.
Permission to modify the code and to distribute modified code is
granted, provided the above notices are retained, and a notice that
the code was modified is included with the above copyright notice.
*/
#include <stdio.h>
#include <stdlib.h>
#include "ssp_defs.h"
/*
* Function prototypes
*/
void slsolve(int, int, float *, float *);
void smatvec(int, int, int, float *, float *, float *);
extern void scheck_tempv();
void
spanel_bmod (
const int m, /* in - number of rows in the matrix */
const int w, /* in */
const int jcol, /* in */
const int nseg, /* in */
float *dense, /* out, of size n by w */
float *tempv, /* working array */
int *segrep, /* in */
int *repfnz, /* in, of size n by w */
GlobalLU_t *Glu, /* modified */
SuperLUStat_t *stat /* output */
)
{
/*
* Purpose
* =======
*
* Performs numeric block updates (sup-panel) in topological order.
* It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
* Special processing on the supernodal portion of L\U[*,j]
*
* Before entering this routine, the original nonzeros in the panel
* were already copied into the spa[m,w].
*
* Updated/Output parameters-
* dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned
* collectively in the m-by-w vector dense[*].
*
*/
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
_fcd ftcs1 = _cptofcd("L", strlen("L")),
ftcs2 = _cptofcd("N", strlen("N")),
ftcs3 = _cptofcd("U", strlen("U"));
#endif
int incx = 1, incy = 1;
float alpha, beta;
#endif
register int k, ksub;
int fsupc, nsupc, nsupr, nrow;
int krep, krep_ind;
float ukj, ukj1, ukj2;
int luptr, luptr1, luptr2;
int segsze;
int block_nrow; /* no of rows in a block row */
register int lptr; /* Points to the row subscripts of a supernode */
int kfnz, irow, no_zeros;
register int isub, isub1, i;
register int jj; /* Index through each column in the panel */
int *xsup, *supno;
int *lsub, *xlsub;
float *lusup;
int *xlusup;
int *repfnz_col; /* repfnz[] for a column in the panel */
float *dense_col; /* dense[] for a column in the panel */
float *tempv1; /* Used in 1-D update */
float *TriTmp, *MatvecTmp; /* used in 2-D update */
float zero = 0.0;
register int ldaTmp;
register int r_ind, r_hi;
static int first = 1, maxsuper, rowblk, colblk;
flops_t *ops = stat->ops;
xsup = Glu->xsup;
supno = Glu->supno;
lsub = Glu->lsub;
xlsub = Glu->xlsub;
lusup = Glu->lusup;
xlusup = Glu->xlusup;
if ( first ) {
maxsuper = sp_ienv(3);
rowblk = sp_ienv(4);
colblk = sp_ienv(5);
first = 0;
}
ldaTmp = maxsuper + rowblk;
/*
* For each nonz supernode segment of U[*,j] in topological order
*/
k = nseg - 1;
for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */
/* krep = representative of current k-th supernode
* fsupc = first supernodal column
* nsupc = no of columns in a supernode
* nsupr = no of rows in a supernode
*/
krep = segrep[k--];
fsupc = xsup[supno[krep]];
nsupc = krep - fsupc + 1;
nsupr = xlsub[fsupc+1] - xlsub[fsupc];
nrow = nsupr - nsupc;
lptr = xlsub[fsupc];
krep_ind = lptr + nsupc - 1;
repfnz_col = repfnz;
dense_col = dense;
if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */
TriTmp = tempv;
/* Sequence through each column in panel -- triangular solves */
for (jj = jcol; jj < jcol + w; jj++,
repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {
kfnz = repfnz_col[krep];
if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
segsze = krep - kfnz + 1;
luptr = xlusup[fsupc];
ops[TRSV] += segsze * (segsze - 1);
ops[GEMV] += 2 * nrow * segsze;
/* Case 1: Update U-segment of size 1 -- col-col update */
if ( segsze == 1 ) {
ukj = dense_col[lsub[krep_ind]];
luptr += nsupr*(nsupc-1) + nsupc;
for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
irow = lsub[i];
dense_col[irow] -= ukj * lusup[luptr];
++luptr;
}
} else if ( segsze <= 3 ) {
ukj = dense_col[lsub[krep_ind]];
ukj1 = dense_col[lsub[krep_ind - 1]];
luptr += nsupr*(nsupc-1) + nsupc-1;
luptr1 = luptr - nsupr;
if ( segsze == 2 ) {
ukj -= ukj1 * lusup[luptr1];
dense_col[lsub[krep_ind]] = ukj;
for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
irow = lsub[i];
luptr++; luptr1++;
dense_col[irow] -= (ukj*lusup[luptr]
+ ukj1*lusup[luptr1]);
}
} else {
ukj2 = dense_col[lsub[krep_ind - 2]];
luptr2 = luptr1 - nsupr;
ukj1 -= ukj2 * lusup[luptr2-1];
ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
dense_col[lsub[krep_ind]] = ukj;
dense_col[lsub[krep_ind-1]] = ukj1;
for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
irow = lsub[i];
luptr++; luptr1++; luptr2++;
dense_col[irow] -= ( ukj*lusup[luptr]
+ ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
}
}
} else { /* segsze >= 4 */
/* Copy U[*,j] segment from dense[*] to TriTmp[*], which
holds the result of triangular solves. */
no_zeros = kfnz - fsupc;
isub = lptr + no_zeros;
for (i = 0; i < segsze; ++i) {
irow = lsub[isub];
TriTmp[i] = dense_col[irow]; /* Gather */
++isub;
}
/* start effective triangle */
luptr += nsupr * no_zeros + no_zeros;
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
&nsupr, TriTmp, &incx );
#else
strsv_( "L", "N", "U", &segsze, &lusup[luptr],
&nsupr, TriTmp, &incx );
#endif
#else
slsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
#endif
} /* else ... */
} /* for jj ... end tri-solves */
/* Block row updates; push all the way into dense[*] block */
for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {
r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
luptr = xlusup[fsupc] + nsupc + r_ind;
isub1 = lptr + nsupc + r_ind;
repfnz_col = repfnz;
TriTmp = tempv;
dense_col = dense;
/* Sequence through each column in panel -- matrix-vector */
for (jj = jcol; jj < jcol + w; jj++,
repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
kfnz = repfnz_col[krep];
if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
segsze = krep - kfnz + 1;
if ( segsze <= 3 ) continue; /* skip unrolled cases */
/* Perform a block update, and scatter the result of
matrix-vector to dense[]. */
no_zeros = kfnz - fsupc;
luptr1 = luptr + nsupr * no_zeros;
MatvecTmp = &TriTmp[maxsuper];
#ifdef USE_VENDOR_BLAS
alpha = one;
beta = zero;
#ifdef _CRAY
SGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1],
&nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
#else
sgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1],
&nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
#endif
#else
smatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
TriTmp, MatvecTmp);
#endif
/* Scatter MatvecTmp[*] into SPA dense[*] temporarily
* such that MatvecTmp[*] can be re-used for the
* the next blok row update. dense[] will be copied into
* global store after the whole panel has been finished.
*/
isub = isub1;
for (i = 0; i < block_nrow; i++) {
irow = lsub[isub];
dense_col[irow] -= MatvecTmp[i];
MatvecTmp[i] = zero;
++isub;
}
} /* for jj ... */
} /* for each block row ... */
/* Scatter the triangular solves into SPA dense[*] */
repfnz_col = repfnz;
TriTmp = tempv;
dense_col = dense;
for (jj = jcol; jj < jcol + w; jj++,
repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
kfnz = repfnz_col[krep];
if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
segsze = krep - kfnz + 1;
if ( segsze <= 3 ) continue; /* skip unrolled cases */
no_zeros = kfnz - fsupc;
isub = lptr + no_zeros;
for (i = 0; i < segsze; i++) {
irow = lsub[isub];
dense_col[irow] = TriTmp[i];
TriTmp[i] = zero;
++isub;
}
} /* for jj ... */
} else { /* 1-D block modification */
/* Sequence through each column in the panel */
for (jj = jcol; jj < jcol + w; jj++,
repfnz_col += m, dense_col += m) {
kfnz = repfnz_col[krep];
if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
segsze = krep - kfnz + 1;
luptr = xlusup[fsupc];
ops[TRSV] += segsze * (segsze - 1);
ops[GEMV] += 2 * nrow * segsze;
/* Case 1: Update U-segment of size 1 -- col-col update */
if ( segsze == 1 ) {
ukj = dense_col[lsub[krep_ind]];
luptr += nsupr*(nsupc-1) + nsupc;
for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
irow = lsub[i];
dense_col[irow] -= ukj * lusup[luptr];
++luptr;
}
} else if ( segsze <= 3 ) {
ukj = dense_col[lsub[krep_ind]];
luptr += nsupr*(nsupc-1) + nsupc-1;
ukj1 = dense_col[lsub[krep_ind - 1]];
luptr1 = luptr - nsupr;
if ( segsze == 2 ) {
ukj -= ukj1 * lusup[luptr1];
dense_col[lsub[krep_ind]] = ukj;
for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
irow = lsub[i];
++luptr; ++luptr1;
dense_col[irow] -= (ukj*lusup[luptr]
+ ukj1*lusup[luptr1]);
}
} else {
ukj2 = dense_col[lsub[krep_ind - 2]];
luptr2 = luptr1 - nsupr;
ukj1 -= ukj2 * lusup[luptr2-1];
ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
dense_col[lsub[krep_ind]] = ukj;
dense_col[lsub[krep_ind-1]] = ukj1;
for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
irow = lsub[i];
++luptr; ++luptr1; ++luptr2;
dense_col[irow] -= ( ukj*lusup[luptr]
+ ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
}
}
} else { /* segsze >= 4 */
/*
* Perform a triangular solve and block update,
* then scatter the result of sup-col update to dense[].
*/
no_zeros = kfnz - fsupc;
/* Copy U[*,j] segment from dense[*] to tempv[*]:
* The result of triangular solve is in tempv[*];
* The result of matrix vector update is in dense_col[*]
*/
isub = lptr + no_zeros;
for (i = 0; i < segsze; ++i) {
irow = lsub[isub];
tempv[i] = dense_col[irow]; /* Gather */
++isub;
}
/* start effective triangle */
luptr += nsupr * no_zeros + no_zeros;
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
&nsupr, tempv, &incx );
#else
strsv_( "L", "N", "U", &segsze, &lusup[luptr],
&nsupr, tempv, &incx );
#endif
luptr += segsze; /* Dense matrix-vector */
tempv1 = &tempv[segsze];
alpha = one;
beta = zero;
#ifdef _CRAY
SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr],
&nsupr, tempv, &incx, &beta, tempv1, &incy );
#else
sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr],
&nsupr, tempv, &incx, &beta, tempv1, &incy );
#endif
#else
slsolve ( nsupr, segsze, &lusup[luptr], tempv );
luptr += segsze; /* Dense matrix-vector */
tempv1 = &tempv[segsze];
smatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
#endif
/* Scatter tempv[*] into SPA dense[*] temporarily, such
* that tempv[*] can be used for the triangular solve of
* the next column of the panel. They will be copied into
* ucol[*] after the whole panel has been finished.
*/
isub = lptr + no_zeros;
for (i = 0; i < segsze; i++) {
irow = lsub[isub];
dense_col[irow] = tempv[i];
tempv[i] = zero;
isub++;
}
/* Scatter the update from tempv1[*] into SPA dense[*] */
/* Start dense rectangular L */
for (i = 0; i < nrow; i++) {
irow = lsub[isub];
dense_col[irow] -= tempv1[i];
tempv1[i] = zero;
++isub;
}
} /* else segsze>=4 ... */
} /* for each column in the panel... */
} /* else 1-D update ... */
} /* for each updating supernode ... */
}

@ -0,0 +1,249 @@
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* November 15, 1997
*
*/
/*
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
Permission is hereby granted to use or copy this program for any
purpose, provided the above notices are retained on all copies.
Permission to modify the code and to distribute modified code is
granted, provided the above notices are retained, and a notice that
the code was modified is included with the above copyright notice.
*/
#include "ssp_defs.h"
#include "util.h"
void
spanel_dfs (
const int m, /* in - number of rows in the matrix */
const int w, /* in */
const int jcol, /* in */
SuperMatrix *A, /* in - original matrix */
int *perm_r, /* in */
int *nseg, /* out */
float *dense, /* out */
int *panel_lsub, /* out */
int *segrep, /* out */
int *repfnz, /* out */
int *xprune, /* out */
int *marker, /* out */
int *parent, /* working array */
int *xplore, /* working array */
GlobalLU_t *Glu /* modified */
)
{
/*
* Purpose
* =======
*
* Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
*
* A supernode representative is the last column of a supernode.
* The nonzeros in U[*,j] are segments that end at supernodal
* representatives.
*
* The routine returns one list of the supernodal representatives
* in topological order of the dfs that generates them. This list is
* a superset of the topological order of each individual column within
* the panel.
* The location of the first nonzero in each supernodal segment
* (supernodal entry location) is also returned. Each column has a
* separate list for this purpose.
*
* Two marker arrays are used for dfs:
* marker[i] == jj, if i was visited during dfs of current column jj;
* marker1[i] >= jcol, if i was visited by earlier columns in this panel;
*
* marker: A-row --> A-row/col (0/1)
* repfnz: SuperA-col --> PA-row
* parent: SuperA-col --> SuperA-col
* xplore: SuperA-col --> index to L-structure
*
*/
NCPformat *Astore;
float *a;
int *asub;
int *xa_begin, *xa_end;
int krep, chperm, chmark, chrep, oldrep, kchild, myfnz;
int k, krow, kmark, kperm;
int xdfs, maxdfs, kpar;
int jj; /* index through each column in the panel */
int *marker1; /* marker1[jj] >= jcol if vertex jj was visited
by a previous column within this panel. */
int *repfnz_col; /* start of each column in the panel */
float *dense_col; /* start of each column in the panel */
int nextl_col; /* next available position in panel_lsub[*,jj] */
int *xsup, *supno;
int *lsub, *xlsub;
/* Initialize pointers */
Astore = A->Store;
a = Astore->nzval;
asub = Astore->rowind;
xa_begin = Astore->colbeg;
xa_end = Astore->colend;
marker1 = marker + m;
repfnz_col = repfnz;
dense_col = dense;
*nseg = 0;
xsup = Glu->xsup;
supno = Glu->supno;
lsub = Glu->lsub;
xlsub = Glu->xlsub;
/* For each column in the panel */
for (jj = jcol; jj < jcol + w; jj++) {
nextl_col = (jj - jcol) * m;
#ifdef CHK_DFS
printf("\npanel col %d: ", jj);
#endif
/* For each nonz in A[*,jj] do dfs */
for (k = xa_begin[jj]; k < xa_end[jj]; k++) {
krow = asub[k];
dense_col[krow] = a[k];
kmark = marker[krow];
if ( kmark == jj )
continue; /* krow visited before, go to the next nonzero */
/* For each unmarked nbr krow of jj
* krow is in L: place it in structure of L[*,jj]
*/
marker[krow] = jj;
kperm = perm_r[krow];
if ( kperm == EMPTY ) {
panel_lsub[nextl_col++] = krow; /* krow is indexed into A */
}
/*
* krow is in U: if its supernode-rep krep
* has been explored, update repfnz[*]
*/
else {
krep = xsup[supno[kperm]+1] - 1;
myfnz = repfnz_col[krep];
#ifdef CHK_DFS
printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, kperm);
#endif
if ( myfnz != EMPTY ) { /* Representative visited before */
if ( myfnz > kperm ) repfnz_col[krep] = kperm;
/* continue; */
}
else {
/* Otherwise, perform dfs starting at krep */
oldrep = EMPTY;
parent[krep] = oldrep;
repfnz_col[krep] = kperm;
xdfs = xlsub[krep];
maxdfs = xprune[krep];
#ifdef CHK_DFS
printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs);
for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
printf("\n");
#endif
do {
/*
* For each unmarked kchild of krep
*/
while ( xdfs < maxdfs ) {
kchild = lsub[xdfs];
xdfs++;
chmark = marker[kchild];
if ( chmark != jj ) { /* Not reached yet */
marker[kchild] = jj;
chperm = perm_r[kchild];
/* Case kchild is in L: place it in L[*,j] */
if ( chperm == EMPTY ) {
panel_lsub[nextl_col++] = kchild;
}
/* Case kchild is in U:
* chrep = its supernode-rep. If its rep has
* been explored, update its repfnz[*]
*/
else {
chrep = xsup[supno[chperm]+1] - 1;
myfnz = repfnz_col[chrep];
#ifdef CHK_DFS
printf("chrep %d,myfnz %d,perm_r[%d] %d\n",chrep,myfnz,kchild,chperm);
#endif
if ( myfnz != EMPTY ) { /* Visited before */
if ( myfnz > chperm )
repfnz_col[chrep] = chperm;
}
else {
/* Cont. dfs at snode-rep of kchild */
xplore[krep] = xdfs;
oldrep = krep;
krep = chrep; /* Go deeper down G(L) */
parent[krep] = oldrep;
repfnz_col[krep] = chperm;
xdfs = xlsub[krep];
maxdfs = xprune[krep];
#ifdef CHK_DFS
printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs);
for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
printf("\n");
#endif
} /* else */
} /* else */
} /* if... */
} /* while xdfs < maxdfs */
/* krow has no more unexplored nbrs:
* Place snode-rep krep in postorder DFS, if this
* segment is seen for the first time. (Note that
* "repfnz[krep]" may change later.)
* Backtrack dfs to its parent.
*/
if ( marker1[krep] < jcol ) {
segrep[*nseg] = krep;
++(*nseg);
marker1[krep] = jj;
}
kpar = parent[krep]; /* Pop stack, mimic recursion */
if ( kpar == EMPTY ) break; /* dfs done */
krep = kpar;
xdfs = xplore[krep];
maxdfs = xprune[krep];
#ifdef CHK_DFS
printf(" pop stack: krep %d,xdfs %d,maxdfs %d: ", krep,xdfs,maxdfs);
for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
printf("\n");
#endif
} while ( kpar != EMPTY ); /* do-while - until empty stack */
} /* else */
} /* else */
} /* for each nonz in A[*,jj] */
repfnz_col += m; /* Move to next column */
dense_col += m;
} /* for jj ... */
}

@ -0,0 +1,173 @@
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
*/
/*
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
Permission is hereby granted to use or copy this program for any
purpose, provided the above notices are retained on all copies.
Permission to modify the code and to distribute modified code is
granted, provided the above notices are retained, and a notice that
the code was modified is included with the above copyright notice.
*/
#include <math.h>
#include <stdlib.h>
#include "ssp_defs.h"
#undef DEBUG
int
spivotL(
const int jcol, /* in */
const float u, /* in - diagonal pivoting threshold */
int *usepr, /* re-use the pivot sequence given by perm_r/iperm_r */
int *perm_r, /* may be modified */
int *iperm_r, /* in - inverse of perm_r */
int *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */
int *pivrow, /* out */
GlobalLU_t *Glu, /* modified - global LU data structures */
SuperLUStat_t *stat /* output */
)
{
/*
* Purpose
* =======
* Performs the numerical pivoting on the current column of L,
* and the CDIV operation.
*
* Pivot policy:
* (1) Compute thresh = u * max_(i>=j) abs(A_ij);
* (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
* pivot row = k;
* ELSE IF abs(A_jj) >= thresh THEN
* pivot row = j;
* ELSE
* pivot row = m;
*
* Note: If you absolutely want to use a given pivot order, then set u=0.0.
*
* Return value: 0 success;
* i > 0 U(i,i) is exactly zero.
*
*/
int fsupc; /* first column in the supernode */
int nsupc; /* no of columns in the supernode */
int nsupr; /* no of rows in the supernode */
int lptr; /* points to the starting subscript of the supernode */
int pivptr, old_pivptr, diag, diagind;
float pivmax, rtemp, thresh;
float temp;
float *lu_sup_ptr;
float *lu_col_ptr;
int *lsub_ptr;
int isub, icol, k, itemp;
int *lsub, *xlsub;
float *lusup;
int *xlusup;
flops_t *ops = stat->ops;
/* Initialize pointers */
lsub = Glu->lsub;
xlsub = Glu->xlsub;
lusup = Glu->lusup;
xlusup = Glu->xlusup;
fsupc = (Glu->xsup)[(Glu->supno)[jcol]];
nsupc = jcol - fsupc; /* excluding jcol; nsupc >= 0 */
lptr = xlsub[fsupc];
nsupr = xlsub[fsupc+1] - lptr;
lu_sup_ptr = &lusup[xlusup[fsupc]]; /* start of the current supernode */
lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */
lsub_ptr = &lsub[lptr]; /* start of row indices of the supernode */
#ifdef DEBUG
if ( jcol == MIN_COL ) {
printf("Before cdiv: col %d\n", jcol);
for (k = nsupc; k < nsupr; k++)
printf(" lu[%d] %f\n", lsub_ptr[k], lu_col_ptr[k]);
}
#endif
/* Determine the largest abs numerical value for partial pivoting;
Also search for user-specified pivot, and diagonal element. */
if ( *usepr ) *pivrow = iperm_r[jcol];
diagind = iperm_c[jcol];
pivmax = 0.0;
pivptr = nsupc;
diag = EMPTY;
old_pivptr = nsupc;
for (isub = nsupc; isub < nsupr; ++isub) {
rtemp = fabs (lu_col_ptr[isub]);
if ( rtemp > pivmax ) {
pivmax = rtemp;
pivptr = isub;
}
if ( *usepr && lsub_ptr[isub] == *pivrow ) old_pivptr = isub;
if ( lsub_ptr[isub] == diagind ) diag = isub;
}
/* Test for singularity */
if ( pivmax == 0.0 ) {
*pivrow = lsub_ptr[pivptr];
perm_r[*pivrow] = jcol;
*usepr = 0;
return (jcol+1);
}
thresh = u * pivmax;
/* Choose appropriate pivotal element by our policy. */
if ( *usepr ) {
rtemp = fabs (lu_col_ptr[old_pivptr]);
if ( rtemp != 0.0 && rtemp >= thresh )
pivptr = old_pivptr;
else
*usepr = 0;
}
if ( *usepr == 0 ) {
/* Use diagonal pivot? */
if ( diag >= 0 ) { /* diagonal exists */
rtemp = fabs (lu_col_ptr[diag]);
if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag;
}
*pivrow = lsub_ptr[pivptr];
}
/* Record pivot row */
perm_r[*pivrow] = jcol;
/* Interchange row subscripts */
if ( pivptr != nsupc ) {
itemp = lsub_ptr[pivptr];
lsub_ptr[pivptr] = lsub_ptr[nsupc];
lsub_ptr[nsupc] = itemp;
/* Interchange numerical values as well, for the whole snode, such
* that L is indexed the same way as A.
*/
for (icol = 0; icol <= nsupc; icol++) {
itemp = pivptr + icol * nsupr;
temp = lu_sup_ptr[itemp];
lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr];
lu_sup_ptr[nsupc + icol*nsupr] = temp;
}
} /* if */
/* cdiv operation */
ops[FACT] += nsupr - nsupc;
temp = 1.0 / lu_col_ptr[nsupc];
for (k = nsupc+1; k < nsupr; k++)
lu_col_ptr[k] *= temp;
return 0;
}

@ -0,0 +1,149 @@
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* November 15, 1997
*
*/
/*
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
Permission is hereby granted to use or copy this program for any
purpose, provided the above notices are retained on all copies.
Permission to modify the code and to distribute modified code is
granted, provided the above notices are retained, and a notice that
the code was modified is included with the above copyright notice.
*/
#include "ssp_defs.h"
#include "util.h"
void
spruneL(
const int jcol, /* in */
const int *perm_r, /* in */
const int pivrow, /* in */
const int nseg, /* in */
const int *segrep, /* in */
const int *repfnz, /* in */
int *xprune, /* out */
GlobalLU_t *Glu /* modified - global LU data structures */
)
{
/*
* Purpose
* =======
* Prunes the L-structure of supernodes whose L-structure
* contains the current pivot row "pivrow"
*
*/
float utemp;
int jsupno, irep, irep1, kmin, kmax, krow, movnum;
int i, ktemp, minloc, maxloc;
int do_prune; /* logical variable */
int *xsup, *supno;
int *lsub, *xlsub;
float *lusup;
int *xlusup;
xsup = Glu->xsup;
supno = Glu->supno;
lsub = Glu->lsub;
xlsub = Glu->xlsub;
lusup = Glu->lusup;
xlusup = Glu->xlusup;
/*
* For each supernode-rep irep in U[*,j]
*/
jsupno = supno[jcol];
for (i = 0; i < nseg; i++) {
irep = segrep[i];
irep1 = irep + 1;
do_prune = FALSE;
/* Don't prune with a zero U-segment */
if ( repfnz[irep] == EMPTY )
continue;
/* If a snode overlaps with the next panel, then the U-segment
* is fragmented into two parts -- irep and irep1. We should let
* pruning occur at the rep-column in irep1's snode.
*/
if ( supno[irep] == supno[irep1] ) /* Don't prune */
continue;
/*
* If it has not been pruned & it has a nonz in row L[pivrow,i]
*/
if ( supno[irep] != jsupno ) {
if ( xprune[irep] >= xlsub[irep1] ) {
kmin = xlsub[irep];
kmax = xlsub[irep1] - 1;
for (krow = kmin; krow <= kmax; krow++)
if ( lsub[krow] == pivrow ) {
do_prune = TRUE;
break;
}
}
if ( do_prune ) {
/* Do a quicksort-type partition
* movnum=TRUE means that the num values have to be exchanged.
*/
movnum = FALSE;
if ( irep == xsup[supno[irep]] ) /* Snode of size 1 */
movnum = TRUE;
while ( kmin <= kmax ) {
if ( perm_r[lsub[kmax]] == EMPTY )
kmax--;
else if ( perm_r[lsub[kmin]] != EMPTY )
kmin++;
else { /* kmin below pivrow, and kmax above pivrow:
* interchange the two subscripts
*/
ktemp = lsub[kmin];
lsub[kmin] = lsub[kmax];
lsub[kmax] = ktemp;
/* If the supernode has only one column, then we
* only keep one set of subscripts. For any subscript
* interchange performed, similar interchange must be
* done on the numerical values.
*/
if ( movnum ) {
minloc = xlusup[irep] + (kmin - xlsub[irep]);
maxloc = xlusup[irep] + (kmax - xlsub[irep]);
utemp = lusup[minloc];
lusup[minloc] = lusup[maxloc];
lusup[maxloc] = utemp;
}
kmin++;
kmax--;
}
} /* while */
xprune[irep] = kmin; /* Pruning */
#ifdef CHK_PRUNE
printf(" After spruneL(),using col %d: xprune[%d] = %d\n",
jcol, irep, kmin);
#endif
} /* if do_prune */
} /* if */
} /* for each U-segment... */
}

@ -0,0 +1,117 @@
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
*/
/*
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
Permission is hereby granted to use or copy this program for any
purpose, provided the above notices are retained on all copies.
Permission to modify the code and to distribute modified code is
granted, provided the above notices are retained, and a notice that
the code was modified is included with the above copyright notice.
*/
#include "ssp_defs.h"
void slsolve(int, int, float*, float*);
void smatvec(int, int, int, float*, float*, float*);
/*
* Performs numeric block updates within the relaxed snode.
*/
int
ssnode_bmod (
const int jcol, /* in */
const int fsupc, /* in */
float *dense, /* in */
float *tempv, /* working array */
GlobalLU_t *Glu, /* modified */
SuperLUStat_t *stat /* output */
)
{
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
_fcd ftcs1 = _cptofcd("L", strlen("L")),
ftcs2 = _cptofcd("N", strlen("N")),
ftcs3 = _cptofcd("U", strlen("U"));
#endif
int incx = 1, incy = 1;
float alpha = -1.0, beta = 1.0;
#endif
int luptr, nsupc, nsupr, nrow;
int isub, irow, i, iptr;
register int ufirst, nextlu;
int *lsub, *xlsub;
float *lusup;
int *xlusup;
flops_t *ops = stat->ops;
lsub = Glu->lsub;
xlsub = Glu->xlsub;
lusup = Glu->lusup;
xlusup = Glu->xlusup;
nextlu = xlusup[jcol];
/*
* Process the supernodal portion of L\U[*,j]
*/
for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
irow = lsub[isub];
lusup[nextlu] = dense[irow];
dense[irow] = 0;
++nextlu;
}
xlusup[jcol + 1] = nextlu; /* Initialize xlusup for next column */
if ( fsupc < jcol ) {
luptr = xlusup[fsupc];
nsupr = xlsub[fsupc+1] - xlsub[fsupc];
nsupc = jcol - fsupc; /* Excluding jcol */
ufirst = xlusup[jcol]; /* Points to the beginning of column
jcol in supernode L\U(jsupno). */
nrow = nsupr - nsupc;
ops[TRSV] += nsupc * (nsupc - 1);
ops[GEMV] += 2 * nrow * nsupc;
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr,
&lusup[ufirst], &incx );
SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#else
strsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr,
&lusup[ufirst], &incx );
sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#endif
#else
slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
&lusup[ufirst], &tempv[0] );
/* Scatter tempv[*] into lusup[*] */
iptr = ufirst + nsupc;
for (i = 0; i < nrow; i++) {
lusup[iptr++] -= tempv[i];
tempv[i] = 0.0;
}
#endif
}
return 0;
}

@ -0,0 +1,106 @@
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* November 15, 1997
*
*/
/*
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
Permission is hereby granted to use or copy this program for any
purpose, provided the above notices are retained on all copies.
Permission to modify the code and to distribute modified code is
granted, provided the above notices are retained, and a notice that
the code was modified is included with the above copyright notice.
*/
#include "ssp_defs.h"
#include "util.h"
int
ssnode_dfs (
const int jcol, /* in - start of the supernode */
const int kcol, /* in - end of the supernode */
const int *asub, /* in */
const int *xa_begin, /* in */
const int *xa_end, /* in */
int *xprune, /* out */
int *marker, /* modified */
GlobalLU_t *Glu /* modified */
)
{
/* Purpose
* =======
* ssnode_dfs() - Determine the union of the row structures of those
* columns within the relaxed snode.
* Note: The relaxed snodes are leaves of the supernodal etree, therefore,
* the portion outside the rectangular supernode must be zero.
*
* Return value
* ============
* 0 success;
* >0 number of bytes allocated when run out of memory.
*
*/
register int i, k, ifrom, ito, nextl, new_next;
int nsuper, krow, kmark, mem_error;
int *xsup, *supno;
int *lsub, *xlsub;
int nzlmax;
xsup = Glu->xsup;
supno = Glu->supno;
lsub = Glu->lsub;
xlsub = Glu->xlsub;
nzlmax = Glu->nzlmax;
nsuper = ++supno[jcol]; /* Next available supernode number */
nextl = xlsub[jcol];
for (i = jcol; i <= kcol; i++) {
/* For each nonzero in A[*,i] */
for (k = xa_begin[i]; k < xa_end[i]; k++) {
krow = asub[k];
kmark = marker[krow];
if ( kmark != kcol ) { /* First time visit krow */
marker[krow] = kcol;
lsub[nextl++] = krow;
if ( nextl >= nzlmax ) {
if ( (mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu)) )
return (mem_error);
lsub = Glu->lsub;
}
}
}
supno[i] = nsuper;
}
/* Supernode > 1, then make a copy of the subscripts for pruning */
if ( jcol < kcol ) {
new_next = nextl + (nextl - xlsub[jcol]);
while ( new_next > nzlmax ) {
if ( (mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu)) )
return (mem_error);
lsub = Glu->lsub;
}
ito = nextl;
for (ifrom = xlsub[jcol]; ifrom < nextl; )
lsub[ito++] = lsub[ifrom++];
for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl;
nextl = ito;
}
xsup[nsuper+1] = kcol + 1;
supno[kcol+1] = nsuper;
xprune[kcol] = nextl;
xlsub[kcol+1] = nextl;
return 0;
}

@ -0,0 +1,469 @@
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
*/
/*
* File name: ssp_blas2.c
* Purpose: Sparse BLAS 2, using some dense BLAS 2 operations.
*/
#include "ssp_defs.h"
/*
* Function prototypes
*/
void susolve(int, int, float*, float*);
void slsolve(int, int, float*, float*);
void smatvec(int, int, int, float*, float*, float*);
int strsv_(char*, char*, char*, int*, float*, int*, float*, int*);
int
sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info)
{
/*
* Purpose
* =======
*
* sp_strsv() solves one of the systems of equations
* A*x = b, or A'*x = b,
* where b and x are n element vectors and A is a sparse unit , or
* non-unit, upper or lower triangular matrix.
* No test for singularity or near-singularity is included in this
* routine. Such tests must be performed before calling this routine.
*
* Parameters
* ==========
*
* uplo - (input) char*
* On entry, uplo specifies whether the matrix is an upper or
* lower triangular matrix as follows:
* uplo = 'U' or 'u' A is an upper triangular matrix.
* uplo = 'L' or 'l' A is a lower triangular matrix.
*
* trans - (input) char*
* On entry, trans specifies the equations to be solved as
* follows:
* trans = 'N' or 'n' A*x = b.
* trans = 'T' or 't' A'*x = b.
* trans = 'C' or 'c' A'*x = b.
*
* diag - (input) char*
* On entry, diag specifies whether or not A is unit
* triangular as follows:
* diag = 'U' or 'u' A is assumed to be unit triangular.
* diag = 'N' or 'n' A is not assumed to be unit
* triangular.
*
* L - (input) SuperMatrix*
* The factor L from the factorization Pr*A*Pc=L*U. Use
* compressed row subscripts storage for supernodes,
* i.e., L has types: Stype = SC, Dtype = SLU_S, Mtype = TRLU.
*
* U - (input) SuperMatrix*
* The factor U from the factorization Pr*A*Pc=L*U.
* U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU.
*
* x - (input/output) float*
* Before entry, the incremented array X must contain the n
* element right-hand side vector b. On exit, X is overwritten
* with the solution vector x.
*
* info - (output) int*
* If *info = -i, the i-th argument had an illegal value.
*
*/
#ifdef _CRAY
_fcd ftcs1 = _cptofcd("L", strlen("L")),
ftcs2 = _cptofcd("N", strlen("N")),
ftcs3 = _cptofcd("U", strlen("U"));
#endif
SCformat *Lstore;
NCformat *Ustore;
float *Lval, *Uval;
int incx = 1;
int nrow;
int fsupc, nsupr, nsupc, luptr, istart, irow;
int i, k, iptr, jcol;
float *work;
flops_t solve_ops;
/* Test the input parameters */
*info = 0;
if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
else if ( !lsame_(trans, "N") && !lsame_(trans, "T") &&
!lsame_(trans, "C")) *info = -2;
else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
if ( *info ) {
i = -(*info);
xerbla_("sp_strsv", &i);
return 0;
}
Lstore = L->Store;
Lval = Lstore->nzval;
Ustore = U->Store;
Uval = Ustore->nzval;
solve_ops = 0;
if ( !(work = floatCalloc(L->nrow)) )
ABORT("Malloc fails for work in sp_strsv().");
if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */
if ( lsame_(uplo, "L") ) {
/* Form x := inv(L)*x */
if ( L->nrow == 0 ) return 0; /* Quick return */
for (k = 0; k <= Lstore->nsuper; k++) {
fsupc = L_FST_SUPC(k);
istart = L_SUB_START(fsupc);
nsupr = L_SUB_START(fsupc+1) - istart;
nsupc = L_FST_SUPC(k+1) - fsupc;
luptr = L_NZ_START(fsupc);
nrow = nsupr - nsupc;
solve_ops += nsupc * (nsupc - 1);
solve_ops += 2 * nrow * nsupc;
if ( nsupc == 1 ) {
for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
irow = L_SUB(iptr);
++luptr;
x[irow] -= x[fsupc] * Lval[luptr];
}
} else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
&x[fsupc], &incx);
SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
#else
strsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
&x[fsupc], &incx);
sgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
#endif
#else
slsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
smatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
&x[fsupc], &work[0] );
#endif
iptr = istart + nsupc;
for (i = 0; i < nrow; ++i, ++iptr) {
irow = L_SUB(iptr);
x[irow] -= work[i]; /* Scatter */
work[i] = 0.0;
}
}
} /* for k ... */
} else {
/* Form x := inv(U)*x */
if ( U->nrow == 0 ) return 0; /* Quick return */
for (k = Lstore->nsuper; k >= 0; k--) {
fsupc = L_FST_SUPC(k);
nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
nsupc = L_FST_SUPC(k+1) - fsupc;
luptr = L_NZ_START(fsupc);
solve_ops += nsupc * (nsupc + 1);
if ( nsupc == 1 ) {
x[fsupc] /= Lval[luptr];
for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
irow = U_SUB(i);
x[irow] -= x[fsupc] * Uval[i];
}
} else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
&x[fsupc], &incx);
#else
strsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
&x[fsupc], &incx);
#endif
#else
susolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
#endif
for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1);
i++) {
irow = U_SUB(i);
x[irow] -= x[jcol] * Uval[i];
}
}
}
} /* for k ... */
}
} else { /* Form x := inv(A')*x */
if ( lsame_(uplo, "L") ) {
/* Form x := inv(L')*x */
if ( L->nrow == 0 ) return 0; /* Quick return */
for (k = Lstore->nsuper; k >= 0; --k) {
fsupc = L_FST_SUPC(k);
istart = L_SUB_START(fsupc);
nsupr = L_SUB_START(fsupc+1) - istart;
nsupc = L_FST_SUPC(k+1) - fsupc;
luptr = L_NZ_START(fsupc);
solve_ops += 2 * (nsupr - nsupc) * nsupc;
for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
iptr = istart + nsupc;
for (i = L_NZ_START(jcol) + nsupc;
i < L_NZ_START(jcol+1); i++) {
irow = L_SUB(iptr);
x[jcol] -= x[irow] * Lval[i];
iptr++;
}
}
if ( nsupc > 1 ) {
solve_ops += nsupc * (nsupc - 1);
#ifdef _CRAY
ftcs1 = _cptofcd("L", strlen("L"));
ftcs2 = _cptofcd("T", strlen("T"));
ftcs3 = _cptofcd("U", strlen("U"));
STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
&x[fsupc], &incx);
#else
strsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
&x[fsupc], &incx);
#endif
}
}
} else {
/* Form x := inv(U')*x */
if ( U->nrow == 0 ) return 0; /* Quick return */
for (k = 0; k <= Lstore->nsuper; k++) {
fsupc = L_FST_SUPC(k);
nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
nsupc = L_FST_SUPC(k+1) - fsupc;
luptr = L_NZ_START(fsupc);
for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
irow = U_SUB(i);
x[jcol] -= x[irow] * Uval[i];
}
}
solve_ops += nsupc * (nsupc + 1);
if ( nsupc == 1 ) {
x[fsupc] /= Lval[luptr];
} else {
#ifdef _CRAY
ftcs1 = _cptofcd("U", strlen("U"));
ftcs2 = _cptofcd("T", strlen("T"));
ftcs3 = _cptofcd("N", strlen("N"));
STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
&x[fsupc], &incx);
#else
strsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
&x[fsupc], &incx);
#endif
}
} /* for k ... */
}
}
stat->ops[SOLVE] += solve_ops;
SUPERLU_FREE(work);
return 0;
}
int
sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
int incx, float beta, float *y, int incy)
{
/* Purpose
=======
sp_sgemv() performs one of the matrix-vector operations
y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
where alpha and beta are scalars, x and y are vectors and A is a
sparse A->nrow by A->ncol matrix.
Parameters
==========
TRANS - (input) char*
On entry, TRANS specifies the operation to be performed as
follows:
TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
TRANS = 'C' or 'c' y := alpha*A'*x + beta*y.
ALPHA - (input) float
On entry, ALPHA specifies the scalar alpha.
A - (input) SuperMatrix*
Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
Currently, the type of A can be:
Stype = NC or NCP; Dtype = SLU_S; Mtype = GE.
In the future, more general A can be handled.
X - (input) float*, array of DIMENSION at least
( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
and at least
( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
Before entry, the incremented array X must contain the
vector x.
INCX - (input) int
On entry, INCX specifies the increment for the elements of
X. INCX must not be zero.
BETA - (input) float
On entry, BETA specifies the scalar beta. When BETA is
supplied as zero then Y need not be set on input.
Y - (output) float*, array of DIMENSION at least
( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
and at least
( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
Before entry with BETA non-zero, the incremented array Y
must contain the vector y. On exit, Y is overwritten by the
updated vector y.
INCY - (input) int
On entry, INCY specifies the increment for the elements of
Y. INCY must not be zero.
==== Sparse Level 2 Blas routine.
*/
/* Local variables */
NCformat *Astore;
float *Aval;
int info;
float temp;
int lenx, leny, i, j, irow;
int iy, jx, jy, kx, ky;
int notran;
notran = lsame_(trans, "N");
Astore = A->Store;
Aval = Astore->nzval;
/* Test the input parameters */
info = 0;
if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
else if (incx == 0) info = 5;
else if (incy == 0) info = 8;
if (info != 0) {
xerbla_("sp_sgemv ", &info);
return 0;
}
/* Quick return if possible. */
if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
return 0;
/* Set LENX and LENY, the lengths of the vectors x and y, and set
up the start points in X and Y. */
if (lsame_(trans, "N")) {
lenx = A->ncol;
leny = A->nrow;
} else {
lenx = A->nrow;
leny = A->ncol;
}
if (incx > 0) kx = 0;
else kx = - (lenx - 1) * incx;
if (incy > 0) ky = 0;
else ky = - (leny - 1) * incy;
/* Start the operations. In this version the elements of A are
accessed sequentially with one pass through A. */
/* First form y := beta*y. */
if (beta != 1.) {
if (incy == 1) {
if (beta == 0.)
for (i = 0; i < leny; ++i) y[i] = 0.;
else
for (i = 0; i < leny; ++i) y[i] = beta * y[i];
} else {
iy = ky;
if (beta == 0.)
for (i = 0; i < leny; ++i) {
y[iy] = 0.;
iy += incy;
}
else
for (i = 0; i < leny; ++i) {
y[iy] = beta * y[iy];
iy += incy;
}
}
}
if (alpha == 0.) return 0;
if ( notran ) {
/* Form y := alpha*A*x + y. */
jx = kx;
if (incy == 1) {
for (j = 0; j < A->ncol; ++j) {
if (x[jx] != 0.) {
temp = alpha * x[jx];
for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
irow = Astore->rowind[i];
y[irow] += temp * Aval[i];
}
}
jx += incx;
}
} else {
ABORT("Not implemented.");
}
} else {
/* Form y := alpha*A'*x + y. */
jy = ky;
if (incx == 1) {
for (j = 0; j < A->ncol; ++j) {
temp = 0.;
for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
irow = Astore->rowind[i];
temp += Aval[i] * x[irow];
}
y[jy] += alpha * temp;
jy += incy;
}
} else {
ABORT("Not implemented.");
}
}
return 0;
} /* sp_sgemv */

@ -0,0 +1,121 @@
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* November 15, 1997
*
*/
/*
* File name: sp_blas3.c
* Purpose: Sparse BLAS3, using some dense BLAS3 operations.
*/
#include "ssp_defs.h"
#include "util.h"
int
sp_sgemm(char *transa, int n,
float alpha, SuperMatrix *A, float *b, int ldb,
float beta, float *c, int ldc)
{
/* Purpose
=======
sp_s performs one of the matrix-matrix operations
C := alpha*op( A )*op( B ) + beta*C,
where op( X ) is one of
op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ),
alpha and beta are scalars, and A, B and C are matrices, with op( A )
an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
Parameters
==========
TRANSA - (input) char*
On entry, TRANSA specifies the form of op( A ) to be used in
the matrix multiplication as follows:
TRANSA = 'N' or 'n', op( A ) = A.
TRANSA = 'T' or 't', op( A ) = A'.
TRANSA = 'C' or 'c', op( A ) = conjg( A' ).
Unchanged on exit.
TRANSB - (input) char*
On entry, TRANSB specifies the form of op( B ) to be used in
the matrix multiplication as follows:
TRANSB = 'N' or 'n', op( B ) = B.
TRANSB = 'T' or 't', op( B ) = B'.
TRANSB = 'C' or 'c', op( B ) = conjg( B' ).
Unchanged on exit.
M - (input) int
On entry, M specifies the number of rows of the matrix
op( A ) and of the matrix C. M must be at least zero.
Unchanged on exit.
N - (input) int
On entry, N specifies the number of columns of the matrix
op( B ) and the number of columns of the matrix C. N must be
at least zero.
Unchanged on exit.
K - (input) int
On entry, K specifies the number of columns of the matrix
op( A ) and the number of rows of the matrix op( B ). K must
be at least zero.
Unchanged on exit.
ALPHA - (input) float
On entry, ALPHA specifies the scalar alpha.
A - (input) SuperMatrix*
Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
Currently, the type of A can be:
Stype = NC or NCP; Dtype = SLU_S; Mtype = GE.
In the future, more general A can be handled.
B - FLOAT PRECISION array of DIMENSION ( LDB, kb ), where kb is
n when TRANSB = 'N' or 'n', and is k otherwise.
Before entry with TRANSB = 'N' or 'n', the leading k by n
part of the array B must contain the matrix B, otherwise
the leading n by k part of the array B must contain the
matrix B.
Unchanged on exit.
LDB - (input) int
On entry, LDB specifies the first dimension of B as declared
in the calling (sub) program. LDB must be at least max( 1, n ).
Unchanged on exit.
BETA - (input) float
On entry, BETA specifies the scalar beta. When BETA is
supplied as zero then C need not be set on input.
C - FLOAT PRECISION array of DIMENSION ( LDC, n ).
Before entry, the leading m by n part of the array C must
contain the matrix C, except when beta is zero, in which
case C need not be set on entry.
On exit, the array C is overwritten by the m by n matrix
( alpha*op( A )*B + beta*C ).
LDC - (input) int
On entry, LDC specifies the first dimension of C as declared
in the calling (sub)program. LDC must be at least max(1,m).
Unchanged on exit.
==== Sparse Level 3 Blas routine.
*/
int incx = 1, incy = 1;
int j;
for (j = 0; j < n; ++j) {
sp_sgemv(transa, alpha, A, &b[ldb*j], incx, beta, &c[ldc*j], incy);
}
return 0;
}

@ -0,0 +1,234 @@
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
*/
#ifndef __SUPERLU_sSP_DEFS /* allow multiple inclusions */
#define __SUPERLU_sSP_DEFS
/*
* File name: ssp_defs.h
* Purpose: Sparse matrix types and function prototypes
* History:
*/
#ifdef _CRAY
#include <fortran.h>
#include <string.h>
#endif
/* Define my integer type int_t */
typedef int int_t; /* default */
#include "Cnames.h"
#include "supermatrix.h"
#include "util.h"
/*
* Global data structures used in LU factorization -
*
* nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
* (xsup,supno): supno[i] is the supernode no to which i belongs;
* xsup(s) points to the beginning of the s-th supernode.
* e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12)
* xsup 0 1 2 4 7 12
* Note: dfs will be performed on supernode rep. relative to the new
* row pivoting ordering
*
* (xlsub,lsub): lsub[*] contains the compressed subscript of
* rectangular supernodes; xlsub[j] points to the starting
* location of the j-th column in lsub[*]. Note that xlsub
* is indexed by column.
* Storage: original row subscripts
*
* During the course of sparse LU factorization, we also use
* (xlsub,lsub) for the purpose of symmetric pruning. For each
* supernode {s,s+1,...,t=s+r} with first column s and last
* column t, the subscript set
* lsub[j], j=xlsub[s], .., xlsub[s+1]-1
* is the structure of column s (i.e. structure of this supernode).
* It is used for the storage of numerical values.
* Furthermore,
* lsub[j], j=xlsub[t], .., xlsub[t+1]-1
* is the structure of the last column t of this supernode.
* It is for the purpose of symmetric pruning. Therefore, the
* structural subscripts can be rearranged without making physical
* interchanges among the numerical values.
*
* However, if the supernode has only one column, then we
* only keep one set of subscripts. For any subscript interchange
* performed, similar interchange must be done on the numerical
* values.
*
* The last column structures (for pruning) will be removed
* after the numercial LU factorization phase.
*
* (xlusup,lusup): lusup[*] contains the numerical values of the
* rectangular supernodes; xlusup[j] points to the starting
* location of the j-th column in storage vector lusup[*]
* Note: xlusup is indexed by column.
* Each rectangular supernode is stored by column-major
* scheme, consistent with Fortran 2-dim array storage.
*
* (xusub,ucol,usub): ucol[*] stores the numerical values of
* U-columns outside the rectangular supernodes. The row
* subscript of nonzero ucol[k] is stored in usub[k].
* xusub[i] points to the starting location of column i in ucol.
* Storage: new row subscripts; that is subscripts of PA.
*/
typedef struct {
int *xsup; /* supernode and column mapping */
int *supno;
int *lsub; /* compressed L subscripts */
int *xlsub;
float *lusup; /* L supernodes */
int *xlusup;
float *ucol; /* U columns */
int *usub;
int *xusub;
int nzlmax; /* current max size of lsub */
int nzumax; /* " " " ucol */
int nzlumax; /* " " " lusup */
int n; /* number of columns in the matrix */
LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
} GlobalLU_t;
typedef struct {
float for_lu;
float total_needed;
int expansions;
} mem_usage_t;
#ifdef __cplusplus
extern "C" {
#endif
/* Driver routines */
extern void
sgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *,
SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *);
extern void
sgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,
char *, float *, float *, SuperMatrix *, SuperMatrix *,
void *, int, SuperMatrix *, SuperMatrix *,
float *, float *, float *, float *,
mem_usage_t *, SuperLUStat_t *, int *);
/* Supernodal LU factor related */
extern void
sCreate_CompCol_Matrix(SuperMatrix *, int, int, int, float *,
int *, int *, Stype_t, Dtype_t, Mtype_t);
extern void
sCreate_CompRow_Matrix(SuperMatrix *, int, int, int, float *,
int *, int *, Stype_t, Dtype_t, Mtype_t);
extern void
sCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *);
extern void
sCreate_Dense_Matrix(SuperMatrix *, int, int, float *, int,
Stype_t, Dtype_t, Mtype_t);
extern void
sCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, float *,
int *, int *, int *, int *, int *,
Stype_t, Dtype_t, Mtype_t);
extern void
sCopy_Dense_Matrix(int, int, float *, int, float *, int);
extern void countnz (const int, int *, int *, int *, GlobalLU_t *);
extern void fixupL (const int, const int *, GlobalLU_t *);
extern void sallocateA (int, int, float **, int **, int **);
extern void sgstrf (superlu_options_t*, SuperMatrix*,
int, int, int*, void *, int, int *, int *,
SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *);
extern int ssnode_dfs (const int, const int, const int *, const int *,
const int *, int *, int *, GlobalLU_t *);
extern int ssnode_bmod (const int, const int, float *,
float *, GlobalLU_t *, SuperLUStat_t*);
extern void spanel_dfs (const int, const int, const int, SuperMatrix *,
int *, int *, float *, int *, int *, int *,
int *, int *, int *, int *, GlobalLU_t *);
extern void spanel_bmod (const int, const int, const int, const int,
float *, float *, int *, int *,
GlobalLU_t *, SuperLUStat_t*);
extern int scolumn_dfs (const int, const int, int *, int *, int *, int *,
int *, int *, int *, int *, int *, GlobalLU_t *);
extern int scolumn_bmod (const int, const int, float *,
float *, int *, int *, int,
GlobalLU_t *, SuperLUStat_t*);
extern int scopy_to_ucol (int, int, int *, int *, int *,
float *, GlobalLU_t *);
extern int spivotL (const int, const float, int *, int *,
int *, int *, int *, GlobalLU_t *, SuperLUStat_t*);
extern void spruneL (const int, const int *, const int, const int,
const int *, const int *, int *, GlobalLU_t *);
extern void sreadmt (int *, int *, int *, float **, int **, int **);
extern void sGenXtrue (int, int, float *, int);
extern void sFillRHS (trans_t, int, float *, int, SuperMatrix *,
SuperMatrix *);
extern void sgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
SuperMatrix *, SuperLUStat_t*, int *);
/* Driver related */
extern void sgsequ (SuperMatrix *, float *, float *, float *,
float *, float *, int *);
extern void slaqgs (SuperMatrix *, float *, float *, float,
float, float, char *);
extern void sgscon (char *, SuperMatrix *, SuperMatrix *,
float, float *, SuperLUStat_t*, int *);
extern float sPivotGrowth(int, SuperMatrix *, int *,
SuperMatrix *, SuperMatrix *);
extern void sgsrfs (trans_t, SuperMatrix *, SuperMatrix *,
SuperMatrix *, int *, int *, char *, float *,
float *, SuperMatrix *, SuperMatrix *,
float *, float *, SuperLUStat_t*, int *);
extern int sp_strsv (char *, char *, char *, SuperMatrix *,
SuperMatrix *, float *, SuperLUStat_t*, int *);
extern int sp_sgemv (char *, float, SuperMatrix *, float *,
int, float, float *, int);
extern int sp_sgemm (char *, int, float,
SuperMatrix *, float *, int, float,
float *, int);
/* Memory-related */
extern int sLUMemInit (fact_t, void *, int, int, int, int, int,
SuperMatrix *, SuperMatrix *,
GlobalLU_t *, int **, float **);
extern void sSetRWork (int, int, float *, float **, float **);
extern void sLUWorkFree (int *, float *, GlobalLU_t *);
extern int sLUMemXpand (int, int, MemType, int *, GlobalLU_t *);
extern float *floatMalloc(int);
extern float *floatCalloc(int);
extern int smemory_usage(const int, const int, const int, const int);
extern int sQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *);
/* Auxiliary routines */
extern void sreadhb(int *, int *, int *, float **, int **, int **);
extern void sCompRow_to_CompCol(int, int, int, float*, int*, int*,
float **, int **, int **);
extern void sfill (float *, int, float);
extern void sinf_norm_error (int, SuperMatrix *, float *);
extern void PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *,
float, float, float *, float *, char *);
/* Routines for debugging */
extern void sPrint_CompCol_Matrix(char *, SuperMatrix *);
extern void sPrint_SuperNode_Matrix(char *, SuperMatrix *);
extern void sPrint_Dense_Matrix(char *, SuperMatrix *);
extern void print_lu_col(char *, int, int, int *, GlobalLU_t *);
extern void check_tempv(int, float *);
#ifdef __cplusplus
}
#endif
#endif /* __SUPERLU_sSP_DEFS */

@ -0,0 +1,331 @@
/* Subroutine */ int strsv_(char *uplo, char *trans, char *diag, int *n,
float *a, int *lda, float *x, int *incx)
{
/* System generated locals */
int i__1, i__2;
/* Local variables */
static int info;
static float temp;
static int i, j;
extern int lsame_(char *, char *);
static int ix, jx, kx;
extern /* Subroutine */ int xerbla_(char *, int *);
static int nounit;
/* Purpose
=======
STRSV solves one of the systems of equations
A*x = b, or A'*x = b,
where b and x are n element vectors and A is an n by n unit, or
non-unit, upper or lower triangular matrix.
No test for singularity or near-singularity is included in this
routine. Such tests must be performed before calling this routine.
Parameters
==========
UPLO - CHARACTER*1.
On entry, UPLO specifies whether the matrix is an upper or
lower triangular matrix as follows:
UPLO = 'U' or 'u' A is an upper triangular matrix.
UPLO = 'L' or 'l' A is a lower triangular matrix.
Unchanged on exit.
TRANS - CHARACTER*1.
On entry, TRANS specifies the equations to be solved as
follows:
TRANS = 'N' or 'n' A*x = b.
TRANS = 'T' or 't' A'*x = b.
TRANS = 'C' or 'c' A'*x = b.
Unchanged on exit.
DIAG - CHARACTER*1.
On entry, DIAG specifies whether or not A is unit
triangular as follows:
DIAG = 'U' or 'u' A is assumed to be unit triangular.
DIAG = 'N' or 'n' A is not assumed to be unit
triangular.
Unchanged on exit.
N - INTEGER.
On entry, N specifies the order of the matrix A.
N must be at least zero.
Unchanged on exit.
A - REAL array of DIMENSION ( LDA, n ).
Before entry with UPLO = 'U' or 'u', the leading n by n
upper triangular part of the array A must contain the upper
triangular matrix and the strictly lower triangular part of
A is not referenced.
Before entry with UPLO = 'L' or 'l', the leading n by n
lower triangular part of the array A must contain the lower
triangular matrix and the strictly upper triangular part of
A is not referenced.
Note that when DIAG = 'U' or 'u', the diagonal elements of
A are not referenced either, but are assumed to be unity.
Unchanged on exit.
LDA - INTEGER.
On entry, LDA specifies the first dimension of A as declared
in the calling (sub) program. LDA must be at least
max( 1, n ).
Unchanged on exit.
X - REAL array of dimension at least
( 1 + ( n - 1 )*abs( INCX ) ).
Before entry, the incremented array X must contain the n
element right-hand side vector b. On exit, X is overwritten
with the solution vector x.
INCX - INTEGER.
On entry, INCX specifies the increment for the elements of
X. INCX must not be zero.
Unchanged on exit.
Level 2 Blas routine.
-- Written on 22-October-1986.
Jack Dongarra, Argonne National Lab.
Jeremy Du Croz, Nag Central Office.
Sven Hammarling, Nag Central Office.
Richard Hanson, Sandia National Labs.
Test the input parameters.
Parameter adjustments
Function Body */
#define X(I) x[(I)-1]
#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
info = 0;
if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
info = 1;
} else if (! lsame_(trans, "N") && ! lsame_(trans, "T") &&
! lsame_(trans, "C")) {
info = 2;
} else if (! lsame_(diag, "U") && ! lsame_(diag, "N")) {
info = 3;
} else if (*n < 0) {
info = 4;
} else if (*lda < ((1 > *n)? 1: *n)) {
info = 6;
} else if (*incx == 0) {
info = 8;
}
if (info != 0) {
xerbla_("STRSV ", &info);
return 0;
}
/* Quick return if possible. */
if (*n == 0) {
return 0;
}
nounit = lsame_(diag, "N");
/* Set up the start point in X if the increment is not unity. This
will be ( N - 1 )*INCX too small for descending loops. */
if (*incx <= 0) {
kx = 1 - (*n - 1) * *incx;
} else if (*incx != 1) {
kx = 1;
}
/* Start the operations. In this version the elements of A are
accessed sequentially with one pass through A. */
if (lsame_(trans, "N")) {
/* Form x := inv( A )*x. */
if (lsame_(uplo, "U")) {
if (*incx == 1) {
for (j = *n; j >= 1; --j) {
if (X(j) != 0.f) {
if (nounit) {
X(j) /= A(j,j);
}
temp = X(j);
for (i = j - 1; i >= 1; --i) {
X(i) -= temp * A(i,j);
/* L10: */
}
}
/* L20: */
}
} else {
jx = kx + (*n - 1) * *incx;
for (j = *n; j >= 1; --j) {
if (X(jx) != 0.f) {
if (nounit) {
X(jx) /= A(j,j);
}
temp = X(jx);
ix = jx;
for (i = j - 1; i >= 1; --i) {
ix -= *incx;
X(ix) -= temp * A(i,j);
/* L30: */
}
}
jx -= *incx;
/* L40: */
}
}
} else {
if (*incx == 1) {
i__1 = *n;
for (j = 1; j <= *n; ++j) {
if (X(j) != 0.f) {
if (nounit) {
X(j) /= A(j,j);
}
temp = X(j);
i__2 = *n;
for (i = j + 1; i <= *n; ++i) {
X(i) -= temp * A(i,j);
/* L50: */
}
}
/* L60: */
}
} else {
jx = kx;
i__1 = *n;
for (j = 1; j <= *n; ++j) {
if (X(jx) != 0.f) {
if (nounit) {
X(jx) /= A(j,j);
}
temp = X(jx);
ix = jx;
i__2 = *n;
for (i = j + 1; i <= *n; ++i) {
ix += *incx;
X(ix) -= temp * A(i,j);
/* L70: */
}
}
jx += *incx;
/* L80: */
}
}
}
} else {
/* Form x := inv( A' )*x. */
if (lsame_(uplo, "U")) {
if (*incx == 1) {
i__1 = *n;
for (j = 1; j <= *n; ++j) {
temp = X(j);
i__2 = j - 1;
for (i = 1; i <= j-1; ++i) {
temp -= A(i,j) * X(i);
/* L90: */
}
if (nounit) {
temp /= A(j,j);
}
X(j) = temp;
/* L100: */
}
} else {
jx = kx;
i__1 = *n;
for (j = 1; j <= *n; ++j) {
temp = X(jx);
ix = kx;
i__2 = j - 1;
for (i = 1; i <= j-1; ++i) {
temp -= A(i,j) * X(ix);
ix += *incx;
/* L110: */
}
if (nounit) {
temp /= A(j,j);
}
X(jx) = temp;
jx += *incx;
/* L120: */
}
}
} else {
if (*incx == 1) {
for (j = *n; j >= 1; --j) {
temp = X(j);
i__1 = j + 1;
for (i = *n; i >= j+1; --i) {
temp -= A(i,j) * X(i);
/* L130: */
}
if (nounit) {
temp /= A(j,j);
}
X(j) = temp;
/* L140: */
}
} else {
kx += (*n - 1) * *incx;
jx = kx;
for (j = *n; j >= 1; --j) {
temp = X(jx);
ix = kx;
i__1 = j + 1;
for (i = *n; i >= j+1; --i) {
temp -= A(i,j) * X(ix);
ix -= *incx;
/* L150: */
}
if (nounit) {
temp /= A(j,j);
}
X(jx) = temp;
jx -= *incx;
/* L160: */
}
}
}
}
return 0;
/* End of STRSV . */
} /* strsv_ */

@ -0,0 +1,55 @@
/*
* Purpose
* =======
* Returns the time in seconds used by the process.
*
* Note: the timer function call is machine dependent. Use conditional
* compilation to choose the appropriate function.
*
*/
/* We want this flag, safer than putting in build system */
#define NO_TIMER
#ifdef SUN
/*
* It uses the system call gethrtime(3C), which is accurate to
* nanoseconds.
*/
#include <sys/time.h>
double SuperLU_timer_() {
return ( (double)gethrtime() / 1e9 );
}
#else
#ifndef NO_TIMER
#include <sys/types.h>
#include <sys/times.h>
#include <time.h>
#include <sys/time.h>
#endif
#ifndef CLK_TCK
#define CLK_TCK 60
#endif
double SuperLU_timer_()
{
#ifdef NO_TIMER
/* no sys/times.h on WIN32 */
double tmp;
tmp = 0.0;
#else
struct tms use;
double tmp;
times(&use);
tmp = use.tms_utime;
tmp += use.tms_stime;
#endif
return (double)(tmp) / CLK_TCK;
}
#endif

@ -0,0 +1,140 @@
#ifndef __SUPERLU_SUPERMATRIX /* allow multiple inclusions */
#define __SUPERLU_SUPERMATRIX
/********************************************
* The matrix types are defined as follows. *
********************************************/
typedef enum {
SLU_NC, /* column-wise, no supernode */
SLU_NR, /* row-wize, no supernode */
SLU_SC, /* column-wise, supernode */
SLU_SR, /* row-wise, supernode */
SLU_NCP, /* column-wise, column-permuted, no supernode
(The consecutive columns of nonzeros, after permutation,
may not be stored contiguously.) */
SLU_DN /* Fortran style column-wise storage for dense matrix */
} Stype_t;
typedef enum {
SLU_S, /* single */
SLU_D, /* double */
SLU_C, /* single complex */
SLU_Z /* double complex */
} Dtype_t;
typedef enum {
SLU_GE, /* general */
SLU_TRLU, /* lower triangular, unit diagonal */
SLU_TRUU, /* upper triangular, unit diagonal */
SLU_TRL, /* lower triangular */
SLU_TRU, /* upper triangular */
SLU_SYL, /* symmetric, store lower half */
SLU_SYU, /* symmetric, store upper half */
SLU_HEL, /* Hermitian, store lower half */
SLU_HEU /* Hermitian, store upper half */
} Mtype_t;
typedef struct {
Stype_t Stype; /* Storage type: interprets the storage structure
pointed to by *Store. */
Dtype_t Dtype; /* Data type. */
Mtype_t Mtype; /* Matrix type: describes the mathematical property of
the matrix. */
int_t nrow; /* number of rows */
int_t ncol; /* number of columns */
void *Store; /* pointer to the actual storage of the matrix */
} SuperMatrix;
/***********************************************
* The storage schemes are defined as follows. *
***********************************************/
/* Stype == NC (Also known as Harwell-Boeing sparse matrix format) */
typedef struct {
int_t nnz; /* number of nonzeros in the matrix */
void *nzval; /* pointer to array of nonzero values, packed by column */
int_t *rowind; /* pointer to array of row indices of the nonzeros */
int_t *colptr; /* pointer to array of beginning of columns in nzval[]
and rowind[] */
/* Note:
Zero-based indexing is used;
colptr[] has ncol+1 entries, the last one pointing
beyond the last column, so that colptr[ncol] = nnz. */
} NCformat;
/* Stype == NR (Also known as row compressed storage (RCS). */
typedef struct {
int_t nnz; /* number of nonzeros in the matrix */
void *nzval; /* pointer to array of nonzero values, packed by row */
int_t *colind; /* pointer to array of column indices of the nonzeros */
int_t *rowptr; /* pointer to array of beginning of rows in nzval[]
and colind[] */
/* Note:
Zero-based indexing is used;
nzval[] and colind[] are of the same length, nnz;
rowptr[] has nrow+1 entries, the last one pointing
beyond the last column, so that rowptr[nrow] = nnz. */
} NRformat;
/* Stype == SC */
typedef struct {
int_t nnz; /* number of nonzeros in the matrix */
int_t nsuper; /* number of supernodes, minus 1 */
void *nzval; /* pointer to array of nonzero values, packed by column */
int_t *nzval_colptr;/* pointer to array of beginning of columns in nzval[] */
int_t *rowind; /* pointer to array of compressed row indices of
rectangular supernodes */
int_t *rowind_colptr;/* pointer to array of beginning of columns in rowind[] */
int_t *col_to_sup; /* col_to_sup[j] is the supernode number to which column
j belongs; mapping from column to supernode number. */
int_t *sup_to_col; /* sup_to_col[s] points to the start of the s-th
supernode; mapping from supernode number to column.
e.g.: col_to_sup: 0 1 2 2 3 3 3 4 4 4 4 4 4 (ncol=12)
sup_to_col: 0 1 2 4 7 12 (nsuper=4) */
/* Note:
Zero-based indexing is used;
nzval_colptr[], rowind_colptr[], col_to_sup and
sup_to_col[] have ncol+1 entries, the last one
pointing beyond the last column.
For col_to_sup[], only the first ncol entries are
defined. For sup_to_col[], only the first nsuper+2
entries are defined. */
} SCformat;
/* Stype == NCP */
typedef struct {
int_t nnz; /* number of nonzeros in the matrix */
void *nzval; /* pointer to array of nonzero values, packed by column */
int_t *rowind;/* pointer to array of row indices of the nonzeros */
/* Note: nzval[]/rowind[] always have the same length */
int_t *colbeg;/* colbeg[j] points to the beginning of column j in nzval[]
and rowind[] */
int_t *colend;/* colend[j] points to one past the last element of column
j in nzval[] and rowind[] */
/* Note:
Zero-based indexing is used;
The consecutive columns of the nonzeros may not be
contiguous in storage, because the matrix has been
postmultiplied by a column permutation matrix. */
} NCPformat;
/* Stype == DN */
typedef struct {
int_t lda; /* leading dimension */
void *nzval; /* array of size lda*ncol to represent a dense matrix */
} DNformat;
/*********************************************************
* Macros used for easy access of sparse matrix entries. *
*********************************************************/
#define L_SUB_START(col) ( Lstore->rowind_colptr[col] )
#define L_SUB(ptr) ( Lstore->rowind[ptr] )
#define L_NZ_START(col) ( Lstore->nzval_colptr[col] )
#define L_FST_SUPC(superno) ( Lstore->sup_to_col[superno] )
#define U_NZ_START(col) ( Ustore->colptr[col] )
#define U_SUB(ptr) ( Ustore->rowind[ptr] )
#endif /* __SUPERLU_SUPERMATRIX */

@ -0,0 +1,478 @@
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
*/
/*
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
Permission is hereby granted to use or copy this program for any
purpose, provided the above notices are retained on all copies.
Permission to modify the code and to distribute modified code is
granted, provided the above notices are retained, and a notice that
the code was modified is included with the above copyright notice.
*/
#include <math.h>
#include "ssp_defs.h"
void
sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
float *nzval, int *rowind, int *colptr,
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
{
NCformat *Astore;
A->Stype = stype;
A->Dtype = dtype;
A->Mtype = mtype;
A->nrow = m;
A->ncol = n;
A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
Astore = A->Store;
Astore->nnz = nnz;
Astore->nzval = nzval;
Astore->rowind = rowind;
Astore->colptr = colptr;
}
void
sCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz,
float *nzval, int *colind, int *rowptr,
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
{
NRformat *Astore;
A->Stype = stype;
A->Dtype = dtype;
A->Mtype = mtype;
A->nrow = m;
A->ncol = n;
A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) );
if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
Astore = A->Store;
Astore->nnz = nnz;
Astore->nzval = nzval;
Astore->colind = colind;
Astore->rowptr = rowptr;
}
/* Copy matrix A into matrix B. */
void
sCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
{
NCformat *Astore, *Bstore;
int ncol, nnz, i;
B->Stype = A->Stype;
B->Dtype = A->Dtype;
B->Mtype = A->Mtype;
B->nrow = A->nrow;;
B->ncol = ncol = A->ncol;
Astore = (NCformat *) A->Store;
Bstore = (NCformat *) B->Store;
Bstore->nnz = nnz = Astore->nnz;
for (i = 0; i < nnz; ++i)
((float *)Bstore->nzval)[i] = ((float *)Astore->nzval)[i];
for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
}
void
sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, float *x, int ldx,
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
{
DNformat *Xstore;
X->Stype = stype;
X->Dtype = dtype;
X->Mtype = mtype;
X->nrow = m;
X->ncol = n;
X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
Xstore = (DNformat *) X->Store;
Xstore->lda = ldx;
Xstore->nzval = (float *) x;
}
void
sCopy_Dense_Matrix(int M, int N, float *X, int ldx,
float *Y, int ldy)
{
/*
*
* Purpose
* =======
*
* Copies a two-dimensional matrix X to another matrix Y.
*/
int i, j;
for (j = 0; j < N; ++j)
for (i = 0; i < M; ++i)
Y[i + j*ldy] = X[i + j*ldx];
}
void
sCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz,
float *nzval, int *nzval_colptr, int *rowind,
int *rowind_colptr, int *col_to_sup, int *sup_to_col,
Stype_t stype, Dtype_t dtype, Mtype_t mtype)
{
SCformat *Lstore;
L->Stype = stype;
L->Dtype = dtype;
L->Mtype = mtype;
L->nrow = m;
L->ncol = n;
L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) );
if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store");
Lstore = L->Store;
Lstore->nnz = nnz;
Lstore->nsuper = col_to_sup[n];
Lstore->nzval = nzval;
Lstore->nzval_colptr = nzval_colptr;
Lstore->rowind = rowind;
Lstore->rowind_colptr = rowind_colptr;
Lstore->col_to_sup = col_to_sup;
Lstore->sup_to_col = sup_to_col;
}
/*
* Convert a row compressed storage into a column compressed storage.
*/
void
sCompRow_to_CompCol(int m, int n, int nnz,
float *a, int *colind, int *rowptr,
float **at, int **rowind, int **colptr)
{
register int i, j, col, relpos;
int *marker;
/* Allocate storage for another copy of the matrix. */
*at = (float *) floatMalloc(nnz);
*rowind = (int *) intMalloc(nnz);
*colptr = (int *) intMalloc(n+1);
marker = (int *) intCalloc(n);
/* Get counts of each column of A, and set up column pointers */
for (i = 0; i < m; ++i)
for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
(*colptr)[0] = 0;
for (j = 0; j < n; ++j) {
(*colptr)[j+1] = (*colptr)[j] + marker[j];
marker[j] = (*colptr)[j];
}
/* Transfer the matrix into the compressed column storage. */
for (i = 0; i < m; ++i) {
for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
col = colind[j];
relpos = marker[col];
(*rowind)[relpos] = i;
(*at)[relpos] = a[j];
++marker[col];
}
}
SUPERLU_FREE(marker);
}
void
sPrint_CompCol_Matrix(char *what, SuperMatrix *A)
{
NCformat *Astore;
register int i,n;
float *dp;
printf("\nCompCol matrix %s:\n", what);
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
n = A->ncol;
Astore = (NCformat *) A->Store;
dp = (float *) Astore->nzval;
printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
printf("nzval: ");
for (i = 0; i < Astore->colptr[n]; ++i) printf("%f ", dp[i]);
printf("\nrowind: ");
for (i = 0; i < Astore->colptr[n]; ++i) printf("%d ", Astore->rowind[i]);
printf("\ncolptr: ");
for (i = 0; i <= n; ++i) printf("%d ", Astore->colptr[i]);
printf("\n");
fflush(stdout);
}
void
sPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
{
SCformat *Astore;
register int i, j, k, c, d, n, nsup;
float *dp;
int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
printf("\nSuperNode matrix %s:\n", what);
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
n = A->ncol;
Astore = (SCformat *) A->Store;
dp = (float *) Astore->nzval;
col_to_sup = Astore->col_to_sup;
sup_to_col = Astore->sup_to_col;
rowind_colptr = Astore->rowind_colptr;
rowind = Astore->rowind;
printf("nrow %d, ncol %d, nnz %d, nsuper %d\n",
A->nrow,A->ncol,Astore->nnz,Astore->nsuper);
printf("nzval:\n");
for (k = 0; k <= Astore->nsuper; ++k) {
c = sup_to_col[k];
nsup = sup_to_col[k+1] - c;
for (j = c; j < c + nsup; ++j) {
d = Astore->nzval_colptr[j];
for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
printf("%d\t%d\t%e\n", rowind[i], j, dp[d++]);
}
}
}
#if 0
for (i = 0; i < Astore->nzval_colptr[n]; ++i) printf("%f ", dp[i]);
#endif
printf("\nnzval_colptr: ");
for (i = 0; i <= n; ++i) printf("%d ", Astore->nzval_colptr[i]);
printf("\nrowind: ");
for (i = 0; i < Astore->rowind_colptr[n]; ++i)
printf("%d ", Astore->rowind[i]);
printf("\nrowind_colptr: ");
for (i = 0; i <= n; ++i) printf("%d ", Astore->rowind_colptr[i]);
printf("\ncol_to_sup: ");
for (i = 0; i < n; ++i) printf("%d ", col_to_sup[i]);
printf("\nsup_to_col: ");
for (i = 0; i <= Astore->nsuper+1; ++i)
printf("%d ", sup_to_col[i]);
printf("\n");
fflush(stdout);
}
void
sPrint_Dense_Matrix(char *what, SuperMatrix *A)
{
DNformat *Astore;
register int i;
float *dp;
printf("\nDense matrix %s:\n", what);
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
Astore = (DNformat *) A->Store;
dp = (float *) Astore->nzval;
printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
printf("\nnzval: ");
for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]);
printf("\n");
fflush(stdout);
}
/*
* Diagnostic print of column "jcol" in the U/L factor.
*/
void
sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
{
int i, k, fsupc;
int *xsup, *supno;
int *xlsub, *lsub;
float *lusup;
int *xlusup;
float *ucol;
int *usub, *xusub;
xsup = Glu->xsup;
supno = Glu->supno;
lsub = Glu->lsub;
xlsub = Glu->xlsub;
lusup = Glu->lusup;
xlusup = Glu->xlusup;
ucol = Glu->ucol;
usub = Glu->usub;
xusub = Glu->xusub;
printf("%s", msg);
printf("col %d: pivrow %d, supno %d, xprune %d\n",
jcol, pivrow, supno[jcol], xprune[jcol]);
printf("\tU-col:\n");
for (i = xusub[jcol]; i < xusub[jcol+1]; i++)
printf("\t%d%10.4f\n", usub[i], ucol[i]);
printf("\tL-col in rectangular snode:\n");
fsupc = xsup[supno[jcol]]; /* first col of the snode */
i = xlsub[fsupc];
k = xlusup[jcol];
while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) {
printf("\t%d\t%10.4f\n", lsub[i], lusup[k]);
i++; k++;
}
fflush(stdout);
}
/*
* Check whether tempv[] == 0. This should be true before and after
* calling any numeric routines, i.e., "panel_bmod" and "column_bmod".
*/
void scheck_tempv(int n, float *tempv)
{
int i;
for (i = 0; i < n; i++) {
if (tempv[i] != 0.0)
{
fprintf(stderr,"tempv[%d] = %f\n", i,tempv[i]);
ABORT("scheck_tempv");
}
}
}
void
sGenXtrue(int n, int nrhs, float *x, int ldx)
{
int i, j;
for (j = 0; j < nrhs; ++j)
for (i = 0; i < n; ++i) {
x[i + j*ldx] = 1.0;/* + (float)(i+1.)/n;*/
}
}
/*
* Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
*/
void
sFillRHS(trans_t trans, int nrhs, float *x, int ldx,
SuperMatrix *A, SuperMatrix *B)
{
NCformat *Astore;
float *Aval;
DNformat *Bstore;
float *rhs;
float one = 1.0;
float zero = 0.0;
int ldc;
char transc[1];
Astore = A->Store;
Aval = (float *) Astore->nzval;
Bstore = B->Store;
rhs = Bstore->nzval;
ldc = Bstore->lda;
if ( trans == NOTRANS ) *(unsigned char *)transc = 'N';
else *(unsigned char *)transc = 'T';
sp_sgemm(transc, nrhs, one, A,
x, ldx, zero, rhs, ldc);
}
/*
* Fills a float precision array with a given value.
*/
void
sfill(float *a, int alen, float dval)
{
register int i;
for (i = 0; i < alen; i++) a[i] = dval;
}
/*
* Check the inf-norm of the error vector
*/
void sinf_norm_error(int nrhs, SuperMatrix *X, float *xtrue)
{
DNformat *Xstore;
float err, xnorm;
float *Xmat, *soln_work;
int i, j;
Xstore = X->Store;
Xmat = Xstore->nzval;
for (j = 0; j < nrhs; j++) {
soln_work = &Xmat[j*Xstore->lda];
err = xnorm = 0.0;
for (i = 0; i < X->nrow; i++) {
err = SUPERLU_MAX(err, fabs(soln_work[i] - xtrue[i]));
xnorm = SUPERLU_MAX(xnorm, fabs(soln_work[i]));
}
err = err / xnorm;
printf("||X - Xtrue||/||X|| = %e\n", err);
}
}
/* Print performance of the code. */
void
sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
float rpg, float rcond, float *ferr,
float *berr, char *equed, SuperLUStat_t *stat)
{
SCformat *Lstore;
NCformat *Ustore;
double *utime;
flops_t *ops;
utime = stat->utime;
ops = stat->ops;
if ( utime[FACT] != 0. )
printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
ops[FACT]*1e-6/utime[FACT]);
printf("Identify relaxed snodes = %8.2f\n", utime[RELAX]);
if ( utime[SOLVE] != 0. )
printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE],
ops[SOLVE]*1e-6/utime[SOLVE]);
Lstore = (SCformat *) L->Store;
Ustore = (NCformat *) U->Store;
printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz);
printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz);
printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
mem_usage->for_lu/1e6, mem_usage->total_needed/1e6,
mem_usage->expansions);
printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n");
printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n",
utime[FACT], ops[FACT]*1e-6/utime[FACT],
utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE],
utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]);
printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n");
printf("NUM:\t%e\t%e\t%e\t%e\t%s\n",
rpg, rcond, ferr[0], berr[0], equed);
}
int print_float_vec(char *what, int n, float *vec)
{
int i;
printf("%s: n %d\n", what, n);
for (i = 0; i < n; ++i) printf("%d\t%f\n", i, vec[i]);
return 0;
}

@ -0,0 +1,391 @@
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
*/
/*
Copyright (c) 1994 by Xerox Corporation. All rights reserved.
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
Permission is hereby granted to use or copy this program for any
purpose, provided the above notices are retained on all copies.
Permission to modify the code and to distribute modified code is
granted, provided the above notices are retained, and a notice that
the code was modified is included with the above copyright notice.
*/
#include <math.h>
#include "ssp_defs.h"
#include "util.h"
/*
* Global statistics variale
*/
void superlu_abort_and_exit(char* msg)
{
fprintf(stderr, msg);
exit (-1);
}
/*
* Set the default values for the options argument.
*/
void set_default_options(superlu_options_t *options)
{
options->Fact = DOFACT;
options->Equil = YES;
options->ColPerm = COLAMD;
options->DiagPivotThresh = 1.0;
options->Trans = NOTRANS;
options->IterRefine = NOREFINE;
options->SymmetricMode = NO;
options->PivotGrowth = NO;
options->ConditionNumber = NO;
options->PrintStat = YES;
}
/* Deallocate the structure pointing to the actual storage of the matrix. */
void
Destroy_SuperMatrix_Store(SuperMatrix *A)
{
SUPERLU_FREE ( A->Store );
}
void
Destroy_CompCol_Matrix(SuperMatrix *A)
{
SUPERLU_FREE( ((NCformat *)A->Store)->rowind );
SUPERLU_FREE( ((NCformat *)A->Store)->colptr );
SUPERLU_FREE( ((NCformat *)A->Store)->nzval );
SUPERLU_FREE( A->Store );
}
void
Destroy_CompRow_Matrix(SuperMatrix *A)
{
SUPERLU_FREE( ((NRformat *)A->Store)->colind );
SUPERLU_FREE( ((NRformat *)A->Store)->rowptr );
SUPERLU_FREE( ((NRformat *)A->Store)->nzval );
SUPERLU_FREE( A->Store );
}
void
Destroy_SuperNode_Matrix(SuperMatrix *A)
{
SUPERLU_FREE ( ((SCformat *)A->Store)->rowind );
SUPERLU_FREE ( ((SCformat *)A->Store)->rowind_colptr );
SUPERLU_FREE ( ((SCformat *)A->Store)->nzval );
SUPERLU_FREE ( ((SCformat *)A->Store)->nzval_colptr );
SUPERLU_FREE ( ((SCformat *)A->Store)->col_to_sup );
SUPERLU_FREE ( ((SCformat *)A->Store)->sup_to_col );
SUPERLU_FREE ( A->Store );
}
/* A is of type Stype==NCP */
void
Destroy_CompCol_Permuted(SuperMatrix *A)
{
SUPERLU_FREE ( ((NCPformat *)A->Store)->colbeg );
SUPERLU_FREE ( ((NCPformat *)A->Store)->colend );
SUPERLU_FREE ( A->Store );
}
/* A is of type Stype==DN */
void
Destroy_Dense_Matrix(SuperMatrix *A)
{
DNformat* Astore = A->Store;
SUPERLU_FREE (Astore->nzval);
SUPERLU_FREE ( A->Store );
}
/*
* Reset repfnz[] for the current column
*/
void
resetrep_col (const int nseg, const int *segrep, int *repfnz)
{
int i, irep;
for (i = 0; i < nseg; i++) {
irep = segrep[i];
repfnz[irep] = EMPTY;
}
}
/*
* Count the total number of nonzeros in factors L and U, and in the
* symmetrically reduced L.
*/
void
countnz(const int n, int *xprune, int *nnzL, int *nnzU, GlobalLU_t *Glu)
{
int nsuper, fsupc, i, j;
int nnzL0, jlen, irep;
int *xsup, *xlsub;
xsup = Glu->xsup;
xlsub = Glu->xlsub;
*nnzL = 0;
*nnzU = (Glu->xusub)[n];
nnzL0 = 0;
nsuper = (Glu->supno)[n];
if ( n <= 0 ) return;
/*
* For each supernode
*/
for (i = 0; i <= nsuper; i++) {
fsupc = xsup[i];
jlen = xlsub[fsupc+1] - xlsub[fsupc];
for (j = fsupc; j < xsup[i+1]; j++) {
*nnzL += jlen;
*nnzU += j - fsupc + 1;
jlen--;
}
irep = xsup[i+1] - 1;
nnzL0 += xprune[irep] - xlsub[irep];
}
/* printf("\tNo of nonzeros in symm-reduced L = %d\n", nnzL0);*/
}
/*
* Fix up the data storage lsub for L-subscripts. It removes the subscript
* sets for structural pruning, and applies permuation to the remaining
* subscripts.
*/
void
fixupL(const int n, const int *perm_r, GlobalLU_t *Glu)
{
register int nsuper, fsupc, nextl, i, j, k, jstrt;
int *xsup, *lsub, *xlsub;
if ( n <= 1 ) return;
xsup = Glu->xsup;
lsub = Glu->lsub;
xlsub = Glu->xlsub;
nextl = 0;
nsuper = (Glu->supno)[n];
/*
* For each supernode ...
*/
for (i = 0; i <= nsuper; i++) {
fsupc = xsup[i];
jstrt = xlsub[fsupc];
xlsub[fsupc] = nextl;
for (j = jstrt; j < xlsub[fsupc+1]; j++) {
lsub[nextl] = perm_r[lsub[j]]; /* Now indexed into P*A */
nextl++;
}
for (k = fsupc+1; k < xsup[i+1]; k++)
xlsub[k] = nextl; /* Other columns in supernode i */
}
xlsub[n] = nextl;
}
/*
* Diagnostic print of segment info after panel_dfs().
*/
void print_panel_seg(int n, int w, int jcol, int nseg,
int *segrep, int *repfnz)
{
int j, k;
for (j = jcol; j < jcol+w; j++) {
printf("\tcol %d:\n", j);
for (k = 0; k < nseg; k++)
printf("\t\tseg %d, segrep %d, repfnz %d\n", k,
segrep[k], repfnz[(j-jcol)*n + segrep[k]]);
}
}
void
StatInit(SuperLUStat_t *stat)
{
register int i, w, panel_size, relax;
panel_size = sp_ienv(1);
relax = sp_ienv(2);
w = SUPERLU_MAX(panel_size, relax);
stat->panel_histo = intCalloc(w+1);
stat->utime = (double *) SUPERLU_MALLOC(NPHASES * sizeof(double));
if (!stat->utime) ABORT("SUPERLU_MALLOC fails for stat->utime");
stat->ops = (flops_t *) SUPERLU_MALLOC(NPHASES * sizeof(flops_t));
if (!stat->ops) ABORT("SUPERLU_MALLOC fails for stat->ops");
for (i = 0; i < NPHASES; ++i) {
stat->utime[i] = 0.;
stat->ops[i] = 0.;
}
}
void
StatPrint(SuperLUStat_t *stat)
{
double *utime;
flops_t *ops;
utime = stat->utime;
ops = stat->ops;
printf("Factor time = %8.2f\n", utime[FACT]);
if ( utime[FACT] != 0.0 )
printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
ops[FACT]*1e-6/utime[FACT]);
printf("Solve time = %8.2f\n", utime[SOLVE]);
if ( utime[SOLVE] != 0.0 )
printf("Solve flops = %e\tMflops = %8.2f\n", ops[SOLVE],
ops[SOLVE]*1e-6/utime[SOLVE]);
}
void
StatFree(SuperLUStat_t *stat)
{
SUPERLU_FREE(stat->panel_histo);
SUPERLU_FREE(stat->utime);
SUPERLU_FREE(stat->ops);
}
flops_t
LUFactFlops(SuperLUStat_t *stat)
{
return (stat->ops[FACT]);
}
flops_t
LUSolveFlops(SuperLUStat_t *stat)
{
return (stat->ops[SOLVE]);
}
/*
* Fills an integer array with a given value.
*/
void ifill(int *a, int alen, int ival)
{
register int i;
for (i = 0; i < alen; i++) a[i] = ival;
}
/*
* Get the statistics of the supernodes
*/
#define NBUCKS 10
static int max_sup_size;
void super_stats(int nsuper, int *xsup)
{
register int nsup1 = 0;
int i, isize, whichb, bl, bh;
int bucket[NBUCKS];
max_sup_size = 0;
for (i = 0; i <= nsuper; i++) {
isize = xsup[i+1] - xsup[i];
if ( isize == 1 ) nsup1++;
if ( max_sup_size < isize ) max_sup_size = isize;
}
printf(" Supernode statistics:\n\tno of super = %d\n", nsuper+1);
printf("\tmax supernode size = %d\n", max_sup_size);
printf("\tno of size 1 supernodes = %d\n", nsup1);
/* Histogram of the supernode sizes */
ifill (bucket, NBUCKS, 0);
for (i = 0; i <= nsuper; i++) {
isize = xsup[i+1] - xsup[i];
whichb = (float) isize / max_sup_size * NBUCKS;
if (whichb >= NBUCKS) whichb = NBUCKS - 1;
bucket[whichb]++;
}
printf("\tHistogram of supernode sizes:\n");
for (i = 0; i < NBUCKS; i++) {
bl = (float) i * max_sup_size / NBUCKS;
bh = (float) (i+1) * max_sup_size / NBUCKS;
printf("\tsnode: %d-%d\t\t%d\n", bl+1, bh, bucket[i]);
}
}
float SpaSize(int n, int np, float sum_npw)
{
return (sum_npw*8 + np*8 + n*4)/1024.;
}
float DenseSize(int n, float sum_nw)
{
return (sum_nw*8 + n*8)/1024.;;
}
/*
* Check whether repfnz[] == EMPTY after reset.
*/
void check_repfnz(int n, int w, int jcol, int *repfnz)
{
int jj, k;
for (jj = jcol; jj < jcol+w; jj++)
for (k = 0; k < n; k++)
if ( repfnz[(jj-jcol)*n + k] != EMPTY ) {
fprintf(stderr, "col %d, repfnz_col[%d] = %d\n", jj,
k, repfnz[(jj-jcol)*n + k]);
ABORT("check_repfnz");
}
}
/* Print a summary of the testing results. */
void
PrintSumm(char *type, int nfail, int nrun, int nerrs)
{
if ( nfail > 0 )
printf("%3s driver: %d out of %d tests failed to pass the threshold\n",
type, nfail, nrun);
else
printf("All tests for %3s driver passed the threshold (%6d tests run)\n", type, nrun);
if ( nerrs > 0 )
printf("%6d error messages recorded\n", nerrs);
}
int print_int_vec(char *what, int n, int *vec)
{
int i;
printf("%s\n", what);
for (i = 0; i < n; ++i) printf("%d\t%d\n", i, vec[i]);
return 0;
}

@ -0,0 +1,267 @@
#ifndef __SUPERLU_UTIL /* allow multiple inclusions */
#define __SUPERLU_UTIL
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
#ifndef __STDC__
#include <malloc.h>
#endif
*/
#include <assert.h>
/***********************************************************************
* Macros
***********************************************************************/
#define FIRSTCOL_OF_SNODE(i) (xsup[i])
/* No of marker arrays used in the symbolic factorization,
each of size n */
#define NO_MARKER 3
#define NUM_TEMPV(m,w,t,b) ( SUPERLU_MAX(m, (t + b)*w) )
#ifndef USER_ABORT
#define USER_ABORT(msg) superlu_abort_and_exit(msg)
#endif
#define ABORT(err_msg) \
{ char msg[256];\
sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\
USER_ABORT(msg); }
#ifndef USER_MALLOC
#if 1
#define USER_MALLOC(size) superlu_malloc(size)
#else
/* The following may check out some uninitialized data */
#define USER_MALLOC(size) memset (superlu_malloc(size), '\x0F', size)
#endif
#endif
#define SUPERLU_MALLOC(size) USER_MALLOC(size)
#ifndef USER_FREE
#define USER_FREE(addr) superlu_free(addr)
#endif
#define SUPERLU_FREE(addr) USER_FREE(addr)
#define CHECK_MALLOC(where) { \
extern int superlu_malloc_total; \
printf("%s: malloc_total %d Bytes\n", \
where, superlu_malloc_total); \
}
#define SUPERLU_MAX(x, y) ( (x) > (y) ? (x) : (y) )
#define SUPERLU_MIN(x, y) ( (x) < (y) ? (x) : (y) )
/***********************************************************************
* Constants
***********************************************************************/
#define EMPTY (-1)
/*#define NO (-1)*/
#define FALSE 0
#define TRUE 1
/***********************************************************************
* Enumerate types
***********************************************************************/
typedef enum {NO, YES} yes_no_t;
typedef enum {DOFACT, SamePattern, SamePattern_SameRowPerm, FACTORED} fact_t;
typedef enum {NOROWPERM, LargeDiag, MY_PERMR} rowperm_t;
typedef enum {NATURAL, MMD_ATA, MMD_AT_PLUS_A, COLAMD, MY_PERMC}colperm_t;
typedef enum {NOTRANS, TRANS, CONJ} trans_t;
typedef enum {NOEQUIL, ROW, COL, BOTH} DiagScale_t;
typedef enum {NOREFINE, SINGLE=1, SLU_DOUBLE, EXTRA} IterRefine_t;
typedef enum {LUSUP, UCOL, LSUB, USUB} MemType;
typedef enum {HEAD, TAIL} stack_end_t;
typedef enum {SYSTEM, USER} LU_space_t;
/*
* The following enumerate type is used by the statistics variable
* to keep track of flop count and time spent at various stages.
*
* Note that not all of the fields are disjoint.
*/
typedef enum {
COLPERM, /* find a column ordering that minimizes fills */
RELAX, /* find artificial supernodes */
ETREE, /* compute column etree */
EQUIL, /* equilibrate the original matrix */
FACT, /* perform LU factorization */
RCOND, /* estimate reciprocal condition number */
SOLVE, /* forward and back solves */
REFINE, /* perform iterative refinement */
SLU_FLOAT, /* time spent in floating-point operations */
TRSV, /* fraction of FACT spent in xTRSV */
GEMV, /* fraction of FACT spent in xGEMV */
FERR, /* estimate error bounds after iterative refinement */
NPHASES /* total number of phases */
} PhaseType;
/***********************************************************************
* Type definitions
***********************************************************************/
typedef float flops_t;
typedef unsigned char Logical;
/*
*-- This contains the options used to control the solve process.
*
* Fact (fact_t)
* Specifies whether or not the factored form of the matrix
* A is supplied on entry, and if not, how the matrix A should
* be factorizaed.
* = DOFACT: The matrix A will be factorized from scratch, and the
* factors will be stored in L and U.
* = SamePattern: The matrix A will be factorized assuming
* that a factorization of a matrix with the same sparsity
* pattern was performed prior to this one. Therefore, this
* factorization will reuse column permutation vector
* ScalePermstruct->perm_c and the column elimination tree
* LUstruct->etree.
* = SamePattern_SameRowPerm: The matrix A will be factorized
* assuming that a factorization of a matrix with the same
* sparsity pattern and similar numerical values was performed
* prior to this one. Therefore, this factorization will reuse
* both row and column scaling factors R and C, and the
* both row and column permutation vectors perm_r and perm_c,
* distributed data structure set up from the previous symbolic
* factorization.
* = FACTORED: On entry, L, U, perm_r and perm_c contain the
* factored form of A. If DiagScale is not NOEQUIL, the matrix
* A has been equilibrated with scaling factors R and C.
*
* Equil (yes_no_t)
* Specifies whether to equilibrate the system (scale A's row and
* columns to have unit norm).
*
* ColPerm (colperm_t)
* Specifies what type of column permutation to use to reduce fill.
* = NATURAL: use the natural ordering
* = MMD_ATA: use minimum degree ordering on structure of A'*A
* = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A
* = COLAMD: use approximate minimum degree column ordering
* = MY_PERMC: use the ordering specified in ScalePermstruct->perm_c[]
*
* Trans (trans_t)
* Specifies the form of the system of equations:
* = NOTRANS: A * X = B (No transpose)
* = TRANS: A**T * X = B (Transpose)
* = CONJ: A**H * X = B (Transpose)
*
* IterRefine (IterRefine_t)
* Specifies whether to perform iterative refinement.
* = NO: no iterative refinement
* = WorkingPrec: perform iterative refinement in working precision
* = ExtraPrec: perform iterative refinement in extra precision
*
* PrintStat (yes_no_t)
* Specifies whether to print the solver's statistics.
*
* DiagPivotThresh (double, in [0.0, 1.0]) (only for sequential SuperLU)
* Specifies the threshold used for a diagonal entry to be an
* acceptable pivot.
*
* PivotGrowth (yes_no_t)
* Specifies whether to compute the reciprocal pivot growth.
*
* ConditionNumber (ues_no_t)
* Specifies whether to compute the reciprocal condition number.
*
* RowPerm (rowperm_t) (only for SuperLU_DIST)
* Specifies whether to permute rows of the original matrix.
* = NO: not to permute the rows
* = LargeDiag: make the diagonal large relative to the off-diagonal
* = MY_PERMR: use the permutation given in ScalePermstruct->perm_r[]
*
* ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST)
* Specifies whether to replace the tiny diagonals by
* sqrt(epsilon)*||A|| during LU factorization.
*
* SolveInitialized (yes_no_t) (only for SuperLU_DIST)
* Specifies whether the initialization has been performed to the
* triangular solve.
*
* RefineInitialized (yes_no_t) (only for SuperLU_DIST)
* Specifies whether the initialization has been performed to the
* sparse matrix-vector multiplication routine needed in iterative
* refinement.
*/
typedef struct {
fact_t Fact;
yes_no_t Equil;
colperm_t ColPerm;
trans_t Trans;
IterRefine_t IterRefine;
yes_no_t PrintStat;
yes_no_t SymmetricMode;
double DiagPivotThresh;
yes_no_t PivotGrowth;
yes_no_t ConditionNumber;
rowperm_t RowPerm;
yes_no_t ReplaceTinyPivot;
yes_no_t SolveInitialized;
yes_no_t RefineInitialized;
} superlu_options_t;
typedef struct {
int *panel_histo; /* histogram of panel size distribution */
double *utime; /* running time at various phases */
flops_t *ops; /* operation count at various phases */
int TinyPivots; /* number of tiny pivots */
int RefineSteps; /* number of iterative refinement steps */
} SuperLUStat_t;
/***********************************************************************
* Prototypes
***********************************************************************/
#ifdef __cplusplus
extern "C" {
#endif
extern void Destroy_SuperMatrix_Store(SuperMatrix *);
extern void Destroy_CompCol_Matrix(SuperMatrix *);
extern void Destroy_CompRow_Matrix(SuperMatrix *);
extern void Destroy_SuperNode_Matrix(SuperMatrix *);
extern void Destroy_CompCol_Permuted(SuperMatrix *);
extern void Destroy_Dense_Matrix(SuperMatrix *);
extern void get_perm_c(int, SuperMatrix *, int *);
extern void set_default_options(superlu_options_t *options);
extern void sp_preorder (superlu_options_t *, SuperMatrix*, int*, int*,
SuperMatrix*);
extern void superlu_abort_and_exit(char*);
extern void *superlu_malloc (size_t);
extern int *intMalloc (int);
extern int *intCalloc (int);
extern void superlu_free (void*);
extern void SetIWork (int, int, int, int *, int **, int **, int **,
int **, int **, int **, int **);
extern int sp_coletree (int *, int *, int *, int, int, int *);
extern void relax_snode (const int, int *, const int, int *, int *);
extern void heap_relax_snode (const int, int *, const int, int *, int *);
extern void resetrep_col (const int, const int *, int *);
extern int spcoletree (int *, int *, int *, int, int, int *);
extern int *TreePostorder (int, int *);
extern double SuperLU_timer_ ();
extern int sp_ienv (int);
extern int lsame_ (char *, char *);
extern int xerbla_ (char *, int *);
extern void ifill (int *, int, int);
extern void snode_profile (int, int *);
extern void super_stats (int, int *);
extern void PrintSumm (char *, int, int, int);
extern void StatInit(SuperLUStat_t *);
extern void StatPrint (SuperLUStat_t *);
extern void StatFree(SuperLUStat_t *);
extern void print_panel_seg(int, int, int, int, int *, int *);
extern void check_repfnz(int, int, int, int *);
#ifdef __cplusplus
}
#endif
#endif /* __SUPERLU_UTIL */

@ -0,0 +1,43 @@
#include <stdio.h>
/* Subroutine */ int xerbla_(char *srname, int *info)
{
/* -- LAPACK auxiliary routine (version 2.0) --
Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
Courant Institute, Argonne National Lab, and Rice University
September 30, 1994
Purpose
=======
XERBLA is an error handler for the LAPACK routines.
It is called by an LAPACK routine if an input parameter has an
invalid value. A message is printed and execution stops.
Installers may consider modifying the STOP statement in order to
call system-specific exception-handling facilities.
Arguments
=========
SRNAME (input) CHARACTER*6
The name of the routine which called XERBLA.
INFO (input) INT
The position of the invalid parameter in the parameter list
of the calling routine.
=====================================================================
*/
printf("** On entry to %6s, parameter number %2d had an illegal value\n",
srname, *info);
/* End of XERBLA */
return 0;
} /* xerbla_ */

@ -87,6 +87,8 @@ PYPLAYERLIB ?= $(PYLIB)
GRPLIB += $(OCGDIR)/blender/renderconverter/$(DEBUG_DIR)librenderconverter.a
GRPLIB += $(OCGDIR)/blender/render/$(DEBUG_DIR)librender.a
GRPLIB += $(OCGDIR)/blender/radiosity/$(DEBUG_DIR)libradiosity.a
GRPLIB += $(NAN_OPENNL)/lib/$(DEBUG_DIR)libopennl.a
GRPLIB += $(NAN_SUPERLU)/lib/$(DEBUG_DIR)libsuperlu.a
GRPLIB += $(OCGDIR)/blender/python/$(DEBUG_DIR)libpython.a

@ -81,6 +81,8 @@ endif
export NAN_GHOST ?= $(LCGDIR)/ghost
export NAN_TEST_VERBOSITY ?= 1
export NAN_BMFONT ?= $(LCGDIR)/bmfont
export NAN_OPENNL ?= $(LCGDIR)/opennl
export NAN_SUPERLU ?= $(LCGDIR)/superlu
ifeq ($(FREE_WINDOWS), true)
export NAN_FTGL ?= $(LCGDIR)/gcc/ftgl
else