Added SuperLU 3.0:

http://crd.lbl.gov/~xiaoye/SuperLU/ This is a library to solve sparse matrix systems (type A*x=B). It is able to solve large systems very FAST. Only the necessary parts of the library are included to limit file size and compilation time. This means the example files, fortran interface, test files, matlab interface, cblas library, complex number part and build system have been left out. All (gcc) warnings have been fixed too. This library will be used for LSCM UV unwrapping. With this library, LSCM unwrapping can be calculated in a split second, making the unwrapping proces much more interactive. Added OpenNL (Open Numerical Libary): http://www.loria.fr/~levy/OpenNL/ OpenNL is a library to easily construct and solve sparse linear systems. We use a stripped down version, as an interface to SuperLU. This library was kindly given to use by Bruno Levy.
2004-07-13 11:42:13 +00:00 · 2004-07-13 11:42:13 +00:00 · 4f1c674ee0
commit 4f1c674ee0
parent 4a13a5720d
51 changed files with 13799 additions and 3 deletions
--- a/3
+++ b/3
@ -962,7 +962,8 @@ def blender_libs(env):
 		'blender_LOD',
 		'blender_BSP',
 		'blender_blenkernel',
-		'blender_IK'])
+		'blender_IK',
+		'blender_ONL'])

 def ketsji_libs(env):
 	"""
--- a/intern/Makefile
+++ b/intern/Makefile
@ -35,7 +35,7 @@ SOURCEDIR = intern
 # include nan_subdirs.mk

 ALLDIRS = string ghost guardedalloc bmfont moto container memutil
-ALLDIRS += decimation iksolver bsp SoundSystem
+ALLDIRS += decimation iksolver bsp SoundSystem opennl

 all::
 	@for i in $(ALLDIRS); do \
--- a/intern/SConscript
+++ b/intern/SConscript
@ -8,7 +8,8 @@ SConscript(['SoundSystem/SConscript',
            'container/SConscript',
            'memutil/SConscript/',
            'decimation/SConscript',
-            'iksolver/SConscript'])
+            'iksolver/SConscript',
+            'opennl/SConscript'])

 NEW_CSG='false'

--- a/intern/opennl/Makefile
+++ b/intern/opennl/Makefile
@ -0,0 +1,67 @@
+#
+# $Id$
+#
+# ***** BEGIN GPL/BL DUAL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version. The Blender
+# Foundation also sells licenses for use in proprietary software under
+# the Blender License.  See http://www.blender.org/BL/ for information
+# about this.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+# The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+# All rights reserved.
+#
+# The Original Code is: all of this file.
+#
+# Contributor(s): Hans Lambermont
+#
+# ***** END GPL/BL DUAL LICENSE BLOCK *****
+# opennl main makefile.
+#
+
+include nan_definitions.mk
+
+LIBNAME = opennl
+LIBNAME_SLU = superlu
+SOURCEDIR = intern/$(LIBNAME)
+SOURCEDIR_SLU = intern/$(LIBNAME_SLU)
+DIR = $(OCGDIR)/$(SOURCEDIR)
+DIR_SLU = $(OCGDIR)/$(SOURCEDIR_SLU)
+DIRS = intern superlu
+
+include nan_subdirs.mk
+
+install: all debug
+	@[ -d $(NAN_OPENNL) ] || mkdir $(NAN_OPENNL)
+	@[ -d $(NAN_OPENNL)/include ] || mkdir $(NAN_OPENNL)/include
+	@[ -d $(NAN_OPENNL)/lib ] || mkdir $(NAN_OPENNL)/lib
+	@[ -d $(NAN_OPENNL)/lib/debug ] || mkdir $(NAN_OPENNL)/lib/debug
+	@../tools/cpifdiff.sh $(DIR)/libopennl.a $(NAN_OPENNL)/lib/
+	@../tools/cpifdiff.sh $(DIR)/debug/libopennl.a $(NAN_OPENNL)/lib/debug/
+ifeq ($(OS),darwin)
+	ranlib $(NAN_OPENNL)/lib/libopennl.a
+	ranlib $(NAN_OPENNL)/lib/debug/libopennl.a
+endif
+	@../tools/cpifdiff.sh extern/*.h $(NAN_OPENNL)/include/
+	@[ -d $(NAN_SUPERLU) ] || mkdir $(NAN_SUPERLU)
+	@[ -d $(NAN_SUPERLU)/lib ] || mkdir $(NAN_SUPERLU)/lib
+	@[ -d $(NAN_SUPERLU)/lib/debug ] || mkdir $(NAN_SUPERLU)/lib/debug
+	@../tools/cpifdiff.sh $(DIR_SLU)/libsuperlu.a $(NAN_SUPERLU)/lib/
+	@../tools/cpifdiff.sh $(DIR_SLU)/debug/libsuperlu.a $(NAN_SUPERLU)/lib/debug/
+ifeq ($(OS),darwin)
+	ranlib $(NAN_SUPERLU)/lib/libsuperlu.a
+	ranlib $(NAN_SUPERLU)/lib/debug/libsuperlu.a
+endif
+
--- a/intern/opennl/SConscript
+++ b/intern/opennl/SConscript
@ -0,0 +1,43 @@
+Import ('user_options_dict')
+Import ('library_env')
+
+opennl_env = library_env.Copy ()
+
+source_files = ['intern/opennl.c',
+                'superlu/colamd.c',
+                'superlu/get_perm_c.c',
+                'superlu/heap_relax_snode.c',
+                'superlu/lsame.c',
+                'superlu/memory.c',
+                'superlu/mmd.c',
+                'superlu/relax_snode.c',
+                'superlu/scolumn_bmod.c',
+                'superlu/scolumn_dfs.c',
+                'superlu/scopy_to_ucol.c',
+                'superlu/sgssv.c',
+                'superlu/sgstrf.c',
+                'superlu/sgstrs.c',
+                'superlu/smemory.c',
+                'superlu/smyblas2.c',
+                'superlu/sp_coletree.c',
+                'superlu/sp_ienv.c',
+                'superlu/sp_preorder.c',
+                'superlu/spanel_bmod.c',
+                'superlu/spanel_dfs.c',
+                'superlu/spivotL.c',
+                'superlu/spruneL.c',
+                'superlu/ssnode_bmod.c',
+                'superlu/ssnode_dfs.c',
+                'superlu/ssp_blas2.c',
+                'superlu/ssp_blas3.c',
+                'superlu/strsv.c',
+                'superlu/superlu_timer.c',
+                'superlu/sutil.c',
+                'superlu/util.c',
+                'superlu/xerbla.c']
+
+opennl_env.Append (CPPPATH = ['extern',
+                              'superlu'])
+
+opennl_env.Library (target='#'+user_options_dict['BUILD_DIR']+'/lib/blender_ONL', source=source_files)
+
--- a/intern/opennl/doc/OpenNL_License.txt
+++ b/intern/opennl/doc/OpenNL_License.txt
@ -0,0 +1,341 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                          675 Mass Ave, Cambridge, MA 02139, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+
+		     END OF TERMS AND CONDITIONS
+
+	Appendix: How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) 19yy  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) 19yy name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
+
--- a/intern/opennl/doc/OpenNL_Readme.txt
+++ b/intern/opennl/doc/OpenNL_Readme.txt
@ -0,0 +1,13 @@
+
+This is OpenNL, a library to easily construct and solve sparse linear systems.
+* OpenNL is supplied with a set of iterative solvers (Conjugate gradient,
+  BICGSTAB, GMRes) and preconditioners (Jacobi, SSOR). 
+* OpenNL can also use other solvers (SuperLU 3.0 supported as an OpenNL
+  extension)
+
+Note that to be compatible with OpenNL, SuperLU 3.0 needs to be compiled with
+the following flag (see make.inc in SuperLU3.0):
+CDEFS = -DAdd_ (the default is -DAdd__, just remove the second underscore)
+
+OpenNL was modified for Blender to be used only as a wrapper for SuperLU.
+
--- a/intern/opennl/doc/SuperLU_License.txt
+++ b/intern/opennl/doc/SuperLU_License.txt
@ -0,0 +1,31 @@
+Copyright (c) 2003, The Regents of the University of California, through
+Lawrence Berkeley National Laboratory (subject to receipt of any required 
+approvals from U.S. Dept. of Energy) 
+
+All rights reserved. 
+
+Redistribution and use in source and binary forms, with or without
+modification,
+are permitted provided that the following conditions are met: 
+
+(1) Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer. 
+(2) Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution. 
+(3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of
+Energy nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ 
--- a/intern/opennl/doc/SuperLU_Readme.txt
+++ b/intern/opennl/doc/SuperLU_Readme.txt
@ -0,0 +1,52 @@
+		SuperLU (Version 3.0)
+		=====================
+
+Copyright (c) 2003, The Regents of the University of California, through
+Lawrence Berkeley National Laboratory (subject to receipt of any required 
+approvals from U.S. Dept. of Energy) 
+
+All rights reserved. 
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met: 
+
+(1) Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer. 
+(2) Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution. 
+(3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of
+Energy nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+  
+
+SuperLU contains a set of subroutines to solve a sparse linear system 
+A*X=B. It uses Gaussian elimination with partial pivoting (GEPP). 
+The columns of A may be preordered before factorization; the 
+preordering for sparsity is completely separate from the factorization.
+
+SuperLU is implemented in ANSI C, and must be compiled with standard 
+ANSI C compilers. It provides functionality for both real and complex
+matrices, in both single and double precision. The file names for the 
+single-precision real version start with letter "s" (such as sgstrf.c);
+the file names for the double-precision real version start with letter "d"
+(such as dgstrf.c); the file names for the single-precision complex
+version start with letter "c" (such as cgstrf.c); the file names
+for the double-precision complex version start with letter "z" 
+(such as zgstrf.c).
+
+SuperLU was modified for Blender to only include single-precision
+functionality.
+
--- a/intern/opennl/extern/ONL_opennl.h
+++ b/intern/opennl/extern/ONL_opennl.h
@ -0,0 +1,163 @@
+/*
+ *  $Id$
+ *
+ *  OpenNL: Numerical Library
+ *  Copyright (C) 2004 Bruno Levy
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *  If you modify this software, you should include a notice giving the
+ *  name of the person performing the modification, the date of modification,
+ *  and the reason for such modification.
+ *
+ *  Contact: Bruno Levy
+ *
+ *     levy@loria.fr
+ *
+ *     ISA Project
+ *     LORIA, INRIA Lorraine, 
+ *     Campus Scientifique, BP 239
+ *     54506 VANDOEUVRE LES NANCY CEDEX 
+ *     FRANCE
+ *
+ *  Note that the GNU General Public License does not permit incorporating
+ *  the Software into proprietary programs. 
+ */
+
+/*
+#define NL_DEBUG
+#define NL_PARANOID
+*/
+
+#define NL_USE_SUPERLU
+
+#ifndef nlOPENNL_H
+#define nlOPENNL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NL_VERSION_0_0 1
+
+/*
+ *
+ * Datatypes
+ *
+ */
+
+typedef unsigned int	NLenum;
+typedef unsigned char	NLboolean;
+typedef unsigned int	NLbitfield;
+typedef void		NLvoid;
+typedef signed char	NLbyte;		/* 1-byte signed */
+typedef short		NLshort;	/* 2-byte signed */
+typedef int		NLint;		/* 4-byte signed */
+typedef unsigned char	NLubyte;	/* 1-byte unsigned */
+typedef unsigned short	NLushort;	/* 2-byte unsigned */
+typedef unsigned int	NLuint;		/* 4-byte unsigned */
+typedef int		NLsizei;	/* 4-byte signed */
+typedef float		NLfloat;	/* single precision float */
+typedef double		NLdouble;	/* double precision float */
+
+typedef void* NLContext ;
+
+/*
+ *
+ * Constants
+ *
+ */
+
+#define NL_FALSE   0x0
+#define NL_TRUE    0x1
+
+/* Primitives */
+
+#define NL_SYSTEM  0x0
+#define NL_MATRIX  0x1
+#define NL_ROW     0x2
+
+/* Solver Parameters */
+
+#define NL_SOLVER           0x100
+#define NL_NB_VARIABLES     0x101
+#define NL_LEAST_SQUARES    0x102
+#define NL_SYMMETRIC        0x106
+#define NL_ERROR            0x108
+
+/* Enable / Disable */
+
+#define NL_NORMALIZE_ROWS  0x400
+
+/* Row parameters */
+
+#define NL_RIGHT_HAND_SIDE 0x500
+#define NL_ROW_SCALING     0x501
+
+/*
+ * Contexts
+ */
+    NLContext nlNewContext() ;
+    void nlDeleteContext(NLContext context) ;
+    void nlMakeCurrent(NLContext context) ;
+    NLContext nlGetCurrent() ;
+
+/*
+ * State set/get
+ */
+
+    void nlSolverParameterf(NLenum pname, NLfloat param) ;
+    void nlSolverParameteri(NLenum pname, NLint param) ;
+
+    void nlRowParameterf(NLenum pname, NLfloat param) ;
+    void nlRowParameteri(NLenum pname, NLint param) ;
+
+    void nlGetBooleanv(NLenum pname, NLboolean* params) ;
+    void nlGetFloatv(NLenum pname, NLfloat* params) ;
+    void nlGetIntergerv(NLenum pname, NLint* params) ;
+
+    void nlEnable(NLenum pname) ;
+    void nlDisable(NLenum pname) ;
+    NLboolean nlIsEnabled(NLenum pname) ;
+
+/*
+ * Variables
+ */
+    void nlSetVariable(NLuint index, NLfloat value) ;
+    NLfloat nlGetVariable(NLuint index) ;
+    void nlLockVariable(NLuint index) ;
+    void nlUnlockVariable(NLuint index) ;
+    NLboolean nlVariableIsLocked(NLuint index) ;
+
+/*
+ * Begin/End
+ */
+
+    void nlBegin(NLenum primitive) ;
+    void nlEnd(NLenum primitive) ;
+    void nlCoefficient(NLuint index, NLfloat value) ;
+
+/*
+ * Solve
+ */
+
+    NLboolean nlSolve() ;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
--- a/intern/opennl/intern/Makefile
+++ b/intern/opennl/intern/Makefile
@ -0,0 +1,43 @@
+#
+# $Id$
+#
+# ***** BEGIN GPL/BL DUAL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version. The Blender
+# Foundation also sells licenses for use in proprietary software under
+# the Blender License.  See http://www.blender.org/BL/ for information
+# about this.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+# The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+# All rights reserved.
+#
+# The Original Code is: all of this file.
+#
+# Contributor(s): none yet.
+#
+# ***** END GPL/BL DUAL LICENSE BLOCK *****
+# opennl intern Makefile
+#
+
+LIBNAME = opennl
+DIR = $(OCGDIR)/intern/$(LIBNAME)
+
+include nan_compile.mk
+
+CCFLAGS += $(NAN_LEVEL_2_CPP_WARNINGS)
+
+CPPFLAGS += -I../superlu -I../extern
+
+
--- a/intern/opennl/intern/opennl.c
+++ b/intern/opennl/intern/opennl.c
--- a/intern/opennl/superlu/Cnames.h
+++ b/intern/opennl/superlu/Cnames.h
@ -0,0 +1,281 @@
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 1, 1997
+ *
+ */
+#ifndef __SUPERLU_CNAMES /* allow multiple inclusions */
+#define __SUPERLU_CNAMES
+
+/* We want this flag, safer than putting in build system */
+#define Add_
+
+/*
+ * These macros define how C routines will be called.  ADD_ assumes that
+ * they will be called by fortran, which expects C routines to have an
+ * underscore postfixed to the name (Suns, and the Intel expect this).
+ * NOCHANGE indicates that fortran will be calling, and that it expects
+ * the name called by fortran to be identical to that compiled by the C
+ * (RS6K's do this).  UPCASE says it expects C routines called by fortran
+ * to be in all upcase (CRAY wants this). 
+ */
+
+#define ADD_       0
+#define ADD__      1
+#define NOCHANGE   2
+#define UPCASE     3
+#define C_CALL     4
+
+#ifdef UpCase
+#define F77_CALL_C UPCASE
+#endif
+
+#ifdef NoChange
+#define F77_CALL_C NOCHANGE
+#endif
+
+#ifdef Add_
+#define F77_CALL_C ADD_
+#endif
+
+#ifdef Add__
+#define F77_CALL_C ADD__
+#endif
+
+/* Default */
+#ifndef F77_CALL_C
+#define F77_CALL_C ADD_
+#endif
+
+
+#if (F77_CALL_C == ADD_)
+/*
+ * These defines set up the naming scheme required to have a fortran 77
+ * routine call a C routine
+ * No redefinition necessary to have following Fortran to C interface:
+ *           FORTRAN CALL               C DECLARATION
+ *           call dgemm(...)           void dgemm_(...)
+ *
+ * This is the default.
+ */
+
+#endif
+
+#if (F77_CALL_C == ADD__)
+/*
+ * These defines set up the naming scheme required to have a fortran 77
+ * routine call a C routine 
+ * for following Fortran to C interface:
+ *           FORTRAN CALL               C DECLARATION
+ *           call dgemm(...)           void dgemm__(...)
+ */
+#define sasum_    sasum__
+#define isamax_   isamax__
+#define scopy_    scopy__
+#define sscal_    sscal__
+#define sger_     sger__
+#define snrm2_    snrm2__
+#define ssymv_    ssymv__
+#define sdot_     sdot__
+#define saxpy_    saxpy__
+#define ssyr2_    ssyr2__
+#define srot_     srot__
+#define sgemv_    sgemv__
+#define strsv_    strsv__
+#define sgemm_    sgemm__
+#define strsm_    strsm__
+
+#define dasum_    dasum__
+#define idamax_   idamax__
+#define dcopy_    dcopy__
+#define dscal_    dscal__
+#define dger_     dger__
+#define dnrm2_    dnrm2__
+#define dsymv_    dsymv__
+#define ddot_     ddot__
+#define daxpy_    daxpy__
+#define dsyr2_    dsyr2__
+#define drot_     drot__
+#define dgemv_    dgemv__
+#define dtrsv_    dtrsv__
+#define dgemm_    dgemm__
+#define dtrsm_    dtrsm__
+
+#define scasum_   scasum__
+#define icamax_   icamax__
+#define ccopy_    ccopy__
+#define cscal_    cscal__
+#define scnrm2_   scnrm2__
+#define caxpy_    caxpy__
+#define cgemv_    cgemv__
+#define ctrsv_    ctrsv__
+#define cgemm_    cgemm__
+#define ctrsm_    ctrsm__
+#define cgerc_    cgerc__
+#define chemv_    chemv__
+#define cher2_    cher2__
+
+#define dzasum_   dzasum__
+#define izamax_   izamax__
+#define zcopy_    zcopy__
+#define zscal_    zscal__
+#define dznrm2_   dznrm2__
+#define zaxpy_    zaxpy__
+#define zgemv_    zgemv__
+#define ztrsv_    ztrsv__
+#define zgemm_    zgemm__
+#define ztrsm_    ztrsm__
+#define zgerc_    zgerc__
+#define zhemv_    zhemv__
+#define zher2_    zher2__
+
+#define c_bridge_dgssv_ c_bridge_dgssv__
+#define c_fortran_dgssv_ c_fortran_dgssv__
+#endif
+
+#if (F77_CALL_C == UPCASE)
+/*
+ * These defines set up the naming scheme required to have a fortran 77
+ * routine call a C routine 
+ * following Fortran to C interface:
+ *           FORTRAN CALL               C DECLARATION
+ *           call dgemm(...)           void DGEMM(...)
+ */
+#define sasum_    SASUM
+#define isamax_   ISAMAX
+#define scopy_    SCOPY
+#define sscal_    SSCAL
+#define sger_     SGER
+#define snrm2_    SNRM2
+#define ssymv_    SSYMV
+#define sdot_     SDOT
+#define saxpy_    SAXPY
+#define ssyr2_    SSYR2
+#define srot_     SROT
+#define sgemv_    SGEMV
+#define strsv_    STRSV
+#define sgemm_    SGEMM
+#define strsm_    STRSM
+
+#define dasum_    SASUM
+#define idamax_   ISAMAX
+#define dcopy_    SCOPY
+#define dscal_    SSCAL
+#define dger_     SGER
+#define dnrm2_    SNRM2
+#define dsymv_    SSYMV
+#define ddot_     SDOT
+#define daxpy_    SAXPY
+#define dsyr2_    SSYR2
+#define drot_     SROT
+#define dgemv_    SGEMV
+#define dtrsv_    STRSV
+#define dgemm_    SGEMM
+#define dtrsm_    STRSM
+
+#define scasum_   SCASUM
+#define icamax_   ICAMAX
+#define ccopy_    CCOPY
+#define cscal_    CSCAL
+#define scnrm2_   SCNRM2
+#define caxpy_    CAXPY
+#define cgemv_    CGEMV
+#define ctrsv_    CTRSV
+#define cgemm_    CGEMM
+#define ctrsm_    CTRSM
+#define cgerc_    CGERC
+#define chemv_    CHEMV
+#define cher2_    CHER2
+
+#define dzasum_   SCASUM
+#define izamax_   ICAMAX
+#define zcopy_    CCOPY
+#define zscal_    CSCAL
+#define dznrm2_   SCNRM2
+#define zaxpy_    CAXPY
+#define zgemv_    CGEMV
+#define ztrsv_    CTRSV
+#define zgemm_    CGEMM
+#define ztrsm_    CTRSM
+#define zgerc_    CGERC
+#define zhemv_    CHEMV
+#define zher2_    CHER2
+
+#define c_bridge_dgssv_ C_BRIDGE_DGSSV
+#define c_fortran_dgssv_ C_FORTRAN_DGSSV
+#endif
+
+#if (F77_CALL_C == NOCHANGE)
+/*
+ * These defines set up the naming scheme required to have a fortran 77
+ * routine call a C routine 
+ * for following Fortran to C interface:
+ *           FORTRAN CALL               C DECLARATION
+ *           call dgemm(...)           void dgemm(...)
+ */
+#define sasum_    sasum
+#define isamax_   isamax
+#define scopy_    scopy
+#define sscal_    sscal
+#define sger_     sger
+#define snrm2_    snrm2
+#define ssymv_    ssymv
+#define sdot_     sdot
+#define saxpy_    saxpy
+#define ssyr2_    ssyr2
+#define srot_     srot
+#define sgemv_    sgemv
+#define strsv_    strsv
+#define sgemm_    sgemm
+#define strsm_    strsm
+
+#define dasum_    dasum
+#define idamax_   idamax
+#define dcopy_    dcopy
+#define dscal_    dscal
+#define dger_     dger
+#define dnrm2_    dnrm2
+#define dsymv_    dsymv
+#define ddot_     ddot
+#define daxpy_    daxpy
+#define dsyr2_    dsyr2
+#define drot_     drot
+#define dgemv_    dgemv
+#define dtrsv_    dtrsv
+#define dgemm_    dgemm
+#define dtrsm_    dtrsm
+
+#define scasum_   scasum
+#define icamax_   icamax
+#define ccopy_    ccopy
+#define cscal_    cscal
+#define scnrm2_   scnrm2
+#define caxpy_    caxpy
+#define cgemv_    cgemv
+#define ctrsv_    ctrsv
+#define cgemm_    cgemm
+#define ctrsm_    ctrsm
+#define cgerc_    cgerc
+#define chemv_    chemv
+#define cher2_    cher2
+
+#define dzasum_   dzasum
+#define izamax_   izamax
+#define zcopy_    zcopy
+#define zscal_    zscal
+#define dznrm2_   dznrm2
+#define zaxpy_    zaxpy
+#define zgemv_    zgemv
+#define ztrsv_    ztrsv
+#define zgemm_    zgemm
+#define ztrsm_    ztrsm
+#define zgerc_    zgerc
+#define zhemv_    zhemv
+#define zher2_    zher2
+
+#define c_bridge_dgssv_ c_bridge_dgssv
+#define c_fortran_dgssv_ c_fortran_dgssv
+#endif
+
+#endif /* __SUPERLU_CNAMES */
--- a/intern/opennl/superlu/Makefile
+++ b/intern/opennl/superlu/Makefile
@ -0,0 +1,40 @@
+#
+# $Id$
+#
+# ***** BEGIN GPL/BL DUAL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version. The Blender
+# Foundation also sells licenses for use in proprietary software under
+# the Blender License.  See http://www.blender.org/BL/ for information
+# about this.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+# The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+# All rights reserved.
+#
+# The Original Code is: all of this file.
+#
+# Contributor(s): none yet.
+#
+# ***** END GPL/BL DUAL LICENSE BLOCK *****
+# opennl intern Makefile
+#
+
+LIBNAME = superlu
+DIR = $(OCGDIR)/intern/$(LIBNAME)
+
+include nan_compile.mk
+
+CCFLAGS += $(NAN_LEVEL_2_CPP_WARNINGS)
+
--- a/intern/opennl/superlu/colamd.c
+++ b/intern/opennl/superlu/colamd.c
--- a/intern/opennl/superlu/colamd.h
+++ b/intern/opennl/superlu/colamd.h
@ -0,0 +1,67 @@
+/* ========================================================================== */
+/* === colamd prototypes and definitions ==================================== */
+/* ========================================================================== */
+
+/*
+    This is the colamd include file,
+
+	http://www.cise.ufl.edu/~davis/colamd/colamd.h
+
+    for use in the colamd.c, colamdmex.c, and symamdmex.c files located at
+
+	http://www.cise.ufl.edu/~davis/colamd/
+
+    See those files for a description of colamd and symamd, and for the
+    copyright notice, which also applies to this file.
+
+    August 3, 1998.  Version 1.0.
+*/
+
+/* ========================================================================== */
+/* === Definitions ========================================================== */
+/* ========================================================================== */
+
+/* size of the knobs [ ] array.  Only knobs [0..1] are currently used. */
+#define COLAMD_KNOBS 20
+
+/* number of output statistics.  Only A [0..2] are currently used. */
+#define COLAMD_STATS 20
+
+/* knobs [0] and A [0]: dense row knob and output statistic. */
+#define COLAMD_DENSE_ROW 0
+
+/* knobs [1] and A [1]: dense column knob and output statistic. */
+#define COLAMD_DENSE_COL 1
+
+/* A [2]: memory defragmentation count output statistic */
+#define COLAMD_DEFRAG_COUNT 2
+
+/* A [3]: whether or not the input columns were jumbled or had duplicates */
+#define COLAMD_JUMBLED_COLS 3
+
+/* ========================================================================== */
+/* === Prototypes of user-callable routines ================================= */
+/* ========================================================================== */
+
+int colamd_recommended		/* returns recommended value of Alen */
+(
+    int nnz,			/* nonzeros in A */
+    int n_row,			/* number of rows in A */
+    int n_col			/* number of columns in A */
+) ;
+
+void colamd_set_defaults	/* sets default parameters */
+(				/* knobs argument is modified on output */
+    double knobs [COLAMD_KNOBS]	/* parameter settings for colamd */
+) ;
+
+int colamd			/* returns TRUE if successful, FALSE otherwise*/
+(				/* A and p arguments are modified on output */
+    int n_row,			/* number of rows in A */
+    int n_col,			/* number of columns in A */
+    int Alen,			/* size of the array A */
+    int A [],			/* row indices of A, of size Alen */
+    int p [],			/* column pointers of A, of size n_col+1 */
+    double knobs [COLAMD_KNOBS]	/* parameter settings for colamd */
+) ;
+
--- a/intern/opennl/superlu/get_perm_c.c
+++ b/intern/opennl/superlu/get_perm_c.c
@ -0,0 +1,453 @@
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+
+#include "ssp_defs.h"
+#include "colamd.h"
+
+extern int  genmmd_(int *, int *, int *, int *, int *, int *, int *, 
+		    int *, int *, int *, int *, int *);
+
+void
+get_colamd(
+	   const int m,  /* number of rows in matrix A. */
+	   const int n,  /* number of columns in matrix A. */
+	   const int nnz,/* number of nonzeros in matrix A. */
+	   int *colptr,  /* column pointer of size n+1 for matrix A. */
+	   int *rowind,  /* row indices of size nz for matrix A. */
+	   int *perm_c   /* out - the column permutation vector. */
+	   )
+{
+    int Alen, *A, i, info, *p;
+    double *knobs;
+
+    Alen = colamd_recommended(nnz, m, n);
+
+    if ( !(knobs = (double *) SUPERLU_MALLOC(COLAMD_KNOBS * sizeof(double))) )
+        ABORT("Malloc fails for knobs");
+    colamd_set_defaults(knobs);
+
+    if (!(A = (int *) SUPERLU_MALLOC(Alen * sizeof(int))) )
+        ABORT("Malloc fails for A[]");
+    if (!(p = (int *) SUPERLU_MALLOC((n+1) * sizeof(int))) )
+        ABORT("Malloc fails for p[]");
+    for (i = 0; i <= n; ++i) p[i] = colptr[i];
+    for (i = 0; i < nnz; ++i) A[i] = rowind[i];
+    info = colamd(m, n, Alen, A, p, knobs);
+    if ( info == FALSE ) ABORT("COLAMD failed");
+
+    for (i = 0; i < n; ++i) perm_c[p[i]] = i;
+
+    SUPERLU_FREE(knobs);
+    SUPERLU_FREE(A);
+    SUPERLU_FREE(p);
+}
+
+void
+getata(
+       const int m,      /* number of rows in matrix A. */
+       const int n,      /* number of columns in matrix A. */
+       const int nz,     /* number of nonzeros in matrix A */
+       int *colptr,      /* column pointer of size n+1 for matrix A. */
+       int *rowind,      /* row indices of size nz for matrix A. */
+       int *atanz,       /* out - on exit, returns the actual number of
+                            nonzeros in matrix A'*A. */
+       int **ata_colptr, /* out - size n+1 */
+       int **ata_rowind  /* out - size *atanz */
+       )
+/*
+ * Purpose
+ * =======
+ *
+ * Form the structure of A'*A. A is an m-by-n matrix in column oriented
+ * format represented by (colptr, rowind). The output A'*A is in column
+ * oriented format (symmetrically, also row oriented), represented by
+ * (ata_colptr, ata_rowind).
+ *
+ * This routine is modified from GETATA routine by Tim Davis.
+ * The complexity of this algorithm is: SUM_{i=1,m} r(i)^2,
+ * i.e., the sum of the square of the row counts.
+ *
+ * Questions
+ * =========
+ *     o  Do I need to withhold the *dense* rows?
+ *     o  How do I know the number of nonzeros in A'*A?
+ * 
+ */
+{
+    register int i, j, k, col, num_nz, ti, trow;
+    int *marker, *b_colptr, *b_rowind;
+    int *t_colptr, *t_rowind; /* a column oriented form of T = A' */
+
+    if ( !(marker = (int*) SUPERLU_MALLOC((SUPERLU_MAX(m,n)+1)*sizeof(int))) )
+	ABORT("SUPERLU_MALLOC fails for marker[]");
+    if ( !(t_colptr = (int*) SUPERLU_MALLOC((m+1) * sizeof(int))) )
+	ABORT("SUPERLU_MALLOC t_colptr[]");
+    if ( !(t_rowind = (int*) SUPERLU_MALLOC(nz * sizeof(int))) )
+	ABORT("SUPERLU_MALLOC fails for t_rowind[]");
+
+    
+    /* Get counts of each column of T, and set up column pointers */
+    for (i = 0; i < m; ++i) marker[i] = 0;
+    for (j = 0; j < n; ++j) {
+	for (i = colptr[j]; i < colptr[j+1]; ++i)
+	    ++marker[rowind[i]];
+    }
+    t_colptr[0] = 0;
+    for (i = 0; i < m; ++i) {
+	t_colptr[i+1] = t_colptr[i] + marker[i];
+	marker[i] = t_colptr[i];
+    }
+
+    /* Transpose the matrix from A to T */
+    for (j = 0; j < n; ++j)
+	for (i = colptr[j]; i < colptr[j+1]; ++i) {
+	    col = rowind[i];
+	    t_rowind[marker[col]] = j;
+	    ++marker[col];
+	}
+
+    
+    /* ----------------------------------------------------------------
+       compute B = T * A, where column j of B is:
+
+       Struct (B_*j) =    UNION   ( Struct (T_*k) )
+                        A_kj != 0
+
+       do not include the diagonal entry
+   
+       ( Partition A as: A = (A_*1, ..., A_*n)
+         Then B = T * A = (T * A_*1, ..., T * A_*n), where
+         T * A_*j = (T_*1, ..., T_*m) * A_*j.  )
+       ---------------------------------------------------------------- */
+
+    /* Zero the diagonal flag */
+    for (i = 0; i < n; ++i) marker[i] = -1;
+
+    /* First pass determines number of nonzeros in B */
+    num_nz = 0;
+    for (j = 0; j < n; ++j) {
+	/* Flag the diagonal so it's not included in the B matrix */
+	marker[j] = j;
+
+	for (i = colptr[j]; i < colptr[j+1]; ++i) {
+	    /* A_kj is nonzero, add pattern of column T_*k to B_*j */
+	    k = rowind[i];
+	    for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) {
+		trow = t_rowind[ti];
+		if ( marker[trow] != j ) {
+		    marker[trow] = j;
+		    num_nz++;
+		}
+	    }
+	}
+    }
+    *atanz = num_nz;
+    
+    /* Allocate storage for A'*A */
+    if ( !(*ata_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
+	ABORT("SUPERLU_MALLOC fails for ata_colptr[]");
+    if ( *atanz ) {
+	if ( !(*ata_rowind = (int*) SUPERLU_MALLOC( *atanz * sizeof(int)) ) )
+	    ABORT("SUPERLU_MALLOC fails for ata_rowind[]");
+    }
+    b_colptr = *ata_colptr; /* aliasing */
+    b_rowind = *ata_rowind;
+    
+    /* Zero the diagonal flag */
+    for (i = 0; i < n; ++i) marker[i] = -1;
+    
+    /* Compute each column of B, one at a time */
+    num_nz = 0;
+    for (j = 0; j < n; ++j) {
+	b_colptr[j] = num_nz;
+	
+	/* Flag the diagonal so it's not included in the B matrix */
+	marker[j] = j;
+
+	for (i = colptr[j]; i < colptr[j+1]; ++i) {
+	    /* A_kj is nonzero, add pattern of column T_*k to B_*j */
+	    k = rowind[i];
+	    for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) {
+		trow = t_rowind[ti];
+		if ( marker[trow] != j ) {
+		    marker[trow] = j;
+		    b_rowind[num_nz++] = trow;
+		}
+	    }
+	}
+    }
+    b_colptr[n] = num_nz;
+       
+    SUPERLU_FREE(marker);
+    SUPERLU_FREE(t_colptr);
+    SUPERLU_FREE(t_rowind);
+}
+
+
+void
+at_plus_a(
+	  const int n,      /* number of columns in matrix A. */
+	  const int nz,     /* number of nonzeros in matrix A */
+	  int *colptr,      /* column pointer of size n+1 for matrix A. */
+	  int *rowind,      /* row indices of size nz for matrix A. */
+	  int *bnz,         /* out - on exit, returns the actual number of
+                               nonzeros in matrix A'*A. */
+	  int **b_colptr,   /* out - size n+1 */
+	  int **b_rowind    /* out - size *bnz */
+	  )
+{
+/*
+ * Purpose
+ * =======
+ *
+ * Form the structure of A'+A. A is an n-by-n matrix in column oriented
+ * format represented by (colptr, rowind). The output A'+A is in column
+ * oriented format (symmetrically, also row oriented), represented by
+ * (b_colptr, b_rowind).
+ *
+ */
+    register int i, j, k, col, num_nz;
+    int *t_colptr, *t_rowind; /* a column oriented form of T = A' */
+    int *marker;
+
+    if ( !(marker = (int*) SUPERLU_MALLOC( n * sizeof(int)) ) )
+	ABORT("SUPERLU_MALLOC fails for marker[]");
+    if ( !(t_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
+	ABORT("SUPERLU_MALLOC fails for t_colptr[]");
+    if ( !(t_rowind = (int*) SUPERLU_MALLOC( nz * sizeof(int)) ) )
+	ABORT("SUPERLU_MALLOC fails t_rowind[]");
+
+    
+    /* Get counts of each column of T, and set up column pointers */
+    for (i = 0; i < n; ++i) marker[i] = 0;
+    for (j = 0; j < n; ++j) {
+	for (i = colptr[j]; i < colptr[j+1]; ++i)
+	    ++marker[rowind[i]];
+    }
+    t_colptr[0] = 0;
+    for (i = 0; i < n; ++i) {
+	t_colptr[i+1] = t_colptr[i] + marker[i];
+	marker[i] = t_colptr[i];
+    }
+
+    /* Transpose the matrix from A to T */
+    for (j = 0; j < n; ++j)
+	for (i = colptr[j]; i < colptr[j+1]; ++i) {
+	    col = rowind[i];
+	    t_rowind[marker[col]] = j;
+	    ++marker[col];
+	}
+
+
+    /* ----------------------------------------------------------------
+       compute B = A + T, where column j of B is:
+
+       Struct (B_*j) = Struct (A_*k) UNION Struct (T_*k)
+
+       do not include the diagonal entry
+       ---------------------------------------------------------------- */
+
+    /* Zero the diagonal flag */
+    for (i = 0; i < n; ++i) marker[i] = -1;
+
+    /* First pass determines number of nonzeros in B */
+    num_nz = 0;
+    for (j = 0; j < n; ++j) {
+	/* Flag the diagonal so it's not included in the B matrix */
+	marker[j] = j;
+
+	/* Add pattern of column A_*k to B_*j */
+	for (i = colptr[j]; i < colptr[j+1]; ++i) {
+	    k = rowind[i];
+	    if ( marker[k] != j ) {
+		marker[k] = j;
+		++num_nz;
+	    }
+	}
+
+	/* Add pattern of column T_*k to B_*j */
+	for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) {
+	    k = t_rowind[i];
+	    if ( marker[k] != j ) {
+		marker[k] = j;
+		++num_nz;
+	    }
+	}
+    }
+    *bnz = num_nz;
+    
+    /* Allocate storage for A+A' */
+    if ( !(*b_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
+	ABORT("SUPERLU_MALLOC fails for b_colptr[]");
+    if ( *bnz) {
+      if ( !(*b_rowind = (int*) SUPERLU_MALLOC( *bnz * sizeof(int)) ) )
+	ABORT("SUPERLU_MALLOC fails for b_rowind[]");
+    }
+    
+    /* Zero the diagonal flag */
+    for (i = 0; i < n; ++i) marker[i] = -1;
+    
+    /* Compute each column of B, one at a time */
+    num_nz = 0;
+    for (j = 0; j < n; ++j) {
+	(*b_colptr)[j] = num_nz;
+	
+	/* Flag the diagonal so it's not included in the B matrix */
+	marker[j] = j;
+
+	/* Add pattern of column A_*k to B_*j */
+	for (i = colptr[j]; i < colptr[j+1]; ++i) {
+	    k = rowind[i];
+	    if ( marker[k] != j ) {
+		marker[k] = j;
+		(*b_rowind)[num_nz++] = k;
+	    }
+	}
+
+	/* Add pattern of column T_*k to B_*j */
+	for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) {
+	    k = t_rowind[i];
+	    if ( marker[k] != j ) {
+		marker[k] = j;
+		(*b_rowind)[num_nz++] = k;
+	    }
+	}
+    }
+    (*b_colptr)[n] = num_nz;
+       
+    SUPERLU_FREE(marker);
+    SUPERLU_FREE(t_colptr);
+    SUPERLU_FREE(t_rowind);
+}
+
+void
+get_perm_c(int ispec, SuperMatrix *A, int *perm_c)
+/*
+ * Purpose
+ * =======
+ *
+ * GET_PERM_C obtains a permutation matrix Pc, by applying the multiple
+ * minimum degree ordering code by Joseph Liu to matrix A'*A or A+A'.
+ * or using approximate minimum degree column ordering by Davis et. al.
+ * The LU factorization of A*Pc tends to have less fill than the LU 
+ * factorization of A.
+ *
+ * Arguments
+ * =========
+ *
+ * ispec   (input) int
+ *         Specifies the type of column ordering to reduce fill:
+ *         = 1: minimum degree on the structure of A^T * A
+ *         = 2: minimum degree on the structure of A^T + A
+ *         = 3: approximate minimum degree for unsymmetric matrices
+ *         If ispec == 0, the natural ordering (i.e., Pc = I) is returned.
+ * 
+ * A       (input) SuperMatrix*
+ *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
+ *         of the linear equations is A->nrow. Currently, the type of A 
+ *         can be: Stype = NC; Dtype = _D; Mtype = GE. In the future,
+ *         more general A can be handled.
+ *
+ * perm_c  (output) int*
+ *	   Column permutation vector of size A->ncol, which defines the 
+ *         permutation matrix Pc; perm_c[i] = j means column i of A is 
+ *         in position j in A*Pc.
+ *
+ */
+{
+    NCformat *Astore = A->Store;
+    int m, n, bnz, *b_colptr, i;
+    int delta, maxint, nofsub, *invp;
+    int *b_rowind, *dhead, *qsize, *llist, *marker;
+    double t, SuperLU_timer_();
+    
+    m = A->nrow;
+    n = A->ncol;
+
+    t = SuperLU_timer_();
+    switch ( ispec ) {
+        case 0: /* Natural ordering */
+	      for (i = 0; i < n; ++i) perm_c[i] = i;
+#if ( PRNTlevel>=1 )
+	      printf("Use natural column ordering.\n");
+#endif
+	      return;
+        case 1: /* Minimum degree ordering on A'*A */
+	      getata(m, n, Astore->nnz, Astore->colptr, Astore->rowind,
+		     &bnz, &b_colptr, &b_rowind);
+#if ( PRNTlevel>=1 )
+	      printf("Use minimum degree ordering on A'*A.\n");
+#endif
+	      t = SuperLU_timer_() - t;
+	      /*printf("Form A'*A time = %8.3f\n", t);*/
+	      break;
+        case 2: /* Minimum degree ordering on A'+A */
+	      if ( m != n ) ABORT("Matrix is not square");
+	      at_plus_a(n, Astore->nnz, Astore->colptr, Astore->rowind,
+			&bnz, &b_colptr, &b_rowind);
+#if ( PRNTlevel>=1 )
+	      printf("Use minimum degree ordering on A'+A.\n");
+#endif
+	      t = SuperLU_timer_() - t;
+	      /*printf("Form A'+A time = %8.3f\n", t);*/
+	      break;
+        case 3: /* Approximate minimum degree column ordering. */
+	      get_colamd(m, n, Astore->nnz, Astore->colptr, Astore->rowind,
+			 perm_c);
+#if ( PRNTlevel>=1 )
+	      printf(".. Use approximate minimum degree column ordering.\n");
+#endif
+	      return; 
+        default:
+	      ABORT("Invalid ISPEC");
+    }
+
+    if ( bnz != 0 ) {
+	t = SuperLU_timer_();
+
+	/* Initialize and allocate storage for GENMMD. */
+	delta = 1; /* DELTA is a parameter to allow the choice of nodes
+		      whose degree <= min-degree + DELTA. */
+	maxint = 2147483647; /* 2**31 - 1 */
+	invp = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
+	if ( !invp ) ABORT("SUPERLU_MALLOC fails for invp.");
+	dhead = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
+	if ( !dhead ) ABORT("SUPERLU_MALLOC fails for dhead.");
+	qsize = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
+	if ( !qsize ) ABORT("SUPERLU_MALLOC fails for qsize.");
+	llist = (int *) SUPERLU_MALLOC(n*sizeof(int));
+	if ( !llist ) ABORT("SUPERLU_MALLOC fails for llist.");
+	marker = (int *) SUPERLU_MALLOC(n*sizeof(int));
+	if ( !marker ) ABORT("SUPERLU_MALLOC fails for marker.");
+
+	/* Transform adjacency list into 1-based indexing required by GENMMD.*/
+	for (i = 0; i <= n; ++i) ++b_colptr[i];
+	for (i = 0; i < bnz; ++i) ++b_rowind[i];
+	
+	genmmd_(&n, b_colptr, b_rowind, perm_c, invp, &delta, dhead, 
+		qsize, llist, marker, &maxint, &nofsub);
+
+	/* Transform perm_c into 0-based indexing. */
+	for (i = 0; i < n; ++i) --perm_c[i];
+
+	SUPERLU_FREE(b_colptr);
+	SUPERLU_FREE(b_rowind);
+	SUPERLU_FREE(invp);
+	SUPERLU_FREE(dhead);
+	SUPERLU_FREE(qsize);
+	SUPERLU_FREE(llist);
+	SUPERLU_FREE(marker);
+
+	t = SuperLU_timer_() - t;
+	/*  printf("call GENMMD time = %8.3f\n", t);*/
+
+    } else { /* Empty adjacency structure */
+	for (i = 0; i < n; ++i) perm_c[i] = i;
+    }
+
+}
--- a/intern/opennl/superlu/heap_relax_snode.c
+++ b/intern/opennl/superlu/heap_relax_snode.c
@ -0,0 +1,116 @@
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ 
+  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ 
+  Permission is hereby granted to use or copy this program for any
+  purpose, provided the above notices are retained on all copies.
+  Permission to modify the code and to distribute modified code is
+  granted, provided the above notices are retained, and a notice that
+  the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+void
+heap_relax_snode (
+	     const     int n,
+	     int       *et,           /* column elimination tree */
+	     const int relax_columns, /* max no of columns allowed in a
+					 relaxed snode */
+	     int       *descendants,  /* no of descendants of each node
+					 in the etree */
+	     int       *relax_end     /* last column in a supernode */
+	     )
+{
+/*
+ * Purpose
+ * =======
+ *    relax_snode() - Identify the initial relaxed supernodes, assuming that 
+ *    the matrix has been reordered according to the postorder of the etree.
+ *
+ */ 
+    register int i, j, k, l, parent;
+    register int snode_start;	/* beginning of a snode */
+    int *et_save, *post, *inv_post, *iwork;
+    int nsuper_et = 0, nsuper_et_post = 0;
+
+    /* The etree may not be postordered, but is heap ordered. */
+
+    iwork = (int*) intMalloc(3*n+2); 
+    if ( !iwork ) ABORT("SUPERLU_MALLOC fails for iwork[]");
+    inv_post = iwork + n+1;
+    et_save = inv_post + n+1;
+
+    /* Post order etree */
+    post = (int *) TreePostorder(n, et);
+    for (i = 0; i < n+1; ++i) inv_post[post[i]] = i;
+
+    /* Renumber etree in postorder */
+    for (i = 0; i < n; ++i) {
+        iwork[post[i]] = post[et[i]];
+	et_save[i] = et[i]; /* Save the original etree */
+    }
+    for (i = 0; i < n; ++i) et[i] = iwork[i];
+
+    /* Compute the number of descendants of each node in the etree */
+    ifill (relax_end, n, EMPTY);
+    for (j = 0; j < n; j++) descendants[j] = 0;
+    for (j = 0; j < n; j++) {
+	parent = et[j];
+	if ( parent != n )  /* not the dummy root */
+	    descendants[parent] += descendants[j] + 1;
+    }
+
+    /* Identify the relaxed supernodes by postorder traversal of the etree. */
+    for (j = 0; j < n; ) { 
+     	parent = et[j];
+        snode_start = j;
+ 	while ( parent != n && descendants[parent] < relax_columns ) {
+	    j = parent;
+	    parent = et[j];
+	}
+	/* Found a supernode in postordered etree; j is the last column. */
+	++nsuper_et_post;
+	k = n;
+	for (i = snode_start; i <= j; ++i)
+	    k = SUPERLU_MIN(k, inv_post[i]);
+	l = inv_post[j];
+	if ( (l - k) == (j - snode_start) ) {
+	    /* It's also a supernode in the original etree */
+	    relax_end[k] = l;		/* Last column is recorded */
+	    ++nsuper_et;
+	} else {
+	    for (i = snode_start; i <= j; ++i) {
+	        l = inv_post[i];
+	        if ( descendants[i] == 0 ) relax_end[l] = l;
+	    }
+	}
+	j++;
+	/* Search for a new leaf */
+	while ( descendants[j] != 0 && j < n ) j++;
+    }
+
+#if ( PRNTlevel>=1 )
+    printf(".. heap_snode_relax:\n"
+	   "\tNo of relaxed snodes in postordered etree:\t%d\n"
+	   "\tNo of relaxed snodes in original etree:\t%d\n",
+	   nsuper_et_post, nsuper_et);
+#endif
+
+    /* Recover the original etree */
+    for (i = 0; i < n; ++i) et[i] = et_save[i];
+
+    SUPERLU_FREE(post);
+    SUPERLU_FREE(iwork);
+}
+
+
--- a/intern/opennl/superlu/lsame.c
+++ b/intern/opennl/superlu/lsame.c
@ -0,0 +1,70 @@
+int lsame_(char *ca, char *cb)
+{
+/*  -- LAPACK auxiliary routine (version 2.0) --   
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
+       Courant Institute, Argonne National Lab, and Rice University   
+       September 30, 1994   
+
+    Purpose   
+    =======   
+
+    LSAME returns .TRUE. if CA is the same letter as CB regardless of case.   
+
+    Arguments   
+    =========   
+
+    CA      (input) CHARACTER*1   
+    CB      (input) CHARACTER*1   
+            CA and CB specify the single characters to be compared.   
+
+   ===================================================================== 
+*/  
+
+    /* System generated locals */
+    int ret_val;
+    
+    /* Local variables */
+    int inta, intb, zcode;
+
+    ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
+    if (ret_val) {
+	return ret_val;
+    }
+
+    /* Now test for equivalence if both characters are alphabetic. */
+
+    zcode = 'Z';
+
+    /* Use 'Z' rather than 'A' so that ASCII can be detected on Prime   
+       machines, on which ICHAR returns a value with bit 8 set.   
+       ICHAR('A') on Prime machines returns 193 which is the same as   
+       ICHAR('A') on an EBCDIC machine. */
+
+    inta = *(unsigned char *)ca;
+    intb = *(unsigned char *)cb;
+
+    if (zcode == 90 || zcode == 122) {
+	/* ASCII is assumed - ZCODE is the ASCII code of either lower or   
+          upper case 'Z'. */
+	if (inta >= 97 && inta <= 122) inta += -32;
+	if (intb >= 97 && intb <= 122) intb += -32;
+
+    } else if (zcode == 233 || zcode == 169) {
+	/* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or   
+          upper case 'Z'. */
+	if ((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153) || (inta 
+		>= 162 && inta <= 169))
+	    inta += 64;
+	if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) || (intb 
+		>= 162 && intb <= 169))
+	    intb += 64;
+    } else if (zcode == 218 || zcode == 250) {
+	/* ASCII is assumed, on Prime machines - ZCODE is the ASCII code   
+          plus 128 of either lower or upper case 'Z'. */
+	if (inta >= 225 && inta <= 250) inta += -32;
+	if (intb >= 225 && intb <= 250) intb += -32;
+    }
+    ret_val = inta == intb;
+    return ret_val;
+    
+} /* lsame_ */
--- a/intern/opennl/superlu/memory.c
+++ b/intern/opennl/superlu/memory.c
@ -0,0 +1,207 @@
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/** Precision-independent memory-related routines.
+    (Shared by [sdcz]memory.c) **/
+
+#include "ssp_defs.h"
+
+
+#if ( DEBUGlevel>=1 )           /* Debug malloc/free. */
+int superlu_malloc_total = 0;
+
+#define PAD_FACTOR  2
+#define DWORD  (sizeof(double)) /* Be sure it's no smaller than double. */
+
+void *superlu_malloc(size_t size)
+{
+    char *buf;
+
+    buf = (char *) malloc(size + DWORD);
+    if ( !buf ) {
+	printf("superlu_malloc fails: malloc_total %.0f MB, size %d\n",
+	       superlu_malloc_total*1e-6, size);
+	ABORT("superlu_malloc: out of memory");
+    }
+
+    ((int_t *) buf)[0] = size;
+#if 0
+    superlu_malloc_total += size + DWORD;
+#else
+    superlu_malloc_total += size;
+#endif
+    return (void *) (buf + DWORD);
+}
+
+void superlu_free(void *addr)
+{
+    char *p = ((char *) addr) - DWORD;
+
+    if ( !addr )
+	ABORT("superlu_free: tried to free NULL pointer");
+
+    if ( !p )
+	ABORT("superlu_free: tried to free NULL+DWORD pointer");
+
+    { 
+	int_t n = ((int_t *) p)[0];
+	
+	if ( !n )
+	    ABORT("superlu_free: tried to free a freed pointer");
+	*((int_t *) p) = 0; /* Set to zero to detect duplicate free's. */
+#if 0	
+	superlu_malloc_total -= (n + DWORD);
+#else
+	superlu_malloc_total -= n;
+#endif
+
+	if ( superlu_malloc_total < 0 )
+	    ABORT("superlu_malloc_total went negative!");
+	
+	/*free (addr);*/
+	free (p);
+    }
+
+}
+
+#else   /* production mode */
+
+void *superlu_malloc(size_t size)
+{
+    void *buf;
+    buf = (void *) malloc(size);
+    return (buf);
+}
+
+void superlu_free(void *addr)
+{
+    free (addr);
+}
+
+#endif
+
+
+/*
+ * Set up pointers for integer working arrays.
+ */
+void
+SetIWork(int m, int n, int panel_size, int *iworkptr, int **segrep,
+	 int **parent, int **xplore, int **repfnz, int **panel_lsub,
+	 int **xprune, int **marker)
+{
+    *segrep = iworkptr;
+    *parent = iworkptr + m;
+    *xplore = *parent + m;
+    *repfnz = *xplore + m;
+    *panel_lsub = *repfnz + panel_size * m;
+    *xprune = *panel_lsub + panel_size * m;
+    *marker = *xprune + n;
+    ifill (*repfnz, m * panel_size, EMPTY);
+    ifill (*panel_lsub, m * panel_size, EMPTY);
+}
+
+
+void
+copy_mem_int(int howmany, void *old, void *new)
+{
+    register int i;
+    int *iold = old;
+    int *inew = new;
+    for (i = 0; i < howmany; i++) inew[i] = iold[i];
+}
+
+
+void
+user_bcopy(char *src, char *dest, int bytes)
+{
+    char *s_ptr, *d_ptr;
+
+    s_ptr = src + bytes - 1;
+    d_ptr = dest + bytes - 1;
+    for (; d_ptr >= dest; --s_ptr, --d_ptr ) *d_ptr = *s_ptr;
+}
+
+
+
+int *intMalloc(int n)
+{
+    int *buf;
+    buf = (int *) SUPERLU_MALLOC(n * sizeof(int));
+    if ( !buf ) {
+	ABORT("SUPERLU_MALLOC fails for buf in intMalloc()");
+    }
+    return (buf);
+}
+
+int *intCalloc(int n)
+{
+    int *buf;
+    register int i;
+    buf = (int *) SUPERLU_MALLOC(n * sizeof(int));
+    if ( !buf ) {
+	ABORT("SUPERLU_MALLOC fails for buf in intCalloc()");
+    }
+    for (i = 0; i < n; ++i) buf[i] = 0;
+    return (buf);
+}
+
+
+
+#if 0
+check_expanders()
+{
+    int p;
+    printf("Check expanders:\n");
+    for (p = 0; p < NO_MEMTYPE; p++) {
+	printf("type %d, size %d, mem %d\n",
+	       p, expanders[p].size, (int)expanders[p].mem);
+    }
+
+    return 0;
+}
+
+
+StackInfo()
+{
+    printf("Stack: size %d, used %d, top1 %d, top2 %d\n",
+	   stack.size, stack.used, stack.top1, stack.top2);
+    return 0;
+}
+
+
+
+PrintStack(char *msg, GlobalLU_t *Glu)
+{
+    int i;
+    int *xlsub, *lsub, *xusub, *usub;
+
+    xlsub = Glu->xlsub;
+    lsub  = Glu->lsub;
+    xusub = Glu->xusub;
+    usub  = Glu->usub;
+
+    printf("%s\n", msg);
+    
+/*    printf("\nUCOL: ");
+    for (i = 0; i < xusub[ndim]; ++i)
+	printf("%f  ", ucol[i]);
+
+    printf("\nLSUB: ");
+    for (i = 0; i < xlsub[ndim]; ++i)
+	printf("%d  ", lsub[i]);
+
+    printf("\nUSUB: ");
+    for (i = 0; i < xusub[ndim]; ++i)
+	printf("%d  ", usub[i]);
+
+    printf("\n");*/
+    return 0;
+}   
+#endif
+
+
+
--- a/intern/opennl/superlu/mmd.c
+++ b/intern/opennl/superlu/mmd.c
--- a/intern/opennl/superlu/relax_snode.c
+++ b/intern/opennl/superlu/relax_snode.c
@ -0,0 +1,71 @@
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ 
+  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ 
+  Permission is hereby granted to use or copy this program for any
+  purpose, provided the above notices are retained on all copies.
+  Permission to modify the code and to distribute modified code is
+  granted, provided the above notices are retained, and a notice that
+  the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+void
+relax_snode (
+	     const     int n,
+	     int       *et,           /* column elimination tree */
+	     const int relax_columns, /* max no of columns allowed in a
+					 relaxed snode */
+	     int       *descendants,  /* no of descendants of each node
+					 in the etree */
+	     int       *relax_end     /* last column in a supernode */
+	     )
+{
+/*
+ * Purpose
+ * =======
+ *    relax_snode() - Identify the initial relaxed supernodes, assuming that 
+ *    the matrix has been reordered according to the postorder of the etree.
+ *
+ */ 
+    register int j, parent;
+    register int snode_start;	/* beginning of a snode */
+    
+    ifill (relax_end, n, EMPTY);
+    for (j = 0; j < n; j++) descendants[j] = 0;
+
+    /* Compute the number of descendants of each node in the etree */
+    for (j = 0; j < n; j++) {
+	parent = et[j];
+	if ( parent != n )  /* not the dummy root */
+	    descendants[parent] += descendants[j] + 1;
+    }
+
+    /* Identify the relaxed supernodes by postorder traversal of the etree. */
+    for (j = 0; j < n; ) { 
+     	parent = et[j];
+        snode_start = j;
+ 	while ( parent != n && descendants[parent] < relax_columns ) {
+	    j = parent;
+	    parent = et[j];
+	}
+	/* Found a supernode with j being the last column. */
+	relax_end[snode_start] = j;		/* Last column is recorded */
+	j++;
+	/* Search for a new leaf */
+	while ( descendants[j] != 0 && j < n ) j++;
+    }
+
+    /*printf("No of relaxed snodes: %d; relaxed columns: %d\n", 
+		nsuper, no_relaxed_col); */
+}
--- a/intern/opennl/superlu/scolumn_bmod.c
+++ b/intern/opennl/superlu/scolumn_bmod.c
@ -0,0 +1,353 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ 
+  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ 
+  Permission is hereby granted to use or copy this program for any
+  purpose, provided the above notices are retained on all copies.
+  Permission to modify the code and to distribute modified code is
+  granted, provided the above notices are retained, and a notice that
+  the code was modified is included with the above copyright notice.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "ssp_defs.h"
+
+/* 
+ * Function prototypes 
+ */
+void susolve(int, int, float*, float*);
+void slsolve(int, int, float*, float*);
+void smatvec(int, int, int, float*, float*, float*);
+
+
+
+/* Return value:   0 - successful return
+ *               > 0 - number of bytes allocated when run out of space
+ */
+int
+scolumn_bmod (
+	     const int  jcol,	  /* in */
+	     const int  nseg,	  /* in */
+	     float     *dense,	  /* in */
+	     float     *tempv,	  /* working array */
+	     int        *segrep,  /* in */
+	     int        *repfnz,  /* in */
+	     int        fpanelc,  /* in -- first column in the current panel */
+	     GlobalLU_t *Glu,     /* modified */
+	     SuperLUStat_t *stat  /* output */
+	     )
+{
+/*
+ * Purpose:
+ * ========
+ *    Performs numeric block updates (sup-col) in topological order.
+ *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
+ *    Special processing on the supernodal portion of L\U[*,j]
+ *
+ */
+#ifdef _CRAY
+    _fcd ftcs1 = _cptofcd("L", strlen("L")),
+         ftcs2 = _cptofcd("N", strlen("N")),
+         ftcs3 = _cptofcd("U", strlen("U"));
+#endif
+
+#ifdef USE_VENDOR_BLAS
+    int         incx = 1, incy = 1;
+    float      alpha, beta;
+#endif
+    
+    /* krep = representative of current k-th supernode
+     * fsupc = first supernodal column
+     * nsupc = no of columns in supernode
+     * nsupr = no of rows in supernode (used as leading dimension)
+     * luptr = location of supernodal LU-block in storage
+     * kfnz = first nonz in the k-th supernodal segment
+     * no_zeros = no of leading zeros in a supernodal U-segment
+     */
+    float       ukj, ukj1, ukj2;
+    int          luptr, luptr1, luptr2;
+    int          fsupc, nsupc, nsupr, segsze;
+    int          nrow;	  /* No of rows in the matrix of matrix-vector */
+    int          jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
+    register int lptr, kfnz, isub, irow, i;
+    register int no_zeros, new_next; 
+    int          ufirst, nextlu;
+    int          fst_col; /* First column within small LU update */
+    int          d_fsupc; /* Distance between the first column of the current
+			     panel and the first column of the current snode. */
+    int          *xsup, *supno;
+    int          *lsub, *xlsub;
+    float       *lusup;
+    int          *xlusup;
+    int          nzlumax;
+    float       *tempv1;
+    float      zero = 0.0;
+#ifdef USE_VENDOR_BLAS
+    float      one = 1.0;
+    float      none = -1.0;
+#endif
+    int          mem_error;
+    flops_t      *ops = stat->ops;
+
+    xsup    = Glu->xsup;
+    supno   = Glu->supno;
+    lsub    = Glu->lsub;
+    xlsub   = Glu->xlsub;
+    lusup   = Glu->lusup;
+    xlusup  = Glu->xlusup;
+    nzlumax = Glu->nzlumax;
+    jcolp1 = jcol + 1;
+    jsupno = supno[jcol];
+    
+    /* 
+     * For each nonz supernode segment of U[*,j] in topological order 
+     */
+    k = nseg - 1;
+    for (ksub = 0; ksub < nseg; ksub++) {
+
+	krep = segrep[k];
+	k--;
+	ksupno = supno[krep];
+	if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
+
+	    fsupc = xsup[ksupno];
+	    fst_col = SUPERLU_MAX ( fsupc, fpanelc );
+
+  	    /* Distance from the current supernode to the current panel; 
+	       d_fsupc=0 if fsupc > fpanelc. */
+  	    d_fsupc = fst_col - fsupc; 
+
+	    luptr = xlusup[fst_col] + d_fsupc;
+	    lptr = xlsub[fsupc] + d_fsupc;
+
+	    kfnz = repfnz[krep];
+	    kfnz = SUPERLU_MAX ( kfnz, fpanelc );
+
+	    segsze = krep - kfnz + 1;
+	    nsupc = krep - fst_col + 1;
+	    nsupr = xlsub[fsupc+1] - xlsub[fsupc];	/* Leading dimension */
+	    nrow = nsupr - d_fsupc - nsupc;
+	    krep_ind = lptr + nsupc - 1;
+
+	    ops[TRSV] += segsze * (segsze - 1);
+	    ops[GEMV] += 2 * nrow * segsze;
+
+
+	    /* 
+	     * Case 1: Update U-segment of size 1 -- col-col update 
+	     */
+	    if ( segsze == 1 ) {
+	  	ukj = dense[lsub[krep_ind]];
+		luptr += nsupr*(nsupc-1) + nsupc;
+
+		for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+		    irow = lsub[i];
+		    dense[irow] -=  ukj*lusup[luptr];
+		    luptr++;
+		}
+
+	    } else if ( segsze <= 3 ) {
+		ukj = dense[lsub[krep_ind]];
+		luptr += nsupr*(nsupc-1) + nsupc-1;
+		ukj1 = dense[lsub[krep_ind - 1]];
+		luptr1 = luptr - nsupr;
+
+		if ( segsze == 2 ) { /* Case 2: 2cols-col update */
+		    ukj -= ukj1 * lusup[luptr1];
+		    dense[lsub[krep_ind]] = ukj;
+		    for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+		    	irow = lsub[i];
+		    	luptr++;
+		    	luptr1++;
+		    	dense[irow] -= ( ukj*lusup[luptr]
+					+ ukj1*lusup[luptr1] );
+		    }
+		} else { /* Case 3: 3cols-col update */
+		    ukj2 = dense[lsub[krep_ind - 2]];
+		    luptr2 = luptr1 - nsupr;
+		    ukj1 -= ukj2 * lusup[luptr2-1];
+		    ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
+		    dense[lsub[krep_ind]] = ukj;
+		    dense[lsub[krep_ind-1]] = ukj1;
+		    for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+		    	irow = lsub[i];
+		    	luptr++;
+		    	luptr1++;
+			luptr2++;
+		    	dense[irow] -= ( ukj*lusup[luptr]
+			     + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
+		    }
+		}
+
+
+
+	    } else {
+	  	/*
+		 * Case: sup-col update
+		 * Perform a triangular solve and block update,
+		 * then scatter the result of sup-col update to dense
+		 */
+
+		no_zeros = kfnz - fst_col;
+
+	        /* Copy U[*,j] segment from dense[*] to tempv[*] */
+	        isub = lptr + no_zeros;
+	        for (i = 0; i < segsze; i++) {
+	  	    irow = lsub[isub];
+		    tempv[i] = dense[irow];
+		    ++isub; 
+	        }
+
+	        /* Dense triangular solve -- start effective triangle */
+		luptr += nsupr * no_zeros + no_zeros; 
+		
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+		STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
+		       &nsupr, tempv, &incx );
+#else		
+		strsv_( "L", "N", "U", &segsze, &lusup[luptr], 
+		       &nsupr, tempv, &incx );
+#endif		
+ 		luptr += segsze;  /* Dense matrix-vector */
+		tempv1 = &tempv[segsze];
+                alpha = one;
+                beta = zero;
+#ifdef _CRAY
+		SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
+		       &nsupr, tempv, &incx, &beta, tempv1, &incy );
+#else
+		sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
+		       &nsupr, tempv, &incx, &beta, tempv1, &incy );
+#endif
+#else
+		slsolve ( nsupr, segsze, &lusup[luptr], tempv );
+
+ 		luptr += segsze;  /* Dense matrix-vector */
+		tempv1 = &tempv[segsze];
+		smatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
+#endif
+		
+		
+                /* Scatter tempv[] into SPA dense[] as a temporary storage */
+                isub = lptr + no_zeros;
+                for (i = 0; i < segsze; i++) {
+                    irow = lsub[isub];
+                    dense[irow] = tempv[i];
+                    tempv[i] = zero;
+                    ++isub;
+                }
+
+		/* Scatter tempv1[] into SPA dense[] */
+		for (i = 0; i < nrow; i++) {
+		    irow = lsub[isub];
+		    dense[irow] -= tempv1[i];
+		    tempv1[i] = zero;
+		    ++isub;
+		}
+	    }
+	    
+	} /* if jsupno ... */
+
+    } /* for each segment... */
+
+    /*
+     *	Process the supernodal portion of L\U[*,j]
+     */
+    nextlu = xlusup[jcol];
+    fsupc = xsup[jsupno];
+
+    /* Copy the SPA dense into L\U[*,j] */
+    new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
+    while ( new_next > nzlumax ) {
+	if ((mem_error = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu)))
+	    return (mem_error);
+	lusup = Glu->lusup;
+	lsub = Glu->lsub;
+    }
+
+    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
+  	irow = lsub[isub];
+	lusup[nextlu] = dense[irow];
+        dense[irow] = zero;
+	++nextlu;
+    }
+
+    xlusup[jcolp1] = nextlu;	/* Close L\U[*,jcol] */
+
+    /* For more updates within the panel (also within the current supernode), 
+     * should start from the first column of the panel, or the first column 
+     * of the supernode, whichever is bigger. There are 2 cases:
+     *    1) fsupc < fpanelc, then fst_col := fpanelc
+     *    2) fsupc >= fpanelc, then fst_col := fsupc
+     */
+    fst_col = SUPERLU_MAX ( fsupc, fpanelc );
+
+    if ( fst_col < jcol ) {
+
+  	/* Distance between the current supernode and the current panel.
+	   d_fsupc=0 if fsupc >= fpanelc. */
+  	d_fsupc = fst_col - fsupc;
+
+	lptr = xlsub[fsupc] + d_fsupc;
+	luptr = xlusup[fst_col] + d_fsupc;
+	nsupr = xlsub[fsupc+1] - xlsub[fsupc];	/* Leading dimension */
+	nsupc = jcol - fst_col;	/* Excluding jcol */
+	nrow = nsupr - d_fsupc - nsupc;
+
+	/* Points to the beginning of jcol in snode L\U(jsupno) */
+	ufirst = xlusup[jcol] + d_fsupc;	
+
+	ops[TRSV] += nsupc * (nsupc - 1);
+	ops[GEMV] += 2 * nrow * nsupc;
+	
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+	STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
+	       &nsupr, &lusup[ufirst], &incx );
+#else
+	strsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
+	       &nsupr, &lusup[ufirst], &incx );
+#endif
+	
+	alpha = none; beta = one; /* y := beta*y + alpha*A*x */
+
+#ifdef _CRAY
+	SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
+	       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
+#else
+	sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
+	       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
+#endif
+#else
+	slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
+
+	smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
+		&lusup[ufirst], tempv );
+	
+        /* Copy updates from tempv[*] into lusup[*] */
+	isub = ufirst + nsupc;
+	for (i = 0; i < nrow; i++) {
+	    lusup[isub] -= tempv[i];
+	    tempv[i] = 0.0;
+	    ++isub;
+	}
+
+#endif
+	
+	
+    } /* if fst_col < jcol ... */ 
+
+    return 0;
+}
--- a/intern/opennl/superlu/scolumn_dfs.c
+++ b/intern/opennl/superlu/scolumn_dfs.c
@ -0,0 +1,270 @@
+
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ 
+  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ 
+  Permission is hereby granted to use or copy this program for any
+  purpose, provided the above notices are retained on all copies.
+  Permission to modify the code and to distribute modified code is
+  granted, provided the above notices are retained, and a notice that
+  the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+/* What type of supernodes we want */
+#define T2_SUPER
+
+int
+scolumn_dfs(
+	   const int  m,         /* in - number of rows in the matrix */
+	   const int  jcol,      /* in */
+	   int        *perm_r,   /* in */
+	   int        *nseg,     /* modified - with new segments appended */
+	   int        *lsub_col, /* in - defines the RHS vector to start the dfs */
+	   int        *segrep,   /* modified - with new segments appended */
+	   int        *repfnz,   /* modified */
+	   int        *xprune,   /* modified */
+	   int        *marker,   /* modified */
+	   int        *parent,	 /* working array */
+	   int        *xplore,   /* working array */
+	   GlobalLU_t *Glu       /* modified */
+	   )
+{
+/* 
+ * Purpose
+ * =======
+ *   "column_dfs" performs a symbolic factorization on column jcol, and
+ *   decide the supernode boundary.
+ *
+ *   This routine does not use numeric values, but only use the RHS 
+ *   row indices to start the dfs.
+ *
+ *   A supernode representative is the last column of a supernode.
+ *   The nonzeros in U[*,j] are segments that end at supernodal
+ *   representatives. The routine returns a list of such supernodal 
+ *   representatives in topological order of the dfs that generates them.
+ *   The location of the first nonzero in each such supernodal segment
+ *   (supernodal entry location) is also returned.
+ *
+ * Local parameters
+ * ================
+ *   nseg: no of segments in current U[*,j]
+ *   jsuper: jsuper=EMPTY if column j does not belong to the same
+ *	supernode as j-1. Otherwise, jsuper=nsuper.
+ *
+ *   marker2: A-row --> A-row/col (0/1)
+ *   repfnz: SuperA-col --> PA-row
+ *   parent: SuperA-col --> SuperA-col
+ *   xplore: SuperA-col --> index to L-structure
+ *
+ * Return value
+ * ============
+ *     0  success;
+ *   > 0  number of bytes allocated when run out of space.
+ *
+ */
+    int     jcolp1, jcolm1, jsuper, nsuper, nextl;
+    int     k, krep, krow, kmark, kperm;
+    int     *marker2;           /* Used for small panel LU */
+    int	    fsupc;		/* First column of a snode */
+    int     myfnz;		/* First nonz column of a U-segment */
+    int	    chperm, chmark, chrep, kchild;
+    int     xdfs, maxdfs, kpar, oldrep;
+    int     jptr, jm1ptr;
+    int     ito, ifrom, istop;	/* Used to compress row subscripts */
+    int     mem_error;
+    int     *xsup, *supno, *lsub, *xlsub;
+    int     nzlmax;
+    static  int  first = 1, maxsuper;
+    
+    xsup    = Glu->xsup;
+    supno   = Glu->supno;
+    lsub    = Glu->lsub;
+    xlsub   = Glu->xlsub;
+    nzlmax  = Glu->nzlmax;
+
+    if ( first ) {
+	maxsuper = sp_ienv(3);
+	first = 0;
+    }
+    jcolp1  = jcol + 1;
+    jcolm1  = jcol - 1;
+    nsuper  = supno[jcol];
+    jsuper  = nsuper;
+    nextl   = xlsub[jcol];
+    marker2 = &marker[2*m];
+
+
+    /* For each nonzero in A[*,jcol] do dfs */
+    for (k = 0; lsub_col[k] != EMPTY; k++) {
+
+	krow = lsub_col[k];
+    	lsub_col[k] = EMPTY;
+	kmark = marker2[krow];    	
+
+	/* krow was visited before, go to the next nonz */
+        if ( kmark == jcol ) continue; 
+
+	/* For each unmarked nbr krow of jcol
+	 *	krow is in L: place it in structure of L[*,jcol]
+	 */
+	marker2[krow] = jcol;
+	kperm = perm_r[krow];
+
+   	if ( kperm == EMPTY ) {
+	    lsub[nextl++] = krow; 	/* krow is indexed into A */
+	    if ( nextl >= nzlmax ) {
+		if ((mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu)))
+		    return (mem_error);
+		lsub = Glu->lsub;
+	    }
+            if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing */
+  	} else {
+	    /*	krow is in U: if its supernode-rep krep
+	     *	has been explored, update repfnz[*]
+	     */
+	    krep = xsup[supno[kperm]+1] - 1;
+	    myfnz = repfnz[krep];
+
+	    if ( myfnz != EMPTY ) {	/* Visited before */
+	    	if ( myfnz > kperm ) repfnz[krep] = kperm;
+		/* continue; */
+	    }
+	    else {
+		/* Otherwise, perform dfs starting at krep */
+		oldrep = EMPTY;
+	 	parent[krep] = oldrep;
+	  	repfnz[krep] = kperm;
+		xdfs = xlsub[krep];
+	  	maxdfs = xprune[krep];
+
+		do {
+		    /* 
+		     * For each unmarked kchild of krep 
+		     */
+		    while ( xdfs < maxdfs ) {
+
+		   	kchild = lsub[xdfs];
+			xdfs++;
+		  	chmark = marker2[kchild];
+
+		   	if ( chmark != jcol ) { /* Not reached yet */
+		   	    marker2[kchild] = jcol;
+		   	    chperm = perm_r[kchild];
+
+		   	    /* Case kchild is in L: place it in L[*,k] */
+		   	    if ( chperm == EMPTY ) {
+			    	lsub[nextl++] = kchild;
+				if ( nextl >= nzlmax ) {
+				    if ((mem_error =
+					 sLUMemXpand(jcol,nextl,LSUB,&nzlmax,Glu)))
+					return (mem_error);
+				    lsub = Glu->lsub;
+				}
+				if ( chmark != jcolm1 ) jsuper = EMPTY;
+			    } else {
+		    	    	/* Case kchild is in U: 
+				 *   chrep = its supernode-rep. If its rep has 
+			         *   been explored, update its repfnz[*]
+			         */
+		   	    	chrep = xsup[supno[chperm]+1] - 1;
+		   		myfnz = repfnz[chrep];
+		   		if ( myfnz != EMPTY ) { /* Visited before */
+				    if ( myfnz > chperm )
+     				  	repfnz[chrep] = chperm;
+				} else {
+		        	    /* Continue dfs at super-rep of kchild */
+		   		    xplore[krep] = xdfs;	
+		   		    oldrep = krep;
+		   		    krep = chrep; /* Go deeper down G(L^t) */
+				    parent[krep] = oldrep;
+		    		    repfnz[krep] = chperm;
+		   		    xdfs = xlsub[krep];     
+				    maxdfs = xprune[krep];
+				} /* else */
+
+			   } /* else */
+
+			} /* if */
+
+		    } /* while */
+
+		    /* krow has no more unexplored nbrs;
+	   	     *    place supernode-rep krep in postorder DFS.
+	   	     *    backtrack dfs to its parent
+		     */
+		    segrep[*nseg] = krep;
+		    ++(*nseg);
+		    kpar = parent[krep]; /* Pop from stack, mimic recursion */
+		    if ( kpar == EMPTY ) break; /* dfs done */
+		    krep = kpar;
+		    xdfs = xplore[krep];
+		    maxdfs = xprune[krep];
+
+		} while ( kpar != EMPTY ); 	/* Until empty stack */
+
+	    } /* else */
+
+	} /* else */
+
+    } /* for each nonzero ... */
+
+    /* Check to see if j belongs in the same supernode as j-1 */
+    if ( jcol == 0 ) { /* Do nothing for column 0 */
+	nsuper = supno[0] = 0;
+    } else {
+   	fsupc = xsup[nsuper];
+	jptr = xlsub[jcol];	/* Not compressed yet */
+	jm1ptr = xlsub[jcolm1];
+
+#ifdef T2_SUPER
+	if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY;
+#endif
+	/* Make sure the number of columns in a supernode doesn't
+	   exceed threshold. */
+	if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY;
+
+	/* If jcol starts a new supernode, reclaim storage space in
+	 * lsub from the previous supernode. Note we only store
+	 * the subscript set of the first and last columns of
+   	 * a supernode. (first for num values, last for pruning)
+	 */
+	if ( jsuper == EMPTY ) {	/* starts a new supernode */
+	    if ( (fsupc < jcolm1-1) ) {	/* >= 3 columns in nsuper */
+#ifdef CHK_COMPRESS
+		printf("  Compress lsub[] at super %d-%d\n", fsupc, jcolm1);
+#endif
+	        ito = xlsub[fsupc+1];
+		xlsub[jcolm1] = ito;
+		istop = ito + jptr - jm1ptr;
+		xprune[jcolm1] = istop; /* Initialize xprune[jcol-1] */
+		xlsub[jcol] = istop;
+		for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
+		    lsub[ito] = lsub[ifrom];
+		nextl = ito;            /* = istop + length(jcol) */
+	    }
+	    nsuper++;
+	    supno[jcol] = nsuper;
+	} /* if a new supernode */
+
+    }	/* else: jcol > 0 */ 
+    
+    /* Tidy up the pointers before exit */
+    xsup[nsuper+1] = jcolp1;
+    supno[jcolp1]  = nsuper;
+    xprune[jcol]   = nextl;	/* Initialize upper bound for pruning */
+    xlsub[jcolp1]  = nextl;
+
+    return 0;
+}
--- a/intern/opennl/superlu/scopy_to_ucol.c
+++ b/intern/opennl/superlu/scopy_to_ucol.c
@ -0,0 +1,105 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ 
+  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ 
+  Permission is hereby granted to use or copy this program for any
+  purpose, provided the above notices are retained on all copies.
+  Permission to modify the code and to distribute modified code is
+  granted, provided the above notices are retained, and a notice that
+  the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+#include "util.h"
+
+int
+scopy_to_ucol(
+	      int        jcol,	  /* in */
+	      int        nseg,	  /* in */
+	      int        *segrep,  /* in */
+	      int        *repfnz,  /* in */
+	      int        *perm_r,  /* in */
+	      float     *dense,   /* modified - reset to zero on return */
+	      GlobalLU_t *Glu      /* modified */
+	      )
+{
+/* 
+ * Gather from SPA dense[*] to global ucol[*].
+ */
+    int ksub, krep, ksupno;
+    int i, k, kfnz, segsze;
+    int fsupc, isub, irow;
+    int jsupno, nextu;
+    int new_next, mem_error;
+    int       *xsup, *supno;
+    int       *lsub, *xlsub;
+    float    *ucol;
+    int       *usub, *xusub;
+    int       nzumax;
+
+    float zero = 0.0;
+
+    xsup    = Glu->xsup;
+    supno   = Glu->supno;
+    lsub    = Glu->lsub;
+    xlsub   = Glu->xlsub;
+    ucol    = Glu->ucol;
+    usub    = Glu->usub;
+    xusub   = Glu->xusub;
+    nzumax  = Glu->nzumax;
+    
+    jsupno = supno[jcol];
+    nextu  = xusub[jcol];
+    k = nseg - 1;
+    for (ksub = 0; ksub < nseg; ksub++) {
+	krep = segrep[k--];
+	ksupno = supno[krep];
+
+	if ( ksupno != jsupno ) { /* Should go into ucol[] */
+	    kfnz = repfnz[krep];
+	    if ( kfnz != EMPTY ) {	/* Nonzero U-segment */
+
+	    	fsupc = xsup[ksupno];
+	        isub = xlsub[fsupc] + kfnz - fsupc;
+	        segsze = krep - kfnz + 1;
+
+		new_next = nextu + segsze;
+		while ( new_next > nzumax ) {
+		    if ((mem_error = sLUMemXpand(jcol, nextu, UCOL, &nzumax, Glu)))
+			return (mem_error);
+		    ucol = Glu->ucol;
+		    if ((mem_error = sLUMemXpand(jcol, nextu, USUB, &nzumax, Glu)))
+			return (mem_error);
+		    usub = Glu->usub;
+		    lsub = Glu->lsub;
+		}
+		
+		for (i = 0; i < segsze; i++) {
+		    irow = lsub[isub];
+		    usub[nextu] = perm_r[irow];
+		    ucol[nextu] = dense[irow];
+		    dense[irow] = zero;
+		    nextu++;
+		    isub++;
+		} 
+
+	    }
+
+	}
+
+    } /* for each segment... */
+
+    xusub[jcol + 1] = nextu;      /* Close U[*,jcol] */
+    return 0;
+}
--- a/intern/opennl/superlu/sgssv.c
+++ b/intern/opennl/superlu/sgssv.c
@ -0,0 +1,221 @@
+
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+#include "ssp_defs.h"
+
+void
+sgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
+      SuperMatrix *L, SuperMatrix *U, SuperMatrix *B,
+      SuperLUStat_t *stat, int *info )
+{
+/*
+ * Purpose
+ * =======
+ *
+ * SGSSV solves the system of linear equations A*X=B, using the
+ * LU factorization from SGSTRF. It performs the following steps:
+ *
+ *   1. If A is stored column-wise (A->Stype = SLU_NC):
+ *
+ *      1.1. Permute the columns of A, forming A*Pc, where Pc
+ *           is a permutation matrix. For more details of this step, 
+ *           see sp_preorder.c.
+ *
+ *      1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined
+ *           by Gaussian elimination with partial pivoting.
+ *           L is unit lower triangular with offdiagonal entries
+ *           bounded by 1 in magnitude, and U is upper triangular.
+ *
+ *      1.3. Solve the system of equations A*X=B using the factored
+ *           form of A.
+ *
+ *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the
+ *      above algorithm to the transpose of A:
+ *
+ *      2.1. Permute columns of transpose(A) (rows of A),
+ *           forming transpose(A)*Pc, where Pc is a permutation matrix. 
+ *           For more details of this step, see sp_preorder.c.
+ *
+ *      2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr
+ *           determined by Gaussian elimination with partial pivoting.
+ *           L is unit lower triangular with offdiagonal entries
+ *           bounded by 1 in magnitude, and U is upper triangular.
+ *
+ *      2.3. Solve the system of equations A*X=B using the factored
+ *           form of A.
+ *
+ *   See supermatrix.h for the definition of 'SuperMatrix' structure.
+ * 
+ * Arguments
+ * =========
+ *
+ * options (input) superlu_options_t*
+ *         The structure defines the input parameters to control
+ *         how the LU decomposition will be performed and how the
+ *         system will be solved.
+ *
+ * A       (input) SuperMatrix*
+ *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
+ *         of linear equations is A->nrow. Currently, the type of A can be:
+ *         Stype = SLU_NC or SLU_NR; Dtype = SLU_S; Mtype = SLU_GE.
+ *         In the future, more general A may be handled.
+ *
+ * perm_c  (input/output) int*
+ *         If A->Stype = SLU_NC, column permutation vector of size A->ncol
+ *         which defines the permutation matrix Pc; perm_c[i] = j means 
+ *         column i of A is in position j in A*Pc.
+ *         If A->Stype = SLU_NR, column permutation vector of size A->nrow
+ *         which describes permutation of columns of transpose(A) 
+ *         (rows of A) as described above.
+ * 
+ *         If options->ColPerm = MY_PERMC or options->Fact = SamePattern or
+ *            options->Fact = SamePattern_SameRowPerm, it is an input argument.
+ *            On exit, perm_c may be overwritten by the product of the input
+ *            perm_c and a permutation that postorders the elimination tree
+ *            of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
+ *            is already in postorder.
+ *         Otherwise, it is an output argument.
+ * 
+ * perm_r  (input/output) int*
+ *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
+ *         which defines the permutation matrix Pr, and is determined 
+ *         by partial pivoting.  perm_r[i] = j means row i of A is in 
+ *         position j in Pr*A.
+ *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
+ *         determines permutation of rows of transpose(A)
+ *         (columns of A) as described above.
+ *
+ *         If options->RowPerm = MY_PERMR or
+ *            options->Fact = SamePattern_SameRowPerm, perm_r is an
+ *            input argument.
+ *         otherwise it is an output argument.
+ *
+ * L       (output) SuperMatrix*
+ *         The factor L from the factorization 
+ *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
+ *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
+ *         Uses compressed row subscripts storage for supernodes, i.e.,
+ *         L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
+ *         
+ * U       (output) SuperMatrix*
+ *	   The factor U from the factorization 
+ *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
+ *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
+ *         Uses column-wise storage scheme, i.e., U has types:
+ *         Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
+ *
+ * B       (input/output) SuperMatrix*
+ *         B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
+ *         On entry, the right hand side matrix.
+ *         On exit, the solution matrix if info = 0;
+ *
+ * stat   (output) SuperLUStat_t*
+ *        Record the statistics on runtime and floating-point operation count.
+ *        See util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info    (output) int*
+ *	   = 0: successful exit
+ *         > 0: if info = i, and i is
+ *             <= A->ncol: U(i,i) is exactly zero. The factorization has
+ *                been completed, but the factor U is exactly singular,
+ *                so the solution could not be computed.
+ *             > A->ncol: number of bytes allocated when memory allocation
+ *                failure occurred, plus A->ncol.
+ *   
+ */
+    DNformat *Bstore;
+    SuperMatrix *AA = NULL;/* A in SLU_NC format used by the factorization routine.*/
+    SuperMatrix AC; /* Matrix postmultiplied by Pc */
+    int      lwork = 0, *etree, i;
+    
+    /* Set default values for some parameters */
+    int      panel_size;     /* panel size */
+    int      relax;          /* no of columns in a relaxed snodes */
+    int      permc_spec;
+    trans_t  trans = NOTRANS;
+    double   *utime;
+    double   t;	/* Temporary time */
+
+    /* Test the input parameters ... */
+    *info = 0;
+    Bstore = B->Store;
+    if ( options->Fact != DOFACT ) *info = -1;
+    else if ( A->nrow != A->ncol || A->nrow < 0 ||
+	 (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
+	 A->Dtype != SLU_S || A->Mtype != SLU_GE )
+	*info = -2;
+    else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
+	B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
+	*info = -7;
+    if ( *info != 0 ) {
+	i = -(*info);
+	xerbla_("sgssv", &i);
+	return;
+    }
+
+    utime = stat->utime;
+
+    /* Convert A to SLU_NC format when necessary. */
+    if ( A->Stype == SLU_NR ) {
+	NRformat *Astore = A->Store;
+	AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
+	sCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
+			       Astore->nzval, Astore->colind, Astore->rowptr,
+			       SLU_NC, A->Dtype, A->Mtype);
+	trans = TRANS;
+    } else {
+        if ( A->Stype == SLU_NC ) AA = A;
+    }
+
+    t = SuperLU_timer_();
+    /*
+     * Get column permutation vector perm_c[], according to permc_spec:
+     *   permc_spec = NATURAL:  natural ordering 
+     *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
+     *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
+     *   permc_spec = COLAMD:   approximate minimum degree column ordering
+     *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
+     */
+    permc_spec = options->ColPerm;
+    if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
+      get_perm_c(permc_spec, AA, perm_c);
+    utime[COLPERM] = SuperLU_timer_() - t;
+
+    etree = intMalloc(A->ncol);
+
+    t = SuperLU_timer_();
+    sp_preorder(options, AA, perm_c, etree, &AC);
+    utime[ETREE] = SuperLU_timer_() - t;
+
+    panel_size = sp_ienv(1);
+    relax = sp_ienv(2);
+
+    /*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
+	  relax, panel_size, sp_ienv(3), sp_ienv(4));*/
+    t = SuperLU_timer_(); 
+    /* Compute the LU factorization of A. */
+    sgstrf(options, &AC, relax, panel_size,
+	   etree, NULL, lwork, perm_c, perm_r, L, U, stat, info);
+    utime[FACT] = SuperLU_timer_() - t;
+
+    t = SuperLU_timer_();
+    if ( *info == 0 ) {
+        /* Solve the system A*X=B, overwriting B with X. */
+        sgstrs (trans, L, U, perm_c, perm_r, B, stat, info);
+    }
+    utime[SOLVE] = SuperLU_timer_() - t;
+
+    SUPERLU_FREE (etree);
+    Destroy_CompCol_Permuted(&AC);
+    if ( A->Stype == SLU_NR ) {
+	Destroy_SuperMatrix_Store(AA);
+	SUPERLU_FREE(AA);
+    }
+
+}
--- a/intern/opennl/superlu/sgstrf.c
+++ b/intern/opennl/superlu/sgstrf.c
@ -0,0 +1,433 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ 
+  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ 
+  Permission is hereby granted to use or copy this program for any
+  purpose, provided the above notices are retained on all copies.
+  Permission to modify the code and to distribute modified code is
+  granted, provided the above notices are retained, and a notice that
+  the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+void
+sgstrf (superlu_options_t *options, SuperMatrix *A,
+        int relax, int panel_size, int *etree, void *work, int lwork,
+        int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
+        SuperLUStat_t *stat, int *info)
+{
+/*
+ * Purpose
+ * =======
+ *
+ * SGSTRF computes an LU factorization of a general sparse m-by-n
+ * matrix A using partial pivoting with row interchanges.
+ * The factorization has the form
+ *     Pr * A = L * U
+ * where Pr is a row permutation matrix, L is lower triangular with unit
+ * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper 
+ * triangular (upper trapezoidal if A->nrow < A->ncol).
+ *
+ * See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ * Arguments
+ * =========
+ *
+ * options (input) superlu_options_t*
+ *         The structure defines the input parameters to control
+ *         how the LU decomposition will be performed.
+ *
+ * A        (input) SuperMatrix*
+ *	    Original matrix A, permuted by columns, of dimension
+ *          (A->nrow, A->ncol). The type of A can be:
+ *          Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
+ *
+ * drop_tol (input) float (NOT IMPLEMENTED)
+ *	    Drop tolerance parameter. At step j of the Gaussian elimination,
+ *          if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
+ *          0 <= drop_tol <= 1. The default value of drop_tol is 0.
+ *
+ * relax    (input) int
+ *          To control degree of relaxing supernodes. If the number
+ *          of nodes (columns) in a subtree of the elimination tree is less
+ *          than relax, this subtree is considered as one supernode,
+ *          regardless of the row structures of those columns.
+ *
+ * panel_size (input) int
+ *          A panel consists of at most panel_size consecutive columns.
+ *
+ * etree    (input) int*, dimension (A->ncol)
+ *          Elimination tree of A'*A.
+ *          Note: etree is a vector of parent pointers for a forest whose
+ *          vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
+ *          On input, the columns of A should be permuted so that the
+ *          etree is in a certain postorder.
+ *
+ * work     (input/output) void*, size (lwork) (in bytes)
+ *          User-supplied work space and space for the output data structures.
+ *          Not referenced if lwork = 0;
+ *
+ * lwork   (input) int
+ *         Specifies the size of work array in bytes.
+ *         = 0:  allocate space internally by system malloc;
+ *         > 0:  use user-supplied work array of length lwork in bytes,
+ *               returns error if space runs out.
+ *         = -1: the routine guesses the amount of space needed without
+ *               performing the factorization, and returns it in
+ *               *info; no other side effects.
+ *
+ * perm_c   (input) int*, dimension (A->ncol)
+ *	    Column permutation vector, which defines the 
+ *          permutation matrix Pc; perm_c[i] = j means column i of A is 
+ *          in position j in A*Pc.
+ *          When searching for diagonal, perm_c[*] is applied to the
+ *          row subscripts of A, so that diagonal threshold pivoting
+ *          can find the diagonal of A, rather than that of A*Pc.
+ *
+ * perm_r   (input/output) int*, dimension (A->nrow)
+ *          Row permutation vector which defines the permutation matrix Pr,
+ *          perm_r[i] = j means row i of A is in position j in Pr*A.
+ *          If options->Fact = SamePattern_SameRowPerm, the pivoting routine
+ *             will try to use the input perm_r, unless a certain threshold
+ *             criterion is violated. In that case, perm_r is overwritten by
+ *             a new permutation determined by partial pivoting or diagonal
+ *             threshold pivoting.
+ *          Otherwise, perm_r is output argument;
+ *
+ * L        (output) SuperMatrix*
+ *          The factor L from the factorization Pr*A=L*U; use compressed row 
+ *          subscripts storage for supernodes, i.e., L has type: 
+ *          Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
+ *
+ * U        (output) SuperMatrix*
+ *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
+ *          storage scheme, i.e., U has types: Stype = SLU_NC, 
+ *          Dtype = SLU_S, Mtype = SLU_TRU.
+ *
+ * stat     (output) SuperLUStat_t*
+ *          Record the statistics on runtime and floating-point operation count.
+ *          See util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info     (output) int*
+ *          = 0: successful exit
+ *          < 0: if info = -i, the i-th argument had an illegal value
+ *          > 0: if info = i, and i is
+ *             <= A->ncol: U(i,i) is exactly zero. The factorization has
+ *                been completed, but the factor U is exactly singular,
+ *                and division by zero will occur if it is used to solve a
+ *                system of equations.
+ *             > A->ncol: number of bytes allocated when memory allocation
+ *                failure occurred, plus A->ncol. If lwork = -1, it is
+ *                the estimated amount of space needed, plus A->ncol.
+ *
+ * ======================================================================
+ *
+ * Local Working Arrays: 
+ * ======================
+ *   m = number of rows in the matrix
+ *   n = number of columns in the matrix
+ *
+ *   xprune[0:n-1]: xprune[*] points to locations in subscript 
+ *	vector lsub[*]. For column i, xprune[i] denotes the point where 
+ *	structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need 
+ *	to be traversed for symbolic factorization.
+ *
+ *   marker[0:3*m-1]: marker[i] = j means that node i has been 
+ *	reached when working on column j.
+ *	Storage: relative to original row subscripts
+ *	NOTE: There are 3 of them: marker/marker1 are used for panel dfs, 
+ *	      see spanel_dfs.c; marker2 is used for inner-factorization,
+ *            see scolumn_dfs.c.
+ *
+ *   parent[0:m-1]: parent vector used during dfs
+ *      Storage: relative to new row subscripts
+ *
+ *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs) 
+ *	unexplored neighbor of i in lsub[*]
+ *
+ *   segrep[0:nseg-1]: contains the list of supernodal representatives
+ *	in topological order of the dfs. A supernode representative is the 
+ *	last column of a supernode.
+ *      The maximum size of segrep[] is n.
+ *
+ *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a 
+ *	supernodal representative r, repfnz[r] is the location of the first 
+ *	nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
+ *	indicates the supernode r has been explored.
+ *	NOTE: There are W of them, each used for one column of a panel. 
+ *
+ *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below 
+ *      the panel diagonal. These are filled in during spanel_dfs(), and are
+ *      used later in the inner LU factorization within the panel.
+ *	panel_lsub[]/dense[] pair forms the SPA data structure.
+ *	NOTE: There are W of them.
+ *
+ *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
+ *	    	   NOTE: there are W of them.
+ *
+ *   tempv[0:*]: real temporary used for dense numeric kernels;
+ *	The size of this array is defined by NUM_TEMPV() in ssp_defs.h.
+ *
+ */
+    /* Local working arrays */
+    NCPformat *Astore;
+    int       *iperm_r = NULL; /* inverse of perm_r;
+			   used when options->Fact == SamePattern_SameRowPerm */
+    int       *iperm_c; /* inverse of perm_c */
+    int       *iwork;
+    float    *swork;
+    int	      *segrep, *repfnz, *parent, *xplore;
+    int	      *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
+    int	      *xprune;
+    int	      *marker;
+    float    *dense, *tempv;
+    int       *relax_end;
+    float    *a;
+    int       *asub;
+    int       *xa_begin, *xa_end;
+    int       *xsup, *supno;
+    int       *xlsub, *xlusup, *xusub;
+    int       nzlumax;
+    static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
+
+    /* Local scalars */
+    fact_t    fact = options->Fact;
+    double    diag_pivot_thresh = options->DiagPivotThresh;
+    int       pivrow;   /* pivotal row number in the original matrix A */
+    int       nseg1;	/* no of segments in U-column above panel row jcol */
+    int       nseg;	/* no of segments in each U-column */
+    register int jcol;	
+    register int kcol;	/* end column of a relaxed snode */
+    register int icol;
+    register int i, k, jj, new_next, iinfo;
+    int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
+    int       w_def;	/* upper bound on panel width */
+    int       usepr, iperm_r_allocated = 0;
+    int       nnzL, nnzU;
+    int       *panel_histo = stat->panel_histo;
+    flops_t   *ops = stat->ops;
+
+    iinfo    = 0;
+    m        = A->nrow;
+    n        = A->ncol;
+    min_mn   = SUPERLU_MIN(m, n);
+    Astore   = A->Store;
+    a        = Astore->nzval;
+    asub     = Astore->rowind;
+    xa_begin = Astore->colbeg;
+    xa_end   = Astore->colend;
+
+    /* Allocate storage common to the factor routines */
+    *info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz,
+                       panel_size, L, U, &Glu, &iwork, &swork);
+    if ( *info ) return;
+    
+    xsup    = Glu.xsup;
+    supno   = Glu.supno;
+    xlsub   = Glu.xlsub;
+    xlusup  = Glu.xlusup;
+    xusub   = Glu.xusub;
+    
+    SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
+	     &repfnz, &panel_lsub, &xprune, &marker);
+    sSetRWork(m, panel_size, swork, &dense, &tempv);
+    
+    usepr = (fact == SamePattern_SameRowPerm);
+    if ( usepr ) {
+	/* Compute the inverse of perm_r */
+	iperm_r = (int *) intMalloc(m);
+	for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
+	iperm_r_allocated = 1;
+    }
+    iperm_c = (int *) intMalloc(n);
+    for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
+
+    /* Identify relaxed snodes */
+    relax_end = (int *) intMalloc(n);
+    if ( options->SymmetricMode == YES ) {
+        heap_relax_snode(n, etree, relax, marker, relax_end); 
+    } else {
+        relax_snode(n, etree, relax, marker, relax_end); 
+    }
+    
+    ifill (perm_r, m, EMPTY);
+    ifill (marker, m * NO_MARKER, EMPTY);
+    supno[0] = -1;
+    xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
+    w_def    = panel_size;
+
+    /* 
+     * Work on one "panel" at a time. A panel is one of the following: 
+     *	   (a) a relaxed supernode at the bottom of the etree, or
+     *	   (b) panel_size contiguous columns, defined by the user
+     */
+    for (jcol = 0; jcol < min_mn; ) {
+
+	if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
+   	    kcol = relax_end[jcol];	  /* end of the relaxed snode */
+	    panel_histo[kcol-jcol+1]++;
+
+	    /* --------------------------------------
+	     * Factorize the relaxed supernode(jcol:kcol) 
+	     * -------------------------------------- */
+	    /* Determine the union of the row structure of the snode */
+	    if ( (*info = ssnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
+				    xprune, marker, &Glu)) != 0 )
+		return;
+
+            nextu    = xusub[jcol];
+	    nextlu   = xlusup[jcol];
+	    jsupno   = supno[jcol];
+	    fsupc    = xsup[jsupno];
+	    new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
+	    nzlumax = Glu.nzlumax;
+	    while ( new_next > nzlumax ) {
+		if ( (*info = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) )
+		    return;
+	    }
+    
+	    for (icol = jcol; icol<= kcol; icol++) {
+		xusub[icol+1] = nextu;
+		
+    		/* Scatter into SPA dense[*] */
+    		for (k = xa_begin[icol]; k < xa_end[icol]; k++)
+        	    dense[asub[k]] = a[k];
+
+	       	/* Numeric update within the snode */
+	        ssnode_bmod(icol, fsupc, dense, tempv, &Glu, stat);
+
+		if ( (*info = spivotL(icol, diag_pivot_thresh, &usepr, perm_r,
+				      iperm_r, iperm_c, &pivrow, &Glu, stat)) )
+		    if ( iinfo == 0 ) iinfo = *info;
+		
+#ifdef DEBUG
+		sprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu);
+#endif
+
+	    }
+
+	    jcol = icol;
+
+	} else { /* Work on one panel of panel_size columns */
+	    
+	    /* Adjust panel_size so that a panel won't overlap with the next 
+	     * relaxed snode.
+	     */
+	    panel_size = w_def;
+	    for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) 
+		if ( relax_end[k] != EMPTY ) {
+		    panel_size = k - jcol;
+		    break;
+		}
+	    if ( k == min_mn ) panel_size = min_mn - jcol;
+	    panel_histo[panel_size]++;
+
+	    /* symbolic factor on a panel of columns */
+	    spanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
+		      dense, panel_lsub, segrep, repfnz, xprune,
+		      marker, parent, xplore, &Glu);
+	    
+	    /* numeric sup-panel updates in topological order */
+	    spanel_bmod(m, panel_size, jcol, nseg1, dense,
+		        tempv, segrep, repfnz, &Glu, stat);
+	    
+	    /* Sparse LU within the panel, and below panel diagonal */
+    	    for ( jj = jcol; jj < jcol + panel_size; jj++) {
+ 		k = (jj - jcol) * m; /* column index for w-wide arrays */
+
+		nseg = nseg1;	/* Begin after all the panel segments */
+
+	    	if ((*info = scolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
+					segrep, &repfnz[k], xprune, marker,
+					parent, xplore, &Glu)) != 0) return;
+
+	      	/* Numeric updates */
+	    	if ((*info = scolumn_bmod(jj, (nseg - nseg1), &dense[k],
+					 tempv, &segrep[nseg1], &repfnz[k],
+					 jcol, &Glu, stat)) != 0) return;
+		
+	        /* Copy the U-segments to ucol[*] */
+		if ((*info = scopy_to_ucol(jj, nseg, segrep, &repfnz[k],
+					  perm_r, &dense[k], &Glu)) != 0)
+		    return;
+
+	    	if ( (*info = spivotL(jj, diag_pivot_thresh, &usepr, perm_r,
+				      iperm_r, iperm_c, &pivrow, &Glu, stat)) )
+		    if ( iinfo == 0 ) iinfo = *info;
+
+		/* Prune columns (0:jj-1) using column jj */
+	    	spruneL(jj, perm_r, pivrow, nseg, segrep,
+                        &repfnz[k], xprune, &Glu);
+
+		/* Reset repfnz[] for this column */
+	    	resetrep_col (nseg, segrep, &repfnz[k]);
+		
+#ifdef DEBUG
+		sprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu);
+#endif
+
+	    }
+
+   	    jcol += panel_size;	/* Move to the next panel */
+
+	} /* else */
+
+    } /* for */
+
+    *info = iinfo;
+    
+    if ( m > n ) {
+	k = 0;
+        for (i = 0; i < m; ++i) 
+            if ( perm_r[i] == EMPTY ) {
+    		perm_r[i] = n + k;
+		++k;
+	    }
+    }
+
+    countnz(min_mn, xprune, &nnzL, &nnzU, &Glu);
+    fixupL(min_mn, perm_r, &Glu);
+
+    sLUWorkFree(iwork, swork, &Glu); /* Free work space and compress storage */
+
+    if ( fact == SamePattern_SameRowPerm ) {
+        /* L and U structures may have changed due to possibly different
+	   pivoting, even though the storage is available.
+	   There could also be memory expansions, so the array locations
+           may have changed, */
+        ((SCformat *)L->Store)->nnz = nnzL;
+	((SCformat *)L->Store)->nsuper = Glu.supno[n];
+	((SCformat *)L->Store)->nzval = Glu.lusup;
+	((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
+	((SCformat *)L->Store)->rowind = Glu.lsub;
+	((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
+	((NCformat *)U->Store)->nnz = nnzU;
+	((NCformat *)U->Store)->nzval = Glu.ucol;
+	((NCformat *)U->Store)->rowind = Glu.usub;
+	((NCformat *)U->Store)->colptr = Glu.xusub;
+    } else {
+        sCreate_SuperNode_Matrix(L, A->nrow, A->ncol, nnzL, Glu.lusup, 
+	                         Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
+			         Glu.xsup, SLU_SC, SLU_S, SLU_TRLU);
+    	sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, 
+			       Glu.usub, Glu.xusub, SLU_NC, SLU_S, SLU_TRU);
+    }
+    
+    ops[FACT] += ops[TRSV] + ops[GEMV];	
+    
+    if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
+    SUPERLU_FREE (iperm_c);
+    SUPERLU_FREE (relax_end);
+
+}
--- a/intern/opennl/superlu/sgstrs.c
+++ b/intern/opennl/superlu/sgstrs.c
@ -0,0 +1,331 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ 
+  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ 
+  Permission is hereby granted to use or copy this program for any
+  purpose, provided the above notices are retained on all copies.
+  Permission to modify the code and to distribute modified code is
+  granted, provided the above notices are retained, and a notice that
+  the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+
+/* 
+ * Function prototypes 
+ */
+void susolve(int, int, float*, float*);
+void slsolve(int, int, float*, float*);
+void smatvec(int, int, int, float*, float*, float*);
+
+
+void
+sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
+        int *perm_c, int *perm_r, SuperMatrix *B,
+        SuperLUStat_t *stat, int *info)
+{
+/*
+ * Purpose
+ * =======
+ *
+ * SGSTRS solves a system of linear equations A*X=B or A'*X=B
+ * with A sparse and B dense, using the LU factorization computed by
+ * SGSTRF.
+ *
+ * See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ * Arguments
+ * =========
+ *
+ * trans   (input) trans_t
+ *          Specifies the form of the system of equations:
+ *          = NOTRANS: A * X = B  (No transpose)
+ *          = TRANS:   A'* X = B  (Transpose)
+ *          = CONJ:    A**H * X = B  (Conjugate transpose)
+ *
+ * L       (input) SuperMatrix*
+ *         The factor L from the factorization Pr*A*Pc=L*U as computed by
+ *         sgstrf(). Use compressed row subscripts storage for supernodes,
+ *         i.e., L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
+ *
+ * U       (input) SuperMatrix*
+ *         The factor U from the factorization Pr*A*Pc=L*U as computed by
+ *         sgstrf(). Use column-wise storage scheme, i.e., U has types:
+ *         Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
+ *
+ * perm_c  (input) int*, dimension (L->ncol)
+ *	   Column permutation vector, which defines the 
+ *         permutation matrix Pc; perm_c[i] = j means column i of A is 
+ *         in position j in A*Pc.
+ *
+ * perm_r  (input) int*, dimension (L->nrow)
+ *         Row permutation vector, which defines the permutation matrix Pr; 
+ *         perm_r[i] = j means row i of A is in position j in Pr*A.
+ *
+ * B       (input/output) SuperMatrix*
+ *         B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
+ *         On entry, the right hand side matrix.
+ *         On exit, the solution matrix if info = 0;
+ *
+ * stat     (output) SuperLUStat_t*
+ *          Record the statistics on runtime and floating-point operation count.
+ *          See util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info    (output) int*
+ * 	   = 0: successful exit
+ *	   < 0: if info = -i, the i-th argument had an illegal value
+ *
+ */
+#ifdef _CRAY
+    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
+#endif
+#ifdef USE_VENDOR_BLAS
+    float   alpha = 1.0, beta = 1.0;
+    float   *work_col;
+#endif
+    DNformat *Bstore;
+    float   *Bmat;
+    SCformat *Lstore;
+    NCformat *Ustore;
+    float   *Lval, *Uval;
+    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
+    int      i, j, k, iptr, jcol, n, ldb, nrhs;
+    float   *work, *rhs_work, *soln;
+    flops_t  solve_ops;
+    void sprint_soln();
+
+    /* Test input parameters ... */
+    *info = 0;
+    Bstore = B->Store;
+    ldb = Bstore->lda;
+    nrhs = B->ncol;
+    if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
+    else if ( L->nrow != L->ncol || L->nrow < 0 ||
+	      L->Stype != SLU_SC || L->Dtype != SLU_S || L->Mtype != SLU_TRLU )
+	*info = -2;
+    else if ( U->nrow != U->ncol || U->nrow < 0 ||
+	      U->Stype != SLU_NC || U->Dtype != SLU_S || U->Mtype != SLU_TRU )
+	*info = -3;
+    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
+	      B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
+	*info = -6;
+    if ( *info ) {
+	i = -(*info);
+	xerbla_("sgstrs", &i);
+	return;
+    }
+
+    n = L->nrow;
+    work = floatCalloc(n * nrhs);
+    if ( !work ) ABORT("Malloc fails for local work[].");
+    soln = floatMalloc(n);
+    if ( !soln ) ABORT("Malloc fails for local soln[].");
+
+    Bmat = Bstore->nzval;
+    Lstore = L->Store;
+    Lval = Lstore->nzval;
+    Ustore = U->Store;
+    Uval = Ustore->nzval;
+    solve_ops = 0;
+    
+    if ( trans == NOTRANS ) {
+	/* Permute right hand sides to form Pr*B */
+	for (i = 0; i < nrhs; i++) {
+	    rhs_work = &Bmat[i*ldb];
+	    for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
+	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
+	}
+	
+	/* Forward solve PLy=Pb. */
+	for (k = 0; k <= Lstore->nsuper; k++) {
+	    fsupc = L_FST_SUPC(k);
+	    istart = L_SUB_START(fsupc);
+	    nsupr = L_SUB_START(fsupc+1) - istart;
+	    nsupc = L_FST_SUPC(k+1) - fsupc;
+	    nrow = nsupr - nsupc;
+
+	    solve_ops += nsupc * (nsupc - 1) * nrhs;
+	    solve_ops += 2 * nrow * nsupc * nrhs;
+	    
+	    if ( nsupc == 1 ) {
+		for (j = 0; j < nrhs; j++) {
+		    rhs_work = &Bmat[j*ldb];
+	    	    luptr = L_NZ_START(fsupc);
+		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
+			irow = L_SUB(iptr);
+			++luptr;
+			rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr];
+		    }
+		}
+	    } else {
+	    	luptr = L_NZ_START(fsupc);
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+		ftcs1 = _cptofcd("L", strlen("L"));
+		ftcs2 = _cptofcd("N", strlen("N"));
+		ftcs3 = _cptofcd("U", strlen("U"));
+		STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
+		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
+		
+		SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, 
+			&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
+			&beta, &work[0], &n );
+#else
+		strsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
+		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
+		
+		sgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, 
+			&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
+			&beta, &work[0], &n );
+#endif
+		for (j = 0; j < nrhs; j++) {
+		    rhs_work = &Bmat[j*ldb];
+		    work_col = &work[j*n];
+		    iptr = istart + nsupc;
+		    for (i = 0; i < nrow; i++) {
+			irow = L_SUB(iptr);
+			rhs_work[irow] -= work_col[i]; /* Scatter */
+			work_col[i] = 0.0;
+			iptr++;
+		    }
+		}
+#else		
+		for (j = 0; j < nrhs; j++) {
+		    rhs_work = &Bmat[j*ldb];
+		    slsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
+		    smatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
+			    &rhs_work[fsupc], &work[0] );
+
+		    iptr = istart + nsupc;
+		    for (i = 0; i < nrow; i++) {
+			irow = L_SUB(iptr);
+			rhs_work[irow] -= work[i];
+			work[i] = 0.0;
+			iptr++;
+		    }
+		}
+#endif		    
+	    } /* else ... */
+	} /* for L-solve */
+
+#ifdef DEBUG
+  	printf("After L-solve: y=\n");
+	sprint_soln(n, Bmat);
+#endif
+
+	/*
+	 * Back solve Ux=y.
+	 */
+	for (k = Lstore->nsuper; k >= 0; k--) {
+	    fsupc = L_FST_SUPC(k);
+	    istart = L_SUB_START(fsupc);
+	    nsupr = L_SUB_START(fsupc+1) - istart;
+	    nsupc = L_FST_SUPC(k+1) - fsupc;
+	    luptr = L_NZ_START(fsupc);
+
+	    solve_ops += nsupc * (nsupc + 1) * nrhs;
+
+	    if ( nsupc == 1 ) {
+		rhs_work = &Bmat[0];
+		for (j = 0; j < nrhs; j++) {
+		    rhs_work[fsupc] /= Lval[luptr];
+		    rhs_work += ldb;
+		}
+	    } else {
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+		ftcs1 = _cptofcd("L", strlen("L"));
+		ftcs2 = _cptofcd("U", strlen("U"));
+		ftcs3 = _cptofcd("N", strlen("N"));
+		STRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
+		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
+#else
+		strsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
+		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
+#endif
+#else		
+		for (j = 0; j < nrhs; j++)
+		    susolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
+#endif		
+	    }
+
+	    for (j = 0; j < nrhs; ++j) {
+		rhs_work = &Bmat[j*ldb];
+		for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
+		    solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
+		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
+			irow = U_SUB(i);
+			rhs_work[irow] -= rhs_work[jcol] * Uval[i];
+		    }
+		}
+	    }
+	    
+	} /* for U-solve */
+
+#ifdef DEBUG
+  	printf("After U-solve: x=\n");
+	sprint_soln(n, Bmat);
+#endif
+
+	/* Compute the final solution X := Pc*X. */
+	for (i = 0; i < nrhs; i++) {
+	    rhs_work = &Bmat[i*ldb];
+	    for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
+	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
+	}
+	
+        stat->ops[SOLVE] = solve_ops;
+
+    } else { /* Solve A'*X=B or CONJ(A)*X=B */
+	/* Permute right hand sides to form Pc'*B. */
+	for (i = 0; i < nrhs; i++) {
+	    rhs_work = &Bmat[i*ldb];
+	    for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
+	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
+	}
+
+	stat->ops[SOLVE] = 0;
+	for (k = 0; k < nrhs; ++k) {
+	    
+	    /* Multiply by inv(U'). */
+	    sp_strsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
+	    
+	    /* Multiply by inv(L'). */
+	    sp_strsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
+	    
+	}
+	/* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
+	for (i = 0; i < nrhs; i++) {
+	    rhs_work = &Bmat[i*ldb];
+	    for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
+	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
+	}
+
+    }
+
+    SUPERLU_FREE(work);
+    SUPERLU_FREE(soln);
+}
+
+/*
+ * Diagnostic print of the solution vector 
+ */
+void
+sprint_soln(int n, float *soln)
+{
+    int i;
+
+    for (i = 0; i < n; i++) 
+  	printf("\t%d: %.4f\n", i, soln[i]);
+}
--- a/intern/opennl/superlu/smemory.c
+++ b/intern/opennl/superlu/smemory.c
@ -0,0 +1,676 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+#include "ssp_defs.h"
+
+/* Constants */
+#define NO_MEMTYPE  4      /* 0: lusup;
+			      1: ucol;
+			      2: lsub;
+			      3: usub */
+#define GluIntArray(n)   (5 * (n) + 5)
+
+/* Internal prototypes */
+void  *sexpand (int *, MemType,int, int, GlobalLU_t *);
+int   sLUWorkInit (int, int, int, int **, float **, LU_space_t);
+void  copy_mem_float (int, void *, void *);
+void  sStackCompress (GlobalLU_t *);
+void  sSetupSpace (void *, int, LU_space_t *);
+void  *suser_malloc (int, int);
+void  suser_free (int, int);
+
+/* External prototypes (in memory.c - prec-indep) */
+extern void    copy_mem_int    (int, void *, void *);
+extern void    user_bcopy      (char *, char *, int);
+
+/* Headers for 4 types of dynamatically managed memory */
+typedef struct e_node {
+    int size;      /* length of the memory that has been used */
+    void *mem;     /* pointer to the new malloc'd store */
+} ExpHeader;
+
+typedef struct {
+    int  size;
+    int  used;
+    int  top1;  /* grow upward, relative to &array[0] */
+    int  top2;  /* grow downward */
+    void *array;
+} LU_stack_t;
+
+/* Variables local to this file */
+static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */
+static LU_stack_t stack;
+static int no_expand;
+
+/* Macros to manipulate stack */
+#define StackFull(x)         ( x + stack.used >= stack.size )
+#define NotDoubleAlign(addr) ( (long int)addr & 7 )
+#define DoubleAlign(addr)    ( ((long int)addr + 7) & ~7L )
+#define TempSpace(m, w)      ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
+			      (w + 1) * m * sizeof(float) )
+#define Reduce(alpha)        ((alpha + 1) / 2)  /* i.e. (alpha-1)/2 + 1 */
+
+
+
+
+/*
+ * Setup the memory model to be used for factorization.
+ *    lwork = 0: use system malloc;
+ *    lwork > 0: use user-supplied work[] space.
+ */
+void sSetupSpace(void *work, int lwork, LU_space_t *MemModel)
+{
+    if ( lwork == 0 ) {
+	*MemModel = SYSTEM; /* malloc/free */
+    } else if ( lwork > 0 ) {
+	*MemModel = USER;   /* user provided space */
+	stack.used = 0;
+	stack.top1 = 0;
+	stack.top2 = (lwork/4)*4; /* must be word addressable */
+	stack.size = stack.top2;
+	stack.array = (void *) work;
+    }
+}
+
+
+
+void *suser_malloc(int bytes, int which_end)
+{
+    void *buf;
+    
+    if ( StackFull(bytes) ) return (NULL);
+
+    if ( which_end == HEAD ) {
+	buf = (char*) stack.array + stack.top1;
+	stack.top1 += bytes;
+    } else {
+	stack.top2 -= bytes;
+	buf = (char*) stack.array + stack.top2;
+    }
+    
+    stack.used += bytes;
+    return buf;
+}
+
+
+void suser_free(int bytes, int which_end)
+{
+    if ( which_end == HEAD ) {
+	stack.top1 -= bytes;
+    } else {
+	stack.top2 += bytes;
+    }
+    stack.used -= bytes;
+}
+
+
+
+/*
+ * mem_usage consists of the following fields:
+ *    - for_lu (float)
+ *      The amount of space used in bytes for the L\U data structures.
+ *    - total_needed (float)
+ *      The amount of space needed in bytes to perform factorization.
+ *    - expansions (int)
+ *      Number of memory expansions during the LU factorization.
+ */
+int sQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
+{
+    SCformat *Lstore;
+    NCformat *Ustore;
+    register int n, iword, dword, panel_size = sp_ienv(1);
+
+    Lstore = L->Store;
+    Ustore = U->Store;
+    n = L->ncol;
+    iword = sizeof(int);
+    dword = sizeof(float);
+
+    /* For LU factors */
+    mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
+				 dword + Lstore->rowind_colptr[n] * iword );
+    mem_usage->for_lu += (float)( (n + 1) * iword +
+				 Ustore->colptr[n] * (dword + iword) );
+
+    /* Working storage to support factorization */
+    mem_usage->total_needed = mem_usage->for_lu +
+	(float)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
+		(panel_size + 1) * n * dword );
+
+    mem_usage->expansions = --no_expand;
+
+    return 0;
+} /* sQuerySpace */
+
+/*
+ * Allocate storage for the data structures common to all factor routines.
+ * For those unpredictable size, make a guess as FILL * nnz(A).
+ * Return value:
+ *     If lwork = -1, return the estimated amount of space required, plus n;
+ *     otherwise, return the amount of space actually allocated when
+ *     memory allocation failure occurred.
+ */
+int
+sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
+	  int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu,
+	  int **iwork, float **dwork)
+{
+    int      info, iword, dword;
+    SCformat *Lstore;
+    NCformat *Ustore;
+    int      *xsup, *supno;
+    int      *lsub, *xlsub;
+    float   *lusup;
+    int      *xlusup;
+    float   *ucol;
+    int      *usub, *xusub;
+    int      nzlmax, nzumax, nzlumax;
+    int      FILL = sp_ienv(6);
+    
+    Glu->n    = n;
+    no_expand = 0;
+    iword     = sizeof(int);
+    dword     = sizeof(float);
+
+    if ( !expanders )	
+        expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader));
+    if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
+    
+    if ( fact != SamePattern_SameRowPerm ) {
+	/* Guess for L\U factors */
+	nzumax = nzlumax = FILL * annz;
+	nzlmax = SUPERLU_MAX(1, FILL/4.) * annz;
+
+	if ( lwork == -1 ) {
+	    return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
+		    + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
+        } else {
+	    sSetupSpace(work, lwork, &Glu->MemModel);
+	}
+	
+#ifdef DEBUG		   
+	printf("sLUMemInit() called: annz %d, MemModel %d\n", 
+		annz, Glu->MemModel);
+#endif	
+	
+	/* Integer pointers for L\U factors */
+	if ( Glu->MemModel == SYSTEM ) {
+	    xsup   = intMalloc(n+1);
+	    supno  = intMalloc(n+1);
+	    xlsub  = intMalloc(n+1);
+	    xlusup = intMalloc(n+1);
+	    xusub  = intMalloc(n+1);
+	} else {
+	    xsup   = (int *)suser_malloc((n+1) * iword, HEAD);
+	    supno  = (int *)suser_malloc((n+1) * iword, HEAD);
+	    xlsub  = (int *)suser_malloc((n+1) * iword, HEAD);
+	    xlusup = (int *)suser_malloc((n+1) * iword, HEAD);
+	    xusub  = (int *)suser_malloc((n+1) * iword, HEAD);
+	}
+
+	lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
+	ucol  = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
+	lsub  = (int *)    sexpand( &nzlmax, LSUB, 0, 0, Glu );
+	usub  = (int *)    sexpand( &nzumax, USUB, 0, 1, Glu );
+
+	while ( !lusup || !ucol || !lsub || !usub ) {
+	    if ( Glu->MemModel == SYSTEM ) {
+		SUPERLU_FREE(lusup); 
+		SUPERLU_FREE(ucol); 
+		SUPERLU_FREE(lsub); 
+		SUPERLU_FREE(usub);
+	    } else {
+		suser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD);
+	    }
+	    nzlumax /= 2;
+	    nzumax /= 2;
+	    nzlmax /= 2;
+	    if ( nzlumax < annz ) {
+		printf("Not enough memory to perform factorization.\n");
+		return (smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
+	    }
+	    lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
+	    ucol  = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
+	    lsub  = (int *)    sexpand( &nzlmax, LSUB, 0, 0, Glu );
+	    usub  = (int *)    sexpand( &nzumax, USUB, 0, 1, Glu );
+	}
+	
+    } else {
+	/* fact == SamePattern_SameRowPerm */
+	Lstore   = L->Store;
+	Ustore   = U->Store;
+	xsup     = Lstore->sup_to_col;
+	supno    = Lstore->col_to_sup;
+	xlsub    = Lstore->rowind_colptr;
+	xlusup   = Lstore->nzval_colptr;
+	xusub    = Ustore->colptr;
+	nzlmax   = Glu->nzlmax;    /* max from previous factorization */
+	nzumax   = Glu->nzumax;
+	nzlumax  = Glu->nzlumax;
+	
+	if ( lwork == -1 ) {
+	    return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
+		    + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
+        } else if ( lwork == 0 ) {
+	    Glu->MemModel = SYSTEM;
+	} else {
+	    Glu->MemModel = USER;
+	    stack.top2 = (lwork/4)*4; /* must be word-addressable */
+	    stack.size = stack.top2;
+	}
+	
+	lsub  = expanders[LSUB].mem  = Lstore->rowind;
+	lusup = expanders[LUSUP].mem = Lstore->nzval;
+	usub  = expanders[USUB].mem  = Ustore->rowind;
+	ucol  = expanders[UCOL].mem  = Ustore->nzval;;
+	expanders[LSUB].size         = nzlmax;
+	expanders[LUSUP].size        = nzlumax;
+	expanders[USUB].size         = nzumax;
+	expanders[UCOL].size         = nzumax;	
+    }
+
+    Glu->xsup    = xsup;
+    Glu->supno   = supno;
+    Glu->lsub    = lsub;
+    Glu->xlsub   = xlsub;
+    Glu->lusup   = lusup;
+    Glu->xlusup  = xlusup;
+    Glu->ucol    = ucol;
+    Glu->usub    = usub;
+    Glu->xusub   = xusub;
+    Glu->nzlmax  = nzlmax;
+    Glu->nzumax  = nzumax;
+    Glu->nzlumax = nzlumax;
+    
+    info = sLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel);
+    if ( info )
+	return ( info + smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
+    
+    ++no_expand;
+    return 0;
+    
+} /* sLUMemInit */
+
+/* Allocate known working storage. Returns 0 if success, otherwise
+   returns the number of bytes allocated so far when failure occurred. */
+int
+sLUWorkInit(int m, int n, int panel_size, int **iworkptr, 
+            float **dworkptr, LU_space_t MemModel)
+{
+    int    isize, dsize, extra;
+    float *old_ptr;
+    int    maxsuper = sp_ienv(3),
+           rowblk   = sp_ienv(4);
+
+    isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
+    dsize = (m * panel_size +
+	     NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(float);
+    
+    if ( MemModel == SYSTEM ) 
+	*iworkptr = (int *) intCalloc(isize/sizeof(int));
+    else
+	*iworkptr = (int *) suser_malloc(isize, TAIL);
+    if ( ! *iworkptr ) {
+	fprintf(stderr, "sLUWorkInit: malloc fails for local iworkptr[]\n");
+	return (isize + n);
+    }
+
+    if ( MemModel == SYSTEM )
+	*dworkptr = (float *) SUPERLU_MALLOC(dsize);
+    else {
+	*dworkptr = (float *) suser_malloc(dsize, TAIL);
+	if ( NotDoubleAlign(*dworkptr) ) {
+	    old_ptr = *dworkptr;
+	    *dworkptr = (float*) DoubleAlign(*dworkptr);
+	    *dworkptr = (float*) ((double*)*dworkptr - 1);
+	    extra = (char*)old_ptr - (char*)*dworkptr;
+#ifdef DEBUG	    
+	    printf("sLUWorkInit: not aligned, extra %d\n", extra);
+#endif	    
+	    stack.top2 -= extra;
+	    stack.used += extra;
+	}
+    }
+    if ( ! *dworkptr ) {
+	fprintf(stderr, "malloc fails for local dworkptr[].");
+	return (isize + dsize + n);
+    }
+	
+    return 0;
+}
+
+
+/*
+ * Set up pointers for real working arrays.
+ */
+void
+sSetRWork(int m, int panel_size, float *dworkptr,
+	 float **dense, float **tempv)
+{
+    float zero = 0.0;
+
+    int maxsuper = sp_ienv(3),
+        rowblk   = sp_ienv(4);
+    *dense = dworkptr;
+    *tempv = *dense + panel_size*m;
+    sfill (*dense, m * panel_size, zero);
+    sfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);     
+}
+	
+/*
+ * Free the working storage used by factor routines.
+ */
+void sLUWorkFree(int *iwork, float *dwork, GlobalLU_t *Glu)
+{
+    if ( Glu->MemModel == SYSTEM ) {
+	SUPERLU_FREE (iwork);
+	SUPERLU_FREE (dwork);
+    } else {
+	stack.used -= (stack.size - stack.top2);
+	stack.top2 = stack.size;
+/*	sStackCompress(Glu);  */
+    }
+    
+    SUPERLU_FREE (expanders);	
+    expanders = 0;
+}
+
+/* Expand the data structures for L and U during the factorization.
+ * Return value:   0 - successful return
+ *               > 0 - number of bytes allocated when run out of space
+ */
+int
+sLUMemXpand(int jcol,
+	   int next,          /* number of elements currently in the factors */
+	   MemType mem_type,  /* which type of memory to expand  */
+	   int *maxlen,       /* modified - maximum length of a data structure */
+	   GlobalLU_t *Glu    /* modified - global LU data structures */
+	   )
+{
+    void   *new_mem;
+    
+#ifdef DEBUG    
+    printf("sLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n",
+	   jcol, next, *maxlen, mem_type);
+#endif    
+
+    if (mem_type == USUB) 
+    	new_mem = sexpand(maxlen, mem_type, next, 1, Glu);
+    else
+	new_mem = sexpand(maxlen, mem_type, next, 0, Glu);
+    
+    if ( !new_mem ) {
+	int    nzlmax  = Glu->nzlmax;
+	int    nzumax  = Glu->nzumax;
+	int    nzlumax = Glu->nzlumax;
+    	fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
+    	return (smemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
+    }
+
+    switch ( mem_type ) {
+      case LUSUP:
+	Glu->lusup   = (float *) new_mem;
+	Glu->nzlumax = *maxlen;
+	break;
+      case UCOL:
+	Glu->ucol   = (float *) new_mem;
+	Glu->nzumax = *maxlen;
+	break;
+      case LSUB:
+	Glu->lsub   = (int *) new_mem;
+	Glu->nzlmax = *maxlen;
+	break;
+      case USUB:
+	Glu->usub   = (int *) new_mem;
+	Glu->nzumax = *maxlen;
+	break;
+    }
+    
+    return 0;
+    
+}
+
+
+
+void
+copy_mem_float(int howmany, void *old, void *new)
+{
+    register int i;
+    float *dold = old;
+    float *dnew = new;
+    for (i = 0; i < howmany; i++) dnew[i] = dold[i];
+}
+
+/*
+ * Expand the existing storage to accommodate more fill-ins.
+ */
+void
+*sexpand (
+	 int *prev_len,   /* length used from previous call */
+	 MemType type,    /* which part of the memory to expand */
+	 int len_to_copy, /* size of the memory to be copied to new store */
+	 int keep_prev,   /* = 1: use prev_len;
+			     = 0: compute new_len to expand */
+	 GlobalLU_t *Glu  /* modified - global LU data structures */
+	)
+{
+    float    EXPAND = 1.5;
+    float    alpha;
+    void     *new_mem, *old_mem;
+    int      new_len, tries, lword, extra, bytes_to_copy;
+
+    alpha = EXPAND;
+
+    if ( no_expand == 0 || keep_prev ) /* First time allocate requested */
+        new_len = *prev_len;
+    else {
+	new_len = alpha * *prev_len;
+    }
+    
+    if ( type == LSUB || type == USUB ) lword = sizeof(int);
+    else lword = sizeof(float);
+
+    if ( Glu->MemModel == SYSTEM ) {
+	new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
+/*	new_mem = (void *) calloc(new_len, lword); */
+	if ( no_expand != 0 ) {
+	    tries = 0;
+	    if ( keep_prev ) {
+		if ( !new_mem ) return (NULL);
+	    } else {
+		while ( !new_mem ) {
+		    if ( ++tries > 10 ) return (NULL);
+		    alpha = Reduce(alpha);
+		    new_len = alpha * *prev_len;
+		    new_mem = (void *) SUPERLU_MALLOC(new_len * lword); 
+/*		    new_mem = (void *) calloc(new_len, lword); */
+		}
+	    }
+	    if ( type == LSUB || type == USUB ) {
+		copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
+	    } else {
+		copy_mem_float(len_to_copy, expanders[type].mem, new_mem);
+	    }
+	    SUPERLU_FREE (expanders[type].mem);
+	}
+	expanders[type].mem = (void *) new_mem;
+	
+    } else { /* MemModel == USER */
+	if ( no_expand == 0 ) {
+	    new_mem = suser_malloc(new_len * lword, HEAD);
+	    if ( NotDoubleAlign(new_mem) &&
+		(type == LUSUP || type == UCOL) ) {
+		old_mem = new_mem;
+		new_mem = (void *)DoubleAlign(new_mem);
+		extra = (char*)new_mem - (char*)old_mem;
+#ifdef DEBUG		
+		printf("expand(): not aligned, extra %d\n", extra);
+#endif		
+		stack.top1 += extra;
+		stack.used += extra;
+	    }
+	    expanders[type].mem = (void *) new_mem;
+	}
+	else {
+	    tries = 0;
+	    extra = (new_len - *prev_len) * lword;
+	    if ( keep_prev ) {
+		if ( StackFull(extra) ) return (NULL);
+	    } else {
+		while ( StackFull(extra) ) {
+		    if ( ++tries > 10 ) return (NULL);
+		    alpha = Reduce(alpha);
+		    new_len = alpha * *prev_len;
+		    extra = (new_len - *prev_len) * lword;	    
+		}
+	    }
+
+	    if ( type != USUB ) {
+		new_mem = (void*)((char*)expanders[type + 1].mem + extra);
+		bytes_to_copy = (char*)stack.array + stack.top1
+		    - (char*)expanders[type + 1].mem;
+		user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
+
+		if ( type < USUB ) {
+		    Glu->usub = expanders[USUB].mem =
+			(void*)((char*)expanders[USUB].mem + extra);
+		}
+		if ( type < LSUB ) {
+		    Glu->lsub = expanders[LSUB].mem =
+			(void*)((char*)expanders[LSUB].mem + extra);
+		}
+		if ( type < UCOL ) {
+		    Glu->ucol = expanders[UCOL].mem =
+			(void*)((char*)expanders[UCOL].mem + extra);
+		}
+		stack.top1 += extra;
+		stack.used += extra;
+		if ( type == UCOL ) {
+		    stack.top1 += extra;   /* Add same amount for USUB */
+		    stack.used += extra;
+		}
+		
+	    } /* if ... */
+
+	} /* else ... */
+    }
+
+    expanders[type].size = new_len;
+    *prev_len = new_len;
+    if ( no_expand ) ++no_expand;
+    
+    return (void *) expanders[type].mem;
+    
+} /* sexpand */
+
+
+/*
+ * Compress the work[] array to remove fragmentation.
+ */
+void
+sStackCompress(GlobalLU_t *Glu)
+{
+    register int iword, dword, ndim;
+    char    *last, *fragment;
+    int      *ifrom, *ito;
+    float   *dfrom, *dto;
+    int      *xlsub, *lsub, *xusub, *usub, *xlusup;
+    float   *ucol, *lusup;
+    
+    iword = sizeof(int);
+    dword = sizeof(float);
+    ndim = Glu->n;
+
+    xlsub  = Glu->xlsub;
+    lsub   = Glu->lsub;
+    xusub  = Glu->xusub;
+    usub   = Glu->usub;
+    xlusup = Glu->xlusup;
+    ucol   = Glu->ucol;
+    lusup  = Glu->lusup;
+    
+    dfrom = ucol;
+    dto = (float *)((char*)lusup + xlusup[ndim] * dword);
+    copy_mem_float(xusub[ndim], dfrom, dto);
+    ucol = dto;
+
+    ifrom = lsub;
+    ito = (int *) ((char*)ucol + xusub[ndim] * iword);
+    copy_mem_int(xlsub[ndim], ifrom, ito);
+    lsub = ito;
+    
+    ifrom = usub;
+    ito = (int *) ((char*)lsub + xlsub[ndim] * iword);
+    copy_mem_int(xusub[ndim], ifrom, ito);
+    usub = ito;
+    
+    last = (char*)usub + xusub[ndim] * iword;
+    fragment = (char*) (((char*)stack.array + stack.top1) - last);
+    stack.used -= (long int) fragment;
+    stack.top1 -= (long int) fragment;
+
+    Glu->ucol = ucol;
+    Glu->lsub = lsub;
+    Glu->usub = usub;
+    
+#ifdef DEBUG
+    printf("sStackCompress: fragment %d\n", fragment);
+    /* for (last = 0; last < ndim; ++last)
+	print_lu_col("After compress:", last, 0);*/
+#endif    
+    
+}
+
+/*
+ * Allocate storage for original matrix A
+ */
+void
+sallocateA(int n, int nnz, float **a, int **asub, int **xa)
+{
+    *a    = (float *) floatMalloc(nnz);
+    *asub = (int *) intMalloc(nnz);
+    *xa   = (int *) intMalloc(n+1);
+}
+
+
+float *floatMalloc(int n)
+{
+    float *buf;
+    buf = (float *) SUPERLU_MALLOC(n * sizeof(float)); 
+    if ( !buf ) {
+	ABORT("SUPERLU_MALLOC failed for buf in floatMalloc()\n");
+    }
+    return (buf);
+}
+
+float *floatCalloc(int n)
+{
+    float *buf;
+    register int i;
+    float zero = 0.0;
+    buf = (float *) SUPERLU_MALLOC(n * sizeof(float));
+    if ( !buf ) {
+	ABORT("SUPERLU_MALLOC failed for buf in floatCalloc()\n");
+    }
+    for (i = 0; i < n; ++i) buf[i] = zero;
+    return (buf);
+}
+
+
+int smemory_usage(const int nzlmax, const int nzumax, 
+		  const int nzlumax, const int n)
+{
+    register int iword, dword;
+
+    iword   = sizeof(int);
+    dword   = sizeof(float);
+    
+    return (10 * n * iword +
+	    nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
+
+}
--- a/intern/opennl/superlu/smyblas2.c
+++ b/intern/opennl/superlu/smyblas2.c
@ -0,0 +1,225 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+ * File name:		smyblas2.c
+ * Purpose:
+ *     Level 2 BLAS operations: solves and matvec, written in C.
+ * Note:
+ *     This is only used when the system lacks an efficient BLAS library.
+ */
+
+/*
+ * Solves a dense UNIT lower triangular system. The unit lower 
+ * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
+ * The solution will be returned in the rhs vector.
+ */
+void slsolve ( int ldm, int ncol, float *M, float *rhs )
+{
+    int k;
+    float x0, x1, x2, x3, x4, x5, x6, x7;
+    float *M0;
+    register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
+    register int firstcol = 0;
+
+    M0 = &M[0];
+
+    while ( firstcol < ncol - 7 ) { /* Do 8 columns */
+      Mki0 = M0 + 1;
+      Mki1 = Mki0 + ldm + 1;
+      Mki2 = Mki1 + ldm + 1;
+      Mki3 = Mki2 + ldm + 1;
+      Mki4 = Mki3 + ldm + 1;
+      Mki5 = Mki4 + ldm + 1;
+      Mki6 = Mki5 + ldm + 1;
+      Mki7 = Mki6 + ldm + 1;
+
+      x0 = rhs[firstcol];
+      x1 = rhs[firstcol+1] - x0 * *Mki0++;
+      x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
+      x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
+      x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
+	                   - x3 * *Mki3++;
+      x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
+	                   - x3 * *Mki3++ - x4 * *Mki4++;
+      x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
+	                   - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++;
+      x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
+	                   - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++
+			   - x6 * *Mki6++;
+
+      rhs[++firstcol] = x1;
+      rhs[++firstcol] = x2;
+      rhs[++firstcol] = x3;
+      rhs[++firstcol] = x4;
+      rhs[++firstcol] = x5;
+      rhs[++firstcol] = x6;
+      rhs[++firstcol] = x7;
+      ++firstcol;
+    
+      for (k = firstcol; k < ncol; k++)
+	rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
+	                - x2 * *Mki2++ - x3 * *Mki3++
+                        - x4 * *Mki4++ - x5 * *Mki5++
+			- x6 * *Mki6++ - x7 * *Mki7++;
+ 
+      M0 += 8 * ldm + 8;
+    }
+
+    while ( firstcol < ncol - 3 ) { /* Do 4 columns */
+      Mki0 = M0 + 1;
+      Mki1 = Mki0 + ldm + 1;
+      Mki2 = Mki1 + ldm + 1;
+      Mki3 = Mki2 + ldm + 1;
+
+      x0 = rhs[firstcol];
+      x1 = rhs[firstcol+1] - x0 * *Mki0++;
+      x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
+      x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
+
+      rhs[++firstcol] = x1;
+      rhs[++firstcol] = x2;
+      rhs[++firstcol] = x3;
+      ++firstcol;
+    
+      for (k = firstcol; k < ncol; k++)
+	rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
+	                - x2 * *Mki2++ - x3 * *Mki3++;
+ 
+      M0 += 4 * ldm + 4;
+    }
+
+    if ( firstcol < ncol - 1 ) { /* Do 2 columns */
+      Mki0 = M0 + 1;
+      Mki1 = Mki0 + ldm + 1;
+
+      x0 = rhs[firstcol];
+      x1 = rhs[firstcol+1] - x0 * *Mki0++;
+
+      rhs[++firstcol] = x1;
+      ++firstcol;
+    
+      for (k = firstcol; k < ncol; k++)
+	rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++;
+ 
+    }
+    
+}
+
+/*
+ * Solves a dense upper triangular system. The upper triangular matrix is
+ * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
+ * in the rhs vector.
+ */
+void
+susolve ( ldm, ncol, M, rhs )
+int ldm;	/* in */
+int ncol;	/* in */
+float *M;	/* in */
+float *rhs;	/* modified */
+{
+    float xj;
+    int jcol, j, irow;
+
+    jcol = ncol - 1;
+
+    for (j = 0; j < ncol; j++) {
+
+	xj = rhs[jcol] / M[jcol + jcol*ldm]; 		/* M(jcol, jcol) */
+	rhs[jcol] = xj;
+	
+	for (irow = 0; irow < jcol; irow++)
+	    rhs[irow] -= xj * M[irow + jcol*ldm];	/* M(irow, jcol) */
+
+	jcol--;
+
+    }
+}
+
+
+/*
+ * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
+ * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
+ */
+void smatvec ( ldm, nrow, ncol, M, vec, Mxvec )
+
+int ldm;	/* in -- leading dimension of M */
+int nrow;	/* in */ 
+int ncol;	/* in */
+float *M;	/* in */
+float *vec;	/* in */
+float *Mxvec;	/* in/out */
+
+{
+    float vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
+    float *M0;
+    register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
+    register int firstcol = 0;
+    int k;
+
+    M0 = &M[0];
+    while ( firstcol < ncol - 7 ) {	/* Do 8 columns */
+
+	Mki0 = M0;
+	Mki1 = Mki0 + ldm;
+        Mki2 = Mki1 + ldm;
+        Mki3 = Mki2 + ldm;
+	Mki4 = Mki3 + ldm;
+	Mki5 = Mki4 + ldm;
+	Mki6 = Mki5 + ldm;
+	Mki7 = Mki6 + ldm;
+
+	vi0 = vec[firstcol++];
+	vi1 = vec[firstcol++];
+	vi2 = vec[firstcol++];
+	vi3 = vec[firstcol++];	
+	vi4 = vec[firstcol++];
+	vi5 = vec[firstcol++];
+	vi6 = vec[firstcol++];
+	vi7 = vec[firstcol++];	
+
+	for (k = 0; k < nrow; k++) 
+	    Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
+		      + vi2 * *Mki2++ + vi3 * *Mki3++ 
+		      + vi4 * *Mki4++ + vi5 * *Mki5++
+		      + vi6 * *Mki6++ + vi7 * *Mki7++;
+
+	M0 += 8 * ldm;
+    }
+
+    while ( firstcol < ncol - 3 ) {	/* Do 4 columns */
+
+	Mki0 = M0;
+	Mki1 = Mki0 + ldm;
+	Mki2 = Mki1 + ldm;
+	Mki3 = Mki2 + ldm;
+
+	vi0 = vec[firstcol++];
+	vi1 = vec[firstcol++];
+	vi2 = vec[firstcol++];
+	vi3 = vec[firstcol++];	
+	for (k = 0; k < nrow; k++) 
+	    Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
+		      + vi2 * *Mki2++ + vi3 * *Mki3++ ;
+
+	M0 += 4 * ldm;
+    }
+
+    while ( firstcol < ncol ) {		/* Do 1 column */
+
+ 	Mki0 = M0;
+	vi0 = vec[firstcol++];
+	for (k = 0; k < nrow; k++)
+	    Mxvec[k] += vi0 * *Mki0++;
+
+	M0 += ldm;
+    }
+	
+}
+
--- a/intern/opennl/superlu/sp_coletree.c
+++ b/intern/opennl/superlu/sp_coletree.c
@ -0,0 +1,332 @@
+
+/*  Elimination tree computation and layout routines */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "ssp_defs.h"
+
+/* 
+ *  Implementation of disjoint set union routines.
+ *  Elements are integers in 0..n-1, and the 
+ *  names of the sets themselves are of type int.
+ *  
+ *  Calls are:
+ *  initialize_disjoint_sets (n) initial call.
+ *  s = make_set (i)             returns a set containing only i.
+ *  s = link (t, u)		 returns s = t union u, destroying t and u.
+ *  s = find (i)		 return name of set containing i.
+ *  finalize_disjoint_sets 	 final call.
+ *
+ *  This implementation uses path compression but not weighted union.
+ *  See Tarjan's book for details.
+ *  John Gilbert, CMI, 1987.
+ *
+ *  Implemented path-halving by XSL 07/05/95.
+ */
+
+static int	*pp;		/* parent array for sets */
+
+static 
+int *mxCallocInt(int n)
+{
+    register int i;
+    int *buf;
+
+    buf = (int *) SUPERLU_MALLOC( n * sizeof(int) );
+    if ( !buf ) {
+         ABORT("SUPERLU_MALLOC fails for buf in mxCallocInt()");
+       }
+    for (i = 0; i < n; i++) buf[i] = 0;
+    return (buf);
+}
+      
+static
+void initialize_disjoint_sets (
+	int n
+	)
+{
+	pp = mxCallocInt(n);
+}
+
+
+static
+int make_set (
+	int i
+	)
+{
+	pp[i] = i;
+	return i;
+}
+
+
+static
+int link (
+	int s,
+	int t
+	)
+{
+	pp[s] = t;
+	return t;
+}
+
+
+/* PATH HALVING */
+static
+int find (int i)
+{
+    register int p, gp;
+    
+    p = pp[i];
+    gp = pp[p];
+    while (gp != p) {
+	pp[i] = gp;
+	i = gp;
+	p = pp[i];
+	gp = pp[p];
+    }
+    return (p);
+}
+
+#if 0
+/* PATH COMPRESSION */
+static
+int find (
+	int i
+	)
+{
+	if (pp[i] != i) 
+		pp[i] = find (pp[i]);
+	return pp[i];
+}
+#endif
+
+static
+void finalize_disjoint_sets (
+	void
+	)
+{
+	SUPERLU_FREE(pp);
+}
+
+
+/*
+ *      Find the elimination tree for A'*A.
+ *      This uses something similar to Liu's algorithm. 
+ *      It runs in time O(nz(A)*log n) and does not form A'*A.
+ *
+ *      Input:
+ *        Sparse matrix A.  Numeric values are ignored, so any
+ *        explicit zeros are treated as nonzero.
+ *      Output:
+ *        Integer array of parents representing the elimination
+ *        tree of the symbolic product A'*A.  Each vertex is a
+ *        column of A, and nc means a root of the elimination forest.
+ *
+ *      John R. Gilbert, Xerox, 10 Dec 1990
+ *      Based on code by JRG dated 1987, 1988, and 1990.
+ */
+
+/*
+ * Nonsymmetric elimination tree
+ */
+int
+sp_coletree(
+	    int *acolst, int *acolend, /* column start and end past 1 */
+	    int *arow,                 /* row indices of A */
+	    int nr, int nc,            /* dimension of A */
+	    int *parent	               /* parent in elim tree */
+	    )
+{
+	int	*root;			/* root of subtee of etree 	*/
+	int     *firstcol;		/* first nonzero col in each row*/
+	int	rset, cset;             
+	int	row, col;
+	int	rroot;
+	int	p;
+
+	root = mxCallocInt (nc);
+	initialize_disjoint_sets (nc);
+
+	/* Compute firstcol[row] = first nonzero column in row */
+
+	firstcol = mxCallocInt (nr);
+	for (row = 0; row < nr; firstcol[row++] = nc);
+	for (col = 0; col < nc; col++) 
+		for (p = acolst[col]; p < acolend[col]; p++) {
+			row = arow[p];
+			firstcol[row] = SUPERLU_MIN(firstcol[row], col);
+		}
+
+	/* Compute etree by Liu's algorithm for symmetric matrices,
+           except use (firstcol[r],c) in place of an edge (r,c) of A.
+	   Thus each row clique in A'*A is replaced by a star
+	   centered at its first vertex, which has the same fill. */
+
+	for (col = 0; col < nc; col++) {
+		cset = make_set (col);
+		root[cset] = col;
+		parent[col] = nc; /* Matlab */
+		for (p = acolst[col]; p < acolend[col]; p++) {
+			row = firstcol[arow[p]];
+			if (row >= col) continue;
+			rset = find (row);
+			rroot = root[rset];
+			if (rroot != col) {
+				parent[rroot] = col;
+				cset = link (cset, rset);
+				root[cset] = col;
+			}
+		}
+	}
+
+	SUPERLU_FREE (root);
+	SUPERLU_FREE (firstcol);
+	finalize_disjoint_sets ();
+	return 0;
+}
+
+/*
+ *  q = TreePostorder (n, p);
+ *
+ *	Postorder a tree.
+ *	Input:
+ *	  p is a vector of parent pointers for a forest whose
+ *        vertices are the integers 0 to n-1; p[root]==n.
+ *	Output:
+ *	  q is a vector indexed by 0..n-1 such that q[i] is the
+ *	  i-th vertex in a postorder numbering of the tree.
+ *
+ *        ( 2/7/95 modified by X.Li:
+ *          q is a vector indexed by 0:n-1 such that vertex i is the
+ *          q[i]-th vertex in a postorder numbering of the tree.
+ *          That is, this is the inverse of the previous q. )
+ *
+ *	In the child structure, lower-numbered children are represented
+ *	first, so that a tree which is already numbered in postorder
+ *	will not have its order changed.
+ *    
+ *  Written by John Gilbert, Xerox, 10 Dec 1990.
+ *  Based on code written by John Gilbert at CMI in 1987.
+ */
+
+static int	*first_kid, *next_kid;	/* Linked list of children.	*/
+static int	*post, postnum;
+
+static
+/*
+ * Depth-first search from vertex v.
+ */
+void etdfs (
+	int	v
+	)
+{
+	int	w;
+
+	for (w = first_kid[v]; w != -1; w = next_kid[w]) {
+		etdfs (w);
+	}
+	/* post[postnum++] = v; in Matlab */
+	post[v] = postnum++;    /* Modified by X.Li on 2/14/95 */
+}
+
+
+/*
+ * Post order a tree
+ */
+int *TreePostorder(
+	int n,
+	int *parent
+)
+{
+	int	v, dad;
+
+	/* Allocate storage for working arrays and results	*/
+	first_kid = 	mxCallocInt (n+1);
+	next_kid  = 	mxCallocInt (n+1);
+	post	  = 	mxCallocInt (n+1);
+
+	/* Set up structure describing children */
+	for (v = 0; v <= n; first_kid[v++] = -1);
+	for (v = n-1; v >= 0; v--) {
+		dad = parent[v];
+		next_kid[v] = first_kid[dad];
+		first_kid[dad] = v;
+	}
+
+	/* Depth-first search from dummy root vertex #n */
+	postnum = 0;
+	etdfs (n);
+
+	SUPERLU_FREE (first_kid);
+	SUPERLU_FREE (next_kid);
+	return post;
+}
+
+
+/*
+ *      p = spsymetree (A);
+ *
+ *      Find the elimination tree for symmetric matrix A.
+ *      This uses Liu's algorithm, and runs in time O(nz*log n).
+ *
+ *      Input:
+ *        Square sparse matrix A.  No check is made for symmetry;
+ *        elements below and on the diagonal are ignored.
+ *        Numeric values are ignored, so any explicit zeros are 
+ *        treated as nonzero.
+ *      Output:
+ *        Integer array of parents representing the etree, with n
+ *        meaning a root of the elimination forest.
+ *      Note:  
+ *        This routine uses only the upper triangle, while sparse
+ *        Cholesky (as in spchol.c) uses only the lower.  Matlab's
+ *        dense Cholesky uses only the upper.  This routine could
+ *        be modified to use the lower triangle either by transposing
+ *        the matrix or by traversing it by rows with auxiliary
+ *        pointer and link arrays.
+ *
+ *      John R. Gilbert, Xerox, 10 Dec 1990
+ *      Based on code by JRG dated 1987, 1988, and 1990.
+ *      Modified by X.S. Li, November 1999.
+ */
+
+/*
+ * Symmetric elimination tree
+ */
+int
+sp_symetree(
+	    int *acolst, int *acolend, /* column starts and ends past 1 */
+	    int *arow,            /* row indices of A */
+	    int n,                /* dimension of A */
+	    int *parent	    /* parent in elim tree */
+	    )
+{
+	int	*root;		    /* root of subtree of etree 	*/
+	int	rset, cset;             
+	int	row, col;
+	int	rroot;
+	int	p;
+
+	root = mxCallocInt (n);
+	initialize_disjoint_sets (n);
+
+	for (col = 0; col < n; col++) {
+		cset = make_set (col);
+		root[cset] = col;
+		parent[col] = n; /* Matlab */
+		for (p = acolst[col]; p < acolend[col]; p++) {
+			row = arow[p];
+			if (row >= col) continue;
+			rset = find (row);
+			rroot = root[rset];
+			if (rroot != col) {
+				parent[rroot] = col;
+				cset = link (cset, rset);
+				root[cset] = col;
+			}
+		}
+	}
+	SUPERLU_FREE (root);
+	finalize_disjoint_sets ();
+	return 0;
+} /* SP_SYMETREE */
--- a/intern/opennl/superlu/sp_ienv.c
+++ b/intern/opennl/superlu/sp_ienv.c
@ -0,0 +1,65 @@
+/*
+ * File name:		sp_ienv.c
+ * History:             Modified from lapack routine ILAENV
+ */
+
+#include "ssp_defs.h"
+#include "util.h"
+
+int
+sp_ienv(int ispec)
+{
+/*
+    Purpose   
+    =======   
+
+    sp_ienv() is inquired to choose machine-dependent parameters for the
+    local environment. See ISPEC for a description of the parameters.   
+
+    This version provides a set of parameters which should give good,   
+    but not optimal, performance on many of the currently available   
+    computers.  Users are encouraged to modify this subroutine to set   
+    the tuning parameters for their particular machine using the option   
+    and problem size information in the arguments.   
+
+    Arguments   
+    =========   
+
+    ISPEC   (input) int
+            Specifies the parameter to be returned as the value of SP_IENV.   
+            = 1: the panel size w; a panel consists of w consecutive
+	         columns of matrix A in the process of Gaussian elimination.
+		 The best value depends on machine's cache characters.
+            = 2: the relaxation parameter relax; if the number of
+	         nodes (columns) in a subtree of the elimination tree is less
+		 than relax, this subtree is considered as one supernode,
+		 regardless of their row structures.
+            = 3: the maximum size for a supernode;
+	    = 4: the minimum row dimension for 2-D blocking to be used;
+	    = 5: the minimum column dimension for 2-D blocking to be used;
+	    = 6: the estimated fills factor for L and U, compared with A;
+	    
+   (SP_IENV) (output) int
+            >= 0: the value of the parameter specified by ISPEC   
+            < 0:  if SP_IENV = -k, the k-th argument had an illegal value. 
+  
+    ===================================================================== 
+*/
+    int i;
+
+    switch (ispec) {
+	case 1: return (10);
+	case 2: return (5);
+	case 3: return (100);
+	case 4: return (200);
+	case 5: return (40);
+        case 6: return (20);
+    }
+
+    /* Invalid value for ISPEC */
+    i = 1;
+    xerbla_("sp_ienv", &i);
+    return 0;
+
+} /* sp_ienv_ */
+
--- a/intern/opennl/superlu/sp_preorder.c
+++ b/intern/opennl/superlu/sp_preorder.c
@ -0,0 +1,203 @@
+#include "ssp_defs.h"
+
+void
+sp_preorder(superlu_options_t *options,  SuperMatrix *A, int *perm_c, 
+	    int *etree, SuperMatrix *AC)
+{
+/*
+ * Purpose
+ * =======
+ *
+ * sp_preorder() permutes the columns of the original matrix. It performs
+ * the following steps:
+ *
+ *    1. Apply column permutation perm_c[] to A's column pointers to form AC;
+ *
+ *    2. If options->Fact = DOFACT, then
+ *       (1) Compute column elimination tree etree[] of AC'AC;
+ *       (2) Post order etree[] to get a postordered elimination tree etree[],
+ *           and a postorder permutation post[];
+ *       (3) Apply post[] permutation to columns of AC;
+ *       (4) Overwrite perm_c[] with the product perm_c * post.
+ *
+ * Arguments
+ * =========
+ *
+ * options (input) superlu_options_t*
+ *         Specifies whether or not the elimination tree will be re-used.
+ *         If options->Fact == DOFACT, this means first time factor A, 
+ *         etree is computed, postered, and output.
+ *         Otherwise, re-factor A, etree is input, unchanged on exit.
+ *
+ * A       (input) SuperMatrix*
+ *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
+ *         of the linear equations is A->nrow. Currently, the type of A can be:
+ *         Stype = NC or SLU_NCP; Mtype = SLU_GE.
+ *         In the future, more general A may be handled.
+ *
+ * perm_c  (input/output) int*
+ *	   Column permutation vector of size A->ncol, which defines the 
+ *         permutation matrix Pc; perm_c[i] = j means column i of A is 
+ *         in position j in A*Pc.
+ *         If options->Fact == DOFACT, perm_c is both input and output.
+ *         On output, it is changed according to a postorder of etree.
+ *         Otherwise, perm_c is input.
+ *
+ * etree   (input/output) int*
+ *         Elimination tree of Pc'*A'*A*Pc, dimension A->ncol.
+ *         If options->Fact == DOFACT, etree is an output argument,
+ *         otherwise it is an input argument.
+ *         Note: etree is a vector of parent pointers for a forest whose
+ *         vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
+ *
+ * AC      (output) SuperMatrix*
+ *         The resulting matrix after applied the column permutation
+ *         perm_c[] to matrix A. The type of AC can be:
+ *         Stype = SLU_NCP; Dtype = A->Dtype; Mtype = SLU_GE.
+ *
+ */
+
+    NCformat  *Astore;
+    NCPformat *ACstore;
+    int       *iwork, *post;
+    register  int n, i;
+
+    n = A->ncol;
+    
+    /* Apply column permutation perm_c to A's column pointers so to
+       obtain NCP format in AC = A*Pc.  */
+    AC->Stype       = SLU_NCP;
+    AC->Dtype       = A->Dtype;
+    AC->Mtype       = A->Mtype;
+    AC->nrow        = A->nrow;
+    AC->ncol        = A->ncol;
+    Astore          = A->Store;
+    ACstore = AC->Store = (void *) SUPERLU_MALLOC( sizeof(NCPformat) );
+    if ( !ACstore ) ABORT("SUPERLU_MALLOC fails for ACstore");
+    ACstore->nnz    = Astore->nnz;
+    ACstore->nzval  = Astore->nzval;
+    ACstore->rowind = Astore->rowind;
+    ACstore->colbeg = (int*) SUPERLU_MALLOC(n*sizeof(int));
+    if ( !(ACstore->colbeg) ) ABORT("SUPERLU_MALLOC fails for ACstore->colbeg");
+    ACstore->colend = (int*) SUPERLU_MALLOC(n*sizeof(int));
+    if ( !(ACstore->colend) ) ABORT("SUPERLU_MALLOC fails for ACstore->colend");
+
+#ifdef DEBUG
+    print_int_vec("pre_order:", n, perm_c);
+    check_perm("Initial perm_c", n, perm_c);
+#endif      
+
+    for (i = 0; i < n; i++) {
+	ACstore->colbeg[perm_c[i]] = Astore->colptr[i]; 
+	ACstore->colend[perm_c[i]] = Astore->colptr[i+1];
+    }
+	
+    if ( options->Fact == DOFACT ) {
+#undef ETREE_ATplusA
+#ifdef ETREE_ATplusA
+        /*--------------------------------------------
+	  COMPUTE THE ETREE OF Pc*(A'+A)*Pc'.
+	  --------------------------------------------*/
+        int *b_colptr, *b_rowind, bnz, j;
+	int *c_colbeg, *c_colend;
+
+        /*printf("Use etree(A'+A)\n");*/
+
+	/* Form B = A + A'. */
+	at_plus_a(n, Astore->nnz, Astore->colptr, Astore->rowind,
+		  &bnz, &b_colptr, &b_rowind);
+
+	/* Form C = Pc*B*Pc'. */
+	c_colbeg = (int*) SUPERLU_MALLOC(2*n*sizeof(int));
+	c_colend = c_colbeg + n;
+	if (!c_colbeg ) ABORT("SUPERLU_MALLOC fails for c_colbeg/c_colend");
+	for (i = 0; i < n; i++) {
+	    c_colbeg[perm_c[i]] = b_colptr[i]; 
+  	    c_colend[perm_c[i]] = b_colptr[i+1];
+	}
+	for (j = 0; j < n; ++j) {
+	    for (i = c_colbeg[j]; i < c_colend[j]; ++i) {
+	        b_rowind[i] = perm_c[b_rowind[i]];
+	    }
+	}
+
+	/* Compute etree of C. */
+	sp_symetree(c_colbeg, c_colend, b_rowind, n, etree);
+
+	SUPERLU_FREE(b_colptr);
+	if ( bnz ) SUPERLU_FREE(b_rowind);
+	SUPERLU_FREE(c_colbeg);
+	
+#else
+        /*--------------------------------------------
+	  COMPUTE THE COLUMN ELIMINATION TREE.
+	  --------------------------------------------*/
+	sp_coletree(ACstore->colbeg, ACstore->colend, ACstore->rowind,
+		    A->nrow, A->ncol, etree);
+#endif
+#ifdef DEBUG	
+	print_int_vec("etree:", n, etree);
+#endif	
+	
+	/* In symmetric mode, do not do postorder here. */
+	if ( options->SymmetricMode == NO ) {
+	    /* Post order etree */
+	    post = (int *) TreePostorder(n, etree);
+	    /* for (i = 0; i < n+1; ++i) inv_post[post[i]] = i;
+	       iwork = post; */
+
+#ifdef DEBUG
+	    print_int_vec("post:", n+1, post);
+	    check_perm("post", n, post);	
+#endif	
+	    iwork = (int*) SUPERLU_MALLOC((n+1)*sizeof(int)); 
+	    if ( !iwork ) ABORT("SUPERLU_MALLOC fails for iwork[]");
+
+	    /* Renumber etree in postorder */
+	    for (i = 0; i < n; ++i) iwork[post[i]] = post[etree[i]];
+	    for (i = 0; i < n; ++i) etree[i] = iwork[i];
+
+#ifdef DEBUG	
+	    print_int_vec("postorder etree:", n, etree);
+#endif
+	
+	    /* Postmultiply A*Pc by post[] */
+	    for (i = 0; i < n; ++i) iwork[post[i]] = ACstore->colbeg[i];
+	    for (i = 0; i < n; ++i) ACstore->colbeg[i] = iwork[i];
+	    for (i = 0; i < n; ++i) iwork[post[i]] = ACstore->colend[i];
+	    for (i = 0; i < n; ++i) ACstore->colend[i] = iwork[i];
+
+	    for (i = 0; i < n; ++i)
+	        iwork[i] = post[perm_c[i]];  /* product of perm_c and post */
+	    for (i = 0; i < n; ++i) perm_c[i] = iwork[i];
+
+#ifdef DEBUG
+	    print_int_vec("Pc*post:", n, perm_c);
+	    check_perm("final perm_c", n, perm_c);	
+#endif
+	    SUPERLU_FREE (post);
+	    SUPERLU_FREE (iwork);
+	} /* end postordering */
+
+    } /* if options->Fact == DOFACT ... */
+
+}
+
+int check_perm(char *what, int n, int *perm)
+{
+    register int i;
+    int          *marker;
+    marker = (int *) calloc(n, sizeof(int));
+
+    for (i = 0; i < n; ++i) {
+	if ( marker[perm[i]] == 1 || perm[i] >= n ) {
+	    printf("%s: Not a valid PERM[%d] = %d\n", what, i, perm[i]);
+	    ABORT("check_perm");
+	} else {
+	    marker[perm[i]] = 1;
+	}
+    }
+
+    SUPERLU_FREE(marker);
+    return 0;
+}
--- a/intern/opennl/superlu/spanel_bmod.c
+++ b/intern/opennl/superlu/spanel_bmod.c
@ -0,0 +1,449 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ 
+  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ 
+  Permission is hereby granted to use or copy this program for any
+  purpose, provided the above notices are retained on all copies.
+  Permission to modify the code and to distribute modified code is
+  granted, provided the above notices are retained, and a notice that
+  the code was modified is included with the above copyright notice.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "ssp_defs.h"
+
+/* 
+ * Function prototypes 
+ */
+void slsolve(int, int, float *, float *);
+void smatvec(int, int, int, float *, float *, float *);
+extern void scheck_tempv();
+
+void
+spanel_bmod (
+	    const int  m,          /* in - number of rows in the matrix */
+	    const int  w,          /* in */
+	    const int  jcol,       /* in */
+	    const int  nseg,       /* in */
+	    float     *dense,     /* out, of size n by w */
+	    float     *tempv,     /* working array */
+	    int        *segrep,    /* in */
+	    int        *repfnz,    /* in, of size n by w */
+	    GlobalLU_t *Glu,       /* modified */
+	    SuperLUStat_t *stat    /* output */
+	    )
+{
+/* 
+ * Purpose
+ * =======
+ *
+ *    Performs numeric block updates (sup-panel) in topological order.
+ *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
+ *    Special processing on the supernodal portion of L\U[*,j]
+ *
+ *    Before entering this routine, the original nonzeros in the panel 
+ *    were already copied into the spa[m,w].
+ *
+ *    Updated/Output parameters-
+ *	dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned 
+ *      collectively in the m-by-w vector dense[*]. 
+ *
+ */
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+    _fcd ftcs1 = _cptofcd("L", strlen("L")),
+         ftcs2 = _cptofcd("N", strlen("N")),
+         ftcs3 = _cptofcd("U", strlen("U"));
+#endif
+    int          incx = 1, incy = 1;
+    float       alpha, beta;
+#endif
+
+    register int k, ksub;
+    int          fsupc, nsupc, nsupr, nrow;
+    int          krep, krep_ind;
+    float       ukj, ukj1, ukj2;
+    int          luptr, luptr1, luptr2;
+    int          segsze;
+    int          block_nrow;  /* no of rows in a block row */
+    register int lptr;	      /* Points to the row subscripts of a supernode */
+    int          kfnz, irow, no_zeros; 
+    register int isub, isub1, i;
+    register int jj;	      /* Index through each column in the panel */
+    int          *xsup, *supno;
+    int          *lsub, *xlsub;
+    float       *lusup;
+    int          *xlusup;
+    int          *repfnz_col; /* repfnz[] for a column in the panel */
+    float       *dense_col;  /* dense[] for a column in the panel */
+    float       *tempv1;             /* Used in 1-D update */
+    float       *TriTmp, *MatvecTmp; /* used in 2-D update */
+    float      zero = 0.0;
+    register int ldaTmp;
+    register int r_ind, r_hi;
+    static   int first = 1, maxsuper, rowblk, colblk;
+    flops_t  *ops = stat->ops;
+    
+    xsup    = Glu->xsup;
+    supno   = Glu->supno;
+    lsub    = Glu->lsub;
+    xlsub   = Glu->xlsub;
+    lusup   = Glu->lusup;
+    xlusup  = Glu->xlusup;
+    
+    if ( first ) {
+	maxsuper = sp_ienv(3);
+	rowblk   = sp_ienv(4);
+	colblk   = sp_ienv(5);
+	first = 0;
+    }
+    ldaTmp = maxsuper + rowblk;
+
+    /* 
+     * For each nonz supernode segment of U[*,j] in topological order 
+     */
+    k = nseg - 1;
+    for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */
+
+	/* krep = representative of current k-th supernode
+	 * fsupc = first supernodal column
+	 * nsupc = no of columns in a supernode
+	 * nsupr = no of rows in a supernode
+	 */
+        krep = segrep[k--];
+	fsupc = xsup[supno[krep]];
+	nsupc = krep - fsupc + 1;
+	nsupr = xlsub[fsupc+1] - xlsub[fsupc];
+	nrow = nsupr - nsupc;
+	lptr = xlsub[fsupc];
+	krep_ind = lptr + nsupc - 1;
+
+	repfnz_col = repfnz;
+	dense_col = dense;
+	
+	if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */
+
+	    TriTmp = tempv;
+	
+	    /* Sequence through each column in panel -- triangular solves */
+	    for (jj = jcol; jj < jcol + w; jj++,
+		 repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {
+
+		kfnz = repfnz_col[krep];
+		if ( kfnz == EMPTY ) continue;	/* Skip any zero segment */
+	    
+		segsze = krep - kfnz + 1;
+		luptr = xlusup[fsupc];
+
+		ops[TRSV] += segsze * (segsze - 1);
+		ops[GEMV] += 2 * nrow * segsze;
+	
+		/* Case 1: Update U-segment of size 1 -- col-col update */
+		if ( segsze == 1 ) {
+		    ukj = dense_col[lsub[krep_ind]];
+		    luptr += nsupr*(nsupc-1) + nsupc;
+
+		    for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
+			irow = lsub[i];
+			dense_col[irow] -= ukj * lusup[luptr];
+			++luptr;
+		    }
+
+		} else if ( segsze <= 3 ) {
+		    ukj = dense_col[lsub[krep_ind]];
+		    ukj1 = dense_col[lsub[krep_ind - 1]];
+		    luptr += nsupr*(nsupc-1) + nsupc-1;
+		    luptr1 = luptr - nsupr;
+
+		    if ( segsze == 2 ) {
+			ukj -= ukj1 * lusup[luptr1];
+			dense_col[lsub[krep_ind]] = ukj;
+			for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+			    irow = lsub[i];
+			    luptr++; luptr1++;
+			    dense_col[irow] -= (ukj*lusup[luptr]
+						+ ukj1*lusup[luptr1]);
+			}
+		    } else {
+			ukj2 = dense_col[lsub[krep_ind - 2]];
+			luptr2 = luptr1 - nsupr;
+			ukj1 -= ukj2 * lusup[luptr2-1];
+			ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
+			dense_col[lsub[krep_ind]] = ukj;
+			dense_col[lsub[krep_ind-1]] = ukj1;
+			for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+			    irow = lsub[i];
+			    luptr++; luptr1++; luptr2++;
+			    dense_col[irow] -= ( ukj*lusup[luptr]
+                             + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
+			}
+		    }
+
+		} else  {	/* segsze >= 4 */
+		    
+		    /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
+		       holds the result of triangular solves.    */
+		    no_zeros = kfnz - fsupc;
+		    isub = lptr + no_zeros;
+		    for (i = 0; i < segsze; ++i) {
+			irow = lsub[isub];
+			TriTmp[i] = dense_col[irow]; /* Gather */
+			++isub;
+		    }
+		    
+		    /* start effective triangle */
+		    luptr += nsupr * no_zeros + no_zeros;
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+		    STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
+			   &nsupr, TriTmp, &incx );
+#else
+		    strsv_( "L", "N", "U", &segsze, &lusup[luptr], 
+			   &nsupr, TriTmp, &incx );
+#endif
+#else		
+		    slsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
+#endif
+		    
+
+		} /* else ... */
+	    
+	    }  /* for jj ... end tri-solves */
+
+	    /* Block row updates; push all the way into dense[*] block */
+	    for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {
+		
+		r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
+		block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
+		luptr = xlusup[fsupc] + nsupc + r_ind;
+		isub1 = lptr + nsupc + r_ind;
+		
+		repfnz_col = repfnz;
+		TriTmp = tempv;
+		dense_col = dense;
+		
+		/* Sequence through each column in panel -- matrix-vector */
+		for (jj = jcol; jj < jcol + w; jj++,
+		     repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
+		    
+		    kfnz = repfnz_col[krep];
+		    if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
+		    
+		    segsze = krep - kfnz + 1;
+		    if ( segsze <= 3 ) continue;   /* skip unrolled cases */
+		    
+		    /* Perform a block update, and scatter the result of
+		       matrix-vector to dense[].		 */
+		    no_zeros = kfnz - fsupc;
+		    luptr1 = luptr + nsupr * no_zeros;
+		    MatvecTmp = &TriTmp[maxsuper];
+		    
+#ifdef USE_VENDOR_BLAS
+		    alpha = one; 
+                    beta = zero;
+#ifdef _CRAY
+		    SGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1], 
+			   &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
+#else
+		    sgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1], 
+			   &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
+#endif
+#else
+		    smatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
+			   TriTmp, MatvecTmp);
+#endif
+		    
+		    /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
+		     * such that MatvecTmp[*] can be re-used for the
+		     * the next blok row update. dense[] will be copied into 
+		     * global store after the whole panel has been finished.
+		     */
+		    isub = isub1;
+		    for (i = 0; i < block_nrow; i++) {
+			irow = lsub[isub];
+			dense_col[irow] -= MatvecTmp[i];
+			MatvecTmp[i] = zero;
+			++isub;
+		    }
+		    
+		} /* for jj ... */
+		
+	    } /* for each block row ... */
+	    
+	    /* Scatter the triangular solves into SPA dense[*] */
+	    repfnz_col = repfnz;
+	    TriTmp = tempv;
+	    dense_col = dense;
+	    
+	    for (jj = jcol; jj < jcol + w; jj++,
+		 repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
+		kfnz = repfnz_col[krep];
+		if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
+		
+		segsze = krep - kfnz + 1;
+		if ( segsze <= 3 ) continue; /* skip unrolled cases */
+		
+		no_zeros = kfnz - fsupc;		
+		isub = lptr + no_zeros;
+		for (i = 0; i < segsze; i++) {
+		    irow = lsub[isub];
+		    dense_col[irow] = TriTmp[i];
+		    TriTmp[i] = zero;
+		    ++isub;
+		}
+		
+	    } /* for jj ... */
+	    
+	} else { /* 1-D block modification */
+	    
+	    
+	    /* Sequence through each column in the panel */
+	    for (jj = jcol; jj < jcol + w; jj++,
+		 repfnz_col += m, dense_col += m) {
+		
+		kfnz = repfnz_col[krep];
+		if ( kfnz == EMPTY ) continue;	/* Skip any zero segment */
+		
+		segsze = krep - kfnz + 1;
+		luptr = xlusup[fsupc];
+
+		ops[TRSV] += segsze * (segsze - 1);
+		ops[GEMV] += 2 * nrow * segsze;
+		
+		/* Case 1: Update U-segment of size 1 -- col-col update */
+		if ( segsze == 1 ) {
+		    ukj = dense_col[lsub[krep_ind]];
+		    luptr += nsupr*(nsupc-1) + nsupc;
+
+		    for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
+			irow = lsub[i];
+			dense_col[irow] -= ukj * lusup[luptr];
+			++luptr;
+		    }
+
+		} else if ( segsze <= 3 ) {
+		    ukj = dense_col[lsub[krep_ind]];
+		    luptr += nsupr*(nsupc-1) + nsupc-1;
+		    ukj1 = dense_col[lsub[krep_ind - 1]];
+		    luptr1 = luptr - nsupr;
+
+		    if ( segsze == 2 ) {
+			ukj -= ukj1 * lusup[luptr1];
+			dense_col[lsub[krep_ind]] = ukj;
+			for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+			    irow = lsub[i];
+			    ++luptr;  ++luptr1;
+			    dense_col[irow] -= (ukj*lusup[luptr]
+						+ ukj1*lusup[luptr1]);
+			}
+		    } else {
+			ukj2 = dense_col[lsub[krep_ind - 2]];
+			luptr2 = luptr1 - nsupr;
+			ukj1 -= ukj2 * lusup[luptr2-1];
+			ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
+			dense_col[lsub[krep_ind]] = ukj;
+			dense_col[lsub[krep_ind-1]] = ukj1;
+			for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+			    irow = lsub[i];
+			    ++luptr; ++luptr1; ++luptr2;
+			    dense_col[irow] -= ( ukj*lusup[luptr]
+                             + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
+			}
+		    }
+
+		} else  { /* segsze >= 4 */
+		    /* 
+		     * Perform a triangular solve and block update,
+		     * then scatter the result of sup-col update to dense[].
+		     */
+		    no_zeros = kfnz - fsupc;
+		    
+		    /* Copy U[*,j] segment from dense[*] to tempv[*]: 
+		     *    The result of triangular solve is in tempv[*];
+		     *    The result of matrix vector update is in dense_col[*]
+		     */
+		    isub = lptr + no_zeros;
+		    for (i = 0; i < segsze; ++i) {
+			irow = lsub[isub];
+			tempv[i] = dense_col[irow]; /* Gather */
+			++isub;
+		    }
+		    
+		    /* start effective triangle */
+		    luptr += nsupr * no_zeros + no_zeros;
+		    
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+		    STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
+			   &nsupr, tempv, &incx );
+#else
+		    strsv_( "L", "N", "U", &segsze, &lusup[luptr], 
+			   &nsupr, tempv, &incx );
+#endif
+		    
+		    luptr += segsze;	/* Dense matrix-vector */
+		    tempv1 = &tempv[segsze];
+                    alpha = one;
+                    beta = zero;
+#ifdef _CRAY
+		    SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
+			   &nsupr, tempv, &incx, &beta, tempv1, &incy );
+#else
+		    sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
+			   &nsupr, tempv, &incx, &beta, tempv1, &incy );
+#endif
+#else
+		    slsolve ( nsupr, segsze, &lusup[luptr], tempv );
+		    
+		    luptr += segsze;        /* Dense matrix-vector */
+		    tempv1 = &tempv[segsze];
+		    smatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
+#endif
+		    
+		    /* Scatter tempv[*] into SPA dense[*] temporarily, such
+		     * that tempv[*] can be used for the triangular solve of
+		     * the next column of the panel. They will be copied into 
+		     * ucol[*] after the whole panel has been finished.
+		     */
+		    isub = lptr + no_zeros;
+		    for (i = 0; i < segsze; i++) {
+			irow = lsub[isub];
+			dense_col[irow] = tempv[i];
+			tempv[i] = zero;
+			isub++;
+		    }
+		    
+		    /* Scatter the update from tempv1[*] into SPA dense[*] */
+		    /* Start dense rectangular L */
+		    for (i = 0; i < nrow; i++) {
+			irow = lsub[isub];
+			dense_col[irow] -= tempv1[i];
+			tempv1[i] = zero;
+			++isub;	
+		    }
+		    
+		} /* else segsze>=4 ... */
+		
+	    } /* for each column in the panel... */
+	    
+	} /* else 1-D update ... */
+
+    } /* for each updating supernode ... */
+
+}
+
+
+
--- a/intern/opennl/superlu/spanel_dfs.c
+++ b/intern/opennl/superlu/spanel_dfs.c
@ -0,0 +1,249 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ 
+  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ 
+  Permission is hereby granted to use or copy this program for any
+  purpose, provided the above notices are retained on all copies.
+  Permission to modify the code and to distribute modified code is
+  granted, provided the above notices are retained, and a notice that
+  the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+#include "util.h"
+
+void
+spanel_dfs (
+	   const int  m,           /* in - number of rows in the matrix */
+	   const int  w,           /* in */
+	   const int  jcol,        /* in */
+	   SuperMatrix *A,       /* in - original matrix */
+	   int        *perm_r,     /* in */
+	   int        *nseg,	   /* out */
+	   float     *dense,      /* out */
+	   int        *panel_lsub, /* out */
+	   int        *segrep,     /* out */
+	   int        *repfnz,     /* out */
+	   int        *xprune,     /* out */
+	   int        *marker,     /* out */     
+	   int        *parent,     /* working array */
+	   int        *xplore,     /* working array */
+	   GlobalLU_t *Glu         /* modified */
+	   )
+{
+/*
+ * Purpose
+ * =======
+ *
+ *   Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
+ *
+ *   A supernode representative is the last column of a supernode.
+ *   The nonzeros in U[*,j] are segments that end at supernodal
+ *   representatives.
+ *
+ *   The routine returns one list of the supernodal representatives
+ *   in topological order of the dfs that generates them. This list is
+ *   a superset of the topological order of each individual column within
+ *   the panel. 
+ *   The location of the first nonzero in each supernodal segment
+ *   (supernodal entry location) is also returned. Each column has a 
+ *   separate list for this purpose.
+ *
+ *   Two marker arrays are used for dfs:
+ *     marker[i] == jj, if i was visited during dfs of current column jj;
+ *     marker1[i] >= jcol, if i was visited by earlier columns in this panel;
+ *
+ *   marker: A-row --> A-row/col (0/1)
+ *   repfnz: SuperA-col --> PA-row
+ *   parent: SuperA-col --> SuperA-col
+ *   xplore: SuperA-col --> index to L-structure
+ *
+ */
+    NCPformat *Astore;
+    float    *a;
+    int       *asub;
+    int       *xa_begin, *xa_end;
+    int	      krep, chperm, chmark, chrep, oldrep, kchild, myfnz;
+    int       k, krow, kmark, kperm;
+    int       xdfs, maxdfs, kpar;
+    int       jj;	   /* index through each column in the panel */
+    int       *marker1;	   /* marker1[jj] >= jcol if vertex jj was visited 
+			      by a previous column within this panel.   */
+    int       *repfnz_col; /* start of each column in the panel */
+    float    *dense_col;  /* start of each column in the panel */
+    int       nextl_col;   /* next available position in panel_lsub[*,jj] */
+    int       *xsup, *supno;
+    int       *lsub, *xlsub;
+
+    /* Initialize pointers */
+    Astore     = A->Store;
+    a          = Astore->nzval;
+    asub       = Astore->rowind;
+    xa_begin   = Astore->colbeg;
+    xa_end     = Astore->colend;
+    marker1    = marker + m;
+    repfnz_col = repfnz;
+    dense_col  = dense;
+    *nseg      = 0;
+    xsup       = Glu->xsup;
+    supno      = Glu->supno;
+    lsub       = Glu->lsub;
+    xlsub      = Glu->xlsub;
+
+    /* For each column in the panel */
+    for (jj = jcol; jj < jcol + w; jj++) {
+	nextl_col = (jj - jcol) * m;
+
+#ifdef CHK_DFS
+	printf("\npanel col %d: ", jj);
+#endif
+
+	/* For each nonz in A[*,jj] do dfs */
+	for (k = xa_begin[jj]; k < xa_end[jj]; k++) {
+	    krow = asub[k];
+            dense_col[krow] = a[k];
+	    kmark = marker[krow];    	
+	    if ( kmark == jj ) 
+		continue;     /* krow visited before, go to the next nonzero */
+
+	    /* For each unmarked nbr krow of jj
+	     * krow is in L: place it in structure of L[*,jj]
+	     */
+	    marker[krow] = jj;
+	    kperm = perm_r[krow];
+	    
+	    if ( kperm == EMPTY ) {
+		panel_lsub[nextl_col++] = krow; /* krow is indexed into A */
+	    }
+	    /* 
+	     * krow is in U: if its supernode-rep krep
+	     * has been explored, update repfnz[*]
+	     */
+	    else {
+		
+		krep = xsup[supno[kperm]+1] - 1;
+		myfnz = repfnz_col[krep];
+		
+#ifdef CHK_DFS
+		printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, kperm);
+#endif
+		if ( myfnz != EMPTY ) {	/* Representative visited before */
+		    if ( myfnz > kperm ) repfnz_col[krep] = kperm;
+		    /* continue; */
+		}
+		else {
+		    /* Otherwise, perform dfs starting at krep */
+		    oldrep = EMPTY;
+		    parent[krep] = oldrep;
+		    repfnz_col[krep] = kperm;
+		    xdfs = xlsub[krep];
+		    maxdfs = xprune[krep];
+		    
+#ifdef CHK_DFS 
+		    printf("  xdfs %d, maxdfs %d: ", xdfs, maxdfs);
+		    for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
+		    printf("\n");
+#endif
+		    do {
+			/* 
+			 * For each unmarked kchild of krep 
+			 */
+			while ( xdfs < maxdfs ) {
+			    
+			    kchild = lsub[xdfs];
+			    xdfs++;
+			    chmark = marker[kchild];
+			    
+			    if ( chmark != jj ) { /* Not reached yet */
+				marker[kchild] = jj;
+				chperm = perm_r[kchild];
+			      
+				/* Case kchild is in L: place it in L[*,j] */
+				if ( chperm == EMPTY ) {
+				    panel_lsub[nextl_col++] = kchild;
+				} 
+				/* Case kchild is in U: 
+				 *   chrep = its supernode-rep. If its rep has 
+				 *   been explored, update its repfnz[*]
+				 */
+				else {
+				    
+				    chrep = xsup[supno[chperm]+1] - 1;
+				    myfnz = repfnz_col[chrep];
+#ifdef CHK_DFS
+				    printf("chrep %d,myfnz %d,perm_r[%d] %d\n",chrep,myfnz,kchild,chperm);
+#endif
+				    if ( myfnz != EMPTY ) { /* Visited before */
+					if ( myfnz > chperm )
+					    repfnz_col[chrep] = chperm;
+				    }
+				    else {
+					/* Cont. dfs at snode-rep of kchild */
+					xplore[krep] = xdfs;	
+					oldrep = krep;
+					krep = chrep; /* Go deeper down G(L) */
+					parent[krep] = oldrep;
+					repfnz_col[krep] = chperm;
+					xdfs = xlsub[krep];     
+					maxdfs = xprune[krep];
+#ifdef CHK_DFS 
+					printf("  xdfs %d, maxdfs %d: ", xdfs, maxdfs);
+					for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);	
+					printf("\n");
+#endif
+				    } /* else */
+				  
+				} /* else */
+			      
+			    } /* if... */
+			    
+			} /* while xdfs < maxdfs */
+			
+			/* krow has no more unexplored nbrs:
+			 *    Place snode-rep krep in postorder DFS, if this 
+			 *    segment is seen for the first time. (Note that
+			 *    "repfnz[krep]" may change later.)
+			 *    Backtrack dfs to its parent.
+			 */
+			if ( marker1[krep] < jcol ) {
+			    segrep[*nseg] = krep;
+			    ++(*nseg);
+			    marker1[krep] = jj;
+			}
+			
+			kpar = parent[krep]; /* Pop stack, mimic recursion */
+			if ( kpar == EMPTY ) break; /* dfs done */
+			krep = kpar;
+			xdfs = xplore[krep];
+			maxdfs = xprune[krep];
+			
+#ifdef CHK_DFS 
+			printf("  pop stack: krep %d,xdfs %d,maxdfs %d: ", krep,xdfs,maxdfs);
+			for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
+			printf("\n");
+#endif
+		    } while ( kpar != EMPTY ); /* do-while - until empty stack */
+		    
+		} /* else */
+		
+	    } /* else */
+	    
+	} /* for each nonz in A[*,jj] */
+	
+	repfnz_col += m;    /* Move to next column */
+        dense_col += m;
+	
+    } /* for jj ... */
+    
+}
--- a/intern/opennl/superlu/spivotL.c
+++ b/intern/opennl/superlu/spivotL.c
@ -0,0 +1,173 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ 
+  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ 
+  Permission is hereby granted to use or copy this program for any
+  purpose, provided the above notices are retained on all copies.
+  Permission to modify the code and to distribute modified code is
+  granted, provided the above notices are retained, and a notice that
+  the code was modified is included with the above copyright notice.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include "ssp_defs.h"
+
+#undef DEBUG
+
+int
+spivotL(
+        const int  jcol,     /* in */
+        const float u,      /* in - diagonal pivoting threshold */
+        int        *usepr,   /* re-use the pivot sequence given by perm_r/iperm_r */
+        int        *perm_r,  /* may be modified */
+        int        *iperm_r, /* in - inverse of perm_r */
+        int        *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */
+        int        *pivrow,  /* out */
+        GlobalLU_t *Glu,     /* modified - global LU data structures */
+	SuperLUStat_t *stat  /* output */
+       )
+{
+/*
+ * Purpose
+ * =======
+ *   Performs the numerical pivoting on the current column of L,
+ *   and the CDIV operation.
+ *
+ *   Pivot policy:
+ *   (1) Compute thresh = u * max_(i>=j) abs(A_ij);
+ *   (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
+ *           pivot row = k;
+ *       ELSE IF abs(A_jj) >= thresh THEN
+ *           pivot row = j;
+ *       ELSE
+ *           pivot row = m;
+ * 
+ *   Note: If you absolutely want to use a given pivot order, then set u=0.0.
+ *
+ *   Return value: 0      success;
+ *                 i > 0  U(i,i) is exactly zero.
+ *
+ */
+    int          fsupc;	    /* first column in the supernode */
+    int          nsupc;	    /* no of columns in the supernode */
+    int          nsupr;     /* no of rows in the supernode */
+    int          lptr;	    /* points to the starting subscript of the supernode */
+    int          pivptr, old_pivptr, diag, diagind;
+    float       pivmax, rtemp, thresh;
+    float       temp;
+    float       *lu_sup_ptr; 
+    float       *lu_col_ptr;
+    int          *lsub_ptr;
+    int          isub, icol, k, itemp;
+    int          *lsub, *xlsub;
+    float       *lusup;
+    int          *xlusup;
+    flops_t      *ops = stat->ops;
+
+    /* Initialize pointers */
+    lsub       = Glu->lsub;
+    xlsub      = Glu->xlsub;
+    lusup      = Glu->lusup;
+    xlusup     = Glu->xlusup;
+    fsupc      = (Glu->xsup)[(Glu->supno)[jcol]];
+    nsupc      = jcol - fsupc;	        /* excluding jcol; nsupc >= 0 */
+    lptr       = xlsub[fsupc];
+    nsupr      = xlsub[fsupc+1] - lptr;
+    lu_sup_ptr = &lusup[xlusup[fsupc]];	/* start of the current supernode */
+    lu_col_ptr = &lusup[xlusup[jcol]];	/* start of jcol in the supernode */
+    lsub_ptr   = &lsub[lptr];	/* start of row indices of the supernode */
+
+#ifdef DEBUG
+if ( jcol == MIN_COL ) {
+    printf("Before cdiv: col %d\n", jcol);
+    for (k = nsupc; k < nsupr; k++) 
+	printf("  lu[%d] %f\n", lsub_ptr[k], lu_col_ptr[k]);
+}
+#endif
+    
+    /* Determine the largest abs numerical value for partial pivoting;
+       Also search for user-specified pivot, and diagonal element. */
+    if ( *usepr ) *pivrow = iperm_r[jcol];
+    diagind = iperm_c[jcol];
+    pivmax = 0.0;
+    pivptr = nsupc;
+    diag = EMPTY;
+    old_pivptr = nsupc;
+    for (isub = nsupc; isub < nsupr; ++isub) {
+	rtemp = fabs (lu_col_ptr[isub]);
+	if ( rtemp > pivmax ) {
+	    pivmax = rtemp;
+	    pivptr = isub;
+	}
+	if ( *usepr && lsub_ptr[isub] == *pivrow ) old_pivptr = isub;
+	if ( lsub_ptr[isub] == diagind ) diag = isub;
+    }
+
+    /* Test for singularity */
+    if ( pivmax == 0.0 ) {
+	*pivrow = lsub_ptr[pivptr];
+	perm_r[*pivrow] = jcol;
+	*usepr = 0;
+	return (jcol+1);
+    }
+
+    thresh = u * pivmax;
+    
+    /* Choose appropriate pivotal element by our policy. */
+    if ( *usepr ) {
+        rtemp = fabs (lu_col_ptr[old_pivptr]);
+	if ( rtemp != 0.0 && rtemp >= thresh )
+	    pivptr = old_pivptr;
+	else
+	    *usepr = 0;
+    }
+    if ( *usepr == 0 ) {
+	/* Use diagonal pivot? */
+	if ( diag >= 0 ) { /* diagonal exists */
+	    rtemp = fabs (lu_col_ptr[diag]);
+	    if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag;
+        }
+	*pivrow = lsub_ptr[pivptr];
+    }
+    
+    /* Record pivot row */
+    perm_r[*pivrow] = jcol;
+    
+    /* Interchange row subscripts */
+    if ( pivptr != nsupc ) {
+	itemp = lsub_ptr[pivptr];
+	lsub_ptr[pivptr] = lsub_ptr[nsupc];
+	lsub_ptr[nsupc] = itemp;
+
+	/* Interchange numerical values as well, for the whole snode, such 
+	 * that L is indexed the same way as A.
+ 	 */
+	for (icol = 0; icol <= nsupc; icol++) {
+	    itemp = pivptr + icol * nsupr;
+	    temp = lu_sup_ptr[itemp];
+	    lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr];
+	    lu_sup_ptr[nsupc + icol*nsupr] = temp;
+	}
+    } /* if */
+
+    /* cdiv operation */
+    ops[FACT] += nsupr - nsupc;
+
+    temp = 1.0 / lu_col_ptr[nsupc];
+    for (k = nsupc+1; k < nsupr; k++) 
+	lu_col_ptr[k] *= temp;
+
+    return 0;
+}
+
--- a/intern/opennl/superlu/spruneL.c
+++ b/intern/opennl/superlu/spruneL.c
@ -0,0 +1,149 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ 
+  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ 
+  Permission is hereby granted to use or copy this program for any
+  purpose, provided the above notices are retained on all copies.
+  Permission to modify the code and to distribute modified code is
+  granted, provided the above notices are retained, and a notice that
+  the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+#include "util.h"
+
+void
+spruneL(
+       const int  jcol,	     /* in */
+       const int  *perm_r,   /* in */
+       const int  pivrow,    /* in */
+       const int  nseg,	     /* in */
+       const int  *segrep,   /* in */
+       const int  *repfnz,   /* in */
+       int        *xprune,   /* out */
+       GlobalLU_t *Glu       /* modified - global LU data structures */
+       )
+{
+/*
+ * Purpose
+ * =======
+ *   Prunes the L-structure of supernodes whose L-structure
+ *   contains the current pivot row "pivrow"
+ *
+ */
+    float     utemp;
+    int        jsupno, irep, irep1, kmin, kmax, krow, movnum;
+    int        i, ktemp, minloc, maxloc;
+    int        do_prune; /* logical variable */
+    int        *xsup, *supno;
+    int        *lsub, *xlsub;
+    float     *lusup;
+    int        *xlusup;
+
+    xsup       = Glu->xsup;
+    supno      = Glu->supno;
+    lsub       = Glu->lsub;
+    xlsub      = Glu->xlsub;
+    lusup      = Glu->lusup;
+    xlusup     = Glu->xlusup;
+    
+    /*
+     * For each supernode-rep irep in U[*,j]
+     */
+    jsupno = supno[jcol];
+    for (i = 0; i < nseg; i++) {
+
+	irep = segrep[i];
+	irep1 = irep + 1;
+	do_prune = FALSE;
+
+	/* Don't prune with a zero U-segment */
+ 	if ( repfnz[irep] == EMPTY )
+		continue;
+
+     	/* If a snode overlaps with the next panel, then the U-segment 
+   	 * is fragmented into two parts -- irep and irep1. We should let
+	 * pruning occur at the rep-column in irep1's snode. 
+	 */
+	if ( supno[irep] == supno[irep1] ) 	/* Don't prune */
+		continue;
+
+	/*
+	 * If it has not been pruned & it has a nonz in row L[pivrow,i]
+	 */
+	if ( supno[irep] != jsupno ) {
+	    if ( xprune[irep] >= xlsub[irep1] ) {
+		kmin = xlsub[irep];
+		kmax = xlsub[irep1] - 1;
+		for (krow = kmin; krow <= kmax; krow++) 
+		    if ( lsub[krow] == pivrow ) {
+			do_prune = TRUE;
+			break;
+		    }
+	    }
+	    
+    	    if ( do_prune ) {
+
+	     	/* Do a quicksort-type partition
+	     	 * movnum=TRUE means that the num values have to be exchanged.
+	     	 */
+	        movnum = FALSE;
+	        if ( irep == xsup[supno[irep]] ) /* Snode of size 1 */
+			movnum = TRUE;
+
+	        while ( kmin <= kmax ) {
+
+	    	    if ( perm_r[lsub[kmax]] == EMPTY ) 
+			kmax--;
+		    else if ( perm_r[lsub[kmin]] != EMPTY )
+			kmin++;
+		    else { /* kmin below pivrow, and kmax above pivrow: 
+		            * 	interchange the two subscripts
+			    */
+		        ktemp = lsub[kmin];
+		        lsub[kmin] = lsub[kmax];
+		        lsub[kmax] = ktemp;
+
+			/* If the supernode has only one column, then we
+ 			 * only keep one set of subscripts. For any subscript 
+			 * interchange performed, similar interchange must be 
+			 * done on the numerical values.
+ 			 */
+		        if ( movnum ) {
+		    	    minloc = xlusup[irep] + (kmin - xlsub[irep]);
+		    	    maxloc = xlusup[irep] + (kmax - xlsub[irep]);
+			    utemp = lusup[minloc];
+		  	    lusup[minloc] = lusup[maxloc];
+			    lusup[maxloc] = utemp;
+		        }
+
+		        kmin++;
+		        kmax--;
+
+		    }
+
+	        } /* while */
+
+	        xprune[irep] = kmin;	/* Pruning */
+
+#ifdef CHK_PRUNE
+	printf("    After spruneL(),using col %d:  xprune[%d] = %d\n", 
+			jcol, irep, kmin);
+#endif
+	    } /* if do_prune */
+
+	} /* if */
+
+    } /* for each U-segment... */
+}
--- a/intern/opennl/superlu/ssnode_bmod.c
+++ b/intern/opennl/superlu/ssnode_bmod.c
@ -0,0 +1,117 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ 
+  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ 
+  Permission is hereby granted to use or copy this program for any
+  purpose, provided the above notices are retained on all copies.
+  Permission to modify the code and to distribute modified code is
+  granted, provided the above notices are retained, and a notice that
+  the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+void slsolve(int, int, float*, float*);
+void smatvec(int, int, int, float*, float*, float*);
+
+/*
+ * Performs numeric block updates within the relaxed snode. 
+ */
+int
+ssnode_bmod (
+	    const int  jcol,	  /* in */
+	    const int  fsupc,     /* in */
+	    float     *dense,    /* in */
+	    float     *tempv,    /* working array */
+	    GlobalLU_t *Glu,      /* modified */
+	    SuperLUStat_t *stat   /* output */
+	    )
+{
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+    _fcd ftcs1 = _cptofcd("L", strlen("L")),
+	 ftcs2 = _cptofcd("N", strlen("N")),
+	 ftcs3 = _cptofcd("U", strlen("U"));
+#endif
+    int            incx = 1, incy = 1;
+    float         alpha = -1.0, beta = 1.0;
+#endif
+
+    int            luptr, nsupc, nsupr, nrow;
+    int            isub, irow, i, iptr; 
+    register int   ufirst, nextlu;
+    int            *lsub, *xlsub;
+    float         *lusup;
+    int            *xlusup;
+    flops_t *ops = stat->ops;
+
+    lsub    = Glu->lsub;
+    xlsub   = Glu->xlsub;
+    lusup   = Glu->lusup;
+    xlusup  = Glu->xlusup;
+
+    nextlu = xlusup[jcol];
+    
+    /*
+     *	Process the supernodal portion of L\U[*,j]
+     */
+    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
+  	irow = lsub[isub];
+	lusup[nextlu] = dense[irow];
+	dense[irow] = 0;
+	++nextlu;
+    }
+
+    xlusup[jcol + 1] = nextlu;	/* Initialize xlusup for next column */
+    
+    if ( fsupc < jcol ) {
+
+	luptr = xlusup[fsupc];
+	nsupr = xlsub[fsupc+1] - xlsub[fsupc];
+	nsupc = jcol - fsupc;	/* Excluding jcol */
+	ufirst = xlusup[jcol];	/* Points to the beginning of column
+				   jcol in supernode L\U(jsupno). */
+	nrow = nsupr - nsupc;
+
+	ops[TRSV] += nsupc * (nsupc - 1);
+	ops[GEMV] += 2 * nrow * nsupc;
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+	STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, 
+	      &lusup[ufirst], &incx );
+	SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
+		&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
+#else
+	strsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, 
+	      &lusup[ufirst], &incx );
+	sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
+		&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
+#endif
+#else
+	slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
+	smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 
+			&lusup[ufirst], &tempv[0] );
+
+        /* Scatter tempv[*] into lusup[*] */
+	iptr = ufirst + nsupc;
+	for (i = 0; i < nrow; i++) {
+	    lusup[iptr++] -= tempv[i];
+	    tempv[i] = 0.0;
+	}
+#endif
+
+    }
+
+    return 0;
+}
--- a/intern/opennl/superlu/ssnode_dfs.c
+++ b/intern/opennl/superlu/ssnode_dfs.c
@ -0,0 +1,106 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ 
+  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ 
+  Permission is hereby granted to use or copy this program for any
+  purpose, provided the above notices are retained on all copies.
+  Permission to modify the code and to distribute modified code is
+  granted, provided the above notices are retained, and a notice that
+  the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+#include "util.h"
+
+int
+ssnode_dfs (
+	   const int  jcol,	    /* in - start of the supernode */
+	   const int  kcol, 	    /* in - end of the supernode */
+	   const int  *asub,        /* in */
+	   const int  *xa_begin,    /* in */
+	   const int  *xa_end,      /* in */
+	   int        *xprune,      /* out */
+	   int        *marker,      /* modified */
+	   GlobalLU_t *Glu          /* modified */
+	   )
+{
+/* Purpose
+ * =======
+ *    ssnode_dfs() - Determine the union of the row structures of those 
+ *    columns within the relaxed snode.
+ *    Note: The relaxed snodes are leaves of the supernodal etree, therefore, 
+ *    the portion outside the rectangular supernode must be zero.
+ *
+ * Return value
+ * ============
+ *     0   success;
+ *    >0   number of bytes allocated when run out of memory.
+ *
+ */
+    register int i, k, ifrom, ito, nextl, new_next;
+    int          nsuper, krow, kmark, mem_error;
+    int          *xsup, *supno;
+    int          *lsub, *xlsub;
+    int          nzlmax;
+    
+    xsup    = Glu->xsup;
+    supno   = Glu->supno;
+    lsub    = Glu->lsub;
+    xlsub   = Glu->xlsub;
+    nzlmax  = Glu->nzlmax;
+
+    nsuper = ++supno[jcol];	/* Next available supernode number */
+    nextl = xlsub[jcol];
+
+    for (i = jcol; i <= kcol; i++) {
+	/* For each nonzero in A[*,i] */
+	for (k = xa_begin[i]; k < xa_end[i]; k++) {	
+	    krow = asub[k];
+	    kmark = marker[krow];
+	    if ( kmark != kcol ) { /* First time visit krow */
+		marker[krow] = kcol;
+		lsub[nextl++] = krow;
+		if ( nextl >= nzlmax ) {
+		    if ( (mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu)) )
+			return (mem_error);
+		    lsub = Glu->lsub;
+		}
+	    }
+    	}
+	supno[i] = nsuper;
+    }
+
+    /* Supernode > 1, then make a copy of the subscripts for pruning */
+    if ( jcol < kcol ) {
+	new_next = nextl + (nextl - xlsub[jcol]);
+	while ( new_next > nzlmax ) {
+	    if ( (mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu)) )
+		return (mem_error);
+	    lsub = Glu->lsub;
+	}
+	ito = nextl;
+	for (ifrom = xlsub[jcol]; ifrom < nextl; )
+	    lsub[ito++] = lsub[ifrom++];	
+        for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl;
+	nextl = ito;
+    }
+
+    xsup[nsuper+1] = kcol + 1;
+    supno[kcol+1]  = nsuper;
+    xprune[kcol]   = nextl;
+    xlsub[kcol+1]  = nextl;
+
+    return 0;
+}
+
--- a/intern/opennl/superlu/ssp_blas2.c
+++ b/intern/opennl/superlu/ssp_blas2.c
@ -0,0 +1,469 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ * File name:		ssp_blas2.c
+ * Purpose:		Sparse BLAS 2, using some dense BLAS 2 operations.
+ */
+
+#include "ssp_defs.h"
+
+/* 
+ * Function prototypes 
+ */
+void susolve(int, int, float*, float*);
+void slsolve(int, int, float*, float*);
+void smatvec(int, int, int, float*, float*, float*);
+int strsv_(char*, char*, char*, int*, float*, int*, float*, int*);
+
+int
+sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
+         SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info)
+{
+/*
+ *   Purpose
+ *   =======
+ *
+ *   sp_strsv() solves one of the systems of equations   
+ *       A*x = b,   or   A'*x = b,
+ *   where b and x are n element vectors and A is a sparse unit , or   
+ *   non-unit, upper or lower triangular matrix.   
+ *   No test for singularity or near-singularity is included in this   
+ *   routine. Such tests must be performed before calling this routine.   
+ *
+ *   Parameters   
+ *   ==========   
+ *
+ *   uplo   - (input) char*
+ *            On entry, uplo specifies whether the matrix is an upper or   
+ *             lower triangular matrix as follows:   
+ *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
+ *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
+ *
+ *   trans  - (input) char*
+ *             On entry, trans specifies the equations to be solved as   
+ *             follows:   
+ *                trans = 'N' or 'n'   A*x = b.   
+ *                trans = 'T' or 't'   A'*x = b.
+ *                trans = 'C' or 'c'   A'*x = b.   
+ *
+ *   diag   - (input) char*
+ *             On entry, diag specifies whether or not A is unit   
+ *             triangular as follows:   
+ *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
+ *                diag = 'N' or 'n'   A is not assumed to be unit   
+ *                                    triangular.   
+ *	     
+ *   L       - (input) SuperMatrix*
+ *	       The factor L from the factorization Pr*A*Pc=L*U. Use
+ *             compressed row subscripts storage for supernodes,
+ *             i.e., L has types: Stype = SC, Dtype = SLU_S, Mtype = TRLU.
+ *
+ *   U       - (input) SuperMatrix*
+ *	        The factor U from the factorization Pr*A*Pc=L*U.
+ *	        U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU.
+ *    
+ *   x       - (input/output) float*
+ *             Before entry, the incremented array X must contain the n   
+ *             element right-hand side vector b. On exit, X is overwritten 
+ *             with the solution vector x.
+ *
+ *   info    - (output) int*
+ *             If *info = -i, the i-th argument had an illegal value.
+ *
+ */
+#ifdef _CRAY
+    _fcd ftcs1 = _cptofcd("L", strlen("L")),
+	 ftcs2 = _cptofcd("N", strlen("N")),
+	 ftcs3 = _cptofcd("U", strlen("U"));
+#endif
+    SCformat *Lstore;
+    NCformat *Ustore;
+    float   *Lval, *Uval;
+    int incx = 1;
+    int nrow;
+    int fsupc, nsupr, nsupc, luptr, istart, irow;
+    int i, k, iptr, jcol;
+    float *work;
+    flops_t solve_ops;
+
+    /* Test the input parameters */
+    *info = 0;
+    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
+    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && 
+              !lsame_(trans, "C")) *info = -2;
+    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
+    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
+    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
+    if ( *info ) {
+	i = -(*info);
+	xerbla_("sp_strsv", &i);
+	return 0;
+    }
+
+    Lstore = L->Store;
+    Lval = Lstore->nzval;
+    Ustore = U->Store;
+    Uval = Ustore->nzval;
+    solve_ops = 0;
+
+    if ( !(work = floatCalloc(L->nrow)) )
+	ABORT("Malloc fails for work in sp_strsv().");
+    
+    if ( lsame_(trans, "N") ) {	/* Form x := inv(A)*x. */
+	
+	if ( lsame_(uplo, "L") ) {
+	    /* Form x := inv(L)*x */
+    	    if ( L->nrow == 0 ) return 0; /* Quick return */
+	    
+	    for (k = 0; k <= Lstore->nsuper; k++) {
+		fsupc = L_FST_SUPC(k);
+		istart = L_SUB_START(fsupc);
+		nsupr = L_SUB_START(fsupc+1) - istart;
+		nsupc = L_FST_SUPC(k+1) - fsupc;
+		luptr = L_NZ_START(fsupc);
+		nrow = nsupr - nsupc;
+
+	        solve_ops += nsupc * (nsupc - 1);
+	        solve_ops += 2 * nrow * nsupc;
+
+		if ( nsupc == 1 ) {
+		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
+			irow = L_SUB(iptr);
+			++luptr;
+			x[irow] -= x[fsupc] * Lval[luptr];
+		    }
+		} else {
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+		    STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
+		       	&x[fsupc], &incx);
+		
+		    SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
+		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
+#else
+		    strsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
+		       	&x[fsupc], &incx);
+		
+		    sgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
+		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
+#endif
+#else
+		    slsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
+		
+		    smatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
+                             &x[fsupc], &work[0] );
+#endif		
+		
+		    iptr = istart + nsupc;
+		    for (i = 0; i < nrow; ++i, ++iptr) {
+			irow = L_SUB(iptr);
+			x[irow] -= work[i];	/* Scatter */
+			work[i] = 0.0;
+
+		    }
+	 	}
+	    } /* for k ... */
+	    
+	} else {
+	    /* Form x := inv(U)*x */
+	    
+	    if ( U->nrow == 0 ) return 0; /* Quick return */
+	    
+	    for (k = Lstore->nsuper; k >= 0; k--) {
+	    	fsupc = L_FST_SUPC(k);
+	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
+	    	nsupc = L_FST_SUPC(k+1) - fsupc;
+	    	luptr = L_NZ_START(fsupc);
+		
+    	        solve_ops += nsupc * (nsupc + 1);
+
+		if ( nsupc == 1 ) {
+		    x[fsupc] /= Lval[luptr];
+		    for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
+			irow = U_SUB(i);
+			x[irow] -= x[fsupc] * Uval[i];
+		    }
+		} else {
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+		    STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
+		       &x[fsupc], &incx);
+#else
+		    strsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
+                           &x[fsupc], &incx);
+#endif
+#else		
+		    susolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
+#endif		
+
+		    for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
+		        solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
+		    	for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
+				i++) {
+			    irow = U_SUB(i);
+			    x[irow] -= x[jcol] * Uval[i];
+		    	}
+                    }
+		}
+	    } /* for k ... */
+	    
+	}
+    } else { /* Form x := inv(A')*x */
+	
+	if ( lsame_(uplo, "L") ) {
+	    /* Form x := inv(L')*x */
+    	    if ( L->nrow == 0 ) return 0; /* Quick return */
+	    
+	    for (k = Lstore->nsuper; k >= 0; --k) {
+	    	fsupc = L_FST_SUPC(k);
+	    	istart = L_SUB_START(fsupc);
+	    	nsupr = L_SUB_START(fsupc+1) - istart;
+	    	nsupc = L_FST_SUPC(k+1) - fsupc;
+	    	luptr = L_NZ_START(fsupc);
+
+		solve_ops += 2 * (nsupr - nsupc) * nsupc;
+
+		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
+		    iptr = istart + nsupc;
+		    for (i = L_NZ_START(jcol) + nsupc; 
+				i < L_NZ_START(jcol+1); i++) {
+			irow = L_SUB(iptr);
+			x[jcol] -= x[irow] * Lval[i];
+			iptr++;
+		    }
+		}
+		
+		if ( nsupc > 1 ) {
+		    solve_ops += nsupc * (nsupc - 1);
+#ifdef _CRAY
+                    ftcs1 = _cptofcd("L", strlen("L"));
+                    ftcs2 = _cptofcd("T", strlen("T"));
+                    ftcs3 = _cptofcd("U", strlen("U"));
+		    STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
+			&x[fsupc], &incx);
+#else
+		    strsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
+			&x[fsupc], &incx);
+#endif
+		}
+	    }
+	} else {
+	    /* Form x := inv(U')*x */
+	    if ( U->nrow == 0 ) return 0; /* Quick return */
+	    
+	    for (k = 0; k <= Lstore->nsuper; k++) {
+	    	fsupc = L_FST_SUPC(k);
+	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
+	    	nsupc = L_FST_SUPC(k+1) - fsupc;
+	    	luptr = L_NZ_START(fsupc);
+
+		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
+		    solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
+		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
+			irow = U_SUB(i);
+			x[jcol] -= x[irow] * Uval[i];
+		    }
+		}
+
+		solve_ops += nsupc * (nsupc + 1);
+
+		if ( nsupc == 1 ) {
+		    x[fsupc] /= Lval[luptr];
+		} else {
+#ifdef _CRAY
+                    ftcs1 = _cptofcd("U", strlen("U"));
+                    ftcs2 = _cptofcd("T", strlen("T"));
+                    ftcs3 = _cptofcd("N", strlen("N"));
+		    STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
+			    &x[fsupc], &incx);
+#else
+		    strsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
+			    &x[fsupc], &incx);
+#endif
+		}
+	    } /* for k ... */
+	}
+    }
+
+    stat->ops[SOLVE] += solve_ops;
+    SUPERLU_FREE(work);
+    return 0;
+}
+
+
+
+
+int
+sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x, 
+	 int incx, float beta, float *y, int incy)
+{
+/*  Purpose   
+    =======   
+
+    sp_sgemv()  performs one of the matrix-vector operations   
+       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
+    where alpha and beta are scalars, x and y are vectors and A is a
+    sparse A->nrow by A->ncol matrix.   
+
+    Parameters   
+    ==========   
+
+    TRANS  - (input) char*
+             On entry, TRANS specifies the operation to be performed as   
+             follows:   
+                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
+                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
+                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
+
+    ALPHA  - (input) float
+             On entry, ALPHA specifies the scalar alpha.   
+
+    A      - (input) SuperMatrix*
+             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
+             Currently, the type of A can be:
+                 Stype = NC or NCP; Dtype = SLU_S; Mtype = GE. 
+             In the future, more general A can be handled.
+
+    X      - (input) float*, array of DIMENSION at least   
+             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
+             and at least   
+             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
+             Before entry, the incremented array X must contain the   
+             vector x.   
+
+    INCX   - (input) int
+             On entry, INCX specifies the increment for the elements of   
+             X. INCX must not be zero.   
+
+    BETA   - (input) float
+             On entry, BETA specifies the scalar beta. When BETA is   
+             supplied as zero then Y need not be set on input.   
+
+    Y      - (output) float*,  array of DIMENSION at least   
+             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
+             and at least   
+             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
+             Before entry with BETA non-zero, the incremented array Y   
+             must contain the vector y. On exit, Y is overwritten by the 
+             updated vector y.
+	     
+    INCY   - (input) int
+             On entry, INCY specifies the increment for the elements of   
+             Y. INCY must not be zero.   
+
+    ==== Sparse Level 2 Blas routine.   
+*/
+
+    /* Local variables */
+    NCformat *Astore;
+    float   *Aval;
+    int info;
+    float temp;
+    int lenx, leny, i, j, irow;
+    int iy, jx, jy, kx, ky;
+    int notran;
+
+    notran = lsame_(trans, "N");
+    Astore = A->Store;
+    Aval = Astore->nzval;
+    
+    /* Test the input parameters */
+    info = 0;
+    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
+    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
+    else if (incx == 0) info = 5;
+    else if (incy == 0)	info = 8;
+    if (info != 0) {
+	xerbla_("sp_sgemv ", &info);
+	return 0;
+    }
+
+    /* Quick return if possible. */
+    if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
+	return 0;
+
+    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
+       up the start points in  X  and  Y. */
+    if (lsame_(trans, "N")) {
+	lenx = A->ncol;
+	leny = A->nrow;
+    } else {
+	lenx = A->nrow;
+	leny = A->ncol;
+    }
+    if (incx > 0) kx = 0;
+    else kx =  - (lenx - 1) * incx;
+    if (incy > 0) ky = 0;
+    else ky =  - (leny - 1) * incy;
+
+    /* Start the operations. In this version the elements of A are   
+       accessed sequentially with one pass through A. */
+    /* First form  y := beta*y. */
+    if (beta != 1.) {
+	if (incy == 1) {
+	    if (beta == 0.)
+		for (i = 0; i < leny; ++i) y[i] = 0.;
+	    else
+		for (i = 0; i < leny; ++i) y[i] = beta * y[i];
+	} else {
+	    iy = ky;
+	    if (beta == 0.)
+		for (i = 0; i < leny; ++i) {
+		    y[iy] = 0.;
+		    iy += incy;
+		}
+	    else
+		for (i = 0; i < leny; ++i) {
+		    y[iy] = beta * y[iy];
+		    iy += incy;
+		}
+	}
+    }
+    
+    if (alpha == 0.) return 0;
+
+    if ( notran ) {
+	/* Form  y := alpha*A*x + y. */
+	jx = kx;
+	if (incy == 1) {
+	    for (j = 0; j < A->ncol; ++j) {
+		if (x[jx] != 0.) {
+		    temp = alpha * x[jx];
+		    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
+			irow = Astore->rowind[i];
+			y[irow] += temp * Aval[i];
+		    }
+		}
+		jx += incx;
+	    }
+	} else {
+	    ABORT("Not implemented.");
+	}
+    } else {
+	/* Form  y := alpha*A'*x + y. */
+	jy = ky;
+	if (incx == 1) {
+	    for (j = 0; j < A->ncol; ++j) {
+		temp = 0.;
+		for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
+		    irow = Astore->rowind[i];
+		    temp += Aval[i] * x[irow];
+		}
+		y[jy] += alpha * temp;
+		jy += incy;
+	    }
+	} else {
+	    ABORT("Not implemented.");
+	}
+    }
+    return 0;
+} /* sp_sgemv */
+
+
+
--- a/intern/opennl/superlu/ssp_blas3.c
+++ b/intern/opennl/superlu/ssp_blas3.c
@ -0,0 +1,121 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+ * File name:		sp_blas3.c
+ * Purpose:		Sparse BLAS3, using some dense BLAS3 operations.
+ */
+
+#include "ssp_defs.h"
+#include "util.h"
+
+int
+sp_sgemm(char *transa, int n, 
+         float alpha, SuperMatrix *A, float *b, int ldb, 
+         float beta, float *c, int ldc)
+{
+/*  Purpose   
+    =======   
+
+    sp_s performs one of the matrix-matrix operations   
+
+       C := alpha*op( A )*op( B ) + beta*C,   
+
+    where  op( X ) is one of 
+
+       op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
+
+    alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
+    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
+  
+
+    Parameters   
+    ==========   
+
+    TRANSA - (input) char*
+             On entry, TRANSA specifies the form of op( A ) to be used in 
+             the matrix multiplication as follows:   
+                TRANSA = 'N' or 'n',  op( A ) = A.   
+                TRANSA = 'T' or 't',  op( A ) = A'.   
+                TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).   
+             Unchanged on exit.   
+
+    TRANSB - (input) char*
+             On entry, TRANSB specifies the form of op( B ) to be used in 
+             the matrix multiplication as follows:   
+                TRANSB = 'N' or 'n',  op( B ) = B.   
+                TRANSB = 'T' or 't',  op( B ) = B'.   
+                TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).   
+             Unchanged on exit.   
+
+    M      - (input) int   
+             On entry,  M  specifies  the number of rows of the matrix 
+	     op( A ) and of the matrix C.  M must be at least zero. 
+	     Unchanged on exit.   
+
+    N      - (input) int
+             On entry,  N specifies the number of columns of the matrix 
+	     op( B ) and the number of columns of the matrix C. N must be 
+	     at least zero.
+	     Unchanged on exit.   
+
+    K      - (input) int
+             On entry, K specifies the number of columns of the matrix 
+	     op( A ) and the number of rows of the matrix op( B ). K must 
+	     be at least  zero.   
+             Unchanged on exit.
+	     
+    ALPHA  - (input) float
+             On entry, ALPHA specifies the scalar alpha.   
+
+    A      - (input) SuperMatrix*
+             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
+             Currently, the type of A can be:
+                 Stype = NC or NCP; Dtype = SLU_S; Mtype = GE. 
+             In the future, more general A can be handled.
+
+    B      - FLOAT PRECISION array of DIMENSION ( LDB, kb ), where kb is 
+             n when TRANSB = 'N' or 'n',  and is  k otherwise.   
+             Before entry with  TRANSB = 'N' or 'n',  the leading k by n 
+             part of the array B must contain the matrix B, otherwise 
+             the leading n by k part of the array B must contain the 
+             matrix B.   
+             Unchanged on exit.   
+
+    LDB    - (input) int
+             On entry, LDB specifies the first dimension of B as declared 
+             in the calling (sub) program. LDB must be at least max( 1, n ).  
+             Unchanged on exit.   
+
+    BETA   - (input) float
+             On entry, BETA specifies the scalar beta. When BETA is   
+             supplied as zero then C need not be set on input.   
+
+    C      - FLOAT PRECISION array of DIMENSION ( LDC, n ).   
+             Before entry, the leading m by n part of the array C must 
+             contain the matrix C,  except when beta is zero, in which 
+             case C need not be set on entry.   
+             On exit, the array C is overwritten by the m by n matrix 
+	     ( alpha*op( A )*B + beta*C ).   
+
+    LDC    - (input) int
+             On entry, LDC specifies the first dimension of C as declared 
+             in the calling (sub)program. LDC must be at least max(1,m).   
+             Unchanged on exit.   
+
+    ==== Sparse Level 3 Blas routine.   
+*/
+    int    incx = 1, incy = 1;
+    int    j;
+
+    for (j = 0; j < n; ++j) {
+	sp_sgemv(transa, alpha, A, &b[ldb*j], incx, beta, &c[ldc*j], incy);
+    }
+    return 0;    
+}
--- a/intern/opennl/superlu/ssp_defs.h
+++ b/intern/opennl/superlu/ssp_defs.h
@ -0,0 +1,234 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+#ifndef __SUPERLU_sSP_DEFS /* allow multiple inclusions */
+#define __SUPERLU_sSP_DEFS
+
+/*
+ * File name:		ssp_defs.h
+ * Purpose:             Sparse matrix types and function prototypes
+ * History:
+ */
+
+#ifdef _CRAY
+#include <fortran.h>
+#include <string.h>
+#endif
+
+/* Define my integer type int_t */
+typedef int int_t; /* default */
+
+#include "Cnames.h"
+#include "supermatrix.h"
+#include "util.h"
+
+
+/*
+ * Global data structures used in LU factorization -
+ * 
+ *   nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
+ *   (xsup,supno): supno[i] is the supernode no to which i belongs;
+ *	xsup(s) points to the beginning of the s-th supernode.
+ *	e.g.   supno 0 1 2 2 3 3 3 4 4 4 4 4   (n=12)
+ *	        xsup 0 1 2 4 7 12
+ *	Note: dfs will be performed on supernode rep. relative to the new 
+ *	      row pivoting ordering
+ *
+ *   (xlsub,lsub): lsub[*] contains the compressed subscript of
+ *	rectangular supernodes; xlsub[j] points to the starting
+ *	location of the j-th column in lsub[*]. Note that xlsub 
+ *	is indexed by column.
+ *	Storage: original row subscripts
+ *
+ *      During the course of sparse LU factorization, we also use
+ *	(xlsub,lsub) for the purpose of symmetric pruning. For each
+ *	supernode {s,s+1,...,t=s+r} with first column s and last
+ *	column t, the subscript set
+ *		lsub[j], j=xlsub[s], .., xlsub[s+1]-1
+ *	is the structure of column s (i.e. structure of this supernode).
+ *	It is used for the storage of numerical values.
+ *	Furthermore,
+ *		lsub[j], j=xlsub[t], .., xlsub[t+1]-1
+ *	is the structure of the last column t of this supernode.
+ *	It is for the purpose of symmetric pruning. Therefore, the
+ *	structural subscripts can be rearranged without making physical
+ *	interchanges among the numerical values.
+ *
+ *	However, if the supernode has only one column, then we
+ *	only keep one set of subscripts. For any subscript interchange
+ *	performed, similar interchange must be done on the numerical
+ *	values.
+ *
+ *	The last column structures (for pruning) will be removed
+ *	after the numercial LU factorization phase.
+ *
+ *   (xlusup,lusup): lusup[*] contains the numerical values of the
+ *	rectangular supernodes; xlusup[j] points to the starting
+ *	location of the j-th column in storage vector lusup[*]
+ *	Note: xlusup is indexed by column.
+ *	Each rectangular supernode is stored by column-major
+ *	scheme, consistent with Fortran 2-dim array storage.
+ *
+ *   (xusub,ucol,usub): ucol[*] stores the numerical values of
+ *	U-columns outside the rectangular supernodes. The row
+ *	subscript of nonzero ucol[k] is stored in usub[k].
+ *	xusub[i] points to the starting location of column i in ucol.
+ *	Storage: new row subscripts; that is subscripts of PA.
+ */
+typedef struct {
+    int     *xsup;    /* supernode and column mapping */
+    int     *supno;   
+    int     *lsub;    /* compressed L subscripts */
+    int	    *xlsub;
+    float  *lusup;   /* L supernodes */
+    int     *xlusup;
+    float  *ucol;    /* U columns */
+    int     *usub;
+    int	    *xusub;
+    int     nzlmax;   /* current max size of lsub */
+    int     nzumax;   /*    "    "    "      ucol */
+    int     nzlumax;  /*    "    "    "     lusup */
+    int     n;        /* number of columns in the matrix */
+    LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
+} GlobalLU_t;
+
+typedef struct {
+    float for_lu;
+    float total_needed;
+    int   expansions;
+} mem_usage_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Driver routines */
+extern void
+sgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *,
+      SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *);
+extern void
+sgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,
+       char *, float *, float *, SuperMatrix *, SuperMatrix *,
+       void *, int, SuperMatrix *, SuperMatrix *,
+       float *, float *, float *, float *,
+       mem_usage_t *, SuperLUStat_t *, int *);
+
+/* Supernodal LU factor related */
+extern void
+sCreate_CompCol_Matrix(SuperMatrix *, int, int, int, float *,
+		       int *, int *, Stype_t, Dtype_t, Mtype_t);
+extern void
+sCreate_CompRow_Matrix(SuperMatrix *, int, int, int, float *,
+		       int *, int *, Stype_t, Dtype_t, Mtype_t);
+extern void
+sCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *);
+extern void
+sCreate_Dense_Matrix(SuperMatrix *, int, int, float *, int,
+		     Stype_t, Dtype_t, Mtype_t);
+extern void
+sCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, float *, 
+		         int *, int *, int *, int *, int *,
+			 Stype_t, Dtype_t, Mtype_t);
+extern void
+sCopy_Dense_Matrix(int, int, float *, int, float *, int);
+
+extern void    countnz (const int, int *, int *, int *, GlobalLU_t *);
+extern void    fixupL (const int, const int *, GlobalLU_t *);
+
+extern void    sallocateA (int, int, float **, int **, int **);
+extern void    sgstrf (superlu_options_t*, SuperMatrix*, 
+                       int, int, int*, void *, int, int *, int *, 
+                       SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *);
+extern int     ssnode_dfs (const int, const int, const int *, const int *,
+			     const int *, int *, int *, GlobalLU_t *);
+extern int     ssnode_bmod (const int, const int, float *,
+                              float *, GlobalLU_t *, SuperLUStat_t*);
+extern void    spanel_dfs (const int, const int, const int, SuperMatrix *,
+			   int *, int *, float *, int *, int *, int *,
+			   int *, int *, int *, int *, GlobalLU_t *);
+extern void    spanel_bmod (const int, const int, const int, const int,
+                           float *, float *, int *, int *,
+			   GlobalLU_t *, SuperLUStat_t*);
+extern int     scolumn_dfs (const int, const int, int *, int *, int *, int *,
+			   int *, int *, int *, int *, int *, GlobalLU_t *);
+extern int     scolumn_bmod (const int, const int, float *,
+			   float *, int *, int *, int,
+                           GlobalLU_t *, SuperLUStat_t*);
+extern int     scopy_to_ucol (int, int, int *, int *, int *,
+                              float *, GlobalLU_t *);         
+extern int     spivotL (const int, const float, int *, int *, 
+                         int *, int *, int *, GlobalLU_t *, SuperLUStat_t*);
+extern void    spruneL (const int, const int *, const int, const int,
+			  const int *, const int *, int *, GlobalLU_t *);
+extern void    sreadmt (int *, int *, int *, float **, int **, int **);
+extern void    sGenXtrue (int, int, float *, int);
+extern void    sFillRHS (trans_t, int, float *, int, SuperMatrix *,
+			  SuperMatrix *);
+extern void    sgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
+                        SuperMatrix *, SuperLUStat_t*, int *);
+
+
+/* Driver related */
+
+extern void    sgsequ (SuperMatrix *, float *, float *, float *,
+			float *, float *, int *);
+extern void    slaqgs (SuperMatrix *, float *, float *, float,
+                        float, float, char *);
+extern void    sgscon (char *, SuperMatrix *, SuperMatrix *, 
+		         float, float *, SuperLUStat_t*, int *);
+extern float   sPivotGrowth(int, SuperMatrix *, int *, 
+                            SuperMatrix *, SuperMatrix *);
+extern void    sgsrfs (trans_t, SuperMatrix *, SuperMatrix *,
+                       SuperMatrix *, int *, int *, char *, float *, 
+                       float *, SuperMatrix *, SuperMatrix *,
+                       float *, float *, SuperLUStat_t*, int *);
+
+extern int     sp_strsv (char *, char *, char *, SuperMatrix *,
+			SuperMatrix *, float *, SuperLUStat_t*, int *);
+extern int     sp_sgemv (char *, float, SuperMatrix *, float *,
+			int, float, float *, int);
+
+extern int     sp_sgemm (char *, int, float,
+			SuperMatrix *, float *, int, float, 
+			float *, int);
+
+/* Memory-related */
+extern int     sLUMemInit (fact_t, void *, int, int, int, int, int,
+			     SuperMatrix *, SuperMatrix *,
+			     GlobalLU_t *, int **, float **);
+extern void    sSetRWork (int, int, float *, float **, float **);
+extern void    sLUWorkFree (int *, float *, GlobalLU_t *);
+extern int     sLUMemXpand (int, int, MemType, int *, GlobalLU_t *);
+
+extern float  *floatMalloc(int);
+extern float  *floatCalloc(int);
+extern int     smemory_usage(const int, const int, const int, const int);
+extern int     sQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *);
+
+/* Auxiliary routines */
+extern void    sreadhb(int *, int *, int *, float **, int **, int **);
+extern void    sCompRow_to_CompCol(int, int, int, float*, int*, int*,
+		                   float **, int **, int **);
+extern void    sfill (float *, int, float);
+extern void    sinf_norm_error (int, SuperMatrix *, float *);
+extern void    PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *,
+			 float, float, float *, float *, char *);
+
+/* Routines for debugging */
+extern void    sPrint_CompCol_Matrix(char *, SuperMatrix *);
+extern void    sPrint_SuperNode_Matrix(char *, SuperMatrix *);
+extern void    sPrint_Dense_Matrix(char *, SuperMatrix *);
+extern void    print_lu_col(char *, int, int, int *, GlobalLU_t *);
+extern void    check_tempv(int, float *);
+
+#ifdef __cplusplus
+  }
+#endif
+
+#endif /* __SUPERLU_sSP_DEFS */
+
--- a/intern/opennl/superlu/strsv.c
+++ b/intern/opennl/superlu/strsv.c
@ -0,0 +1,331 @@
+
+/* Subroutine */ int strsv_(char *uplo, char *trans, char *diag, int *n, 
+	float *a, int *lda, float *x, int *incx)
+{
+
+
+    /* System generated locals */
+    int i__1, i__2;
+
+    /* Local variables */
+    static int info;
+    static float temp;
+    static int i, j;
+    extern int lsame_(char *, char *);
+    static int ix, jx, kx;
+    extern /* Subroutine */ int xerbla_(char *, int *);
+    static int nounit;
+
+
+/*  Purpose   
+    =======   
+
+    STRSV  solves one of the systems of equations   
+
+       A*x = b,   or   A'*x = b,   
+
+    where b and x are n element vectors and A is an n by n unit, or   
+    non-unit, upper or lower triangular matrix.   
+
+    No test for singularity or near-singularity is included in this   
+    routine. Such tests must be performed before calling this routine.   
+
+    Parameters   
+    ==========   
+
+    UPLO   - CHARACTER*1.   
+             On entry, UPLO specifies whether the matrix is an upper or   
+             lower triangular matrix as follows:   
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.   
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.   
+
+             Unchanged on exit.   
+
+    TRANS  - CHARACTER*1.   
+             On entry, TRANS specifies the equations to be solved as   
+             follows:   
+
+                TRANS = 'N' or 'n'   A*x = b.   
+
+                TRANS = 'T' or 't'   A'*x = b.   
+
+                TRANS = 'C' or 'c'   A'*x = b.   
+
+             Unchanged on exit.   
+
+    DIAG   - CHARACTER*1.   
+             On entry, DIAG specifies whether or not A is unit   
+             triangular as follows:   
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.   
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit   
+                                    triangular.   
+
+             Unchanged on exit.   
+
+    N      - INTEGER.   
+             On entry, N specifies the order of the matrix A.   
+             N must be at least zero.   
+             Unchanged on exit.   
+
+    A      - REAL             array of DIMENSION ( LDA, n ).   
+             Before entry with  UPLO = 'U' or 'u', the leading n by n   
+             upper triangular part of the array A must contain the upper 
+  
+             triangular matrix and the strictly lower triangular part of 
+  
+             A is not referenced.   
+             Before entry with UPLO = 'L' or 'l', the leading n by n   
+             lower triangular part of the array A must contain the lower 
+  
+             triangular matrix and the strictly upper triangular part of 
+  
+             A is not referenced.   
+             Note that when  DIAG = 'U' or 'u', the diagonal elements of 
+  
+             A are not referenced either, but are assumed to be unity.   
+             Unchanged on exit.   
+
+    LDA    - INTEGER.   
+             On entry, LDA specifies the first dimension of A as declared 
+  
+             in the calling (sub) program. LDA must be at least   
+             max( 1, n ).   
+             Unchanged on exit.   
+
+    X      - REAL             array of dimension at least   
+             ( 1 + ( n - 1 )*abs( INCX ) ).   
+             Before entry, the incremented array X must contain the n   
+             element right-hand side vector b. On exit, X is overwritten 
+  
+             with the solution vector x.   
+
+    INCX   - INTEGER.   
+             On entry, INCX specifies the increment for the elements of   
+             X. INCX must not be zero.   
+             Unchanged on exit.   
+
+
+    Level 2 Blas routine.   
+
+    -- Written on 22-October-1986.   
+       Jack Dongarra, Argonne National Lab.   
+       Jeremy Du Croz, Nag Central Office.   
+       Sven Hammarling, Nag Central Office.   
+       Richard Hanson, Sandia National Labs.   
+
+
+
+       Test the input parameters.   
+
+    
+   Parameter adjustments   
+       Function Body */
+#define X(I) x[(I)-1]
+
+#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
+
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans, "T") &&
+	     ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag, "N")) {
+	info = 3;
+    } else if (*n < 0) {
+	info = 4;
+    } else if (*lda < ((1 > *n)? 1: *n)) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
+    }
+    if (info != 0) {
+	xerbla_("STRSV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    nounit = lsame_(diag, "N");
+
+/*     Set up the start point in X if the increment is not unity. This   
+       will be  ( N - 1 )*INCX  too small for descending loops. */
+
+    if (*incx <= 0) {
+	kx = 1 - (*n - 1) * *incx;
+    } else if (*incx != 1) {
+	kx = 1;
+    }
+
+/*     Start the operations. In this version the elements of A are   
+       accessed sequentially with one pass through A. */
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  x := inv( A )*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    if (X(j) != 0.f) {
+			if (nounit) {
+			    X(j) /= A(j,j);
+			}
+			temp = X(j);
+			for (i = j - 1; i >= 1; --i) {
+			    X(i) -= temp * A(i,j);
+/* L10: */
+			}
+		    }
+/* L20: */
+		}
+	    } else {
+		jx = kx + (*n - 1) * *incx;
+		for (j = *n; j >= 1; --j) {
+		    if (X(jx) != 0.f) {
+			if (nounit) {
+			    X(jx) /= A(j,j);
+			}
+			temp = X(jx);
+			ix = jx;
+			for (i = j - 1; i >= 1; --i) {
+			    ix -= *incx;
+			    X(ix) -= temp * A(i,j);
+/* L30: */
+			}
+		    }
+		    jx -= *incx;
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= *n; ++j) {
+		    if (X(j) != 0.f) {
+			if (nounit) {
+			    X(j) /= A(j,j);
+			}
+			temp = X(j);
+			i__2 = *n;
+			for (i = j + 1; i <= *n; ++i) {
+			    X(i) -= temp * A(i,j);
+/* L50: */
+			}
+		    }
+/* L60: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= *n; ++j) {
+		    if (X(jx) != 0.f) {
+			if (nounit) {
+			    X(jx) /= A(j,j);
+			}
+			temp = X(jx);
+			ix = jx;
+			i__2 = *n;
+			for (i = j + 1; i <= *n; ++i) {
+			    ix += *incx;
+			    X(ix) -= temp * A(i,j);
+/* L70: */
+			}
+		    }
+		    jx += *incx;
+/* L80: */
+		}
+	    }
+	}
+    } else {
+
+/*        Form  x := inv( A' )*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= *n; ++j) {
+		    temp = X(j);
+		    i__2 = j - 1;
+		    for (i = 1; i <= j-1; ++i) {
+			temp -= A(i,j) * X(i);
+/* L90: */
+		    }
+		    if (nounit) {
+			temp /= A(j,j);
+		    }
+		    X(j) = temp;
+/* L100: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= *n; ++j) {
+		    temp = X(jx);
+		    ix = kx;
+		    i__2 = j - 1;
+		    for (i = 1; i <= j-1; ++i) {
+			temp -= A(i,j) * X(ix);
+			ix += *incx;
+/* L110: */
+		    }
+		    if (nounit) {
+			temp /= A(j,j);
+		    }
+		    X(jx) = temp;
+		    jx += *incx;
+/* L120: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    temp = X(j);
+		    i__1 = j + 1;
+		    for (i = *n; i >= j+1; --i) {
+			temp -= A(i,j) * X(i);
+/* L130: */
+		    }
+		    if (nounit) {
+			temp /= A(j,j);
+		    }
+		    X(j) = temp;
+/* L140: */
+		}
+	    } else {
+		kx += (*n - 1) * *incx;
+		jx = kx;
+		for (j = *n; j >= 1; --j) {
+		    temp = X(jx);
+		    ix = kx;
+		    i__1 = j + 1;
+		    for (i = *n; i >= j+1; --i) {
+			temp -= A(i,j) * X(ix);
+			ix -= *incx;
+/* L150: */
+		    }
+		    if (nounit) {
+			temp /= A(j,j);
+		    }
+		    X(jx) = temp;
+		    jx -= *incx;
+/* L160: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of STRSV . */
+
+} /* strsv_ */
+
--- a/intern/opennl/superlu/superlu_timer.c
+++ b/intern/opennl/superlu/superlu_timer.c
@ -0,0 +1,55 @@
+/* 
+ * Purpose
+ * ======= 
+ *	Returns the time in seconds used by the process.
+ *
+ * Note: the timer function call is machine dependent. Use conditional
+ *       compilation to choose the appropriate function.
+ *
+ */
+
+/* We want this flag, safer than putting in build system */
+#define NO_TIMER
+
+#ifdef SUN 
+/*
+ * 	It uses the system call gethrtime(3C), which is accurate to 
+ *	nanoseconds. 
+*/
+#include <sys/time.h>
+ 
+double SuperLU_timer_() {
+    return ( (double)gethrtime() / 1e9 );
+}
+
+#else
+
+#ifndef NO_TIMER
+#include <sys/types.h>
+#include <sys/times.h>
+#include <time.h>
+#include <sys/time.h>
+#endif
+
+#ifndef CLK_TCK
+#define CLK_TCK 60
+#endif
+
+double SuperLU_timer_()
+{
+#ifdef NO_TIMER
+    /* no sys/times.h on WIN32 */
+    double tmp;
+    tmp = 0.0;
+#else
+    struct tms use;
+    double tmp;
+    times(&use);
+    tmp = use.tms_utime;
+    tmp += use.tms_stime;
+#endif
+    return (double)(tmp) / CLK_TCK;
+}
+
+#endif
+
--- a/intern/opennl/superlu/supermatrix.h
+++ b/intern/opennl/superlu/supermatrix.h
@ -0,0 +1,140 @@
+#ifndef __SUPERLU_SUPERMATRIX /* allow multiple inclusions */
+#define __SUPERLU_SUPERMATRIX
+
+/********************************************
+ * The matrix types are defined as follows. *
+ ********************************************/
+typedef enum {
+    SLU_NC,    /* column-wise, no supernode */
+    SLU_NR,    /* row-wize, no supernode */
+    SLU_SC,    /* column-wise, supernode */
+    SLU_SR,    /* row-wise, supernode */
+    SLU_NCP,   /* column-wise, column-permuted, no supernode 
+                  (The consecutive columns of nonzeros, after permutation,
+		   may not be stored  contiguously.) */
+    SLU_DN     /* Fortran style column-wise storage for dense matrix */
+} Stype_t;
+
+typedef enum {
+    SLU_S,     /* single */
+    SLU_D,     /* double */
+    SLU_C,     /* single complex */
+    SLU_Z      /* double complex */
+} Dtype_t;
+
+typedef enum {
+    SLU_GE,    /* general */
+    SLU_TRLU,  /* lower triangular, unit diagonal */
+    SLU_TRUU,  /* upper triangular, unit diagonal */
+    SLU_TRL,   /* lower triangular */
+    SLU_TRU,   /* upper triangular */
+    SLU_SYL,   /* symmetric, store lower half */
+    SLU_SYU,   /* symmetric, store upper half */
+    SLU_HEL,   /* Hermitian, store lower half */
+    SLU_HEU    /* Hermitian, store upper half */
+} Mtype_t;
+
+typedef struct {
+	Stype_t Stype; /* Storage type: interprets the storage structure 
+		   	  pointed to by *Store. */
+	Dtype_t Dtype; /* Data type. */
+	Mtype_t Mtype; /* Matrix type: describes the mathematical property of 
+			  the matrix. */
+	int_t  nrow;   /* number of rows */
+	int_t  ncol;   /* number of columns */
+	void *Store;   /* pointer to the actual storage of the matrix */
+} SuperMatrix;
+
+/***********************************************
+ * The storage schemes are defined as follows. *
+ ***********************************************/
+
+/* Stype == NC (Also known as Harwell-Boeing sparse matrix format) */
+typedef struct {
+    int_t  nnz;	    /* number of nonzeros in the matrix */
+    void   *nzval;  /* pointer to array of nonzero values, packed by column */
+    int_t  *rowind; /* pointer to array of row indices of the nonzeros */
+    int_t  *colptr; /* pointer to array of beginning of columns in nzval[] 
+		       and rowind[]  */
+                    /* Note:
+		       Zero-based indexing is used;
+		       colptr[] has ncol+1 entries, the last one pointing
+		       beyond the last column, so that colptr[ncol] = nnz. */
+} NCformat;
+
+/* Stype == NR (Also known as row compressed storage (RCS). */
+typedef struct {
+    int_t nnz;	   /* number of nonzeros in the matrix */
+    void  *nzval;  /* pointer to array of nonzero values, packed by row */
+    int_t *colind; /* pointer to array of column indices of the nonzeros */
+    int_t *rowptr; /* pointer to array of beginning of rows in nzval[] 
+                      and colind[]  */
+                   /* Note:
+		      Zero-based indexing is used;
+		      nzval[] and colind[] are of the same length, nnz;
+		      rowptr[] has nrow+1 entries, the last one pointing
+		      beyond the last column, so that rowptr[nrow] = nnz. */
+} NRformat;
+
+/* Stype == SC */
+typedef struct {
+  int_t  nnz;	     /* number of nonzeros in the matrix */
+  int_t  nsuper;     /* number of supernodes, minus 1 */
+  void *nzval;       /* pointer to array of nonzero values, packed by column */
+  int_t *nzval_colptr;/* pointer to array of beginning of columns in nzval[] */
+  int_t *rowind;     /* pointer to array of compressed row indices of 
+			rectangular supernodes */
+  int_t *rowind_colptr;/* pointer to array of beginning of columns in rowind[] */
+  int_t *col_to_sup; /* col_to_sup[j] is the supernode number to which column 
+			j belongs; mapping from column to supernode number. */
+  int_t *sup_to_col; /* sup_to_col[s] points to the start of the s-th 
+			supernode; mapping from supernode number to column.
+		        e.g.: col_to_sup: 0 1 2 2 3 3 3 4 4 4 4 4 4 (ncol=12)
+		              sup_to_col: 0 1 2 4 7 12           (nsuper=4) */
+                     /* Note:
+		        Zero-based indexing is used;
+		        nzval_colptr[], rowind_colptr[], col_to_sup and
+		        sup_to_col[] have ncol+1 entries, the last one
+		        pointing beyond the last column.
+		        For col_to_sup[], only the first ncol entries are
+		        defined. For sup_to_col[], only the first nsuper+2
+		        entries are defined. */
+} SCformat;
+
+/* Stype == NCP */
+typedef struct {
+    int_t nnz;	  /* number of nonzeros in the matrix */
+    void *nzval;  /* pointer to array of nonzero values, packed by column */
+    int_t *rowind;/* pointer to array of row indices of the nonzeros */
+		  /* Note: nzval[]/rowind[] always have the same length */
+    int_t *colbeg;/* colbeg[j] points to the beginning of column j in nzval[] 
+                     and rowind[]  */
+    int_t *colend;/* colend[j] points to one past the last element of column
+		     j in nzval[] and rowind[]  */
+		  /* Note:
+		     Zero-based indexing is used;
+		     The consecutive columns of the nonzeros may not be 
+		     contiguous in storage, because the matrix has been 
+		     postmultiplied by a column permutation matrix. */
+} NCPformat;
+
+/* Stype == DN */
+typedef struct {
+    int_t lda;    /* leading dimension */
+    void *nzval;  /* array of size lda*ncol to represent a dense matrix */
+} DNformat;
+
+
+
+/*********************************************************
+ * Macros used for easy access of sparse matrix entries. *
+ *********************************************************/
+#define L_SUB_START(col)     ( Lstore->rowind_colptr[col] )
+#define L_SUB(ptr)           ( Lstore->rowind[ptr] )
+#define L_NZ_START(col)      ( Lstore->nzval_colptr[col] )
+#define L_FST_SUPC(superno)  ( Lstore->sup_to_col[superno] )
+#define U_NZ_START(col)      ( Ustore->colptr[col] )
+#define U_SUB(ptr)           ( Ustore->rowind[ptr] )
+
+
+#endif  /* __SUPERLU_SUPERMATRIX */
--- a/intern/opennl/superlu/sutil.c
+++ b/intern/opennl/superlu/sutil.c
@ -0,0 +1,478 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ 
+  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ 
+  Permission is hereby granted to use or copy this program for any
+  purpose, provided the above notices are retained on all copies.
+  Permission to modify the code and to distribute modified code is
+  granted, provided the above notices are retained, and a notice that
+  the code was modified is included with the above copyright notice.
+*/
+
+#include <math.h>
+#include "ssp_defs.h"
+
+void
+sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, 
+		       float *nzval, int *rowind, int *colptr,
+		       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
+{
+    NCformat *Astore;
+
+    A->Stype = stype;
+    A->Dtype = dtype;
+    A->Mtype = mtype;
+    A->nrow = m;
+    A->ncol = n;
+    A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
+    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
+    Astore = A->Store;
+    Astore->nnz = nnz;
+    Astore->nzval = nzval;
+    Astore->rowind = rowind;
+    Astore->colptr = colptr;
+}
+
+void
+sCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz, 
+		       float *nzval, int *colind, int *rowptr,
+		       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
+{
+    NRformat *Astore;
+
+    A->Stype = stype;
+    A->Dtype = dtype;
+    A->Mtype = mtype;
+    A->nrow = m;
+    A->ncol = n;
+    A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) );
+    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
+    Astore = A->Store;
+    Astore->nnz = nnz;
+    Astore->nzval = nzval;
+    Astore->colind = colind;
+    Astore->rowptr = rowptr;
+}
+
+/* Copy matrix A into matrix B. */
+void
+sCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
+{
+    NCformat *Astore, *Bstore;
+    int      ncol, nnz, i;
+
+    B->Stype = A->Stype;
+    B->Dtype = A->Dtype;
+    B->Mtype = A->Mtype;
+    B->nrow  = A->nrow;;
+    B->ncol  = ncol = A->ncol;
+    Astore   = (NCformat *) A->Store;
+    Bstore   = (NCformat *) B->Store;
+    Bstore->nnz = nnz = Astore->nnz;
+    for (i = 0; i < nnz; ++i)
+	((float *)Bstore->nzval)[i] = ((float *)Astore->nzval)[i];
+    for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
+    for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
+}
+
+
+void
+sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, float *x, int ldx,
+		    Stype_t stype, Dtype_t dtype, Mtype_t mtype)
+{
+    DNformat    *Xstore;
+    
+    X->Stype = stype;
+    X->Dtype = dtype;
+    X->Mtype = mtype;
+    X->nrow = m;
+    X->ncol = n;
+    X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
+    if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
+    Xstore = (DNformat *) X->Store;
+    Xstore->lda = ldx;
+    Xstore->nzval = (float *) x;
+}
+
+void
+sCopy_Dense_Matrix(int M, int N, float *X, int ldx,
+			float *Y, int ldy)
+{
+/*
+ *
+ *  Purpose
+ *  =======
+ *
+ *  Copies a two-dimensional matrix X to another matrix Y.
+ */
+    int    i, j;
+    
+    for (j = 0; j < N; ++j)
+        for (i = 0; i < M; ++i)
+            Y[i + j*ldy] = X[i + j*ldx];
+}
+
+void
+sCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz, 
+			float *nzval, int *nzval_colptr, int *rowind,
+			int *rowind_colptr, int *col_to_sup, int *sup_to_col,
+			Stype_t stype, Dtype_t dtype, Mtype_t mtype)
+{
+    SCformat *Lstore;
+
+    L->Stype = stype;
+    L->Dtype = dtype;
+    L->Mtype = mtype;
+    L->nrow = m;
+    L->ncol = n;
+    L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) );
+    if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store");
+    Lstore = L->Store;
+    Lstore->nnz = nnz;
+    Lstore->nsuper = col_to_sup[n];
+    Lstore->nzval = nzval;
+    Lstore->nzval_colptr = nzval_colptr;
+    Lstore->rowind = rowind;
+    Lstore->rowind_colptr = rowind_colptr;
+    Lstore->col_to_sup = col_to_sup;
+    Lstore->sup_to_col = sup_to_col;
+
+}
+
+
+/*
+ * Convert a row compressed storage into a column compressed storage.
+ */
+void
+sCompRow_to_CompCol(int m, int n, int nnz, 
+		    float *a, int *colind, int *rowptr,
+		    float **at, int **rowind, int **colptr)
+{
+    register int i, j, col, relpos;
+    int *marker;
+
+    /* Allocate storage for another copy of the matrix. */
+    *at = (float *) floatMalloc(nnz);
+    *rowind = (int *) intMalloc(nnz);
+    *colptr = (int *) intMalloc(n+1);
+    marker = (int *) intCalloc(n);
+    
+    /* Get counts of each column of A, and set up column pointers */
+    for (i = 0; i < m; ++i)
+	for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
+    (*colptr)[0] = 0;
+    for (j = 0; j < n; ++j) {
+	(*colptr)[j+1] = (*colptr)[j] + marker[j];
+	marker[j] = (*colptr)[j];
+    }
+
+    /* Transfer the matrix into the compressed column storage. */
+    for (i = 0; i < m; ++i) {
+	for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
+	    col = colind[j];
+	    relpos = marker[col];
+	    (*rowind)[relpos] = i;
+	    (*at)[relpos] = a[j];
+	    ++marker[col];
+	}
+    }
+
+    SUPERLU_FREE(marker);
+}
+
+
+void
+sPrint_CompCol_Matrix(char *what, SuperMatrix *A)
+{
+    NCformat     *Astore;
+    register int i,n;
+    float       *dp;
+    
+    printf("\nCompCol matrix %s:\n", what);
+    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
+    n = A->ncol;
+    Astore = (NCformat *) A->Store;
+    dp = (float *) Astore->nzval;
+    printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
+    printf("nzval: ");
+    for (i = 0; i < Astore->colptr[n]; ++i) printf("%f  ", dp[i]);
+    printf("\nrowind: ");
+    for (i = 0; i < Astore->colptr[n]; ++i) printf("%d  ", Astore->rowind[i]);
+    printf("\ncolptr: ");
+    for (i = 0; i <= n; ++i) printf("%d  ", Astore->colptr[i]);
+    printf("\n");
+    fflush(stdout);
+}
+
+void
+sPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
+{
+    SCformat     *Astore;
+    register int i, j, k, c, d, n, nsup;
+    float       *dp;
+    int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
+    
+    printf("\nSuperNode matrix %s:\n", what);
+    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
+    n = A->ncol;
+    Astore = (SCformat *) A->Store;
+    dp = (float *) Astore->nzval;
+    col_to_sup = Astore->col_to_sup;
+    sup_to_col = Astore->sup_to_col;
+    rowind_colptr = Astore->rowind_colptr;
+    rowind = Astore->rowind;
+    printf("nrow %d, ncol %d, nnz %d, nsuper %d\n", 
+	   A->nrow,A->ncol,Astore->nnz,Astore->nsuper);
+    printf("nzval:\n");
+    for (k = 0; k <= Astore->nsuper; ++k) {
+      c = sup_to_col[k];
+      nsup = sup_to_col[k+1] - c;
+      for (j = c; j < c + nsup; ++j) {
+	d = Astore->nzval_colptr[j];
+	for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
+	  printf("%d\t%d\t%e\n", rowind[i], j, dp[d++]);
+	}
+      }
+    }
+#if 0
+    for (i = 0; i < Astore->nzval_colptr[n]; ++i) printf("%f  ", dp[i]);
+#endif
+    printf("\nnzval_colptr: ");
+    for (i = 0; i <= n; ++i) printf("%d  ", Astore->nzval_colptr[i]);
+    printf("\nrowind: ");
+    for (i = 0; i < Astore->rowind_colptr[n]; ++i) 
+        printf("%d  ", Astore->rowind[i]);
+    printf("\nrowind_colptr: ");
+    for (i = 0; i <= n; ++i) printf("%d  ", Astore->rowind_colptr[i]);
+    printf("\ncol_to_sup: ");
+    for (i = 0; i < n; ++i) printf("%d  ", col_to_sup[i]);
+    printf("\nsup_to_col: ");
+    for (i = 0; i <= Astore->nsuper+1; ++i) 
+        printf("%d  ", sup_to_col[i]);
+    printf("\n");
+    fflush(stdout);
+}
+
+void
+sPrint_Dense_Matrix(char *what, SuperMatrix *A)
+{
+    DNformat     *Astore;
+    register int i;
+    float       *dp;
+    
+    printf("\nDense matrix %s:\n", what);
+    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
+    Astore = (DNformat *) A->Store;
+    dp = (float *) Astore->nzval;
+    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
+    printf("\nnzval: ");
+    for (i = 0; i < A->nrow; ++i) printf("%f  ", dp[i]);
+    printf("\n");
+    fflush(stdout);
+}
+
+/*
+ * Diagnostic print of column "jcol" in the U/L factor.
+ */
+void
+sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
+{
+    int     i, k, fsupc;
+    int     *xsup, *supno;
+    int     *xlsub, *lsub;
+    float  *lusup;
+    int     *xlusup;
+    float  *ucol;
+    int     *usub, *xusub;
+
+    xsup    = Glu->xsup;
+    supno   = Glu->supno;
+    lsub    = Glu->lsub;
+    xlsub   = Glu->xlsub;
+    lusup   = Glu->lusup;
+    xlusup  = Glu->xlusup;
+    ucol    = Glu->ucol;
+    usub    = Glu->usub;
+    xusub   = Glu->xusub;
+    
+    printf("%s", msg);
+    printf("col %d: pivrow %d, supno %d, xprune %d\n", 
+	   jcol, pivrow, supno[jcol], xprune[jcol]);
+    
+    printf("\tU-col:\n");
+    for (i = xusub[jcol]; i < xusub[jcol+1]; i++)
+	printf("\t%d%10.4f\n", usub[i], ucol[i]);
+    printf("\tL-col in rectangular snode:\n");
+    fsupc = xsup[supno[jcol]];	/* first col of the snode */
+    i = xlsub[fsupc];
+    k = xlusup[jcol];
+    while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) {
+	printf("\t%d\t%10.4f\n", lsub[i], lusup[k]);
+	i++; k++;
+    }
+    fflush(stdout);
+}
+
+
+/*
+ * Check whether tempv[] == 0. This should be true before and after 
+ * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". 
+ */
+void scheck_tempv(int n, float *tempv)
+{
+    int i;
+	
+    for (i = 0; i < n; i++) {
+	if (tempv[i] != 0.0) 
+	{
+	    fprintf(stderr,"tempv[%d] = %f\n", i,tempv[i]);
+	    ABORT("scheck_tempv");
+	}
+    }
+}
+
+
+void
+sGenXtrue(int n, int nrhs, float *x, int ldx)
+{
+    int  i, j;
+    for (j = 0; j < nrhs; ++j)
+	for (i = 0; i < n; ++i) {
+	    x[i + j*ldx] = 1.0;/* + (float)(i+1.)/n;*/
+	}
+}
+
+/*
+ * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
+ */
+void
+sFillRHS(trans_t trans, int nrhs, float *x, int ldx,
+         SuperMatrix *A, SuperMatrix *B)
+{
+    NCformat *Astore;
+    float   *Aval;
+    DNformat *Bstore;
+    float   *rhs;
+    float one = 1.0;
+    float zero = 0.0;
+    int      ldc;
+    char transc[1];
+
+    Astore = A->Store;
+    Aval   = (float *) Astore->nzval;
+    Bstore = B->Store;
+    rhs    = Bstore->nzval;
+    ldc    = Bstore->lda;
+    
+    if ( trans == NOTRANS ) *(unsigned char *)transc = 'N';
+    else *(unsigned char *)transc = 'T';
+
+    sp_sgemm(transc, nrhs, one, A,
+	     x, ldx, zero, rhs, ldc);
+
+}
+
+/* 
+ * Fills a float precision array with a given value.
+ */
+void 
+sfill(float *a, int alen, float dval)
+{
+    register int i;
+    for (i = 0; i < alen; i++) a[i] = dval;
+}
+
+
+
+/* 
+ * Check the inf-norm of the error vector 
+ */
+void sinf_norm_error(int nrhs, SuperMatrix *X, float *xtrue)
+{
+    DNformat *Xstore;
+    float err, xnorm;
+    float *Xmat, *soln_work;
+    int i, j;
+
+    Xstore = X->Store;
+    Xmat = Xstore->nzval;
+
+    for (j = 0; j < nrhs; j++) {
+      soln_work = &Xmat[j*Xstore->lda];
+      err = xnorm = 0.0;
+      for (i = 0; i < X->nrow; i++) {
+	err = SUPERLU_MAX(err, fabs(soln_work[i] - xtrue[i]));
+	xnorm = SUPERLU_MAX(xnorm, fabs(soln_work[i]));
+      }
+      err = err / xnorm;
+      printf("||X - Xtrue||/||X|| = %e\n", err);
+    }
+}
+
+
+
+/* Print performance of the code. */
+void
+sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
+           float rpg, float rcond, float *ferr,
+           float *berr, char *equed, SuperLUStat_t *stat)
+{
+    SCformat *Lstore;
+    NCformat *Ustore;
+    double   *utime;
+    flops_t  *ops;
+    
+    utime = stat->utime;
+    ops   = stat->ops;
+    
+    if ( utime[FACT] != 0. )
+	printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
+	       ops[FACT]*1e-6/utime[FACT]);
+    printf("Identify relaxed snodes	= %8.2f\n", utime[RELAX]);
+    if ( utime[SOLVE] != 0. )
+	printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE],
+	       ops[SOLVE]*1e-6/utime[SOLVE]);
+    
+    Lstore = (SCformat *) L->Store;
+    Ustore = (NCformat *) U->Store;
+    printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz);
+    printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz);
+    printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
+	
+    printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
+	   mem_usage->for_lu/1e6, mem_usage->total_needed/1e6,
+	   mem_usage->expansions);
+	
+    printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n");
+    printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n",
+	   utime[FACT], ops[FACT]*1e-6/utime[FACT],
+	   utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE],
+	   utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]);
+    
+    printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n");
+    printf("NUM:\t%e\t%e\t%e\t%e\t%s\n",
+	   rpg, rcond, ferr[0], berr[0], equed);
+    
+}
+
+
+
+
+int print_float_vec(char *what, int n, float *vec)
+{
+    int i;
+    printf("%s: n %d\n", what, n);
+    for (i = 0; i < n; ++i) printf("%d\t%f\n", i, vec[i]);
+    return 0;
+}
+
--- a/intern/opennl/superlu/util.c
+++ b/intern/opennl/superlu/util.c
@ -0,0 +1,391 @@
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ 
+  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ 
+  Permission is hereby granted to use or copy this program for any
+  purpose, provided the above notices are retained on all copies.
+  Permission to modify the code and to distribute modified code is
+  granted, provided the above notices are retained, and a notice that
+  the code was modified is included with the above copyright notice.
+*/
+
+#include <math.h>
+#include "ssp_defs.h"
+#include "util.h"
+
+/* 
+ * Global statistics variale
+ */
+
+void superlu_abort_and_exit(char* msg)
+{
+    fprintf(stderr, msg);
+    exit (-1);
+}
+
+/*
+ * Set the default values for the options argument.
+ */
+void set_default_options(superlu_options_t *options)
+{
+    options->Fact = DOFACT;
+    options->Equil = YES;
+    options->ColPerm = COLAMD;
+    options->DiagPivotThresh = 1.0;
+    options->Trans = NOTRANS;
+    options->IterRefine = NOREFINE;
+    options->SymmetricMode = NO;
+    options->PivotGrowth = NO;
+    options->ConditionNumber = NO;
+    options->PrintStat = YES;
+}
+
+/* Deallocate the structure pointing to the actual storage of the matrix. */
+void
+Destroy_SuperMatrix_Store(SuperMatrix *A)
+{
+    SUPERLU_FREE ( A->Store );
+}
+
+void
+Destroy_CompCol_Matrix(SuperMatrix *A)
+{
+    SUPERLU_FREE( ((NCformat *)A->Store)->rowind );
+    SUPERLU_FREE( ((NCformat *)A->Store)->colptr );
+    SUPERLU_FREE( ((NCformat *)A->Store)->nzval );
+    SUPERLU_FREE( A->Store );
+}
+
+void
+Destroy_CompRow_Matrix(SuperMatrix *A)
+{
+    SUPERLU_FREE( ((NRformat *)A->Store)->colind );
+    SUPERLU_FREE( ((NRformat *)A->Store)->rowptr );
+    SUPERLU_FREE( ((NRformat *)A->Store)->nzval );
+    SUPERLU_FREE( A->Store );
+}
+
+void
+Destroy_SuperNode_Matrix(SuperMatrix *A)
+{
+    SUPERLU_FREE ( ((SCformat *)A->Store)->rowind );
+    SUPERLU_FREE ( ((SCformat *)A->Store)->rowind_colptr );
+    SUPERLU_FREE ( ((SCformat *)A->Store)->nzval );
+    SUPERLU_FREE ( ((SCformat *)A->Store)->nzval_colptr );
+    SUPERLU_FREE ( ((SCformat *)A->Store)->col_to_sup );
+    SUPERLU_FREE ( ((SCformat *)A->Store)->sup_to_col );
+    SUPERLU_FREE ( A->Store );
+}
+
+/* A is of type Stype==NCP */
+void
+Destroy_CompCol_Permuted(SuperMatrix *A)
+{
+    SUPERLU_FREE ( ((NCPformat *)A->Store)->colbeg );
+    SUPERLU_FREE ( ((NCPformat *)A->Store)->colend );
+    SUPERLU_FREE ( A->Store );
+}
+
+/* A is of type Stype==DN */
+void
+Destroy_Dense_Matrix(SuperMatrix *A)
+{
+    DNformat* Astore = A->Store;
+    SUPERLU_FREE (Astore->nzval);
+    SUPERLU_FREE ( A->Store );
+}
+
+/*
+ * Reset repfnz[] for the current column 
+ */
+void
+resetrep_col (const int nseg, const int *segrep, int *repfnz)
+{
+    int i, irep;
+    
+    for (i = 0; i < nseg; i++) {
+	irep = segrep[i];
+	repfnz[irep] = EMPTY;
+    }
+}
+
+
+/*
+ * Count the total number of nonzeros in factors L and U,  and in the 
+ * symmetrically reduced L. 
+ */
+void
+countnz(const int n, int *xprune, int *nnzL, int *nnzU, GlobalLU_t *Glu)
+{
+    int          nsuper, fsupc, i, j;
+    int          nnzL0, jlen, irep;
+    int          *xsup, *xlsub;
+
+    xsup   = Glu->xsup;
+    xlsub  = Glu->xlsub;
+    *nnzL  = 0;
+    *nnzU  = (Glu->xusub)[n];
+    nnzL0  = 0;
+    nsuper = (Glu->supno)[n];
+
+    if ( n <= 0 ) return;
+
+    /* 
+     * For each supernode
+     */
+    for (i = 0; i <= nsuper; i++) {
+	fsupc = xsup[i];
+	jlen = xlsub[fsupc+1] - xlsub[fsupc];
+
+	for (j = fsupc; j < xsup[i+1]; j++) {
+	    *nnzL += jlen;
+	    *nnzU += j - fsupc + 1;
+	    jlen--;
+	}
+	irep = xsup[i+1] - 1;
+	nnzL0 += xprune[irep] - xlsub[irep];
+    }
+    
+    /* printf("\tNo of nonzeros in symm-reduced L = %d\n", nnzL0);*/
+}
+
+
+
+/*
+ * Fix up the data storage lsub for L-subscripts. It removes the subscript
+ * sets for structural pruning,	and applies permuation to the remaining
+ * subscripts.
+ */
+void
+fixupL(const int n, const int *perm_r, GlobalLU_t *Glu)
+{
+    register int nsuper, fsupc, nextl, i, j, k, jstrt;
+    int          *xsup, *lsub, *xlsub;
+
+    if ( n <= 1 ) return;
+
+    xsup   = Glu->xsup;
+    lsub   = Glu->lsub;
+    xlsub  = Glu->xlsub;
+    nextl  = 0;
+    nsuper = (Glu->supno)[n];
+    
+    /* 
+     * For each supernode ...
+     */
+    for (i = 0; i <= nsuper; i++) {
+	fsupc = xsup[i];
+	jstrt = xlsub[fsupc];
+	xlsub[fsupc] = nextl;
+	for (j = jstrt; j < xlsub[fsupc+1]; j++) {
+	    lsub[nextl] = perm_r[lsub[j]]; /* Now indexed into P*A */
+	    nextl++;
+  	}
+	for (k = fsupc+1; k < xsup[i+1]; k++) 
+	    	xlsub[k] = nextl;	/* Other columns in supernode i */
+
+    }
+
+    xlsub[n] = nextl;
+}
+
+
+/*
+ * Diagnostic print of segment info after panel_dfs().
+ */
+void print_panel_seg(int n, int w, int jcol, int nseg, 
+		     int *segrep, int *repfnz)
+{
+    int j, k;
+    
+    for (j = jcol; j < jcol+w; j++) {
+	printf("\tcol %d:\n", j);
+	for (k = 0; k < nseg; k++)
+	    printf("\t\tseg %d, segrep %d, repfnz %d\n", k, 
+			segrep[k], repfnz[(j-jcol)*n + segrep[k]]);
+    }
+
+}
+
+
+void
+StatInit(SuperLUStat_t *stat)
+{
+    register int i, w, panel_size, relax;
+
+    panel_size = sp_ienv(1);
+    relax = sp_ienv(2);
+    w = SUPERLU_MAX(panel_size, relax);
+    stat->panel_histo = intCalloc(w+1);
+    stat->utime = (double *) SUPERLU_MALLOC(NPHASES * sizeof(double));
+    if (!stat->utime) ABORT("SUPERLU_MALLOC fails for stat->utime");
+    stat->ops = (flops_t *) SUPERLU_MALLOC(NPHASES * sizeof(flops_t));
+    if (!stat->ops) ABORT("SUPERLU_MALLOC fails for stat->ops");
+    for (i = 0; i < NPHASES; ++i) {
+        stat->utime[i] = 0.;
+        stat->ops[i] = 0.;
+    }
+}
+
+
+void
+StatPrint(SuperLUStat_t *stat)
+{
+    double         *utime;
+    flops_t        *ops;
+
+    utime = stat->utime;
+    ops   = stat->ops;
+    printf("Factor time  = %8.2f\n", utime[FACT]);
+    if ( utime[FACT] != 0.0 )
+      printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
+	     ops[FACT]*1e-6/utime[FACT]);
+
+    printf("Solve time   = %8.2f\n", utime[SOLVE]);
+    if ( utime[SOLVE] != 0.0 )
+      printf("Solve flops = %e\tMflops = %8.2f\n", ops[SOLVE],
+	     ops[SOLVE]*1e-6/utime[SOLVE]);
+
+}
+
+
+void
+StatFree(SuperLUStat_t *stat)
+{
+    SUPERLU_FREE(stat->panel_histo);
+    SUPERLU_FREE(stat->utime);
+    SUPERLU_FREE(stat->ops);
+}
+
+
+flops_t
+LUFactFlops(SuperLUStat_t *stat)
+{
+    return (stat->ops[FACT]);
+}
+
+flops_t
+LUSolveFlops(SuperLUStat_t *stat)
+{
+    return (stat->ops[SOLVE]);
+}
+
+
+
+
+
+/* 
+ * Fills an integer array with a given value.
+ */
+void ifill(int *a, int alen, int ival)
+{
+    register int i;
+    for (i = 0; i < alen; i++) a[i] = ival;
+}
+
+
+
+/* 
+ * Get the statistics of the supernodes 
+ */
+#define NBUCKS 10
+static 	int	max_sup_size;
+
+void super_stats(int nsuper, int *xsup)
+{
+    register int nsup1 = 0;
+    int          i, isize, whichb, bl, bh;
+    int          bucket[NBUCKS];
+
+    max_sup_size = 0;
+
+    for (i = 0; i <= nsuper; i++) {
+	isize = xsup[i+1] - xsup[i];
+	if ( isize == 1 ) nsup1++;
+	if ( max_sup_size < isize ) max_sup_size = isize;	
+    }
+
+    printf("    Supernode statistics:\n\tno of super = %d\n", nsuper+1);
+    printf("\tmax supernode size = %d\n", max_sup_size);
+    printf("\tno of size 1 supernodes = %d\n", nsup1);
+
+    /* Histogram of the supernode sizes */
+    ifill (bucket, NBUCKS, 0);
+
+    for (i = 0; i <= nsuper; i++) {
+        isize = xsup[i+1] - xsup[i];
+        whichb = (float) isize / max_sup_size * NBUCKS;
+        if (whichb >= NBUCKS) whichb = NBUCKS - 1;
+        bucket[whichb]++;
+    }
+    
+    printf("\tHistogram of supernode sizes:\n");
+    for (i = 0; i < NBUCKS; i++) {
+        bl = (float) i * max_sup_size / NBUCKS;
+        bh = (float) (i+1) * max_sup_size / NBUCKS;
+        printf("\tsnode: %d-%d\t\t%d\n", bl+1, bh, bucket[i]);
+    }
+
+}
+
+
+float SpaSize(int n, int np, float sum_npw)
+{
+    return (sum_npw*8 + np*8 + n*4)/1024.;
+}
+
+float DenseSize(int n, float sum_nw)
+{
+    return (sum_nw*8 + n*8)/1024.;;
+}
+
+
+
+/*
+ * Check whether repfnz[] == EMPTY after reset.
+ */
+void check_repfnz(int n, int w, int jcol, int *repfnz)
+{
+    int jj, k;
+
+    for (jj = jcol; jj < jcol+w; jj++) 
+	for (k = 0; k < n; k++)
+	    if ( repfnz[(jj-jcol)*n + k] != EMPTY ) {
+		fprintf(stderr, "col %d, repfnz_col[%d] = %d\n", jj,
+			k, repfnz[(jj-jcol)*n + k]);
+		ABORT("check_repfnz");
+	    }
+}
+
+
+/* Print a summary of the testing results. */
+void
+PrintSumm(char *type, int nfail, int nrun, int nerrs)
+{
+    if ( nfail > 0 )
+	printf("%3s driver: %d out of %d tests failed to pass the threshold\n",
+	       type, nfail, nrun);
+    else
+	printf("All tests for %3s driver passed the threshold (%6d tests run)\n", type, nrun);
+
+    if ( nerrs > 0 )
+	printf("%6d error messages recorded\n", nerrs);
+}
+
+
+int print_int_vec(char *what, int n, int *vec)
+{
+    int i;
+    printf("%s\n", what);
+    for (i = 0; i < n; ++i) printf("%d\t%d\n", i, vec[i]);
+    return 0;
+}
--- a/intern/opennl/superlu/util.h
+++ b/intern/opennl/superlu/util.h
@ -0,0 +1,267 @@
+#ifndef __SUPERLU_UTIL /* allow multiple inclusions */
+#define __SUPERLU_UTIL
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+/*
+#ifndef __STDC__
+#include <malloc.h>
+#endif
+*/
+#include <assert.h>
+
+/***********************************************************************
+ * Macros
+ ***********************************************************************/
+#define FIRSTCOL_OF_SNODE(i)	(xsup[i])
+/* No of marker arrays used in the symbolic factorization,
+   each of size n */
+#define NO_MARKER     3
+#define NUM_TEMPV(m,w,t,b)  ( SUPERLU_MAX(m, (t + b)*w) )
+
+#ifndef USER_ABORT
+#define USER_ABORT(msg) superlu_abort_and_exit(msg)
+#endif
+
+#define ABORT(err_msg) \
+ { char msg[256];\
+   sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\
+   USER_ABORT(msg); }
+
+
+#ifndef USER_MALLOC
+#if 1
+#define USER_MALLOC(size) superlu_malloc(size)
+#else
+/* The following may check out some uninitialized data */
+#define USER_MALLOC(size) memset (superlu_malloc(size), '\x0F', size)
+#endif
+#endif
+
+#define SUPERLU_MALLOC(size) USER_MALLOC(size)
+
+#ifndef USER_FREE
+#define USER_FREE(addr) superlu_free(addr)
+#endif
+
+#define SUPERLU_FREE(addr) USER_FREE(addr)
+
+#define CHECK_MALLOC(where) {                 \
+    extern int superlu_malloc_total;        \
+    printf("%s: malloc_total %d Bytes\n",     \
+	   where, superlu_malloc_total); \
+}
+
+#define SUPERLU_MAX(x, y) 	( (x) > (y) ? (x) : (y) )
+#define SUPERLU_MIN(x, y) 	( (x) < (y) ? (x) : (y) )
+
+/***********************************************************************
+ * Constants 
+ ***********************************************************************/
+#define EMPTY	(-1)
+/*#define NO	(-1)*/
+#define FALSE	0
+#define TRUE	1
+
+/***********************************************************************
+ * Enumerate types
+ ***********************************************************************/
+typedef enum {NO, YES}                                          yes_no_t;
+typedef enum {DOFACT, SamePattern, SamePattern_SameRowPerm, FACTORED} fact_t;
+typedef enum {NOROWPERM, LargeDiag, MY_PERMR}                   rowperm_t;
+typedef enum {NATURAL, MMD_ATA, MMD_AT_PLUS_A, COLAMD, MY_PERMC}colperm_t;
+typedef enum {NOTRANS, TRANS, CONJ}                             trans_t;
+typedef enum {NOEQUIL, ROW, COL, BOTH}                          DiagScale_t;
+typedef enum {NOREFINE, SINGLE=1, SLU_DOUBLE, EXTRA}                IterRefine_t;
+typedef enum {LUSUP, UCOL, LSUB, USUB}                          MemType;
+typedef enum {HEAD, TAIL}                                       stack_end_t;
+typedef enum {SYSTEM, USER}                                     LU_space_t;
+
+/* 
+ * The following enumerate type is used by the statistics variable 
+ * to keep track of flop count and time spent at various stages.
+ *
+ * Note that not all of the fields are disjoint.
+ */
+typedef enum {
+    COLPERM, /* find a column ordering that minimizes fills */
+    RELAX,   /* find artificial supernodes */
+    ETREE,   /* compute column etree */
+    EQUIL,   /* equilibrate the original matrix */
+    FACT,    /* perform LU factorization */
+    RCOND,   /* estimate reciprocal condition number */
+    SOLVE,   /* forward and back solves */
+    REFINE,  /* perform iterative refinement */
+    SLU_FLOAT,   /* time spent in floating-point operations */
+    TRSV,    /* fraction of FACT spent in xTRSV */
+    GEMV,    /* fraction of FACT spent in xGEMV */
+    FERR,    /* estimate error bounds after iterative refinement */
+    NPHASES  /* total number of phases */
+} PhaseType;
+
+
+/***********************************************************************
+ * Type definitions
+ ***********************************************************************/
+typedef float    flops_t;
+typedef unsigned char Logical;
+
+/* 
+ *-- This contains the options used to control the solve process.
+ *
+ * Fact   (fact_t)
+ *        Specifies whether or not the factored form of the matrix
+ *        A is supplied on entry, and if not, how the matrix A should
+ *        be factorizaed.
+ *        = DOFACT: The matrix A will be factorized from scratch, and the
+ *             factors will be stored in L and U.
+ *        = SamePattern: The matrix A will be factorized assuming
+ *             that a factorization of a matrix with the same sparsity
+ *             pattern was performed prior to this one. Therefore, this
+ *             factorization will reuse column permutation vector 
+ *             ScalePermstruct->perm_c and the column elimination tree
+ *             LUstruct->etree.
+ *        = SamePattern_SameRowPerm: The matrix A will be factorized
+ *             assuming that a factorization of a matrix with the same
+ *             sparsity	pattern and similar numerical values was performed
+ *             prior to this one. Therefore, this factorization will reuse
+ *             both row and column scaling factors R and C, and the
+ *             both row and column permutation vectors perm_r and perm_c,
+ *             distributed data structure set up from the previous symbolic
+ *             factorization.
+ *        = FACTORED: On entry, L, U, perm_r and perm_c contain the 
+ *              factored form of A. If DiagScale is not NOEQUIL, the matrix
+ *              A has been equilibrated with scaling factors R and C.
+ *
+ * Equil  (yes_no_t)
+ *        Specifies whether to equilibrate the system (scale A's row and
+ *        columns to have unit norm).
+ *
+ * ColPerm (colperm_t)
+ *        Specifies what type of column permutation to use to reduce fill.
+ *        = NATURAL: use the natural ordering 
+ *        = MMD_ATA: use minimum degree ordering on structure of A'*A
+ *        = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A
+ *        = COLAMD: use approximate minimum degree column ordering
+ *        = MY_PERMC: use the ordering specified in ScalePermstruct->perm_c[]
+ *         
+ * Trans  (trans_t)
+ *        Specifies the form of the system of equations:
+ *        = NOTRANS: A * X = B        (No transpose)
+ *        = TRANS:   A**T * X = B     (Transpose)
+ *        = CONJ:    A**H * X = B     (Transpose)
+ *
+ * IterRefine (IterRefine_t)
+ *        Specifies whether to perform iterative refinement.
+ *        = NO: no iterative refinement
+ *        = WorkingPrec: perform iterative refinement in working precision
+ *        = ExtraPrec: perform iterative refinement in extra precision
+ *
+ * PrintStat (yes_no_t)
+ *        Specifies whether to print the solver's statistics.
+ *
+ * DiagPivotThresh (double, in [0.0, 1.0]) (only for sequential SuperLU)
+ *        Specifies the threshold used for a diagonal entry to be an
+ *        acceptable pivot.
+ *
+ * PivotGrowth (yes_no_t)
+ *        Specifies whether to compute the reciprocal pivot growth.
+ *
+ * ConditionNumber (ues_no_t)
+ *        Specifies whether to compute the reciprocal condition number.
+ *
+ * RowPerm (rowperm_t) (only for SuperLU_DIST)
+ *        Specifies whether to permute rows of the original matrix.
+ *        = NO: not to permute the rows
+ *        = LargeDiag: make the diagonal large relative to the off-diagonal
+ *        = MY_PERMR: use the permutation given in ScalePermstruct->perm_r[]
+ *           
+ * ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST)
+ *        Specifies whether to replace the tiny diagonals by
+ *        sqrt(epsilon)*||A|| during LU factorization.
+ *
+ * SolveInitialized (yes_no_t) (only for SuperLU_DIST)
+ *        Specifies whether the initialization has been performed to the
+ *        triangular solve.
+ *
+ * RefineInitialized (yes_no_t) (only for SuperLU_DIST)
+ *        Specifies whether the initialization has been performed to the
+ *        sparse matrix-vector multiplication routine needed in iterative
+ *        refinement.
+ */
+typedef struct {
+    fact_t        Fact;
+    yes_no_t      Equil;
+    colperm_t     ColPerm;
+    trans_t       Trans;
+    IterRefine_t  IterRefine;
+    yes_no_t      PrintStat;
+    yes_no_t      SymmetricMode;
+    double        DiagPivotThresh;
+    yes_no_t      PivotGrowth;
+    yes_no_t      ConditionNumber;
+    rowperm_t     RowPerm;
+    yes_no_t      ReplaceTinyPivot;
+    yes_no_t      SolveInitialized;
+    yes_no_t      RefineInitialized;
+} superlu_options_t;
+
+typedef struct {
+    int     *panel_histo; /* histogram of panel size distribution */
+    double  *utime;       /* running time at various phases */
+    flops_t *ops;         /* operation count at various phases */
+    int     TinyPivots;   /* number of tiny pivots */
+    int     RefineSteps;  /* number of iterative refinement steps */
+} SuperLUStat_t;
+
+
+/***********************************************************************
+ * Prototypes
+ ***********************************************************************/
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void    Destroy_SuperMatrix_Store(SuperMatrix *);
+extern void    Destroy_CompCol_Matrix(SuperMatrix *);
+extern void    Destroy_CompRow_Matrix(SuperMatrix *);
+extern void    Destroy_SuperNode_Matrix(SuperMatrix *);
+extern void    Destroy_CompCol_Permuted(SuperMatrix *);
+extern void    Destroy_Dense_Matrix(SuperMatrix *);
+extern void    get_perm_c(int, SuperMatrix *, int *);
+extern void    set_default_options(superlu_options_t *options);
+extern void    sp_preorder (superlu_options_t *, SuperMatrix*, int*, int*,
+			    SuperMatrix*);
+extern void    superlu_abort_and_exit(char*);
+extern void    *superlu_malloc (size_t);
+extern int     *intMalloc (int);
+extern int     *intCalloc (int);
+extern void    superlu_free (void*);
+extern void    SetIWork (int, int, int, int *, int **, int **, int **,
+                         int **, int **, int **, int **);
+extern int     sp_coletree (int *, int *, int *, int, int, int *);
+extern void    relax_snode (const int, int *, const int, int *, int *);
+extern void    heap_relax_snode (const int, int *, const int, int *, int *);
+extern void    resetrep_col (const int, const int *, int *);
+extern int     spcoletree (int *, int *, int *, int, int, int *);
+extern int     *TreePostorder (int, int *);
+extern double  SuperLU_timer_ ();
+extern int     sp_ienv (int);
+extern int     lsame_ (char *, char *);
+extern int     xerbla_ (char *, int *);
+extern void    ifill (int *, int, int);
+extern void    snode_profile (int, int *);
+extern void    super_stats (int, int *);
+extern void    PrintSumm (char *, int, int, int);
+extern void    StatInit(SuperLUStat_t *);
+extern void    StatPrint (SuperLUStat_t *);
+extern void    StatFree(SuperLUStat_t *);
+extern void    print_panel_seg(int, int, int, int, int *, int *);
+extern void    check_repfnz(int, int, int, int *);
+
+#ifdef __cplusplus
+  }
+#endif
+
+#endif /* __SUPERLU_UTIL */
--- a/intern/opennl/superlu/xerbla.c
+++ b/intern/opennl/superlu/xerbla.c
@ -0,0 +1,43 @@
+
+#include <stdio.h>
+
+/* Subroutine */ int xerbla_(char *srname, int *info)
+{
+/*  -- LAPACK auxiliary routine (version 2.0) --   
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
+       Courant Institute, Argonne National Lab, and Rice University   
+       September 30, 1994   
+
+
+    Purpose   
+    =======   
+
+    XERBLA  is an error handler for the LAPACK routines.   
+    It is called by an LAPACK routine if an input parameter has an   
+    invalid value.  A message is printed and execution stops.   
+
+    Installers may consider modifying the STOP statement in order to   
+    call system-specific exception-handling facilities.   
+
+    Arguments   
+    =========   
+
+    SRNAME  (input) CHARACTER*6   
+            The name of the routine which called XERBLA.   
+
+    INFO    (input) INT   
+            The position of the invalid parameter in the parameter list   
+
+            of the calling routine.   
+
+   ===================================================================== 
+*/
+
+    printf("** On entry to %6s, parameter number %2d had an illegal value\n",
+		srname, *info);
+
+/*     End of XERBLA */
+
+    return 0;
+} /* xerbla_ */
+
--- a/source/Makefile
+++ b/source/Makefile
@ -87,6 +87,8 @@ PYPLAYERLIB ?= $(PYLIB)
    GRPLIB += $(OCGDIR)/blender/renderconverter/$(DEBUG_DIR)librenderconverter.a
    GRPLIB += $(OCGDIR)/blender/render/$(DEBUG_DIR)librender.a
    GRPLIB += $(OCGDIR)/blender/radiosity/$(DEBUG_DIR)libradiosity.a
+    GRPLIB += $(NAN_OPENNL)/lib/$(DEBUG_DIR)libopennl.a
+    GRPLIB += $(NAN_SUPERLU)/lib/$(DEBUG_DIR)libsuperlu.a

    GRPLIB += $(OCGDIR)/blender/python/$(DEBUG_DIR)libpython.a

--- a/source/nan_definitions.mk
+++ b/source/nan_definitions.mk
@ -81,6 +81,8 @@ endif
    export NAN_GHOST ?= $(LCGDIR)/ghost
    export NAN_TEST_VERBOSITY ?= 1
    export NAN_BMFONT ?= $(LCGDIR)/bmfont
+    export NAN_OPENNL ?= $(LCGDIR)/opennl
+    export NAN_SUPERLU ?= $(LCGDIR)/superlu
    ifeq ($(FREE_WINDOWS), true)
      export NAN_FTGL ?= $(LCGDIR)/gcc/ftgl
    else