blender/intern/opennl/superlu/ssp_blas2.c

/** \file opennl/superlu/ssp_blas2.c
 *  \ingroup opennl
 */

/*
 * -- SuperLU routine (version 3.0) --
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
 * and Lawrence Berkeley National Lab.
 * October 15, 2003
 *
 */
/*
 * File name:		ssp_blas2.c
 * Purpose:		Sparse BLAS 2, using some dense BLAS 2 operations.
 */

#include "ssp_defs.h"

/* 
 * Function prototypes 
 */
void susolve(int, int, float*, float*);
void slsolve(int, int, float*, float*);
void smatvec(int, int, int, float*, float*, float*);
int strsv_(char*, char*, char*, int*, float*, int*, float*, int*);

int
sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
         SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info)
{
/*
 *   Purpose
 *   =======
 *
 *   sp_strsv() solves one of the systems of equations   
 *       A*x = b,   or   A'*x = b,
 *   where b and x are n element vectors and A is a sparse unit , or   
 *   non-unit, upper or lower triangular matrix.   
 *   No test for singularity or near-singularity is included in this   
 *   routine. Such tests must be performed before calling this routine.   
 *
 *   Parameters   
 *   ==========   
 *
 *   uplo   - (input) char*
 *            On entry, uplo specifies whether the matrix is an upper or   
 *             lower triangular matrix as follows:   
 *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
 *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
 *
 *   trans  - (input) char*
 *             On entry, trans specifies the equations to be solved as   
 *             follows:   
 *                trans = 'N' or 'n'   A*x = b.   
 *                trans = 'T' or 't'   A'*x = b.
 *                trans = 'C' or 'c'   A'*x = b.   
 *
 *   diag   - (input) char*
 *             On entry, diag specifies whether or not A is unit   
 *             triangular as follows:   
 *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
 *                diag = 'N' or 'n'   A is not assumed to be unit   
 *                                    triangular.   
 *	     
 *   L       - (input) SuperMatrix*
 *	       The factor L from the factorization Pr*A*Pc=L*U. Use
 *             compressed row subscripts storage for supernodes,
 *             i.e., L has types: Stype = SC, Dtype = SLU_S, Mtype = TRLU.
 *
 *   U       - (input) SuperMatrix*
 *	        The factor U from the factorization Pr*A*Pc=L*U.
 *	        U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU.
 *    
 *   x       - (input/output) float*
 *             Before entry, the incremented array X must contain the n   
 *             element right-hand side vector b. On exit, X is overwritten 
 *             with the solution vector x.
 *
 *   info    - (output) int*
 *             If *info = -i, the i-th argument had an illegal value.
 *
 */
#ifdef _CRAY
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
	 ftcs2 = _cptofcd("N", strlen("N")),
	 ftcs3 = _cptofcd("U", strlen("U"));
#endif
    SCformat *Lstore;
    NCformat *Ustore;
    float   *Lval, *Uval;
    int incx = 1;
    int nrow;
    int fsupc, nsupr, nsupc, luptr, istart, irow;
    int i, k, iptr, jcol;
    float *work;
    flops_t solve_ops;

    /* Test the input parameters */
    *info = 0;
    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && 
              !lsame_(trans, "C")) *info = -2;
    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
    if ( *info ) {
	i = -(*info);
	xerbla_("sp_strsv", &i);
	return 0;
    }

    Lstore = L->Store;
    Lval = Lstore->nzval;
    Ustore = U->Store;
    Uval = Ustore->nzval;
    solve_ops = 0;

    if ( !(work = floatCalloc(L->nrow)) )
	ABORT("Malloc fails for work in sp_strsv().");
    
    if ( lsame_(trans, "N") ) {	/* Form x := inv(A)*x. */
	
	if ( lsame_(uplo, "L") ) {
	    /* Form x := inv(L)*x */
    	    if ( L->nrow == 0 ) {
			SUPERLU_FREE(work);
			return 0; /* Quick return */
		}
	    
	    for (k = 0; k <= Lstore->nsuper; k++) {
		fsupc = L_FST_SUPC(k);
		istart = L_SUB_START(fsupc);
		nsupr = L_SUB_START(fsupc+1) - istart;
		nsupc = L_FST_SUPC(k+1) - fsupc;
		luptr = L_NZ_START(fsupc);
		nrow = nsupr - nsupc;

	        solve_ops += nsupc * (nsupc - 1);
	        solve_ops += 2 * nrow * nsupc;

		if ( nsupc == 1 ) {
		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
			irow = L_SUB(iptr);
			++luptr;
			x[irow] -= x[fsupc] * Lval[luptr];
		    }
		} else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
		    STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
		       	&x[fsupc], &incx);
		
		    SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
#else
		    strsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
		       	&x[fsupc], &incx);
		
		    sgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
#endif
#else
		    slsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
		
		    smatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
                             &x[fsupc], &work[0] );
#endif		
		
		    iptr = istart + nsupc;
		    for (i = 0; i < nrow; ++i, ++iptr) {
			irow = L_SUB(iptr);
			x[irow] -= work[i];	/* Scatter */
			work[i] = 0.0;

		    }
	 	}
	    } /* for k ... */
	    
	} else {
	    /* Form x := inv(U)*x */
	    
	    if ( U->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = Lstore->nsuper; k >= 0; k--) {
	    	fsupc = L_FST_SUPC(k);
	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);
		
    	        solve_ops += nsupc * (nsupc + 1);

		if ( nsupc == 1 ) {
		    x[fsupc] /= Lval[luptr];
		    for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
			irow = U_SUB(i);
			x[irow] -= x[fsupc] * Uval[i];
		    }
		} else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
		    STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
		       &x[fsupc], &incx);
#else
		    strsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
                           &x[fsupc], &incx);
#endif
#else		
		    susolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
#endif		

		    for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		        solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
		    	for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
				i++) {
			    irow = U_SUB(i);
			    x[irow] -= x[jcol] * Uval[i];
		    	}
                    }
		}
	    } /* for k ... */
	    
	}
    } else { /* Form x := inv(A')*x */
	
	if ( lsame_(uplo, "L") ) {
	    /* Form x := inv(L')*x */
    	    if ( L->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = Lstore->nsuper; k >= 0; --k) {
	    	fsupc = L_FST_SUPC(k);
	    	istart = L_SUB_START(fsupc);
	    	nsupr = L_SUB_START(fsupc+1) - istart;
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);

		solve_ops += 2 * (nsupr - nsupc) * nsupc;

		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		    iptr = istart + nsupc;
		    for (i = L_NZ_START(jcol) + nsupc; 
				i < L_NZ_START(jcol+1); i++) {
			irow = L_SUB(iptr);
			x[jcol] -= x[irow] * Lval[i];
			iptr++;
		    }
		}
		
		if ( nsupc > 1 ) {
		    solve_ops += nsupc * (nsupc - 1);
#ifdef _CRAY
                    ftcs1 = _cptofcd("L", strlen("L"));
                    ftcs2 = _cptofcd("T", strlen("T"));
                    ftcs3 = _cptofcd("U", strlen("U"));
		    STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			&x[fsupc], &incx);
#else
		    strsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
			&x[fsupc], &incx);
#endif
		}
	    }
	} else {
	    /* Form x := inv(U')*x */
	    if ( U->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = 0; k <= Lstore->nsuper; k++) {
	    	fsupc = L_FST_SUPC(k);
	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);

		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		    solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
			irow = U_SUB(i);
			x[jcol] -= x[irow] * Uval[i];
		    }
		}

		solve_ops += nsupc * (nsupc + 1);

		if ( nsupc == 1 ) {
		    x[fsupc] /= Lval[luptr];
		} else {
#ifdef _CRAY
                    ftcs1 = _cptofcd("U", strlen("U"));
                    ftcs2 = _cptofcd("T", strlen("T"));
                    ftcs3 = _cptofcd("N", strlen("N"));
		    STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			    &x[fsupc], &incx);
#else
		    strsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
			    &x[fsupc], &incx);
#endif
		}
	    } /* for k ... */
	}
    }

    stat->ops[SOLVE] += solve_ops;
    SUPERLU_FREE(work);
    return 0;
}


int
sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x, 
	 int incx, float beta, float *y, int incy)
{
/*  Purpose   
    =======   

    sp_sgemv()  performs one of the matrix-vector operations   
       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
    where alpha and beta are scalars, x and y are vectors and A is a
    sparse A->nrow by A->ncol matrix.   

    Parameters   
    ==========   

    TRANS  - (input) char*
             On entry, TRANS specifies the operation to be performed as   
             follows:   
                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   

    ALPHA  - (input) float
             On entry, ALPHA specifies the scalar alpha.   

    A      - (input) SuperMatrix*
             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
             Currently, the type of A can be:
                 Stype = NC or NCP; Dtype = SLU_S; Mtype = GE. 
             In the future, more general A can be handled.

    X      - (input) float*, array of DIMENSION at least   
             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
             Before entry, the incremented array X must contain the   
             vector x.   

    INCX   - (input) int
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   

    BETA   - (input) float
             On entry, BETA specifies the scalar beta. When BETA is   
             supplied as zero then Y need not be set on input.   

    Y      - (output) float*,  array of DIMENSION at least   
             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
             Before entry with BETA non-zero, the incremented array Y   
             must contain the vector y. On exit, Y is overwritten by the 
             updated vector y.
	     
    INCY   - (input) int
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   

    ==== Sparse Level 2 Blas routine.   
*/

    /* Local variables */
    NCformat *Astore;
    float   *Aval;
    int info;
    float temp;
    int lenx, leny, i, j, irow;
    int iy, jx, jy, kx, ky;
    int notran;

    notran = lsame_(trans, "N");
    Astore = A->Store;
    Aval = Astore->nzval;
    
    /* Test the input parameters */
    info = 0;
    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
    else if (incx == 0) info = 5;
    else if (incy == 0)	info = 8;
    if (info != 0) {
	xerbla_("sp_sgemv ", &info);
	return 0;
    }

    /* Quick return if possible. */
    if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
	return 0;

    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
       up the start points in  X  and  Y. */
    if (lsame_(trans, "N")) {
	lenx = A->ncol;
	leny = A->nrow;
    } else {
	lenx = A->nrow;
	leny = A->ncol;
    }
    if (incx > 0) kx = 0;
    else kx =  - (lenx - 1) * incx;
    if (incy > 0) ky = 0;
    else ky =  - (leny - 1) * incy;

    /* Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through A. */
    /* First form  y := beta*y. */
    if (beta != 1.) {
	if (incy == 1) {
	    if (beta == 0.)
		for (i = 0; i < leny; ++i) y[i] = 0.;
	    else
		for (i = 0; i < leny; ++i) y[i] = beta * y[i];
	} else {
	    iy = ky;
	    if (beta == 0.)
		for (i = 0; i < leny; ++i) {
		    y[iy] = 0.;
		    iy += incy;
		}
	    else
		for (i = 0; i < leny; ++i) {
		    y[iy] = beta * y[iy];
		    iy += incy;
		}
	}
    }
    
    if (alpha == 0.) return 0;

    if ( notran ) {
	/* Form  y := alpha*A*x + y. */
	jx = kx;
	if (incy == 1) {
	    for (j = 0; j < A->ncol; ++j) {
		if (x[jx] != 0.) {
		    temp = alpha * x[jx];
		    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
			irow = Astore->rowind[i];
			y[irow] += temp * Aval[i];
		    }
		}
		jx += incx;
	    }
	} else {
	    ABORT("Not implemented.");
	}
    } else {
	/* Form  y := alpha*A'*x + y. */
	jy = ky;
	if (incx == 1) {
	    for (j = 0; j < A->ncol; ++j) {
		temp = 0.;
		for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
		    irow = Astore->rowind[i];
		    temp += Aval[i] * x[irow];
		}
		y[jy] += alpha * temp;
		jy += incy;
	    }
	} else {
	    ABORT("Not implemented.");
	}
    }
    return 0;
} /* sp_sgemv */
doxygen: opennl tagged. renamed BLO_sys_types.h to superlu_sys_types.h 2011-02-25 10:24:29 +00:00			`/** \file opennl/superlu/ssp_blas2.c`
			`* \ingroup opennl`
			`*/`
Added SuperLU 3.0: http://crd.lbl.gov/~xiaoye/SuperLU/ This is a library to solve sparse matrix systems (type A*x=B). It is able to solve large systems very FAST. Only the necessary parts of the library are included to limit file size and compilation time. This means the example files, fortran interface, test files, matlab interface, cblas library, complex number part and build system have been left out. All (gcc) warnings have been fixed too. This library will be used for LSCM UV unwrapping. With this library, LSCM unwrapping can be calculated in a split second, making the unwrapping proces much more interactive. Added OpenNL (Open Numerical Libary): http://www.loria.fr/~levy/OpenNL/ OpenNL is a library to easily construct and solve sparse linear systems. We use a stripped down version, as an interface to SuperLU. This library was kindly given to use by Bruno Levy. 2004-07-13 11:42:13 +00:00
			`/*`
			`* -- SuperLU routine (version 3.0) --`
			`* Univ. of California Berkeley, Xerox Palo Alto Research Center,`
			`* and Lawrence Berkeley National Lab.`
			`* October 15, 2003`
			`*`
			`*/`
			`/*`
			`* File name: ssp_blas2.c`
			`* Purpose: Sparse BLAS 2, using some dense BLAS 2 operations.`
			`*/`

			`#include "ssp_defs.h"`

			`/*`
			`* Function prototypes`
			`*/`
			`void susolve(int, int, float, float);`
			`void slsolve(int, int, float, float);`
			`void smatvec(int, int, int, float, float, float*);`
			`int strsv_(char, char, char, int, float, int, float, int);`

			`int`
			`sp_strsv(char uplo, char trans, char diag, SuperMatrix L,`
			`SuperMatrix U, float x, SuperLUStat_t stat, int info)`
			`{`
			`/*`
			`* Purpose`
			`* =======`
			`*`
			`* sp_strsv() solves one of the systems of equations`
			`* Ax = b, or A'x = b,`
			`* where b and x are n element vectors and A is a sparse unit , or`
			`* non-unit, upper or lower triangular matrix.`
			`* No test for singularity or near-singularity is included in this`
			`* routine. Such tests must be performed before calling this routine.`
			`*`
			`* Parameters`
			`* ==========`
			`*`
			`* uplo - (input) char*`
			`* On entry, uplo specifies whether the matrix is an upper or`
			`* lower triangular matrix as follows:`
			`* uplo = 'U' or 'u' A is an upper triangular matrix.`
			`* uplo = 'L' or 'l' A is a lower triangular matrix.`
			`*`
			`* trans - (input) char*`
			`* On entry, trans specifies the equations to be solved as`
			`* follows:`
			`* trans = 'N' or 'n' A*x = b.`
			`* trans = 'T' or 't' A'*x = b.`
			`* trans = 'C' or 'c' A'*x = b.`
			`*`
			`* diag - (input) char*`
			`* On entry, diag specifies whether or not A is unit`
			`* triangular as follows:`
			`* diag = 'U' or 'u' A is assumed to be unit triangular.`
			`* diag = 'N' or 'n' A is not assumed to be unit`
			`* triangular.`
			`*`
			`* L - (input) SuperMatrix*`
			`* The factor L from the factorization PrAPc=L*U. Use`
			`* compressed row subscripts storage for supernodes,`
			`* i.e., L has types: Stype = SC, Dtype = SLU_S, Mtype = TRLU.`
			`*`
			`* U - (input) SuperMatrix*`
			`* The factor U from the factorization PrAPc=L*U.`
			`* U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU.`
			`*`
			`* x - (input/output) float*`
			`* Before entry, the incremented array X must contain the n`
			`* element right-hand side vector b. On exit, X is overwritten`
			`* with the solution vector x.`
			`*`
			`* info - (output) int*`
			`* If *info = -i, the i-th argument had an illegal value.`
			`*`
			`*/`
			`#ifdef _CRAY`
			`_fcd ftcs1 = _cptofcd("L", strlen("L")),`
			`ftcs2 = _cptofcd("N", strlen("N")),`
			`ftcs3 = _cptofcd("U", strlen("U"));`
			`#endif`
			`SCformat *Lstore;`
			`NCformat *Ustore;`
			`float Lval, Uval;`
			`int incx = 1;`
			`int nrow;`
			`int fsupc, nsupr, nsupc, luptr, istart, irow;`
			`int i, k, iptr, jcol;`
			`float *work;`
			`flops_t solve_ops;`

			`/* Test the input parameters */`
			`*info = 0;`
			`if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;`
			`else if ( !lsame_(trans, "N") && !lsame_(trans, "T") &&`
			`!lsame_(trans, "C")) *info = -2;`
			`else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;`
			`else if ( L->nrow != L->ncol \|\| L->nrow < 0 ) *info = -4;`
			`else if ( U->nrow != U->ncol \|\| U->nrow < 0 ) *info = -5;`
			`if ( *info ) {`
			`i = -(*info);`
			`xerbla_("sp_strsv", &i);`
			`return 0;`
			`}`

			`Lstore = L->Store;`
			`Lval = Lstore->nzval;`
			`Ustore = U->Store;`
			`Uval = Ustore->nzval;`
			`solve_ops = 0;`

			`if ( !(work = floatCalloc(L->nrow)) )`
			`ABORT("Malloc fails for work in sp_strsv().");`

			`if ( lsame_(trans, "N") ) { /* Form x := inv(A)x. /`

			`if ( lsame_(uplo, "L") ) {`
			`/* Form x := inv(L)x /`
More bugs found thanks to klockwork mallocn.c check malloced memory before using it. ssp_blas2.c free memory that wasn't but should have been. Kent 2006-10-27 15:00:31 +00:00			`if ( L->nrow == 0 ) {`
			`SUPERLU_FREE(work);`
			`return 0; /* Quick return */`
			`}`
Added SuperLU 3.0: http://crd.lbl.gov/~xiaoye/SuperLU/ This is a library to solve sparse matrix systems (type A*x=B). It is able to solve large systems very FAST. Only the necessary parts of the library are included to limit file size and compilation time. This means the example files, fortran interface, test files, matlab interface, cblas library, complex number part and build system have been left out. All (gcc) warnings have been fixed too. This library will be used for LSCM UV unwrapping. With this library, LSCM unwrapping can be calculated in a split second, making the unwrapping proces much more interactive. Added OpenNL (Open Numerical Libary): http://www.loria.fr/~levy/OpenNL/ OpenNL is a library to easily construct and solve sparse linear systems. We use a stripped down version, as an interface to SuperLU. This library was kindly given to use by Bruno Levy. 2004-07-13 11:42:13 +00:00
			`for (k = 0; k <= Lstore->nsuper; k++) {`
			`fsupc = L_FST_SUPC(k);`
			`istart = L_SUB_START(fsupc);`
			`nsupr = L_SUB_START(fsupc+1) - istart;`
			`nsupc = L_FST_SUPC(k+1) - fsupc;`
			`luptr = L_NZ_START(fsupc);`
			`nrow = nsupr - nsupc;`

			`solve_ops += nsupc * (nsupc - 1);`
			`solve_ops += 2 * nrow * nsupc;`

			`if ( nsupc == 1 ) {`
			`for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {`
			`irow = L_SUB(iptr);`
			`++luptr;`
			`x[irow] -= x[fsupc] * Lval[luptr];`
			`}`
			`} else {`
			`#ifdef USE_VENDOR_BLAS`
			`#ifdef _CRAY`
			`STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,`
			`&x[fsupc], &incx);`

			`SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],`
			`&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);`
			`#else`
			`strsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,`
			`&x[fsupc], &incx);`

			`sgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],`
			`&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);`
			`#endif`
			`#else`
			`slsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);`

			`smatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],`
			`&x[fsupc], &work[0] );`
			`#endif`

			`iptr = istart + nsupc;`
			`for (i = 0; i < nrow; ++i, ++iptr) {`
			`irow = L_SUB(iptr);`
			`x[irow] -= work[i]; /* Scatter */`
			`work[i] = 0.0;`

			`}`
			`}`
			`} /* for k ... */`

			`} else {`
			`/* Form x := inv(U)x /`

			`if ( U->nrow == 0 ) return 0; /* Quick return */`

			`for (k = Lstore->nsuper; k >= 0; k--) {`
			`fsupc = L_FST_SUPC(k);`
			`nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);`
			`nsupc = L_FST_SUPC(k+1) - fsupc;`
			`luptr = L_NZ_START(fsupc);`

			`solve_ops += nsupc * (nsupc + 1);`

			`if ( nsupc == 1 ) {`
			`x[fsupc] /= Lval[luptr];`
			`for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {`
			`irow = U_SUB(i);`
			`x[irow] -= x[fsupc] * Uval[i];`
			`}`
			`} else {`
			`#ifdef USE_VENDOR_BLAS`
			`#ifdef _CRAY`
			`STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,`
			`&x[fsupc], &incx);`
			`#else`
			`strsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,`
			`&x[fsupc], &incx);`
			`#endif`
			`#else`
			`susolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );`
			`#endif`

			`for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {`
			`solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));`
			`for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1);`
			`i++) {`
			`irow = U_SUB(i);`
			`x[irow] -= x[jcol] * Uval[i];`
			`}`
			`}`
			`}`
			`} /* for k ... */`

			`}`
			`} else { /* Form x := inv(A')x /`

			`if ( lsame_(uplo, "L") ) {`
			`/* Form x := inv(L')x /`
			`if ( L->nrow == 0 ) return 0; /* Quick return */`

			`for (k = Lstore->nsuper; k >= 0; --k) {`
			`fsupc = L_FST_SUPC(k);`
			`istart = L_SUB_START(fsupc);`
			`nsupr = L_SUB_START(fsupc+1) - istart;`
			`nsupc = L_FST_SUPC(k+1) - fsupc;`
			`luptr = L_NZ_START(fsupc);`

			`solve_ops += 2 * (nsupr - nsupc) * nsupc;`

			`for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {`
			`iptr = istart + nsupc;`
			`for (i = L_NZ_START(jcol) + nsupc;`
			`i < L_NZ_START(jcol+1); i++) {`
			`irow = L_SUB(iptr);`
			`x[jcol] -= x[irow] * Lval[i];`
			`iptr++;`
			`}`
			`}`

			`if ( nsupc > 1 ) {`
			`solve_ops += nsupc * (nsupc - 1);`
			`#ifdef _CRAY`
			`ftcs1 = _cptofcd("L", strlen("L"));`
			`ftcs2 = _cptofcd("T", strlen("T"));`
			`ftcs3 = _cptofcd("U", strlen("U"));`
			`STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,`
			`&x[fsupc], &incx);`
			`#else`
			`strsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,`
			`&x[fsupc], &incx);`
			`#endif`
			`}`
			`}`
			`} else {`
			`/* Form x := inv(U')x /`
			`if ( U->nrow == 0 ) return 0; /* Quick return */`

			`for (k = 0; k <= Lstore->nsuper; k++) {`
			`fsupc = L_FST_SUPC(k);`
			`nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);`
			`nsupc = L_FST_SUPC(k+1) - fsupc;`
			`luptr = L_NZ_START(fsupc);`

			`for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {`
			`solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));`
			`for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {`
			`irow = U_SUB(i);`
			`x[jcol] -= x[irow] * Uval[i];`
			`}`
			`}`

			`solve_ops += nsupc * (nsupc + 1);`

			`if ( nsupc == 1 ) {`
			`x[fsupc] /= Lval[luptr];`
			`} else {`
			`#ifdef _CRAY`
			`ftcs1 = _cptofcd("U", strlen("U"));`
			`ftcs2 = _cptofcd("T", strlen("T"));`
			`ftcs3 = _cptofcd("N", strlen("N"));`
			`STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,`
			`&x[fsupc], &incx);`
			`#else`
			`strsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,`
			`&x[fsupc], &incx);`
			`#endif`
			`}`
			`} /* for k ... */`
			`}`
			`}`

			`stat->ops[SOLVE] += solve_ops;`
			`SUPERLU_FREE(work);`
			`return 0;`
			`}`




			`int`
			`sp_sgemv(char trans, float alpha, SuperMatrix A, float *x,`
			`int incx, float beta, float *y, int incy)`
			`{`
			`/* Purpose`
			`=======`

			`sp_sgemv() performs one of the matrix-vector operations`
			`y := alphaAx + betay, or y := alphaA'x + betay,`
			`where alpha and beta are scalars, x and y are vectors and A is a`
			`sparse A->nrow by A->ncol matrix.`

			`Parameters`
			`==========`

			`TRANS - (input) char*`
			`On entry, TRANS specifies the operation to be performed as`
			`follows:`
			`TRANS = 'N' or 'n' y := alphaAx + beta*y.`
			`TRANS = 'T' or 't' y := alphaA'x + beta*y.`
			`TRANS = 'C' or 'c' y := alphaA'x + beta*y.`

			`ALPHA - (input) float`
			`On entry, ALPHA specifies the scalar alpha.`

			`A - (input) SuperMatrix*`
			`Matrix A with a sparse format, of dimension (A->nrow, A->ncol).`
			`Currently, the type of A can be:`
			`Stype = NC or NCP; Dtype = SLU_S; Mtype = GE.`
			`In the future, more general A can be handled.`

			`X - (input) float*, array of DIMENSION at least`
			`( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'`
			`and at least`
			`( 1 + ( m - 1 )*abs( INCX ) ) otherwise.`
			`Before entry, the incremented array X must contain the`
			`vector x.`

			`INCX - (input) int`
			`On entry, INCX specifies the increment for the elements of`
			`X. INCX must not be zero.`

			`BETA - (input) float`
			`On entry, BETA specifies the scalar beta. When BETA is`
			`supplied as zero then Y need not be set on input.`

			`Y - (output) float*, array of DIMENSION at least`
			`( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'`
			`and at least`
			`( 1 + ( n - 1 )*abs( INCY ) ) otherwise.`
			`Before entry with BETA non-zero, the incremented array Y`
			`must contain the vector y. On exit, Y is overwritten by the`
			`updated vector y.`

			`INCY - (input) int`
			`On entry, INCY specifies the increment for the elements of`
			`Y. INCY must not be zero.`

			`==== Sparse Level 2 Blas routine.`
			`*/`

			`/* Local variables */`
			`NCformat *Astore;`
			`float *Aval;`
			`int info;`
			`float temp;`
			`int lenx, leny, i, j, irow;`
			`int iy, jx, jy, kx, ky;`
			`int notran;`

			`notran = lsame_(trans, "N");`
			`Astore = A->Store;`
			`Aval = Astore->nzval;`

			`/* Test the input parameters */`
			`info = 0;`
			`if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;`
			`else if ( A->nrow < 0 \|\| A->ncol < 0 ) info = 3;`
			`else if (incx == 0) info = 5;`
			`else if (incy == 0) info = 8;`
			`if (info != 0) {`
			`xerbla_("sp_sgemv ", &info);`
			`return 0;`
			`}`

			`/* Quick return if possible. */`
			`if (A->nrow == 0 \|\| A->ncol == 0 \|\| (alpha == 0. && beta == 1.))`
			`return 0;`

			`/* Set LENX and LENY, the lengths of the vectors x and y, and set`
			`up the start points in X and Y. */`
			`if (lsame_(trans, "N")) {`
			`lenx = A->ncol;`
			`leny = A->nrow;`
			`} else {`
			`lenx = A->nrow;`
			`leny = A->ncol;`
			`}`
			`if (incx > 0) kx = 0;`
			`else kx = - (lenx - 1) * incx;`
			`if (incy > 0) ky = 0;`
			`else ky = - (leny - 1) * incy;`

			`/* Start the operations. In this version the elements of A are`
			`accessed sequentially with one pass through A. */`
			`/* First form y := betay. /`
			`if (beta != 1.) {`
			`if (incy == 1) {`
			`if (beta == 0.)`
			`for (i = 0; i < leny; ++i) y[i] = 0.;`
			`else`
			`for (i = 0; i < leny; ++i) y[i] = beta * y[i];`
			`} else {`
			`iy = ky;`
			`if (beta == 0.)`
			`for (i = 0; i < leny; ++i) {`
			`y[iy] = 0.;`
			`iy += incy;`
			`}`
			`else`
			`for (i = 0; i < leny; ++i) {`
			`y[iy] = beta * y[iy];`
			`iy += incy;`
			`}`
			`}`
			`}`

			`if (alpha == 0.) return 0;`

			`if ( notran ) {`
			`/* Form y := alphaAx + y. */`
			`jx = kx;`
			`if (incy == 1) {`
			`for (j = 0; j < A->ncol; ++j) {`
			`if (x[jx] != 0.) {`
			`temp = alpha * x[jx];`
			`for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {`
			`irow = Astore->rowind[i];`
			`y[irow] += temp * Aval[i];`
			`}`
			`}`
			`jx += incx;`
			`}`
			`} else {`
			`ABORT("Not implemented.");`
			`}`
			`} else {`
			`/* Form y := alphaA'x + y. */`
			`jy = ky;`
			`if (incx == 1) {`
			`for (j = 0; j < A->ncol; ++j) {`
			`temp = 0.;`
			`for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {`
			`irow = Astore->rowind[i];`
			`temp += Aval[i] * x[irow];`
			`}`
			`y[jy] += alpha * temp;`
			`jy += incy;`
			`}`
			`} else {`
			`ABORT("Not implemented.");`
			`}`
			`}`
			`return 0;`
			`} /* sp_sgemv */`