forked from bartvdbraak/blender
233 lines
5.6 KiB
C
233 lines
5.6 KiB
C
|
|
|
|
/*
|
|
* -- SuperLU routine (version 2.0) --
|
|
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
|
* and Lawrence Berkeley National Lab.
|
|
* November 15, 1997
|
|
*
|
|
*/
|
|
/*
|
|
* File name: smyblas2.c
|
|
* Purpose:
|
|
* Level 2 BLAS operations: solves and matvec, written in C.
|
|
* Note:
|
|
* This is only used when the system lacks an efficient BLAS library.
|
|
*/
|
|
|
|
/*
|
|
* Solves a dense UNIT lower triangular system. The unit lower
|
|
* triangular matrix is stored in a 2D array M(1:nrow,1:ncol).
|
|
* The solution will be returned in the rhs vector.
|
|
*/
|
|
|
|
/* local prototypes*/
|
|
void slsolve ( int, int, float *, float *);
|
|
void susolve ( int, int, float *, float *);
|
|
void smatvec ( int, int, int, float *, float *, float *);
|
|
|
|
|
|
void slsolve ( int ldm, int ncol, float *M, float *rhs )
|
|
{
|
|
int k;
|
|
float x0, x1, x2, x3, x4, x5, x6, x7;
|
|
float *M0;
|
|
register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
|
|
register int firstcol = 0;
|
|
|
|
M0 = &M[0];
|
|
|
|
while ( firstcol < ncol - 7 ) { /* Do 8 columns */
|
|
Mki0 = M0 + 1;
|
|
Mki1 = Mki0 + ldm + 1;
|
|
Mki2 = Mki1 + ldm + 1;
|
|
Mki3 = Mki2 + ldm + 1;
|
|
Mki4 = Mki3 + ldm + 1;
|
|
Mki5 = Mki4 + ldm + 1;
|
|
Mki6 = Mki5 + ldm + 1;
|
|
Mki7 = Mki6 + ldm + 1;
|
|
|
|
x0 = rhs[firstcol];
|
|
x1 = rhs[firstcol+1] - x0 * *Mki0++;
|
|
x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
|
|
x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
|
|
x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
|
|
- x3 * *Mki3++;
|
|
x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
|
|
- x3 * *Mki3++ - x4 * *Mki4++;
|
|
x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
|
|
- x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++;
|
|
x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
|
|
- x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++
|
|
- x6 * *Mki6++;
|
|
|
|
rhs[++firstcol] = x1;
|
|
rhs[++firstcol] = x2;
|
|
rhs[++firstcol] = x3;
|
|
rhs[++firstcol] = x4;
|
|
rhs[++firstcol] = x5;
|
|
rhs[++firstcol] = x6;
|
|
rhs[++firstcol] = x7;
|
|
++firstcol;
|
|
|
|
for (k = firstcol; k < ncol; k++)
|
|
rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
|
|
- x2 * *Mki2++ - x3 * *Mki3++
|
|
- x4 * *Mki4++ - x5 * *Mki5++
|
|
- x6 * *Mki6++ - x7 * *Mki7++;
|
|
|
|
M0 += 8 * ldm + 8;
|
|
}
|
|
|
|
while ( firstcol < ncol - 3 ) { /* Do 4 columns */
|
|
Mki0 = M0 + 1;
|
|
Mki1 = Mki0 + ldm + 1;
|
|
Mki2 = Mki1 + ldm + 1;
|
|
Mki3 = Mki2 + ldm + 1;
|
|
|
|
x0 = rhs[firstcol];
|
|
x1 = rhs[firstcol+1] - x0 * *Mki0++;
|
|
x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
|
|
x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
|
|
|
|
rhs[++firstcol] = x1;
|
|
rhs[++firstcol] = x2;
|
|
rhs[++firstcol] = x3;
|
|
++firstcol;
|
|
|
|
for (k = firstcol; k < ncol; k++)
|
|
rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
|
|
- x2 * *Mki2++ - x3 * *Mki3++;
|
|
|
|
M0 += 4 * ldm + 4;
|
|
}
|
|
|
|
if ( firstcol < ncol - 1 ) { /* Do 2 columns */
|
|
Mki0 = M0 + 1;
|
|
Mki1 = Mki0 + ldm + 1;
|
|
|
|
x0 = rhs[firstcol];
|
|
x1 = rhs[firstcol+1] - x0 * *Mki0++;
|
|
|
|
rhs[++firstcol] = x1;
|
|
++firstcol;
|
|
|
|
for (k = firstcol; k < ncol; k++)
|
|
rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
|
* Solves a dense upper triangular system. The upper triangular matrix is
|
|
* stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
|
|
* in the rhs vector.
|
|
*/
|
|
void
|
|
susolve ( ldm, ncol, M, rhs )
|
|
int ldm; /* in */
|
|
int ncol; /* in */
|
|
float *M; /* in */
|
|
float *rhs; /* modified */
|
|
{
|
|
float xj;
|
|
int jcol, j, irow;
|
|
|
|
jcol = ncol - 1;
|
|
|
|
for (j = 0; j < ncol; j++) {
|
|
|
|
xj = rhs[jcol] / M[jcol + jcol*ldm]; /* M(jcol, jcol) */
|
|
rhs[jcol] = xj;
|
|
|
|
for (irow = 0; irow < jcol; irow++)
|
|
rhs[irow] -= xj * M[irow + jcol*ldm]; /* M(irow, jcol) */
|
|
|
|
jcol--;
|
|
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
|
|
* The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
|
|
*/
|
|
void smatvec ( ldm, nrow, ncol, M, vec, Mxvec )
|
|
|
|
int ldm; /* in -- leading dimension of M */
|
|
int nrow; /* in */
|
|
int ncol; /* in */
|
|
float *M; /* in */
|
|
float *vec; /* in */
|
|
float *Mxvec; /* in/out */
|
|
|
|
{
|
|
float vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
|
|
float *M0;
|
|
register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
|
|
register int firstcol = 0;
|
|
int k;
|
|
|
|
M0 = &M[0];
|
|
while ( firstcol < ncol - 7 ) { /* Do 8 columns */
|
|
|
|
Mki0 = M0;
|
|
Mki1 = Mki0 + ldm;
|
|
Mki2 = Mki1 + ldm;
|
|
Mki3 = Mki2 + ldm;
|
|
Mki4 = Mki3 + ldm;
|
|
Mki5 = Mki4 + ldm;
|
|
Mki6 = Mki5 + ldm;
|
|
Mki7 = Mki6 + ldm;
|
|
|
|
vi0 = vec[firstcol++];
|
|
vi1 = vec[firstcol++];
|
|
vi2 = vec[firstcol++];
|
|
vi3 = vec[firstcol++];
|
|
vi4 = vec[firstcol++];
|
|
vi5 = vec[firstcol++];
|
|
vi6 = vec[firstcol++];
|
|
vi7 = vec[firstcol++];
|
|
|
|
for (k = 0; k < nrow; k++)
|
|
Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
|
|
+ vi2 * *Mki2++ + vi3 * *Mki3++
|
|
+ vi4 * *Mki4++ + vi5 * *Mki5++
|
|
+ vi6 * *Mki6++ + vi7 * *Mki7++;
|
|
|
|
M0 += 8 * ldm;
|
|
}
|
|
|
|
while ( firstcol < ncol - 3 ) { /* Do 4 columns */
|
|
|
|
Mki0 = M0;
|
|
Mki1 = Mki0 + ldm;
|
|
Mki2 = Mki1 + ldm;
|
|
Mki3 = Mki2 + ldm;
|
|
|
|
vi0 = vec[firstcol++];
|
|
vi1 = vec[firstcol++];
|
|
vi2 = vec[firstcol++];
|
|
vi3 = vec[firstcol++];
|
|
for (k = 0; k < nrow; k++)
|
|
Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
|
|
+ vi2 * *Mki2++ + vi3 * *Mki3++ ;
|
|
|
|
M0 += 4 * ldm;
|
|
}
|
|
|
|
while ( firstcol < ncol ) { /* Do 1 column */
|
|
|
|
Mki0 = M0;
|
|
vi0 = vec[firstcol++];
|
|
for (k = 0; k < nrow; k++)
|
|
Mxvec[k] += vi0 * *Mki0++;
|
|
|
|
M0 += ldm;
|
|
}
|
|
|
|
}
|
|
|