/************************************************************************** ** ** Copyright (C) 1993 David E. Stewart & Zbigniew Leyk, all rights reserved. ** ** Meschach Library ** ** This Meschach Library is provided "as is" without any express ** or implied warranty of any kind with respect to this software. ** In particular the authors shall not be liable for any direct, ** indirect, special, incidental or consequential damages arising ** in any way from use of the software. ** ** Everyone is granted permission to copy, modify and redistribute this ** Meschach Library, provided: ** 1. All copies contain this copyright notice. ** 2. All modified copies shall carry a notice stating who ** made the last modification and the date of such modification. ** 3. No charge is made for this software or works derived from it. ** This clause shall not be construed as constraining other software ** distributed on the same medium as this software, nor is a ** distribution fee considered a charge. ** ***************************************************************************/ /* Sparse LU factorisation See also: sparse.[ch] etc for details about sparse matrices */ #include #include #include "sparse2.h" /* Macro for speedup */ /* #define sprow_idx2(r,c,hint) \ ( ( (hint) >= 0 && (r)->elt[hint].col == (c)) ? hint : sprow_idx((r),(c)) ) */ /* spLUfactor -- sparse LU factorisation with pivoting -- uses partial pivoting and Markowitz criterion |a[p][k]| >= alpha * max_i |a[i][k]| -- creates fill-in as needed -- in situ factorisation */ SPMAT *spLUfactor(A,px,alpha) SPMAT *A; PERM *px; double alpha; { int i, best_i, k, idx, len, best_len, m, n; SPROW *r, *r_piv, tmp_row; static SPROW *merge = (SPROW *)NULL; Real max_val, tmp; static VEC *col_vals=VNULL; if ( ! A || ! px ) error(E_NULL,"spLUfctr"); if ( alpha <= 0.0 || alpha > 1.0 ) error(E_RANGE,"alpha in spLUfctr"); if ( px->size <= A->m ) px = px_resize(px,A->m); px_ident(px); col_vals = v_resize(col_vals,A->m); MEM_STAT_REG(col_vals,TYPE_VEC); m = A->m; n = A->n; if ( ! A->flag_col ) sp_col_access(A); if ( ! A->flag_diag ) sp_diag_access(A); A->flag_col = A->flag_diag = FALSE; if ( ! merge ) { merge = sprow_get(20); MEM_STAT_REG(merge,TYPE_SPROW); } for ( k = 0; k < n; k++ ) { /* find pivot row/element for partial pivoting */ /* get first row with a non-zero entry in the k-th column */ max_val = 0.0; for ( i = k; i < m; i++ ) { r = &(A->row[i]); idx = sprow_idx(r,k); if ( idx < 0 ) tmp = 0.0; else tmp = r->elt[idx].val; if ( fabs(tmp) > max_val ) max_val = fabs(tmp); col_vals->ve[i] = tmp; } if ( max_val == 0.0 ) continue; best_len = n+1; /* only if no possibilities */ best_i = -1; for ( i = k; i < m; i++ ) { tmp = fabs(col_vals->ve[i]); if ( tmp == 0.0 ) continue; if ( tmp >= alpha*max_val ) { r = &(A->row[i]); idx = sprow_idx(r,k); len = (r->len) - idx; if ( len < best_len ) { best_len = len; best_i = i; } } } /* swap row #best_i with row #k */ MEM_COPY(&(A->row[best_i]),&tmp_row,sizeof(SPROW)); MEM_COPY(&(A->row[k]),&(A->row[best_i]),sizeof(SPROW)); MEM_COPY(&tmp_row,&(A->row[k]),sizeof(SPROW)); /* swap col_vals entries */ tmp = col_vals->ve[best_i]; col_vals->ve[best_i] = col_vals->ve[k]; col_vals->ve[k] = tmp; px_transp(px,k,best_i); r_piv = &(A->row[k]); for ( i = k+1; i < n; i++ ) { /* compute and set multiplier */ tmp = col_vals->ve[i]/col_vals->ve[k]; if ( tmp != 0.0 ) sp_set_val(A,i,k,tmp); else continue; /* perform row operations */ merge->len = 0; r = &(A->row[i]); sprow_mltadd(r,r_piv,-tmp,k+1,merge,TYPE_SPROW); idx = sprow_idx(r,k+1); if ( idx < 0 ) idx = -(idx+2); /* see if r needs expanding */ if ( r->maxlen < idx + merge->len ) sprow_xpd(r,idx+merge->len,TYPE_SPMAT); r->len = idx+merge->len; MEM_COPY((char *)(merge->elt),(char *)&(r->elt[idx]), merge->len*sizeof(row_elt)); } } return A; } /* spLUsolve -- solve A.x = b using factored matrix A from spLUfactor() -- returns x -- may not be in-situ */ VEC *spLUsolve(A,pivot,b,x) SPMAT *A; PERM *pivot; VEC *b, *x; { int i, idx, len, lim; Real sum, *x_ve; SPROW *r; row_elt *elt; if ( ! A || ! b ) error(E_NULL,"spLUsolve"); if ( (pivot != PNULL && A->m != pivot->size) || A->m != b->dim ) error(E_SIZES,"spLUsolve"); if ( ! x || x->dim != A->n ) x = v_resize(x,A->n); if ( pivot != PNULL ) x = px_vec(pivot,b,x); else x = v_copy(b,x); x_ve = x->ve; lim = min(A->m,A->n); for ( i = 0; i < lim; i++ ) { sum = x_ve[i]; r = &(A->row[i]); len = r->len; elt = r->elt; for ( idx = 0; idx < len && elt->col < i; idx++, elt++ ) sum -= elt->val*x_ve[elt->col]; x_ve[i] = sum; } for ( i = lim-1; i >= 0; i-- ) { sum = x_ve[i]; r = &(A->row[i]); len = r->len; elt = &(r->elt[len-1]); for ( idx = len-1; idx >= 0 && elt->col > i; idx--, elt-- ) sum -= elt->val*x_ve[elt->col]; if ( idx < 0 || elt->col != i || elt->val == 0.0 ) error(E_SING,"spLUsolve"); x_ve[i] = sum/elt->val; } return x; } /* spLUTsolve -- solve A.x = b using factored matrix A from spLUfactor() -- returns x -- may not be in-situ */ VEC *spLUTsolve(A,pivot,b,x) SPMAT *A; PERM *pivot; VEC *b, *x; { int i, idx, lim, rownum; Real sum, *tmp_ve; /* SPROW *r; */ row_elt *elt; static VEC *tmp=VNULL; if ( ! A || ! b ) error(E_NULL,"spLUTsolve"); if ( (pivot != PNULL && A->m != pivot->size) || A->m != b->dim ) error(E_SIZES,"spLUTsolve"); tmp = v_copy(b,tmp); MEM_STAT_REG(tmp,TYPE_VEC); if ( ! A->flag_col ) sp_col_access(A); if ( ! A->flag_diag ) sp_diag_access(A); lim = min(A->m,A->n); tmp_ve = tmp->ve; /* solve U^T.tmp = b */ for ( i = 0; i < lim; i++ ) { sum = tmp_ve[i]; rownum = A->start_row[i]; idx = A->start_idx[i]; if ( rownum < 0 || idx < 0 ) error(E_SING,"spLUTsolve"); while ( rownum < i && rownum >= 0 && idx >= 0 ) { elt = &(A->row[rownum].elt[idx]); sum -= elt->val*tmp_ve[rownum]; rownum = elt->nxt_row; idx = elt->nxt_idx; } if ( rownum != i ) error(E_SING,"spLUTsolve"); elt = &(A->row[rownum].elt[idx]); if ( elt->val == 0.0 ) error(E_SING,"spLUTsolve"); tmp_ve[i] = sum/elt->val; } /* now solve L^T.tmp = (old) tmp */ for ( i = lim-1; i >= 0; i-- ) { sum = tmp_ve[i]; rownum = i; idx = A->row[rownum].diag; if ( idx < 0 ) error(E_NULL,"spLUTsolve"); elt = &(A->row[rownum].elt[idx]); rownum = elt->nxt_row; idx = elt->nxt_idx; while ( rownum < lim && rownum >= 0 && idx >= 0 ) { elt = &(A->row[rownum].elt[idx]); sum -= elt->val*tmp_ve[rownum]; rownum = elt->nxt_row; idx = elt->nxt_idx; } tmp_ve[i] = sum; } if ( pivot != PNULL ) x = pxinv_vec(pivot,tmp,x); else x = v_copy(tmp,x); return x; } /* spILUfactor -- sparse modified incomplete LU factorisation with no pivoting -- all pivot entries are ensured to be >= alpha in magnitude -- setting alpha = 0 gives incomplete LU factorisation -- no fill-in is generated -- in situ factorisation */ SPMAT *spILUfactor(A,alpha) SPMAT *A; double alpha; { int i, k, idx, idx_piv, m, n, old_idx, old_idx_piv; SPROW *r, *r_piv; Real piv_val, tmp; /* printf("spILUfactor: entered\n"); */ if ( ! A ) error(E_NULL,"spILUfactor"); if ( alpha < 0.0 ) error(E_RANGE,"[alpha] in spILUfactor"); m = A->m; n = A->n; sp_diag_access(A); sp_col_access(A); for ( k = 0; k < n; k++ ) { /* printf("spILUfactor(l.%d): checkpoint A: k = %d\n",__LINE__,k); */ /* printf("spILUfactor(l.%d): A =\n", __LINE__); */ /* sp_output(A); */ r_piv = &(A->row[k]); idx_piv = r_piv->diag; if ( idx_piv < 0 ) { sprow_set_val(r_piv,k,alpha); idx_piv = sprow_idx(r_piv,k); } /* printf("spILUfactor: checkpoint B\n"); */ if ( idx_piv < 0 ) error(E_BOUNDS,"spILUfactor"); old_idx_piv = idx_piv; piv_val = r_piv->elt[idx_piv].val; /* printf("spILUfactor: checkpoint C\n"); */ if ( fabs(piv_val) < alpha ) piv_val = ( piv_val < 0.0 ) ? -alpha : alpha; if ( piv_val == 0.0 ) /* alpha == 0.0 too! */ error(E_SING,"spILUfactor"); /* go to next row with a non-zero in this column */ i = r_piv->elt[idx_piv].nxt_row; old_idx = idx = r_piv->elt[idx_piv].nxt_idx; while ( i >= k ) { /* printf("spILUfactor: checkpoint D: i = %d\n",i); */ /* perform row operations */ r = &(A->row[i]); /* idx = sprow_idx(r,k); */ /* printf("spLUfactor(l.%d) i = %d, idx = %d\n", __LINE__, i, idx); */ if ( idx < 0 ) { idx = r->elt[old_idx].nxt_idx; i = r->elt[old_idx].nxt_row; continue; } /* printf("spILUfactor: checkpoint E\n"); */ /* compute and set multiplier */ r->elt[idx].val = tmp = r->elt[idx].val/piv_val; /* printf("spILUfactor: piv_val = %g, multiplier = %g\n", piv_val, tmp); */ /* printf("spLUfactor(l.%d) multiplier = %g\n", __LINE__, tmp); */ if ( tmp == 0.0 ) { idx = r->elt[old_idx].nxt_idx; i = r->elt[old_idx].nxt_row; continue; } /* idx = sprow_idx(r,k+1); */ /* if ( idx < 0 ) idx = -(idx+2); */ idx_piv++; idx++; /* now look beyond the multiplier entry */ /* printf("spILUfactor: checkpoint F: idx = %d, idx_piv = %d\n", idx, idx_piv); */ while ( idx_piv < r_piv->len && idx < r->len ) { /* printf("spILUfactor: checkpoint G: idx = %d, idx_piv = %d\n", idx, idx_piv); */ if ( r_piv->elt[idx_piv].col < r->elt[idx].col ) idx_piv++; else if ( r_piv->elt[idx_piv].col > r->elt[idx].col ) idx++; else /* column numbers match */ { /* printf("spILUfactor(l.%d) subtract %g times the ", __LINE__, tmp); */ /* printf("(%d,%d) entry to the (%d,%d) entry\n", k, r_piv->elt[idx_piv].col, i, r->elt[idx].col); */ r->elt[idx].val -= tmp*r_piv->elt[idx_piv].val; idx++; idx_piv++; } } /* bump to next row with a non-zero in column k */ /* printf("spILUfactor(l.%d) column = %d, row[%d] =\n", __LINE__, r->elt[old_idx].col, i); */ /* sprow_foutput(stdout,r); */ i = r->elt[old_idx].nxt_row; old_idx = idx = r->elt[old_idx].nxt_idx; /* printf("spILUfactor(l.%d) i = %d, idx = %d\n", __LINE__, i, idx); */ /* and restore idx_piv to index of pivot entry */ idx_piv = old_idx_piv; } } /* printf("spILUfactor: exiting\n"); */ return A; }