#!/bin/sh # to unpack, sh this message in an empty directory PATH=/bin:/usr/bin cat > 24048P07 <<'bigmail CUT HERE............' - tmp_e->nxt_row = i2; - tmp_e->nxt_idx = idx2; - } - } - else if ( r2->elt[idx2].col != j2 ) - error(E_INTERN,"bkp_swap_elt"); - - e1 = &(r1->elt[idx1]); e2 = &(r2->elt[idx2]); - - tmp = e1->val; - e1->val = e2->val; - e2->val = tmp; - - return A; -} - -/* bkp_bump_col -- bumps row and idx to next entry in column j */ -row_elt *bkp_bump_col(A, j, row, idx) -SPMAT *A; -int j, *row, *idx; -{ - SPROW *r; - row_elt *e; - - if ( *row < 0 ) - { - *row = A->start_row[j]; - *idx = A->start_idx[j]; - } - else - { - r = &(A->row[*row]); - e = &(r->elt[*idx]); - if ( e->col != j ) - error(E_INTERN,"bkp_bump_col"); - *row = e->nxt_row; - *idx = e->nxt_idx; - } - if ( *row < 0 ) - return (row_elt *)NULL; - else - return &(A->row[*row].elt[*idx]); -} - -/* bkp_interchange -- swap rows/cols i and j (symmetric pivot) - -- uses just the upper triangular part */ -SPMAT *bkp_interchange(A, i1, i2) -SPMAT *A; -int i1, i2; -{ - int tmp_row, tmp_idx; - int row1, row2, idx1, idx2, tmp_row1, tmp_idx1, tmp_row2, tmp_idx2; - SPROW *r1, *r2; - row_elt *e1, *e2; - IVEC *done_list = IVNULL; - - if ( ! A ) - error(E_NULL,"bkp_interchange"); - if ( i1 < 0 || i1 >= A->n || i2 < 0 || i2 >= A->n ) - error(E_BOUNDS,"bkp_interchange"); - if ( A->m != A->n ) - error(E_SQUARE,"bkp_interchange"); - - if ( i1 == i2 ) - return A; - if ( i1 > i2 ) - { tmp_idx = i1; i1 = i2; i2 = tmp_idx; } - - done_list = iv_resize(done_list,A->n); - for ( tmp_idx = 0; tmp_idx < A->n; tmp_idx++ ) - done_list->ive[tmp_idx] = FALSE; - row1 = -1; idx1 = i1; - row2 = -1; idx2 = i2; - e1 = bkp_bump_col(A,i1,&row1,&idx1); - e2 = bkp_bump_col(A,i2,&row2,&idx2); - - while ( (row1 >= 0 && row1 < i1) || (row2 >= 0 && row2 < i1) ) - /* Note: "row2 < i1" not "row2 < i2" as we must stop before the - "knee bend" */ - { - if ( row1 >= 0 && row1 < i1 && ( row1 < row2 || row2 < 0 ) ) - { - tmp_row1 = row1; tmp_idx1 = idx1; - e1 = bkp_bump_col(A,i1,&tmp_row1,&tmp_idx1); - if ( ! done_list->ive[row1] ) - { - if ( row1 == row2 ) - bkp_swap_elt(A,row1,i1,idx1,row1,i2,idx2); - else - bkp_swap_elt(A,row1,i1,idx1,row1,i2,-1); - done_list->ive[row1] = TRUE; - } - row1 = tmp_row1; idx1 = tmp_idx1; - } - else if ( row2 >= 0 && row2 < i1 && ( row2 < row1 || row1 < 0 ) ) - { - tmp_row2 = row2; tmp_idx2 = idx2; - e2 = bkp_bump_col(A,i2,&tmp_row2,&tmp_idx2); - if ( ! done_list->ive[row2] ) - { - if ( row1 == row2 ) - bkp_swap_elt(A,row2,i1,idx1,row2,i2,idx2); - else - bkp_swap_elt(A,row2,i1,-1,row2,i2,idx2); - done_list->ive[row2] = TRUE; - } - row2 = tmp_row2; idx2 = tmp_idx2; - } - else if ( row1 == row2 ) - { - tmp_row1 = row1; tmp_idx1 = idx1; - e1 = bkp_bump_col(A,i1,&tmp_row1,&tmp_idx1); - tmp_row2 = row2; tmp_idx2 = idx2; - e2 = bkp_bump_col(A,i2,&tmp_row2,&tmp_idx2); - if ( ! done_list->ive[row1] ) - { - bkp_swap_elt(A,row1,i1,idx1,row2,i2,idx2); - done_list->ive[row1] = TRUE; - } - row1 = tmp_row1; idx1 = tmp_idx1; - row2 = tmp_row2; idx2 = tmp_idx2; - } - } - - /* ensure we are **past** the first knee */ - while ( row2 >= 0 && row2 <= i1 ) - e2 = bkp_bump_col(A,i2,&row2,&idx2); - - /* at/after 1st "knee bend" */ - r1 = &(A->row[i1]); - idx1 = 0; - e1 = &(r1->elt[idx1]); - while ( row2 >= 0 && row2 < i2 ) - { - /* used for update of e2 at end of loop */ - tmp_row = row2; tmp_idx = idx2; - if ( ! done_list->ive[row2] ) - { - r2 = &(A->row[row2]); - bkp_bump_col(A,i2,&tmp_row,&tmp_idx); - done_list->ive[row2] = TRUE; - tmp_idx1 = unord_get_idx(r1,row2); - tracecatch(bkp_swap_elt(A,row2,i2,idx2,i1,row2,tmp_idx1), - "bkp_interchange"); - } - - /* update e1 and e2 */ - row2 = tmp_row; idx2 = tmp_idx; - e2 = ( row2 >= 0 ) ? &(A->row[row2].elt[idx2]) : (row_elt *)NULL; - } - - idx1 = 0; - e1 = r1->elt; - while ( idx1 < r1->len ) - { - if ( e1->col >= i2 || e1->col <= i1 ) - { - idx1++; - e1++; - continue; - } - if ( ! done_list->ive[e1->col] ) - { - tmp_idx2 = unord_get_idx(&(A->row[e1->col]),i2); - tracecatch(bkp_swap_elt(A,i1,e1->col,idx1,e1->col,i2,tmp_idx2), - "bkp_interchange"); - done_list->ive[e1->col] = TRUE; - } - idx1++; - e1++; - } - - /* at/after 2nd "knee bend" */ - idx1 = 0; - e1 = &(r1->elt[idx1]); - r2 = &(A->row[i2]); - idx2 = 0; - e2 = &(r2->elt[idx2]); - while ( idx1 < r1->len ) - { - if ( e1->col <= i2 ) - { - idx1++; e1++; - continue; - } - if ( ! done_list->ive[e1->col] ) - { - tmp_idx2 = unord_get_idx(r2,e1->col); - tracecatch(bkp_swap_elt(A,i1,e1->col,idx1,i2,e1->col,tmp_idx2), - "bkp_interchange"); - done_list->ive[e1->col] = TRUE; - } - idx1++; e1++; - } - - idx2 = 0; e2 = r2->elt; - while ( idx2 < r2->len ) - { - if ( e2->col <= i2 ) - { - idx2++; e2++; - continue; - } - if ( ! done_list->ive[e2->col] ) - { - tmp_idx1 = unord_get_idx(r1,e2->col); - tracecatch(bkp_swap_elt(A,i2,e2->col,idx2,i1,e2->col,tmp_idx1), - "bkp_interchange"); - done_list->ive[e2->col] = TRUE; - } - idx2++; e2++; - } - - /* now interchange the digonal entries! */ - idx1 = unord_get_idx(&(A->row[i1]),i1); - idx2 = unord_get_idx(&(A->row[i2]),i2); - if ( idx1 >= 0 || idx2 >= 0 ) - { - tracecatch(bkp_swap_elt(A,i1,i1,idx1,i2,i2,idx2), - "bkp_interchange"); - } - - return A; -} - - -/* iv_min -- returns minimum of an integer vector - -- sets index to the position in iv if index != NULL */ -int iv_min(iv,index) -IVEC *iv; -int *index; -{ - int i, i_min, min_val, tmp; - - if ( ! iv ) - error(E_NULL,"iv_min"); - if ( iv->dim <= 0 ) - error(E_SIZES,"iv_min"); - i_min = 0; - min_val = iv->ive[0]; - for ( i = 1; i < iv->dim; i++ ) - { - tmp = iv->ive[i]; - if ( tmp < min_val ) - { - min_val = tmp; - i_min = i; - } - } - - if ( index != (int *)NULL ) - *index = i_min; - - return min_val; -} - -/* max_row_col -- returns max { |A[j][k]| : k >= i, k != j, k != l } given j - using symmetry and only the upper triangular part of A */ -static double max_row_col(A,i,j,l) -SPMAT *A; -int i, j, l; -{ - int row_num, idx; - SPROW *r; - row_elt *e; - Real max_val, tmp; - - if ( ! A ) - error(E_NULL,"max_row_col"); - if ( i < 0 || i > A->n || j < 0 || j >= A->n ) - error(E_BOUNDS,"max_row_col"); - - max_val = 0.0; - - idx = unord_get_idx(&(A->row[i]),j); - if ( idx < 0 ) - { - row_num = -1; idx = j; - e = chase_past(A,j,&row_num,&idx,i); - } - else - { - row_num = i; - e = &(A->row[i].elt[idx]); - } - while ( row_num >= 0 && row_num < j ) - { - if ( row_num != l ) - { - tmp = fabs(e->val); - if ( tmp > max_val ) - max_val = tmp; - } - e = bump_col(A,j,&row_num,&idx); - } - r = &(A->row[j]); - for ( idx = 0, e = r->elt; idx < r->len; idx++, e++ ) - { - if ( e->col > j && e->col != l ) - { - tmp = fabs(e->val); - if ( tmp > max_val ) - max_val = tmp; - } - } - - return max_val; -} - -/* nonzeros -- counts non-zeros in A */ -static int nonzeros(A) -SPMAT *A; -{ - int cnt, i; - - if ( ! A ) - return 0; - cnt = 0; - for ( i = 0; i < A->m; i++ ) - cnt += A->row[i].len; - - return cnt; -} - -/* chk_col_access -- for spBKPfactor() - -- checks that column access path is OK */ -int chk_col_access(A) -SPMAT *A; -{ - int cnt_nz, j, row, idx; - SPROW *r; - row_elt *e; - - if ( ! A ) - error(E_NULL,"chk_col_access"); - - /* count nonzeros as we go down columns */ - cnt_nz = 0; - for ( j = 0; j < A->n; j++ ) - { - row = A->start_row[j]; - idx = A->start_idx[j]; - while ( row >= 0 ) - { - if ( row >= A->m || idx < 0 ) - return FALSE; - r = &(A->row[row]); - if ( idx >= r->len ) - return FALSE; - e = &(r->elt[idx]); - if ( e->nxt_row >= 0 && e->nxt_row <= row ) - return FALSE; - row = e->nxt_row; - idx = e->nxt_idx; - cnt_nz++; - } - } - - if ( cnt_nz != nonzeros(A) ) - return FALSE; - else - return TRUE; -} - -/* col_cmp -- compare two columns -- for sorting rows using qsort() */ -static int col_cmp(e1,e2) -row_elt *e1, *e2; -{ - return e1->col - e2->col; -} - -/* spBKPfactor -- sparse Bunch-Kaufman-Parlett factorisation of A in-situ - -- A is factored into the form P'AP = MDM' where - P is a permutation matrix, M lower triangular and D is block - diagonal with blocks of size 1 or 2 - -- P is stored in pivot; blocks[i]==i iff D[i][i] is a block */ -SPMAT *spBKPfactor(A,pivot,blocks,tol) -SPMAT *A; -PERM *pivot, *blocks; -double tol; -{ - int i, j, k, l, n, onebyone, r; - int idx, idx1, idx_piv; - int row_num; - int best_deg, best_j, best_l, best_cost, mark_cost, deg, deg_j, - deg_l, ignore_deg; - int list_idx, list_idx2, old_list_idx; - SPROW *row, *r_piv, *r1_piv; - row_elt *e, *e1; - Real aii, aip1, aip1i; - Real det, max_j, max_l, s, t; - static IVEC *scan_row = IVNULL, *scan_idx = IVNULL, *col_list = IVNULL, - *tmp_iv = IVNULL; - static IVEC *deg_list = IVNULL; - static IVEC *orig_idx = IVNULL, *orig1_idx = IVNULL; - static PERM *order = PNULL; - - if ( ! A || ! pivot || ! blocks ) - error(E_NULL,"spBKPfactor"); - if ( A->m != A->n ) - error(E_SQUARE,"spBKPfactor"); - if ( A->m != pivot->size || pivot->size != blocks->size ) - error(E_SIZES,"spBKPfactor"); - if ( tol <= 0.0 || tol > 1.0 ) - error(E_RANGE,"spBKPfactor"); - - n = A->n; - - px_ident(pivot); px_ident(blocks); - sp_col_access(A); sp_diag_access(A); - ignore_deg = FALSE; - - deg_list = iv_resize(deg_list,n); - order = px_resize(order,n); - MEM_STAT_REG(deg_list,TYPE_IVEC); - MEM_STAT_REG(order,TYPE_PERM); - - scan_row = iv_resize(scan_row,5); - scan_idx = iv_resize(scan_idx,5); - col_list = iv_resize(col_list,5); - orig_idx = iv_resize(orig_idx,5); - orig_idx = iv_resize(orig1_idx,5); - orig_idx = iv_resize(tmp_iv,5); - MEM_STAT_REG(scan_row,TYPE_IVEC); - MEM_STAT_REG(scan_idx,TYPE_IVEC); - MEM_STAT_REG(col_list,TYPE_IVEC); - MEM_STAT_REG(orig_idx,TYPE_IVEC); - MEM_STAT_REG(orig1_idx,TYPE_IVEC); - MEM_STAT_REG(tmp_iv,TYPE_IVEC); - - for ( i = 0; i < n-1; i = onebyone ? i+1 : i+2 ) - { - /* now we want to use a Markowitz-style selection rule for - determining which rows to swap and whether to use - 1x1 or 2x2 pivoting */ - - /* get list of degrees of nodes */ - deg_list = iv_resize(deg_list,n-i); - if ( ! ignore_deg ) - for ( j = i; j < n; j++ ) - deg_list->ive[j-i] = 0; - else - { - for ( j = i; j < n; j++ ) - deg_list->ive[j-i] = 1; - if ( i < n ) - deg_list->ive[0] = 0; - } - order = px_resize(order,n-i); - px_ident(order); - - if ( ! ignore_deg ) - { - for ( j = i; j < n; j++ ) - { - /* idx = sprow_idx(&(A->row[j]),j+1); */ - /* idx = fixindex(idx); */ - idx = 0; - row = &(A->row[j]); - e = &(row->elt[idx]); - /* deg_list->ive[j-i] += row->len - idx; */ - for ( ; idx < row->len; idx++, e++ ) - if ( e->col >= i ) - deg_list->ive[e->col - i]++; - } - /* now deg_list[k] == degree of node k+i */ - - /* now sort them into increasing order */ - iv_sort(deg_list,order); - /* now deg_list[idx] == degree of node i+order[idx] */ - } - - /* now we can chase through the nodes in order of increasing - degree, picking out the ones that satisfy our stability - criterion */ - list_idx = 0; r = -1; - best_j = best_l = -1; - for ( deg = 0; deg <= n; deg++ ) - { - Real ajj, all, ajl; - - if ( list_idx >= deg_list->dim ) - break; /* That's all folks! */ - old_list_idx = list_idx; - while ( list_idx < deg_list->dim && - deg_list->ive[list_idx] <= deg ) - { - j = i+order->pe[list_idx]; - if ( j < i ) - continue; - /* can we use row/col j for a 1 x 1 pivot? */ - /* find max_j = max_{k>=i} {|A[k][j]|,|A[j][k]|} */ - ajj = fabs(unord_get_val(A,j,j)); - if ( ajj == 0.0 ) - { - list_idx++; - continue; /* can't use this for 1 x 1 pivot */ - } - - max_j = max_row_col(A,i,j,-1); - if ( ajj >= tol/* *alpha */ *max_j ) - { - onebyone = TRUE; - best_j = j; - best_deg = deg_list->ive[list_idx]; - break; - } - list_idx++; - } - if ( best_j >= 0 ) - break; - best_cost = 2*n; /* > any possible Markowitz cost (bound) */ - best_j = best_l = -1; - list_idx = old_list_idx; - while ( list_idx < deg_list->dim && - deg_list->ive[list_idx] <= deg ) - { - j = i+order->pe[list_idx]; - ajj = fabs(unord_get_val(A,j,j)); - for ( list_idx2 = 0; list_idx2 < list_idx; list_idx2++ ) - { - deg_j = deg; - deg_l = deg_list->ive[list_idx2]; - l = i+order->pe[list_idx2]; - if ( l < i ) - continue; - /* try using rows/cols (j,l) for a 2 x 2 pivot block */ - all = fabs(unord_get_val(A,l,l)); - ajl = ( j > l ) ? fabs(unord_get_val(A,l,j)) : - fabs(unord_get_val(A,j,l)); - det = fabs(ajj*all - ajl*ajl); - if ( det == 0.0 ) - continue; - max_j = max_row_col(A,i,j,l); - max_l = max_row_col(A,i,l,j); - if ( tol*(all*max_j+ajl*max_l) < det && - tol*(ajl*max_j+ajj*max_l) < det ) - { - /* acceptably stable 2 x 2 pivot */ - /* this is actually an overestimate of the - Markowitz cost for choosing (j,l) */ - mark_cost = (ajj == 0.0) ? - ((all == 0.0) ? deg_j+deg_l : deg_j+2*deg_l) : - ((all == 0.0) ? 2*deg_j+deg_l : - 2*(deg_j+deg_l)); - if ( mark_cost < best_cost ) - { - onebyone = FALSE; - best_cost = mark_cost; - best_j = j; - best_l = l; - best_deg = deg_j; - } - } - } - list_idx++; - } - if ( best_j >= 0 ) - break; - } - - if ( best_deg > (int)floor(0.8*(n-i)) ) - ignore_deg = TRUE; - - /* now do actual interchanges */ - if ( best_j >= 0 && onebyone ) - { - bkp_interchange(A,i,best_j); - px_transp(pivot,i,best_j); - } - else if ( best_j >= 0 && best_l >= 0 && ! onebyone ) - { - if ( best_j == i || best_j == i+1 ) - { - if ( best_l == i || best_l == i+1 ) - { - /* no pivoting, but must update blocks permutation */ - px_transp(blocks,i,i+1); - goto dopivot; - } - bkp_interchange(A,(best_j == i) ? i+1 : i,best_l); - px_transp(pivot,(best_j == i) ? i+1 : i,best_l); - } - else if ( best_l == i || best_l == i+1 ) - { - bkp_interchange(A,(best_l == i) ? i+1 : i,best_j); - px_transp(pivot,(best_l == i) ? i+1 : i,best_j); - } - else /* best_j & best_l outside i, i+1 */ - { - if ( i != best_j ) - { - bkp_interchange(A,i,best_j); - px_transp(pivot,i,best_j); - } - if ( i+1 != best_l ) - { - bkp_interchange(A,i+1,best_l); - px_transp(pivot,i+1,best_l); - } - } - } - else /* can't pivot &/or nothing to pivot */ - continue; - - /* update blocks permutation */ - if ( ! onebyone ) - px_transp(blocks,i,i+1); - - dopivot: - if ( onebyone ) - { - int idx_j, idx_k, s_idx, s_idx2; - row_elt *e_ij, *e_ik; - - r_piv = &(A->row[i]); - idx_piv = unord_get_idx(r_piv,i); - /* if idx_piv < 0 then aii == 0 and no pivoting can be done; - -- this means that we should continue to the next iteration */ - if ( idx_piv < 0 ) - continue; - aii = r_piv->elt[idx_piv].val; - if ( aii == 0.0 ) - continue; - - /* for ( j = i+1; j < n; j++ ) { ... pivot step ... } */ - /* initialise scan_... etc for the 1 x 1 pivot */ - scan_row = iv_resize(scan_row,r_piv->len); - scan_idx = iv_resize(scan_idx,r_piv->len); - col_list = iv_resize(col_list,r_piv->len); - orig_idx = iv_resize(orig_idx,r_piv->len); - row_num = i; s_idx = idx = 0; - e = &(r_piv->elt[idx]); - for ( idx = 0; idx < r_piv->len; idx++, e++ ) - { - if ( e->col < i ) - continue; - scan_row->ive[s_idx] = i; - scan_idx->ive[s_idx] = idx; - orig_idx->ive[s_idx] = idx; - col_list->ive[s_idx] = e->col; - s_idx++; - } - scan_row = iv_resize(scan_row,s_idx); - scan_idx = iv_resize(scan_idx,s_idx); - col_list = iv_resize(col_list,s_idx); - orig_idx = iv_resize(orig_idx,s_idx); - - order = px_resize(order,scan_row->dim); - px_ident(order); - iv_sort(col_list,order); - - tmp_iv = iv_resize(tmp_iv,scan_row->dim); - for ( idx = 0; idx < order->size; idx++ ) - tmp_iv->ive[idx] = scan_idx->ive[order->pe[idx]]; - iv_copy(tmp_iv,scan_idx); - for ( idx = 0; idx < order->size; idx++ ) - tmp_iv->ive[idx] = scan_row->ive[order->pe[idx]]; - iv_copy(tmp_iv,scan_row); - for ( idx = 0; idx < scan_row->dim; idx++ ) - tmp_iv->ive[idx] = orig_idx->ive[order->pe[idx]]; - iv_copy(tmp_iv,orig_idx); - - /* now do actual pivot */ - /* for ( j = i+1; j < n-1; j++ ) .... */ - - for ( s_idx = 0; s_idx < scan_row->dim; s_idx++ ) - { - idx_j = orig_idx->ive[s_idx]; - if ( idx_j < 0 ) - error(E_INTERN,"spBKPfactor"); - e_ij = &(r_piv->elt[idx_j]); - j = e_ij->col; - if ( j < i+1 ) - continue; - scan_to(A,scan_row,scan_idx,col_list,j); - - /* compute multiplier */ - t = e_ij->val / aii; - - /* for ( k = j; k < n; k++ ) { .... update A[j][k] .... } */ - /* this is the row in which pivoting is done */ - row = &(A->row[j]); - for ( s_idx2 = s_idx; s_idx2 < scan_row->dim; s_idx2++ ) - { - idx_k = orig_idx->ive[s_idx2]; - e_ik = &(r_piv->elt[idx_k]); - k = e_ik->col; - /* k >= j since col_list has been sorted */ - - if ( scan_row->ive[s_idx2] == j ) - { /* no fill-in -- can be done directly */ - idx = scan_idx->ive[s_idx2]; - /* idx = sprow_idx2(row,k,idx); */ - row->elt[idx].val -= t*e_ik->val; - } - else - { /* fill-in -- insert entry & patch column */ - int old_row, old_idx; - row_elt *old_e, *new_e; - - old_row = scan_row->ive[s_idx2]; - old_idx = scan_idx->ive[s_idx2]; - /* old_idx = sprow_idx2(&(A->row[old_row]),k,old_idx); */ - - if ( old_idx < 0 ) - error(E_INTERN,"spBKPfactor"); - /* idx = sprow_idx(row,k); */ - /* idx = fixindex(idx); */ - idx = row->len; - - /* sprow_set_val(row,k,-t*e_ik->val); */ - if ( row->len >= row->maxlen ) - { tracecatch(sprow_xpd(row,2*row->maxlen+1,TYPE_SPMAT), - "spBKPfactor"); } - - row->len = idx+1; - - new_e = &(row->elt[idx]); - new_e->val = -t*e_ik->val; - new_e->col = k; - - old_e = &(A->row[old_row].elt[old_idx]); - new_e->nxt_row = old_e->nxt_row; - new_e->nxt_idx = old_e->nxt_idx; - old_e->nxt_row = j; - old_e->nxt_idx = idx; - } - } - e_ij->val = t; - } - } - else /* onebyone == FALSE */ - { /* do 2 x 2 pivot */ - int idx_k, idx1_k, s_idx, s_idx2; - int old_col; - row_elt *e_tmp; - - r_piv = &(A->row[i]); - idx_piv = unord_get_idx(r_piv,i); - aii = aip1i = 0.0; - e_tmp = r_piv->elt; - for ( idx_piv = 0; idx_piv < r_piv->len; idx_piv++, e_tmp++ ) - if ( e_tmp->col == i ) - aii = e_tmp->val; - else if ( e_tmp->col == i+1 ) - aip1i = e_tmp->val; - - r1_piv = &(A->row[i+1]); - e_tmp = r1_piv->elt; - aip1 = unord_get_val(A,i+1,i+1); - det = aii*aip1 - aip1i*aip1i; /* Must have det < 0 */ - if ( aii == 0.0 && aip1i == 0.0 ) - { - /* error(E_RANGE,"spBKPfactor"); */ - onebyone = TRUE; - continue; /* cannot pivot */ - } - - if ( det == 0.0 ) - { - if ( aii != 0.0 ) - error(E_RANGE,"spBKPfactor"); - onebyone = TRUE; - continue; /* cannot pivot */ - } - aip1i = aip1i/det; - aii = aii/det; - aip1 = aip1/det; - - /* initialise scan_... etc for the 2 x 2 pivot */ - s_idx = r_piv->len + r1_piv->len; - scan_row = iv_resize(scan_row,s_idx); - scan_idx = iv_resize(scan_idx,s_idx); - col_list = iv_resize(col_list,s_idx); - orig_idx = iv_resize(orig_idx,s_idx); - orig1_idx = iv_resize(orig1_idx,s_idx); - - e = r_piv->elt; - for ( idx = 0; idx < r_piv->len; idx++, e++ ) - { - scan_row->ive[idx] = i; - scan_idx->ive[idx] = idx; - col_list->ive[idx] = e->col; - orig_idx->ive[idx] = idx; - orig1_idx->ive[idx] = -1; - } - e = r_piv->elt; - e1 = r1_piv->elt; - for ( idx = 0; idx < r1_piv->len; idx++, e1++ ) - { - scan_row->ive[idx+r_piv->len] = i+1; - scan_idx->ive[idx+r_piv->len] = idx; - col_list->ive[idx+r_piv->len] = e1->col; - orig_idx->ive[idx+r_piv->len] = -1; - orig1_idx->ive[idx+r_piv->len] = idx; - } - - e1 = r1_piv->elt; - order = px_resize(order,scan_row->dim); - px_ident(order); - iv_sort(col_list,order); - tmp_iv = iv_resize(tmp_iv,scan_row->dim); - for ( idx = 0; idx < order->size; idx++ ) - tmp_iv->ive[idx] = scan_idx->ive[order->pe[idx]]; - iv_copy(tmp_iv,scan_idx); - for ( idx = 0; idx < order->size; idx++ ) - tmp_iv->ive[idx] = scan_row->ive[order->pe[idx]]; - iv_copy(tmp_iv,scan_row); - for ( idx = 0; idx < scan_row->dim; idx++ ) - tmp_iv->ive[idx] = orig_idx->ive[order->pe[idx]]; - iv_copy(tmp_iv,orig_idx); - for ( idx = 0; idx < scan_row->dim; idx++ ) - tmp_iv->ive[idx] = orig1_idx->ive[order->pe[idx]]; - iv_copy(tmp_iv,orig1_idx); - - s_idx = 0; - old_col = -1; - for ( idx = 0; idx < scan_row->dim; idx++ ) - { - if ( col_list->ive[idx] == old_col ) - { - if ( scan_row->ive[idx] == i ) - { - scan_row->ive[s_idx-1] = scan_row->ive[idx]; - scan_idx->ive[s_idx-1] = scan_idx->ive[idx]; - col_list->ive[s_idx-1] = col_list->ive[idx]; - orig_idx->ive[s_idx-1] = orig_idx->ive[idx]; - orig1_idx->ive[s_idx-1] = orig1_idx->ive[idx-1]; - } - else if ( idx > 0 ) - { - scan_row->ive[s_idx-1] = scan_row->ive[idx-1]; - scan_idx->ive[s_idx-1] = scan_idx->ive[idx-1]; - col_list->ive[s_idx-1] = col_list->ive[idx-1]; - orig_idx->ive[s_idx-1] = orig_idx->ive[idx-1]; - orig1_idx->ive[s_idx-1] = orig1_idx->ive[idx]; - } - } - else - { - scan_row->ive[s_idx] = scan_row->ive[idx]; - scan_idx->ive[s_idx] = scan_idx->ive[idx]; - col_list->ive[s_idx] = col_list->ive[idx]; - orig_idx->ive[s_idx] = orig_idx->ive[idx]; - orig1_idx->ive[s_idx] = orig1_idx->ive[idx]; - s_idx++; - } - old_col = col_list->ive[idx]; - } - scan_row = iv_resize(scan_row,s_idx); - scan_idx = iv_resize(scan_idx,s_idx); - col_list = iv_resize(col_list,s_idx); - orig_idx = iv_resize(orig_idx,s_idx); - orig1_idx = iv_resize(orig1_idx,s_idx); - - /* for ( j = i+2; j < n; j++ ) { .... row operation .... } */ - for ( s_idx = 0; s_idx < scan_row->dim; s_idx++ ) - { - int idx_piv, idx1_piv; - Real aip1j, aij, aik, aip1k; - row_elt *e_ik, *e_ip1k; - - j = col_list->ive[s_idx]; - if ( j < i+2 ) - continue; - tracecatch(scan_to(A,scan_row,scan_idx,col_list,j), - "spBKPfactor"); - - idx_piv = orig_idx->ive[s_idx]; - aij = ( idx_piv < 0 ) ? 0.0 : r_piv->elt[idx_piv].val; - /* aij = ( s_idx < r_piv->len ) ? r_piv->elt[s_idx].val : - 0.0; */ - /* aij = sp_get_val(A,i,j); */ - idx1_piv = orig1_idx->ive[s_idx]; - aip1j = ( idx1_piv < 0 ) ? 0.0 : r1_piv->elt[idx1_piv].val; - /* aip1j = ( s_idx < r_piv->len ) ? 0.0 : - r1_piv->elt[s_idx-r_piv->len].val; */ - /* aip1j = sp_get_val(A,i+1,j); */ - s = - aip1i*aip1j + aip1*aij; - t = - aip1i*aij + aii*aip1j; - - /* for ( k = j; k < n; k++ ) { .... update entry .... } */ - row = &(A->row[j]); - /* set idx_k and idx1_k indices */ - s_idx2 = s_idx; - k = col_list->ive[s_idx2]; - idx_k = orig_idx->ive[s_idx2]; - idx1_k = orig1_idx->ive[s_idx2]; - - while ( s_idx2 < scan_row->dim ) - { - k = col_list->ive[s_idx2]; - idx_k = orig_idx->ive[s_idx2]; - idx1_k = orig1_idx->ive[s_idx2]; - e_ik = ( idx_k < 0 ) ? (row_elt *)NULL : - &(r_piv->elt[idx_k]); - e_ip1k = ( idx1_k < 0 ) ? (row_elt *)NULL : - &(r1_piv->elt[idx1_k]); - aik = ( idx_k >= 0 ) ? e_ik->val : 0.0; - aip1k = ( idx1_k >= 0 ) ? e_ip1k->val : 0.0; - if ( scan_row->ive[s_idx2] == j ) - { /* no fill-in */ - row = &(A->row[j]); - /* idx = sprow_idx(row,k); */ - idx = scan_idx->ive[s_idx2]; - if ( idx < 0 ) - error(E_INTERN,"spBKPfactor"); - row->elt[idx].val -= s*aik + t*aip1k; - } - else - { /* fill-in -- insert entry & patch column */ - Real tmp; - int old_row, old_idx; - row_elt *old_e, *new_e; - - tmp = - s*aik - t*aip1k; - if ( tmp != 0.0 ) - { - row = &(A->row[j]); - old_row = scan_row->ive[s_idx2]; - old_idx = scan_idx->ive[s_idx2]; - - idx = row->len; - if ( row->len >= row->maxlen ) - { tracecatch(sprow_xpd(row,2*row->maxlen+1, - TYPE_SPMAT), - "spBKPfactor"); } - - row->len = idx + 1; - /* idx = sprow_idx(row,k); */ - new_e = &(row->elt[idx]); - new_e->val = tmp; - new_e->col = k; - - if ( old_row < 0 ) - error(E_INTERN,"spBKPfactor"); - /* old_idx = sprow_idx2(&(A->row[old_row]), - k,old_idx); */ - old_e = &(A->row[old_row].elt[old_idx]); - new_e->nxt_row = old_e->nxt_row; - new_e->nxt_idx = old_e->nxt_idx; - old_e->nxt_row = j; - old_e->nxt_idx = idx; - } - } - - /* update idx_k, idx1_k, s_idx2 etc */ - s_idx2++; - } - - /* store multipliers -- may involve fill-in (!) */ - /* idx = sprow_idx(r_piv,j); */ - idx = orig_idx->ive[s_idx]; - if ( idx >= 0 ) - { - r_piv->elt[idx].val = s; - } - else if ( s != 0.0 ) - { - int old_row, old_idx; - row_elt *new_e, *old_e; - - old_row = -1; old_idx = j; - - if ( i > 0 ) - { - tracecatch(chase_col(A,j,&old_row,&old_idx,i-1), - "spBKPfactor"); - } - /* sprow_set_val(r_piv,j,s); */ - idx = r_piv->len; - if ( r_piv->len >= r_piv->maxlen ) - { tracecatch(sprow_xpd(r_piv,2*r_piv->maxlen+1, - TYPE_SPMAT), - "spBKPfactor"); } - - r_piv->len = idx + 1; - /* idx = sprow_idx(r_piv,j); */ - /* if ( idx < 0 ) - error(E_INTERN,"spBKPfactor"); */ - new_e = &(r_piv->elt[idx]); - new_e->val = s; - new_e->col = j; - if ( old_row < 0 ) - { - new_e->nxt_row = A->start_row[j]; - new_e->nxt_idx = A->start_idx[j]; - A->start_row[j] = i; - A->start_idx[j] = idx; - } - else - { - /* old_idx = sprow_idx2(&(A->row[old_row]),j,old_idx);*/ - if ( old_idx < 0 ) - error(E_INTERN,"spBKPfactor"); - old_e = &(A->row[old_row].elt[old_idx]); - new_e->nxt_row = old_e->nxt_row; - new_e->nxt_idx = old_e->nxt_idx; - old_e->nxt_row = i; - old_e->nxt_idx = idx; - } - } - /* idx1 = sprow_idx(r1_piv,j); */ - idx1 = orig1_idx->ive[s_idx]; - if ( idx1 >= 0 ) - { - r1_piv->elt[idx1].val = t; - } - else if ( t != 0.0 ) - { - int old_row, old_idx; - row_elt *new_e, *old_e; - - old_row = -1; old_idx = j; - tracecatch(chase_col(A,j,&old_row,&old_idx,i), - "spBKPfactor"); - /* sprow_set_val(r1_piv,j,t); */ - idx1 = r1_piv->len; - if ( r1_piv->len >= r1_piv->maxlen ) - { tracecatch(sprow_xpd(r1_piv,2*r1_piv->maxlen+1, - TYPE_SPMAT), - "spBKPfactor"); } - - r1_piv->len = idx1 + 1; - /* idx1 = sprow_idx(r1_piv,j); */ - /* if ( idx < 0 ) - error(E_INTERN,"spBKPfactor"); */ - new_e = &(r1_piv->elt[idx1]); - new_e->val = t; - new_e->col = j; - if ( idx1 < 0 ) - error(E_INTERN,"spBKPfactor"); - new_e = &(r1_piv->elt[idx1]); - if ( old_row < 0 ) - { - new_e->nxt_row = A->start_row[j]; - new_e->nxt_idx = A->start_idx[j]; - A->start_row[j] = i+1; - A->start_idx[j] = idx1; - } - else - { - old_idx = sprow_idx2(&(A->row[old_row]),j,old_idx); - if ( old_idx < 0 ) - error(E_INTERN,"spBKPfactor"); - old_e = &(A->row[old_row].elt[old_idx]); - new_e->nxt_row = old_e->nxt_row; - new_e->nxt_idx = old_e->nxt_idx; - old_e->nxt_row = i+1; - old_e->nxt_idx = idx1; - } - } - } - } - } - - /* now sort the rows arrays */ - for ( i = 0; i < A->m; i++ ) - qsort(A->row[i].elt,A->row[i].len,sizeof(row_elt),(int(*)())col_cmp); - A->flag_col = A->flag_diag = FALSE; - - return A; -} - -/* spBKPsolve -- solves A.x = b where A has been factored a la BKPfactor() - -- returns x, which is created if NULL */ -VEC *spBKPsolve(A,pivot,block,b,x) -SPMAT *A; -PERM *pivot, *block; -VEC *b, *x; -{ - static VEC *tmp=VNULL; /* dummy storage needed */ - int i /* , j */, n, onebyone; - int row_num, idx; - Real a11, a12, a22, b1, b2, det, sum, *tmp_ve, tmp_diag; - SPROW *r; - row_elt *e; - - if ( ! A || ! pivot || ! block || ! b ) - error(E_NULL,"spBKPsolve"); - if ( A->m != A->n ) - error(E_SQUARE,"spBKPsolve"); - n = A->n; - if ( b->dim != n || pivot->size != n || block->size != n ) - error(E_SIZES,"spBKPsolve"); - x = v_resize(x,n); - tmp = v_resize(tmp,n); - MEM_STAT_REG(tmp,TYPE_VEC); - - tmp_ve = tmp->ve; - - if ( ! A->flag_col ) - sp_col_access(A); - - px_vec(pivot,b,tmp); - /* printf("# BKPsolve: effect of pivot: tmp =\n"); v_output(tmp); */ - - /* solve for lower triangular part */ - for ( i = 0; i < n; i++ ) - { - sum = tmp_ve[i]; - if ( block->pe[i] < i ) - { - /* for ( j = 0; j < i-1; j++ ) - sum -= A_me[j][i]*tmp_ve[j]; */ - row_num = -1; idx = i; - e = bump_col(A,i,&row_num,&idx); - while ( row_num >= 0 && row_num < i-1 ) - { - sum -= e->val*tmp_ve[row_num]; - e = bump_col(A,i,&row_num,&idx); - } - } - else - { - /* for ( j = 0; j < i; j++ ) - sum -= A_me[j][i]*tmp_ve[j]; */ - row_num = -1; idx = i; - e = bump_col(A,i,&row_num,&idx); - while ( row_num >= 0 && row_num < i ) - { - sum -= e->val*tmp_ve[row_num]; - e = bump_col(A,i,&row_num,&idx); - } - } - tmp_ve[i] = sum; - } - - /* printf("# BKPsolve: solving L part: tmp =\n"); v_output(tmp); */ - /* solve for diagonal part */ - for ( i = 0; i < n; i = onebyone ? i+1 : i+2 ) - { - onebyone = ( block->pe[i] == i ); - if ( onebyone ) - { - /* tmp_ve[i] /= A_me[i][i]; */ - tmp_diag = sp_get_val(A,i,i); - if ( tmp_diag == 0.0 ) - error(E_SING,"spBKPsolve"); - tmp_ve[i] /= tmp_diag; - } - else - { - a11 = sp_get_val(A,i,i); - a22 = sp_get_val(A,i+1,i+1); - a12 = sp_get_val(A,i,i+1); - b1 = tmp_ve[i]; - b2 = tmp_ve[i+1]; - det = a11*a22-a12*a12; /* < 0 : see BKPfactor() */ - if ( det == 0.0 ) - error(E_SING,"BKPsolve"); - det = 1/det; - tmp_ve[i] = det*(a22*b1-a12*b2); - tmp_ve[i+1] = det*(a11*b2-a12*b1); - } - } - - /* printf("# BKPsolve: solving D part: tmp =\n"); v_output(tmp); */ - /* solve for transpose of lower triangular part */ - for ( i = n-2; i >= 0; i-- ) - { - sum = tmp_ve[i]; - if ( block->pe[i] > i ) - { - /* onebyone is false */ - /* for ( j = i+2; j < n; j++ ) - sum -= A_me[i][j]*tmp_ve[j]; */ - if ( i+2 >= n ) - continue; - r = &(A->row[i]); - idx = sprow_idx(r,i+2); - idx = fixindex(idx); - e = &(r->elt[idx]); - for ( ; idx < r->len; idx++, e++ ) - sum -= e->val*tmp_ve[e->col]; - } - else /* onebyone */ - { - /* for ( j = i+1; j < n; j++ ) - sum -= A_me[i][j]*tmp_ve[j]; */ - r = &(A->row[i]); - idx = sprow_idx(r,i+1); - idx = fixindex(idx); - e = &(r->elt[idx]); - for ( ; idx < r->len; idx++, e++ ) - sum -= e->val*tmp_ve[e->col]; - } - tmp_ve[i] = sum; - } - - /* printf("# BKPsolve: solving L^T part: tmp =\n");v_output(tmp); */ - /* and do final permutation */ - x = pxinv_vec(pivot,tmp,x); - - return x; -} - - - //GO.SYSIN DD spbkp.c echo spswap.c 1>&2 sed >spswap.c <<'//GO.SYSIN DD spswap.c' 's/^-//' - -/************************************************************************** -** -** Copyright (C) 1993 David E. Steward & Zbigniew Leyk, all rights reserved. -** -** Meschach Library -** -** This Meschach Library is provided "as is" without any express -** or implied warranty of any kind with respect to this software. -** In particular the authors shall not be liable for any direct, -** indirect, special, incidental or consequential damages arising -** in any way from use of the software. -** -** Everyone is granted permission to copy, modify and redistribute this -** Meschach Library, provided: -** 1. All copies contain this copyright notice. -** 2. All modified copies shall carry a notice stating who -** made the last modification and the date of such modification. -** 3. No charge is made for this software or works derived from it. -** This clause shall not be construed as constraining other software -** distributed on the same medium as this software, nor is a -** distribution fee considered a charge. -** -***************************************************************************/ - - -/* - Sparse matrix swap and permutation routines - Modified Mon 09th Nov 1992, 08:50:54 PM - to use Karen George's suggestion to use unordered rows -*/ - -static char rcsid[] = "$Id: m9,v 1.1.1.1 1999/04/14 14:16:23 borland Exp $"; - -#include -#include "sparse2.h" -#include - - -#define btos(x) ((x) ? "TRUE" : "FALSE") - -/* scan_to -- updates scan (int) vectors to point to the last row in each - column with row # <= max_row, if any */ -void scan_to(A, scan_row, scan_idx, col_list, max_row) -SPMAT *A; -IVEC *scan_row, *scan_idx, *col_list; -int max_row; -{ - int col, idx, j_idx, row_num; - SPROW *r; - row_elt *e; - - if ( ! A || ! scan_row || ! scan_idx || ! col_list ) - error(E_NULL,"scan_to"); - if ( scan_row->dim != scan_idx->dim || scan_idx->dim != col_list->dim ) - error(E_SIZES,"scan_to"); - - if ( max_row < 0 ) - return; - - if ( ! A->flag_col ) - sp_col_access(A); - - for ( j_idx = 0; j_idx < scan_row->dim; j_idx++ ) - { - row_num = scan_row->ive[j_idx]; - idx = scan_idx->ive[j_idx]; - col = col_list->ive[j_idx]; - - if ( col < 0 || col >= A->n ) - error(E_BOUNDS,"scan_to"); - if ( row_num < 0 ) - { - idx = col; - continue; - } - r = &(A->row[row_num]); - if ( idx < 0 ) - error(E_INTERN,"scan_to"); - e = &(r->elt[idx]); - if ( e->col != col ) - error(E_INTERN,"scan_to"); - if ( idx < 0 ) - { - printf("scan_to: row_num = %d, idx = %d, col = %d\n", - row_num, idx, col); - error(E_INTERN,"scan_to"); - } - /* if ( e->nxt_row <= max_row ) - chase_col(A, col, &row_num, &idx, max_row); */ - while ( e->nxt_row >= 0 && e->nxt_row <= max_row ) - { - row_num = e->nxt_row; - idx = e->nxt_idx; - e = &(A->row[row_num].elt[idx]); - } - - /* printf("scan_to: computed j_idx = %d, row_num = %d, idx = %d\n", - j_idx, row_num, idx); */ - scan_row->ive[j_idx] = row_num; - scan_idx->ive[j_idx] = idx; - } -} - -/* patch_col -- patches column access paths for fill-in */ -void patch_col(A, col, old_row, old_idx, row_num, idx) -SPMAT *A; -int col, old_row, old_idx, row_num, idx; -{ - SPROW *r; - row_elt *e; - - if ( old_row >= 0 ) - { - r = &(A->row[old_row]); - old_idx = sprow_idx2(r,col,old_idx); - e = &(r->elt[old_idx]); - e->nxt_row = row_num; - e->nxt_idx = idx; - } - else - { - A->start_row[col] = row_num; - A->start_idx[col] = idx; - } -} - -/* chase_col -- chases column access path in column col, starting with - row_num and idx, to find last row # in this column <= max_row - -- row_num is returned; idx is also set by this routine - -- assumes that the column access paths (possibly without the - nxt_idx fields) are set up */ -row_elt *chase_col(A, col, row_num, idx, max_row) -SPMAT *A; -int col, *row_num, *idx, max_row; -{ - int old_idx, old_row, tmp_idx, tmp_row; - SPROW *r; - row_elt *e; - - if ( col < 0 || col >= A->n ) - error(E_BOUNDS,"chase_col"); - tmp_row = *row_num; - if ( tmp_row < 0 ) - { - if ( A->start_row[col] > max_row ) - { - tmp_row = -1; - tmp_idx = col; - return (row_elt *)NULL; - } - else - { - tmp_row = A->start_row[col]; - tmp_idx = A->start_idx[col]; - } - } - else - tmp_idx = *idx; - - old_row = tmp_row; - old_idx = tmp_idx; - while ( tmp_row >= 0 && tmp_row < max_row ) - { - r = &(A->row[tmp_row]); - /* tmp_idx = sprow_idx2(r,col,tmp_idx); */ - if ( tmp_idx < 0 || tmp_idx >= r->len || - r->elt[tmp_idx].col != col ) - { -#ifdef DEBUG - printf("chase_col:error: col = %d, row # = %d, idx = %d\n", - col, tmp_row, tmp_idx); - printf("chase_col:error: old_row = %d, old_idx = %d\n", - old_row, old_idx); - printf("chase_col:error: A =\n"); - sp_dump(stdout,A); -#endif - error(E_INTERN,"chase_col"); - } - e = &(r->elt[tmp_idx]); - old_row = tmp_row; - old_idx = tmp_idx; - tmp_row = e->nxt_row; - tmp_idx = e->nxt_idx; - } - if ( old_row > max_row ) - { - old_row = -1; - old_idx = col; - e = (row_elt *)NULL; - } - else if ( tmp_row <= max_row && tmp_row >= 0 ) - { - old_row = tmp_row; - old_idx = tmp_idx; - } - - *row_num = old_row; - if ( old_row >= 0 ) - *idx = old_idx; - else - *idx = col; - - return e; -} - -/* chase_past -- as for chase_col except that we want the first - row whose row # >= min_row; -1 indicates no such row */ -row_elt *chase_past(A, col, row_num, idx, min_row) -SPMAT *A; -int col, *row_num, *idx, min_row; -{ - SPROW *r; - row_elt *e; - int tmp_idx, tmp_row; - - tmp_row = *row_num; - tmp_idx = *idx; - chase_col(A,col,&tmp_row,&tmp_idx,min_row); - if ( tmp_row < 0 ) /* use A->start_row[..] etc. */ - { - if ( A->start_row[col] < 0 ) - tmp_row = -1; - else - { - tmp_row = A->start_row[col]; - tmp_idx = A->start_idx[col]; - } - } - else if ( tmp_row < min_row ) - { - r = &(A->row[tmp_row]); - if ( tmp_idx < 0 || tmp_idx >= r->len || - r->elt[tmp_idx].col != col ) - error(E_INTERN,"chase_past"); - tmp_row = r->elt[tmp_idx].nxt_row; - tmp_idx = r->elt[tmp_idx].nxt_idx; - } - - *row_num = tmp_row; - *idx = tmp_idx; - if ( tmp_row < 0 ) - e = (row_elt *)NULL; - else - { - if ( tmp_idx < 0 || tmp_idx >= A->row[tmp_row].len || - A->row[tmp_row].elt[tmp_idx].col != col ) - error(E_INTERN,"bump_col"); - e = &(A->row[tmp_row].elt[tmp_idx]); - } - - return e; -} - -/* bump_col -- move along to next nonzero entry in column col after row_num - -- update row_num and idx */ -row_elt *bump_col(A, col, row_num, idx) -SPMAT *A; -int col, *row_num, *idx; -{ - SPROW *r; - row_elt *e; - int tmp_row, tmp_idx; - - tmp_row = *row_num; - tmp_idx = *idx; - /* printf("bump_col: col = %d, row# = %d, idx = %d\n", - col, *row_num, *idx); */ - if ( tmp_row < 0 ) - { - tmp_row = A->start_row[col]; - tmp_idx = A->start_idx[col]; - } - else - { - r = &(A->row[tmp_row]); - if ( tmp_idx < 0 || tmp_idx >= r->len || - r->elt[tmp_idx].col != col ) - error(E_INTERN,"bump_col"); - e = &(r->elt[tmp_idx]); - tmp_row = e->nxt_row; - tmp_idx = e->nxt_idx; - } - if ( tmp_row < 0 ) - { - e = (row_elt *)NULL; - tmp_idx = col; - } - else - { - if ( tmp_idx < 0 || tmp_idx >= A->row[tmp_row].len || - A->row[tmp_row].elt[tmp_idx].col != col ) - error(E_INTERN,"bump_col"); - e = &(A->row[tmp_row].elt[tmp_idx]); - } - *row_num = tmp_row; - *idx = tmp_idx; - - return e; -} - - //GO.SYSIN DD spswap.c echo iter0.c 1>&2 sed >iter0.c <<'//GO.SYSIN DD iter0.c' 's/^-//' - -/************************************************************************** -** -** Copyright (C) 1993 David E. Stewart & Zbigniew Leyk, all rights reserved. -** -** Meschach Library -** -** This Meschach Library is provided "as is" without any express -** or implied warranty of any kind with respect to this software. -** In particular the authors shall not be liable for any direct, -** indirect, special, incidental or consequential damages arising -** in any way from use of the software. -** -** Everyone is granted permission to copy, modify and redistribute this -** Meschach Library, provided: -** 1. All copies contain this copyright notice. -** 2. All modified copies shall carry a notice stating who -** made the last modification and the date of such modification. -** 3. No charge is made for this software or works derived from it. -** This clause shall not be construed as constraining other software -** distributed on the same medium as this software, nor is a -** distribution fee considered a charge. -** -***************************************************************************/ - - -/* iter0.c 14/09/93 */ - -/* ITERATIVE METHODS - service functions */ - -/* functions for creating and releasing ITER structures; - for memory information; - for getting some values from an ITER variable; - for changing values in an ITER variable; - see also iter.c -*/ - -#include -#include -#include "iter.h" - - -static char rcsid[] = "$Id: m9,v 1.1.1.1 1999/04/14 14:16:23 borland Exp $"; - - -/* standard functions */ - -/* standard information */ -void iter_std_info(ip,nres,res,Bres) -ITER *ip; -double nres; -VEC *res, *Bres; -{ - if (nres >= 0.0) - printf(" %d. residual = %g\n",ip->steps,nres); - else - printf(" %d. residual = %g (WARNING !!! should be >= 0) \n", - ip->steps,nres); -} - -/* standard stopping criterion */ -int iter_std_stop_crit(ip, nres, res, Bres) -ITER *ip; -double nres; -VEC *res, *Bres; -{ - /* standard stopping criterium */ - if (nres <= ip->init_res*ip->eps) return TRUE; - return FALSE; -} - - -/* iter_get - create a new structure pointing to ITER */ - -ITER *iter_get(lenb, lenx) -int lenb, lenx; -{ - ITER *ip; - - if ((ip = NEW(ITER)) == (ITER *) NULL) - error(E_MEM,"iter_get"); - else if (mem_info_is_on()) { - mem_bytes(TYPE_ITER,0,sizeof(ITER)); - mem_numvar(TYPE_ITER,1); - } - - /* default values */ - - ip->shared_x = FALSE; - ip->shared_b = FALSE; - ip->k = 0; - ip->limit = ITER_LIMIT_DEF; - ip->eps = ITER_EPS_DEF; - ip->steps = 0; - - if (lenb > 0) ip->b = v_get(lenb); - else ip->b = (VEC *)NULL; - - if (lenx > 0) ip->x = v_get(lenx); - else ip->x = (VEC *)NULL; - - ip->Ax = (Fun_Ax) NULL; - ip->A_par = NULL; - ip->ATx = (Fun_Ax) NULL; - ip->AT_par = NULL; - ip->Bx = (Fun_Ax) NULL; - ip->B_par = NULL; - ip->info = iter_std_info; - ip->stop_crit = iter_std_stop_crit; - ip->init_res = 0.0; - - return ip; -} - - -/* iter_free - release memory */ -int iter_free(ip) -ITER *ip; -{ - if (ip == (ITER *)NULL) return -1; - - if (mem_info_is_on()) { - mem_bytes(TYPE_ITER,sizeof(ITER),0); - mem_numvar(TYPE_ITER,-1); - } - - if ( !ip->shared_x && ip->x != NULL ) v_free(ip->x); - if ( !ip->shared_b && ip->b != NULL ) v_free(ip->b); - - free((char *)ip); - - return 0; -} - -ITER *iter_resize(ip,new_lenb,new_lenx) -ITER *ip; -int new_lenb, new_lenx; -{ - VEC *old; - - if ( ip == (ITER *) NULL) - error(E_NULL,"iter_resize"); - - old = ip->x; - ip->x = v_resize(ip->x,new_lenx); - if ( ip->shared_x && old != ip->x ) - warning(WARN_SHARED_VEC,"iter_resize"); - old = ip->b; - ip->b = v_resize(ip->b,new_lenb); - if ( ip->shared_b && old != ip->b ) - warning(WARN_SHARED_VEC,"iter_resize"); - - return ip; -} - - -/* print out ip structure - for diagnostic purposes mainly */ -void iter_dump(fp,ip) -ITER *ip; -FILE *fp; -{ - if (ip == NULL) { - fprintf(fp," ITER structure: NULL\n"); - return; - } - - fprintf(fp,"\n ITER structure:\n"); - fprintf(fp," ip->shared_x = %s, ip->shared_b = %s\n", - (ip->shared_x ? "TRUE" : "FALSE"), - (ip->shared_b ? "TRUE" : "FALSE") ); - fprintf(fp," ip->k = %d, ip->limit = %d, ip->steps = %d, ip->eps = %g\n", - ip->k,ip->limit,ip->steps,ip->eps); - fprintf(fp," ip->x = 0x%p, ip->b = 0x%p\n",ip->x,ip->b); - fprintf(fp," ip->Ax = 0x%p, ip->A_par = 0x%p\n",ip->Ax,ip->A_par); - fprintf(fp," ip->ATx = 0x%p, ip->AT_par = 0x%p\n",ip->ATx,ip->AT_par); - fprintf(fp," ip->Bx = 0x%p, ip->B_par = 0x%p\n",ip->Bx,ip->B_par); - fprintf(fp," ip->info = 0x%p, ip->stop_crit = 0x%p, ip->init_res = %g\n", - ip->info,ip->stop_crit,ip->init_res); - fprintf(fp,"\n"); - -} - - -/* copy the structure ip1 to ip2 preserving vectors x and b of ip2 - (vectors x and b in ip2 are the same before and after iter_copy2) - if ip2 == NULL then a new structure is created with x and b being NULL - and other members are taken from ip1 -*/ -ITER *iter_copy2(ip1,ip2) -ITER *ip1, *ip2; -{ - VEC *x, *b; - int shx, shb; - - if (ip1 == (ITER *)NULL) - error(E_NULL,"iter_copy2"); - - if (ip2 == (ITER *)NULL) { - if ((ip2 = NEW(ITER)) == (ITER *) NULL) - error(E_MEM,"iter_copy2"); - else if (mem_info_is_on()) { - mem_bytes(TYPE_ITER,0,sizeof(ITER)); - mem_numvar(TYPE_ITER,1); - } - ip2->x = ip2->b = NULL; - ip2->shared_x = ip2->shared_x = FALSE; - } - - x = ip2->x; - b = ip2->b; - shb = ip2->shared_b; - shx = ip2->shared_x; - MEM_COPY(ip1,ip2,sizeof(ITER)); - ip2->x = x; - ip2->b = b; - ip2->shared_x = shx; - ip2->shared_b = shb; - - return ip2; -} - - -/* copy the structure ip1 to ip2 copying also the vectors x and b */ -ITER *iter_copy(ip1,ip2) -ITER *ip1, *ip2; -{ - VEC *x, *b; - - if (ip1 == (ITER *)NULL) - error(E_NULL,"iter_copy"); - - if (ip2 == (ITER *)NULL) { - if ((ip2 = NEW(ITER)) == (ITER *) NULL) - error(E_MEM,"iter_copy2"); - else if (mem_info_is_on()) { - mem_bytes(TYPE_ITER,0,sizeof(ITER)); - mem_numvar(TYPE_ITER,1); - } - } - - x = ip2->x; - b = ip2->b; - - MEM_COPY(ip1,ip2,sizeof(ITER)); - if (ip1->x) - ip2->x = v_copy(ip1->x,x); - if (ip1->b) - ip2->b = v_copy(ip1->b,b); - - ip2->shared_x = ip2->shared_b = FALSE; - - return ip2; -} - - -/*** functions to generate sparse matrices with random entries ***/ - - -/* iter_gen_sym -- generate symmetric positive definite - n x n matrix, - nrow - number of nonzero entries in a row - */ -SPMAT *iter_gen_sym(n,nrow) -int n, nrow; -{ - SPMAT *A; - VEC *u; - Real s1; - int i, j, k, k_max; - - if (nrow <= 1) nrow = 2; - /* nrow should be even */ - if ((nrow & 1)) nrow -= 1; - A = sp_get(n,n,nrow); - u = v_get(A->m); - v_zero(u); - for ( i = 0; i < A->m; i++ ) - { - k_max = ((rand() >> 8) % (nrow/2)); - for ( k = 0; k <= k_max; k++ ) - { - j = (rand() >> 8) % A->n; - s1 = mrand(); - sp_set_val(A,i,j,s1); - sp_set_val(A,j,i,s1); - u->ve[i] += fabs(s1); - u->ve[j] += fabs(s1); - } - } - /* ensure that A is positive definite */ - for ( i = 0; i < A->m; i++ ) - sp_set_val(A,i,i,u->ve[i] + 1.0); - - V_FREE(u); - return A; -} - - -/* iter_gen_nonsym -- generate non-symmetric m x n sparse matrix, m >= n - nrow - number of entries in a row; - diag - number which is put in diagonal entries and then permuted - (if diag is zero then 1.0 is there) -*/ -SPMAT *iter_gen_nonsym(m,n,nrow,diag) -int m, n, nrow; -double diag; -{ - SPMAT *A; - PERM *px; - int i, j, k, k_max; - Real s1; - - if (nrow <= 1) nrow = 2; - if (diag == 0.0) diag = 1.0; - A = sp_get(m,n,nrow); - px = px_get(n); - for ( i = 0; i < A->m; i++ ) - { - k_max = (rand() >> 8) % (nrow-1); - for ( k = 0; k <= k_max; k++ ) - { - j = (rand() >> 8) % A->n; - s1 = mrand(); - sp_set_val(A,i,j,-s1); - } - } - /* to make it likely that A is nonsingular, use pivot... */ - for ( i = 0; i < 2*A->n; i++ ) - { - j = (rand() >> 8) % A->n; - k = (rand() >> 8) % A->n; - px_transp(px,j,k); - } - for ( i = 0; i < A->n; i++ ) - sp_set_val(A,i,px->pe[i],diag); - - PX_FREE(px); - return A; -} - - -/* iter_gen_nonsym -- generate non-symmetric positive definite - n x n sparse matrix; - nrow - number of entries in a row -*/ -SPMAT *iter_gen_nonsym_posdef(n,nrow) -int n, nrow; -{ - SPMAT *A; - PERM *px; - VEC *u; - int i, j, k, k_max; - Real s1; - - if (nrow <= 1) nrow = 2; - A = sp_get(n,n,nrow); - px = px_get(n); - u = v_get(A->m); - v_zero(u); - for ( i = 0; i < A->m; i++ ) - { - k_max = (rand() >> 8) % (nrow-1); - for ( k = 0; k <= k_max; k++ ) - { - j = (rand() >> 8) % A->n; - s1 = mrand(); - sp_set_val(A,i,j,-s1); - u->ve[i] += fabs(s1); - } - } - /* ensure that A is positive definite */ - for ( i = 0; i < A->m; i++ ) - sp_set_val(A,i,i,u->ve[i] + 1.0); - - PX_FREE(px); - V_FREE(u); - return A; -} - - - //GO.SYSIN DD iter0.c echo itersym.c 1>&2 sed >itersym.c <<'//GO.SYSIN DD itersym.c' 's/^-//' - -/************************************************************************** -** -** Copyright (C) 1993 David E. Stewart & Zbigniew Leyk, all rights reserved. -** -** Meschach Library -** -** This Meschach Library is provided "as is" without any express -** or implied warranty of any kind with respect to this software. -** In particular the authors shall not be liable for any direct, -** indirect, special, incidental or consequential damages arising -** in any way from use of the software. -** -** Everyone is granted permission to copy, modify and redistribute this -** Meschach Library, provided: -** 1. All copies contain this copyright notice. -** 2. All modified copies shall carry a notice stating who -** made the last modification and the date of such modification. -** 3. No charge is made for this software or works derived from it. -** This clause shall not be construed as constraining other software -** distributed on the same medium as this software, nor is a -** distribution fee considered a charge. -** -***************************************************************************/ - - -/* itersym.c 17/09/93 */ - - -/* - ITERATIVE METHODS - implementation of several iterative methods; - see also iter0.c - */ - -#include -#include -#include "matrix.h" -#include "matrix2.h" -#include "sparse.h" -#include "iter.h" - -static char rcsid[] = "$Id: m9,v 1.1.1.1 1999/04/14 14:16:23 borland Exp $"; - - -#ifdef ANSI_C -VEC *spCHsolve(SPMAT *,VEC *,VEC *); -VEC *trieig(VEC *,VEC *,MAT *); -#else -VEC *spCHsolve(); -VEC *trieig(); -#endif - - - -/* iter_spcg -- a simple interface to iter_cg() which uses sparse matrix - data structures - -- assumes that LLT contains the Cholesky factorisation of the - actual preconditioner; - use always as follows: - x = iter_spcg(A,LLT,b,eps,x,limit,steps); - or - x = iter_spcg(A,LLT,b,eps,VNULL,limit,steps); - In the second case the solution vector is created. - */ -VEC *iter_spcg(A,LLT,b,eps,x,limit,steps) -SPMAT *A, *LLT; -VEC *b, *x; -double eps; -int *steps, limit; -{ - ITER *ip; - - ip = iter_get(0,0); - ip->Ax = (Fun_Ax) sp_mv_mlt; - ip->A_par = (void *)A; - ip->Bx = (Fun_Ax) spCHsolve; - ip->B_par = (void *)LLT; - ip->info = (Fun_info) NULL; - ip->b = b; - ip->eps = eps; - ip->limit = limit; - ip->x = x; - iter_cg(ip); - x = ip->x; - if (steps) *steps = ip->steps; - ip->shared_x = ip->shared_b = TRUE; - iter_free(ip); /* release only ITER structure */ - return x; -} - -/* - Conjugate gradients method; - */ -VEC *iter_cg(ip) -ITER *ip; -{ - static VEC *r = VNULL, *p = VNULL, *q = VNULL, *z = VNULL; - Real alpha, beta, inner, old_inner, nres; - VEC *rr; /* rr == r or rr == z */ - - if (ip == INULL) - error(E_NULL,"iter_cg"); - if (!ip->Ax || !ip->b) - error(E_NULL,"iter_cg"); - if ( ip->x == ip->b ) - error(E_INSITU,"iter_cg"); - if (!ip->stop_crit) - error(E_NULL,"iter_cg"); - - if ( ip->eps <= 0.0 ) - ip->eps = MACHEPS; - - r = v_resize(r,ip->b->dim); - p = v_resize(p,ip->b->dim); - q = v_resize(q,ip->b->dim); - - MEM_STAT_REG(r,TYPE_VEC); - MEM_STAT_REG(p,TYPE_VEC); - MEM_STAT_REG(q,TYPE_VEC); - - if (ip->Bx != (Fun_Ax)NULL) { - z = v_resize(z,ip->b->dim); - MEM_STAT_REG(z,TYPE_VEC); - rr = z; - } - else rr = r; - - if (ip->x != VNULL) { - if (ip->x->dim != ip->b->dim) - error(E_SIZES,"iter_cg"); - ip->Ax(ip->A_par,ip->x,p); /* p = A*x */ - v_sub(ip->b,p,r); /* r = b - A*x */ - } - else { /* ip->x == 0 */ - ip->x = v_get(ip->b->dim); - ip->shared_x = FALSE; - v_copy(ip->b,r); - } - - old_inner = 0.0; - for ( ip->steps = 0; ip->steps <= ip->limit; ip->steps++ ) - { - if ( ip->Bx ) - (ip->Bx)(ip->B_par,r,rr); /* rr = B*r */ - - inner = in_prod(rr,r); - nres = sqrt(fabs(inner)); - if (ip->info) ip->info(ip,nres,r,rr); - if (ip->steps == 0) ip->init_res = nres; - if ( ip->stop_crit(ip,nres,r,rr) ) break; - - if ( ip->steps ) /* if ( ip->steps > 0 ) ... */ - { - beta = inner/old_inner; - p = v_mltadd(rr,p,beta,p); - } - else /* if ( ip->steps == 0 ) ... */ - { - beta = 0.0; - p = v_copy(rr,p); - old_inner = 0.0; - } - (ip->Ax)(ip->A_par,p,q); /* q = A*p */ - alpha = in_prod(p,q); - if (sqrt(fabs(alpha)) <= MACHEPS*ip->init_res) - error(E_BREAKDOWN,"iter_cg"); - alpha = inner/alpha; - v_mltadd(ip->x,p,alpha,ip->x); - v_mltadd(r,q,-alpha,r); - old_inner = inner; - } - - return ip->x; -} - - - -/* iter_lanczos -- raw lanczos algorithm -- no re-orthogonalisation - -- creates T matrix of size == m, - but no larger than before beta_k == 0 - -- uses passed routine to do matrix-vector multiplies */ -void iter_lanczos(ip,a,b,beta2,Q) -ITER *ip; -VEC *a, *b; -Real *beta2; -MAT *Q; -{ - int j; - static VEC *v = VNULL, *w = VNULL, *tmp = VNULL; - Real alpha, beta, c; - - if ( ! ip ) - error(E_NULL,"iter_lanczos"); - if ( ! ip->Ax || ! ip->x || ! a || ! b ) - error(E_NULL,"iter_lanczos"); - if ( ip->k <= 0 ) - error(E_BOUNDS,"iter_lanczos"); - if ( Q && ( Q->n < ip->x->dim || Q->m < ip->k ) ) - error(E_SIZES,"iter_lanczos"); - - a = v_resize(a,(u_int)ip->k); - b = v_resize(b,(u_int)(ip->k-1)); - v = v_resize(v,ip->x->dim); - w = v_resize(w,ip->x->dim); - tmp = v_resize(tmp,ip->x->dim); - MEM_STAT_REG(v,TYPE_VEC); - MEM_STAT_REG(w,TYPE_VEC); - MEM_STAT_REG(tmp,TYPE_VEC); - - beta = 1.0; - v_zero(a); - v_zero(b); - if (Q) m_zero(Q); - - /* normalise x as w */ - c = v_norm2(ip->x); - if (c <= MACHEPS) { /* ip->x == 0 */ - *beta2 = 0.0; - return; - } - else - sv_mlt(1.0/c,ip->x,w); - - (ip->Ax)(ip->A_par,w,v); - - for ( j = 0; j < ip->k; j++ ) - { - /* store w in Q if Q not NULL */ - if ( Q ) set_row(Q,j,w); - - alpha = in_prod(w,v); - a->ve[j] = alpha; - v_mltadd(v,w,-alpha,v); - beta = v_norm2(v); - if ( beta == 0.0 ) - { - *beta2 = 0.0; - return; - } - - if ( j < ip->k-1 ) - b->ve[j] = beta; - v_copy(w,tmp); - sv_mlt(1/beta,v,w); - sv_mlt(-beta,tmp,v); - (ip->Ax)(ip->A_par,w,tmp); - v_add(v,tmp,v); - } - *beta2 = beta; - -} - -/* iter_splanczos -- version that uses sparse matrix data structure */ -void iter_splanczos(A,m,x0,a,b,beta2,Q) -SPMAT *A; -int m; -VEC *x0, *a, *b; -Real *beta2; -MAT *Q; -{ - ITER *ip; - - ip = iter_get(0,0); - ip->shared_x = ip->shared_b = TRUE; - ip->Ax = (Fun_Ax) sp_mv_mlt; - ip->A_par = (void *) A; - ip->x = x0; - ip->k = m; - iter_lanczos(ip,a,b,beta2,Q); - iter_free(ip); /* release only ITER structure */ -} - - - -extern double frexp(), ldexp(); - -/* product -- returns the product of a long list of numbers - -- answer stored in mant (mantissa) and expt (exponent) */ -static double product(a,offset,expt) -VEC *a; -double offset; -int *expt; -{ - Real mant, tmp_fctr; - int i, tmp_expt; - - if ( ! a ) - error(E_NULL,"product"); - - mant = 1.0; - *expt = 0; - if ( offset == 0.0 ) - for ( i = 0; i < a->dim; i++ ) - { - mant *= frexp(a->ve[i],&tmp_expt); - *expt += tmp_expt; - if ( ! (i % 10) ) - { - mant = frexp(mant,&tmp_expt); - *expt += tmp_expt; - } - } - else - for ( i = 0; i < a->dim; i++ ) - { - tmp_fctr = a->ve[i] - offset; - tmp_fctr += (tmp_fctr > 0.0 ) ? -MACHEPS*offset : - MACHEPS*offset; - mant *= frexp(tmp_fctr,&tmp_expt); - *expt += tmp_expt; - if ( ! (i % 10) ) - { - mant = frexp(mant,&tmp_expt); - *expt += tmp_expt; - } - } - - mant = frexp(mant,&tmp_expt); - *expt += tmp_expt; - - return mant; -} - -/* product2 -- returns the product of a long list of numbers - -- answer stored in mant (mantissa) and expt (exponent) */ -static double product2(a,k,expt) -VEC *a; -int k; /* entry of a to leave out */ -int *expt; -{ - Real mant, mu, tmp_fctr; - int i, tmp_expt; - - if ( ! a ) - error(E_NULL,"product2"); - if ( k < 0 || k >= a->dim ) - error(E_BOUNDS,"product2"); - - mant = 1.0; - *expt = 0; - mu = a->ve[k]; - for ( i = 0; i < a->dim; i++ ) - { - if ( i == k ) - continue; - tmp_fctr = a->ve[i] - mu; - tmp_fctr += ( tmp_fctr > 0.0 ) ? -MACHEPS*mu : MACHEPS*mu; - mant *= frexp(tmp_fctr,&tmp_expt); - *expt += tmp_expt; - if ( ! (i % 10) ) - { - mant = frexp(mant,&tmp_expt); - *expt += tmp_expt; - } - } - mant = frexp(mant,&tmp_expt); - *expt += tmp_expt; - - return mant; -} - -/* dbl_cmp -- comparison function to pass to qsort() */ -static int dbl_cmp(x,y) -Real *x, *y; -{ - Real tmp; - - tmp = *x - *y; - return (tmp > 0 ? 1 : tmp < 0 ? -1: 0); -} - -/* iter_lanczos2 -- lanczos + error estimate for every e-val - -- uses Cullum & Willoughby approach, Sparse Matrix Proc. 1978 - -- returns multiple e-vals where multiple e-vals may not exist - -- returns evals vector */ -VEC *iter_lanczos2(ip,evals,err_est) -ITER *ip; /* ITER structure */ -VEC *evals; /* eigenvalue vector */ -VEC *err_est; /* error estimates of eigenvalues */ -{ - VEC *a; - static VEC *b=VNULL, *a2=VNULL, *b2=VNULL; - Real beta, pb_mant, det_mant, det_mant1, det_mant2; - int i, pb_expt, det_expt, det_expt1, det_expt2; - - if ( ! ip ) - error(E_NULL,"iter_lanczos2"); - if ( ! ip->Ax || ! ip->x ) - error(E_NULL,"iter_lanczos2"); - if ( ip->k <= 0 ) - error(E_RANGE,"iter_lanczos2"); - - a = evals; - a = v_resize(a,(u_int)ip->k); - b = v_resize(b,(u_int)(ip->k-1)); - MEM_STAT_REG(b,TYPE_VEC); - - iter_lanczos(ip,a,b,&beta,MNULL); - - /* printf("# beta =%g\n",beta); */ - pb_mant = 0.0; - if ( err_est ) - { - pb_mant = product(b,(double)0.0,&pb_expt); - /* printf("# pb_mant = %g, pb_expt = %d\n",pb_mant, pb_expt); */ - } - - /* printf("# diags =\n"); v_output(a); */ - /* printf("# off diags =\n"); v_output(b); */ - a2 = v_resize(a2,a->dim - 1); - b2 = v_resize(b2,b->dim - 1); - MEM_STAT_REG(a2,TYPE_VEC); - MEM_STAT_REG(b2,TYPE_VEC); - for ( i = 0; i < a2->dim - 1; i++ ) - { - a2->ve[i] = a->ve[i+1]; - b2->ve[i] = b->ve[i+1]; - } - a2->ve[a2->dim-1] = a->ve[a2->dim]; - - trieig(a,b,MNULL); - - /* sort evals as a courtesy */ - qsort((void *)(a->ve),(int)(a->dim),sizeof(Real),(int (*)())dbl_cmp); - - /* error estimates */ - if ( err_est ) - { - err_est = v_resize(err_est,(u_int)ip->k); - - trieig(a2,b2,MNULL); - /* printf("# a =\n"); v_output(a); */ - /* printf("# a2 =\n"); v_output(a2); */ - - for ( i = 0; i < a->dim; i++ ) - { - det_mant1 = product2(a,i,&det_expt1); - det_mant2 = product(a2,(double)a->ve[i],&det_expt2); - /* printf("# det_mant1=%g, det_expt1=%d\n", - det_mant1,det_expt1); */ - /* printf("# det_mant2=%g, det_expt2=%d\n", - det_mant2,det_expt2); */ - if ( det_mant1 == 0.0 ) - { /* multiple e-val of T */ - err_est->ve[i] = 0.0; - continue; - } - else if ( det_mant2 == 0.0 ) - { - err_est->ve[i] = HUGE; - continue; - } - if ( (det_expt1 + det_expt2) % 2 ) - /* if odd... */ - det_mant = sqrt(2.0*fabs(det_mant1*det_mant2)); - else /* if even... */ - det_mant = sqrt(fabs(det_mant1*det_mant2)); - det_expt = (det_expt1+det_expt2)/2; - err_est->ve[i] = fabs(beta* - ldexp(pb_mant/det_mant,pb_expt-det_expt)); - } - } - - return a; -} - -/* iter_splanczos2 -- version of iter_lanczos2() that uses sparse matrix data - structure */ - -VEC *iter_splanczos2(A,m,x0,evals,err_est) -SPMAT *A; -int m; -VEC *x0; /* initial vector */ -VEC *evals; /* eigenvalue vector */ -VEC *err_est; /* error estimates of eigenvalues */ -{ - ITER *ip; - VEC *a; - - ip = iter_get(0,0); - ip->Ax = (Fun_Ax) sp_mv_mlt; - ip->A_par = (void *) A; - ip->x = x0; - ip->k = m; - a = iter_lanczos2(ip,evals,err_est); - ip->shared_x = ip->shared_b = TRUE; - iter_free(ip); /* release only ITER structure */ - return a; -} - - - - -/* - Conjugate gradient method - Another variant - mainly for testing - */ - -VEC *iter_cg1(ip) -ITER *ip; -{ - static VEC *r = VNULL, *p = VNULL, *q = VNULL, *z = VNULL; - Real alpha; - double inner,nres; - VEC *rr; /* rr == r or rr == z */ - - if (ip == INULL) - error(E_NULL,"iter_cg"); - if (!ip->Ax || !ip->b) - error(E_NULL,"iter_cg"); - if ( ip->x == ip->b ) - error(E_INSITU,"iter_cg"); - if (!ip->stop_crit) - error(E_NULL,"iter_cg"); - - if ( ip->eps <= 0.0 ) - ip->eps = MACHEPS; - - r = v_resize(r,ip->b->dim); - p = v_resize(p,ip->b->dim); - q = v_resize(q,ip->b->dim); - - MEM_STAT_REG(r,TYPE_VEC); - MEM_STAT_REG(p,TYPE_VEC); - MEM_STAT_REG(q,TYPE_VEC); - - if (ip->Bx != (Fun_Ax)NULL) { - z = v_resize(z,ip->b->dim); - MEM_STAT_REG(z,TYPE_VEC); - rr = z; - } - else rr = r; - - if (ip->x != VNULL) { - if (ip->x->dim != ip->b->dim) - error(E_SIZES,"iter_cg"); - ip->Ax(ip->A_par,ip->x,p); /* p = A*x */ - v_sub(ip->b,p,r); /* r = b - A*x */ - } - else { /* ip->x == 0 */ - ip->x = v_get(ip->b->dim); - ip->shared_x = FALSE; - v_copy(ip->b,r); - } - - if (ip->Bx) (ip->Bx)(ip->B_par,r,p); - else v_copy(r,p); - - inner = in_prod(p,r); - nres = sqrt(fabs(inner)); - if (ip->info) ip->info(ip,nres,r,p); - if ( nres == 0.0) return ip->x; - - for ( ip->steps = 0; ip->steps <= ip->limit; ip->steps++ ) - { - ip->Ax(ip->A_par,p,q); - inner = in_prod(q,p); - if (sqrt(fabs(inner)) <= MACHEPS*ip->init_res) - error(E_BREAKDOWN,"iter_cg1"); - - alpha = in_prod(p,r)/inner; - v_mltadd(ip->x,p,alpha,ip->x); - v_mltadd(r,q,-alpha,r); - - rr = r; - if (ip->Bx) { - ip->Bx(ip->B_par,r,z); - rr = z; - } - - nres = in_prod(r,rr); - if (nres < 0.0) { - warning(WARN_RES_LESS_0,"iter_cg"); - break; - } - nres = sqrt(fabs(nres)); - if (ip->info) ip->info(ip,nres,r,z); - if (ip->steps == 0) ip->init_res = nres; - if ( ip->stop_crit(ip,nres,r,z) ) break; - - alpha = -in_prod(rr,q)/inner; - v_mltadd(rr,p,alpha,p); - - } - - return ip->x; -} - - //GO.SYSIN DD itersym.c echo iternsym.c 1>&2 sed >iternsym.c <<'//GO.SYSIN DD iternsym.c' 's/^-//' - -/************************************************************************** -** -** Copyright (C) 1993 David E. Stewart & Zbigniew Leyk, all rights reserved. -** -** Meschach Library -** -** This Meschach Library is provided "as is" without any express -** or implied warranty of any kind with respect to this software. -** In particular the authors shall not be liable for any direct, -** indirect, special, incidental or consequential damages arising -** in any way from use of the software. -** -** Everyone is granted permission to copy, modify and redistribute this -** Meschach Library, provided: -** 1. All copies contain this copyright notice. -** 2. All modified copies shall carry a notice stating who -** made the last modification and the date of such modification. -** 3. No charge is made for this software or works derived from it. -** This clause shall not be construed as constraining other software -** distributed on the same medium as this software, nor is a -** distribution fee considered a charge. -** -***************************************************************************/ - - -/* iter.c 17/09/93 */ - -/* - ITERATIVE METHODS - implementation of several iterative methods; - see also iter0.c -*/ - -#include -#include -#include "matrix.h" -#include "matrix2.h" -#include "sparse.h" -#include "iter.h" - -static char rcsid[] = "$Id: m9,v 1.1.1.1 1999/04/14 14:16:23 borland Exp $"; - - -#ifdef ANSI_C -VEC *spCHsolve(SPMAT *,VEC *,VEC *); -#else -VEC *spCHsolve(); -#endif - - -/* - iter_cgs -- uses CGS to compute a solution x to A.x=b -*/ - -VEC *iter_cgs(ip,r0) -ITER *ip; -VEC *r0; -{ - static VEC *p = VNULL, *q = VNULL, *r = VNULL, *u = VNULL; - static VEC *v = VNULL, *z = VNULL; - VEC *tmp; - Real alpha, beta, nres, rho, old_rho, sigma, inner; - - if (ip == INULL) - error(E_NULL,"iter_cgs"); - if (!ip->Ax || !ip->b || !r0) - error(E_NULL,"iter_cgs"); - if ( ip->x == ip->b ) - error(E_INSITU,"iter_cgs"); - if (!ip->stop_crit) - error(E_NULL,"iter_cgs"); - if ( r0->dim != ip->b->dim ) - error(E_SIZES,"iter_cgs"); - - if ( ip->eps <= 0.0 ) ip->eps = MACHEPS; - - p = v_resize(p,ip->b->dim); - q = v_resize(q,ip->b->dim); - r = v_resize(r,ip->b->dim); - u = v_resize(u,ip->b->dim); - v = v_resize(v,ip->b->dim); - - MEM_STAT_REG(p,TYPE_VEC); - MEM_STAT_REG(q,TYPE_VEC); - MEM_STAT_REG(r,TYPE_VEC); - MEM_STAT_REG(u,TYPE_VEC); - MEM_STAT_REG(v,TYPE_VEC); - - if (ip->Bx) { - z = v_resize(z,ip->b->dim); - MEM_STAT_REG(z,TYPE_VEC); - } - - if (ip->x != VNULL) { - if (ip->x->dim != ip->b->dim) - error(E_SIZES,"iter_cgs"); - ip->Ax(ip->A_par,ip->x,v); /* v = A*x */ - if (ip->Bx) { - v_sub(ip->b,v,v); /* v = b - A*x */ - (ip->Bx)(ip->B_par,v,r); /* r = B*(b-A*x) */ - } - else v_sub(ip->b,v,r); /* r = b-A*x */ - } - else { /* ip->x == 0 */ - ip->x = v_get(ip->b->dim); /* x == 0 */ - ip->shared_x = FALSE; - if (ip->Bx) (ip->Bx)(ip->B_par,ip->b,r); /* r = B*b */ - else v_copy(ip->b,r); /* r = b */ - } - - v_zero(p); - v_zero(q); - old_rho = 1.0; - - for (ip->steps = 0; ip->steps <= ip->limit; ip->steps++) { - - inner = in_prod(r,r); - nres = sqrt(fabs(inner)); - if (ip->steps == 0) ip->init_res = nres; - - if (ip->info) ip->info(ip,nres,r,VNULL); - if ( ip->stop_crit(ip,nres,r,VNULL) ) break; - - rho = in_prod(r0,r); - if ( old_rho == 0.0 ) - error(E_BREAKDOWN,"iter_cgs"); - beta = rho/old_rho; - v_mltadd(r,q,beta,u); - v_mltadd(q,p,beta,v); - v_mltadd(u,v,beta,p); - - (ip->Ax)(ip->A_par,p,q); - if (ip->Bx) { - (ip->Bx)(ip->B_par,q,z); - tmp = z; - } - else tmp = q; - - sigma = in_prod(r0,tmp); - if ( sigma == 0.0 ) - error(E_BREAKDOWN,"iter_cgs"); - alpha = rho/sigma; - v_mltadd(u,tmp,-alpha,q); - v_add(u,q,v); - - (ip->Ax)(ip->A_par,v,u); - if (ip->Bx) { - (ip->Bx)(ip->B_par,u,z); - tmp = z; - } - else tmp = u; - - v_mltadd(r,tmp,-alpha,r); - v_mltadd(ip->x,v,alpha,ip->x); - - old_rho = rho; - } - - return ip->x; -} - - - -/* iter_spcgs -- simple interface for SPMAT data structures - use always as follows: - x = iter_spcgs(A,B,b,r0,tol,x,limit,steps); - or - x = iter_spcgs(A,B,b,r0,tol,VNULL,limit,steps); - In the second case the solution vector is created. - If B is not NULL then it is a preconditioner. -*/ -VEC *iter_spcgs(A,B,b,r0,tol,x,limit,steps) -SPMAT *A, *B; -VEC *b, *r0, *x; -double tol; -int *steps,limit; -{ - ITER *ip; - - ip = iter_get(0,0); - ip->Ax = (Fun_Ax) sp_mv_mlt; - ip->A_par = (void *) A; - if (B) { - ip->Bx = (Fun_Ax) sp_mv_mlt; - ip->B_par = (void *) B; - } - else { - ip->Bx = (Fun_Ax) NULL; - ip->B_par = NULL; - } - ip->info = (Fun_info) NULL; - ip->limit = limit; - ip->b = b; - ip->eps = tol; - ip->x = x; - iter_cgs(ip,r0); - x = ip->x; - if (steps) *steps = ip->steps; - ip->shared_x = ip->shared_b = TRUE; - iter_free(ip); /* release only ITER structure */ - return x; - -} - -/* - Routine for performing LSQR -- the least squares QR algorithm - of Paige and Saunders: - "LSQR: an algorithm for sparse linear equations and - sparse least squares", ACM Trans. Math. Soft., v. 8 - pp. 43--71 (1982) - */ -/* lsqr -- sparse CG-like least squares routine: - -- finds min_x ||A.x-b||_2 using A defined through A & AT - -- returns x (if x != NULL) */ -VEC *iter_lsqr(ip) -ITER *ip; -{ - static VEC *u = VNULL, *v = VNULL, *w = VNULL, *tmp = VNULL; - Real alpha, beta, phi, phi_bar; - Real rho, rho_bar, rho_max, theta, nres; - Real s, c; /* for Givens' rotations */ - int m, n; - - if ( ! ip || ! ip->b || !ip->Ax || !ip->ATx ) - error(E_NULL,"iter_lsqr"); - if ( ip->x == ip->b ) - error(E_INSITU,"iter_lsqr"); - if (!ip->stop_crit || !ip->x) - error(E_NULL,"iter_lsqr"); - - if ( ip->eps <= 0.0 ) ip->eps = MACHEPS; - - m = ip->b->dim; - n = ip->x->dim; - - u = v_resize(u,(u_int)m); - v = v_resize(v,(u_int)n); - w = v_resize(w,(u_int)n); - tmp = v_resize(tmp,(u_int)n); - - MEM_STAT_REG(u,TYPE_VEC); - MEM_STAT_REG(v,TYPE_VEC); - MEM_STAT_REG(w,TYPE_VEC); - MEM_STAT_REG(tmp,TYPE_VEC); - - if (ip->x != VNULL) { - ip->Ax(ip->A_par,ip->x,u); /* u = A*x */ - v_sub(ip->b,u,u); /* u = b-A*x */ - } - else { /* ip->x == 0 */ - ip->x = v_get(ip->b->dim); - ip->shared_x = FALSE; - v_copy(ip->b,u); /* u = b */ - } - - beta = v_norm2(u); - if ( beta == 0.0 ) return ip->x; - - sv_mlt(1.0/beta,u,u); - (ip->ATx)(ip->AT_par,u,v); - alpha = v_norm2(v); - if ( alpha == 0.0 ) return ip->x; - - sv_mlt(1.0/alpha,v,v); - v_copy(v,w); - phi_bar = beta; - rho_bar = alpha; - - rho_max = 1.0; - for (ip->steps = 0; ip->steps <= ip->limit; ip->steps++) { - - tmp = v_resize(tmp,m); - (ip->Ax)(ip->A_par,v,tmp); - - v_mltadd(tmp,u,-alpha,u); - beta = v_norm2(u); - sv_mlt(1.0/beta,u,u); - - tmp = v_resize(tmp,n); - (ip->ATx)(ip->AT_par,u,tmp); - v_mltadd(tmp,v,-beta,v); - alpha = v_norm2(v); - sv_mlt(1.0/alpha,v,v); - - rho = sqrt(rho_bar*rho_bar+beta*beta); - if ( rho > rho_max ) - rho_max = rho; - c = rho_bar/rho; - s = beta/rho; - theta = s*alpha; - rho_bar = -c*alpha; - phi = c*phi_bar; - phi_bar = s*phi_bar; - - /* update ip->x & w */ - if ( rho == 0.0 ) - error(E_BREAKDOWN,"iter_lsqr"); - v_mltadd(ip->x,w,phi/rho,ip->x); - v_mltadd(v,w,-theta/rho,w); - - nres = fabs(phi_bar*alpha*c)*rho_max; - - if (ip->info) ip->info(ip,nres,w,VNULL); - if (ip->steps == 0) ip->init_res = nres; - if ( ip->stop_crit(ip,nres,w,VNULL) ) break; - } - - return ip->x; -} - -/* iter_splsqr -- simple interface for SPMAT data structures */ -VEC *iter_splsqr(A,b,tol,x,limit,steps) -SPMAT *A; -VEC *b, *x; -double tol; -int *steps,limit; -{ - ITER *ip; - - ip = iter_get(0,0); - ip->Ax = (Fun_Ax) sp_mv_mlt; - ip->A_par = (void *) A; - ip->ATx = (Fun_Ax) sp_vm_mlt; - ip->AT_par = (void *) A; - ip->Bx = (Fun_Ax) NULL; - ip->B_par = NULL; - - ip->info = (Fun_info) NULL; - ip->limit = limit; - ip->b = b; - ip->eps = tol; - ip->x = x; - iter_lsqr(ip); - x = ip->x; - if (steps) *steps = ip->steps; - ip->shared_x = ip->shared_b = TRUE; - iter_free(ip); /* release only ITER structure */ - return x; -} - - - -/* iter_arnoldi -- an implementation of the Arnoldi method; - iterative refinement is applied. -*/ -MAT *iter_arnoldi_iref(ip,h_rem,Q,H) -ITER *ip; -Real *h_rem; -MAT *Q, *H; -{ - static VEC *u=VNULL, *r=VNULL, *s=VNULL, *tmp=VNULL; - VEC v; /* auxiliary vector */ - int i,j; - Real h_val, c; - - if (ip == INULL) - error(E_NULL,"iter_arnoldi_iref"); - if ( ! ip->Ax || ! Q || ! ip->x ) - error(E_NULL,"iter_arnoldi_iref"); - if ( ip->k <= 0 ) - error(E_BOUNDS,"iter_arnoldi_iref"); - if ( Q->n != ip->x->dim || Q->m != ip->k ) - error(E_SIZES,"iter_arnoldi_iref"); - - m_zero(Q); - H = m_resize(H,ip->k,ip->k); - m_zero(H); - - u = v_resize(u,ip->x->dim); - r = v_resize(r,ip->k); - s = v_resize(s,ip->k); - tmp = v_resize(tmp,ip->x->dim); - MEM_STAT_REG(u,TYPE_VEC); - MEM_STAT_REG(r,TYPE_VEC); - MEM_STAT_REG(s,TYPE_VEC); - MEM_STAT_REG(tmp,TYPE_VEC); - - v.dim = v.max_dim = ip->x->dim; - - c = v_norm2(ip->x); - if ( c <= 0.0) - return H; - else { - v.ve = Q->me[0]; - sv_mlt(1.0/c,ip->x,&v); - } - - v_zero(r); - v_zero(s); - for ( i = 0; i < ip->k; i++ ) - { - v.ve = Q->me[i]; - u = (ip->Ax)(ip->A_par,&v,u); - for (j = 0; j <= i; j++) { - v.ve = Q->me[j]; - /* modified Gram-Schmidt */ - r->ve[j] = in_prod(&v,u); - v_mltadd(u,&v,-r->ve[j],u); - } - h_val = v_norm2(u); - /* if u == 0 then we have an exact subspace */ - if ( h_val <= 0.0 ) - { - *h_rem = h_val; - return H; - } - /* iterative refinement -- ensures near orthogonality */ - do { - v_zero(tmp); - for (j = 0; j <= i; j++) { - v.ve = Q->me[j]; - s->ve[j] = in_prod(&v,u); - v_mltadd(tmp,&v,s->ve[j],tmp); - } - v_sub(u,tmp,u); - v_add(r,s,r); - } while ( v_norm2(s) > 0.1*(h_val = v_norm2(u)) ); - /* now that u is nearly orthogonal to Q, update H */ - set_col(H,i,r); - /* check once again if h_val is zero */ - if ( h_val <= 0.0 ) - { - *h_rem = h_val; - return H; - } - if ( i == ip->k-1 ) - { - *h_rem = h_val; - continue; - } - /* H->me[i+1][i] = h_val; */ - m_set_val(H,i+1,i,h_val); - v.ve = Q->me[i+1]; - sv_mlt(1.0/h_val,u,&v); - } - - return H; -} - -/* iter_arnoldi -- an implementation of the Arnoldi method; - modified Gram-Schmidt algorithm -*/ -MAT *iter_arnoldi(ip,h_rem,Q,H) -ITER *ip; -Real *h_rem; -MAT *Q, *H; -{ - static VEC *u=VNULL, *r=VNULL; - VEC v; /* auxiliary vector */ - int i,j; - Real h_val, c; - - if (ip == INULL) - error(E_NULL,"iter_arnoldi"); - if ( ! ip->Ax || ! Q || ! ip->x ) - error(E_NULL,"iter_arnoldi"); - if ( ip->k <= 0 ) - error(E_BOUNDS,"iter_arnoldi"); - if ( Q->n != ip->x->dim || Q->m != ip->k ) - error(E_SIZES,"iter_arnoldi"); - - m_zero(Q); - H = m_resize(H,ip->k,ip->k); - m_zero(H); - - u = v_resize(u,ip->x->dim); - r = v_resize(r,ip->k); - MEM_STAT_REG(u,TYPE_VEC); - MEM_STAT_REG(r,TYPE_VEC); - - v.dim = v.max_dim = ip->x->dim; - - c = v_norm2(ip->x); - if ( c <= 0.0) - return H; - else { - v.ve = Q->me[0]; - sv_mlt(1.0/c,ip->x,&v); - } - - v_zero(r); - for ( i = 0; i < ip->k; i++ ) - { - v.ve = Q->me[i]; - u = (ip->Ax)(ip->A_par,&v,u); - for (j = 0; j <= i; j++) { - v.ve = Q->me[j]; - /* modified Gram-Schmidt */ - r->ve[j] = in_prod(&v,u); - v_mltadd(u,&v,-r->ve[j],u); - } - h_val = v_norm2(u); - /* if u == 0 then we have an exact subspace */ - if ( h_val <= 0.0 ) - { - *h_rem = h_val; - return H; - } - set_col(H,i,r); - if ( i == ip->k-1 ) - { - *h_rem = h_val; - continue; - } - /* H->me[i+1][i] = h_val; */ - m_set_val(H,i+1,i,h_val); - v.ve = Q->me[i+1]; - sv_mlt(1.0/h_val,u,&v); - } - - return H; -} - - - -/* iter_sparnoldi -- uses arnoldi() with an explicit representation of A */ -MAT *iter_sparnoldi(A,x0,m,h_rem,Q,H) -SPMAT *A; -VEC *x0; -int m; -Real *h_rem; -MAT *Q, *H; -{ - ITER *ip; - - ip = iter_get(0,0); - ip->Ax = (Fun_Ax) sp_mv_mlt; - ip->A_par = (void *) A; - ip->x = x0; - ip->k = m; - iter_arnoldi_iref(ip,h_rem,Q,H); - ip->shared_x = ip->shared_b = TRUE; - iter_free(ip); /* release only ITER structure */ - return H; -} - - -/* for testing gmres */ -static void test_gmres(ip,i,Q,R,givc,givs,h_val) -ITER *ip; -int i; -MAT *Q, *R; -VEC *givc, *givs; -double h_val; -{ - VEC vt, vt1; - static MAT *Q1, *R1; - int j; - - /* test Q*A*Q^T = R */ - - Q = m_resize(Q,i+1,ip->b->dim); - Q1 = m_resize(Q1,i+1,ip->b->dim); - R1 = m_resize(R1,i+1,i+1); - MEM_STAT_REG(Q1,TYPE_MAT); - MEM_STAT_REG(R1,TYPE_MAT); - - vt.dim = vt.max_dim = ip->b->dim; - vt1.dim = vt1.max_dim = ip->b->dim; - for (j=0; j <= i; j++) { - vt.ve = Q->me[j]; - vt1.ve = Q1->me[j]; - ip->Ax(ip->A_par,&vt,&vt1); - } - - mmtr_mlt(Q,Q1,R1); - R1 = m_resize(R1,i+2,i+1); - for (j=0; j < i; j++) - R1->me[i+1][j] = 0.0; - R1->me[i+1][i] = h_val; - - for (j = 0; j <= i; j++) { - rot_rows(R1,j,j+1,givc->ve[j],givs->ve[j],R1); - } - - R1 = m_resize(R1,i+1,i+1); - m_sub(R,R1,R1); - /* if (m_norm_inf(R1) > MACHEPS*ip->b->dim) */ - printf(" %d. ||Q*A*Q^T - H|| = %g [cf. MACHEPS = %g]\n", - ip->steps,m_norm_inf(R1),MACHEPS); - - /* check Q*Q^T = I */ - - Q = m_resize(Q,i+1,ip->b->dim); - mmtr_mlt(Q,Q,R1); - for (j=0; j <= i; j++) - R1->me[j][j] -= 1.0; - if (m_norm_inf(R1) > MACHEPS*ip->b->dim) - printf(" ! m_norm_inf(Q*Q^T) = %g\n",m_norm_inf(R1)); - -} - - -/* gmres -- generalised minimum residual algorithm of Saad & Schultz - SIAM J. Sci. Stat. Comp. v.7, pp.856--869 (1986) -*/ -VEC *iter_gmres(ip) -ITER *ip; -{ - static VEC *u=VNULL, *r=VNULL, *rhs = VNULL; - static VEC *givs=VNULL, *givc=VNULL, *z = VNULL; - static MAT *Q = MNULL, *R = MNULL; - VEC *rr, v, v1; /* additional pointers (not real vectors) */ - int i,j, done; - Real nres; -/* Real last_h; */ - - if (ip == INULL) - error(E_NULL,"iter_gmres"); - if ( ! ip->Ax || ! ip->b ) - error(E_NULL,"iter_gmres"); - if ( ! ip->stop_crit ) - error(E_NULL,"iter_gmres"); - if ( ip->k <= 0 ) - error(E_BOUNDS,"iter_gmres"); - if (ip->x != VNULL && ip->x->dim != ip->b->dim) - error(E_SIZES,"iter_gmres"); - if (ip->eps <= 0.0) ip->eps = MACHEPS; - - r = v_resize(r,ip->k+1); - u = v_resize(u,ip->b->dim); - rhs = v_resize(rhs,ip->k+1); - givs = v_resize(givs,ip->k); /* Givens rotations */ - givc = v_resize(givc,ip->k); - - MEM_STAT_REG(r,TYPE_VEC); - MEM_STAT_REG(u,TYPE_VEC); - MEM_STAT_REG(rhs,TYPE_VEC); - MEM_STAT_REG(givs,TYPE_VEC); - MEM_STAT_REG(givc,TYPE_VEC); - - R = m_resize(R,ip->k+1,ip->k); - Q = m_resize(Q,ip->k,ip->b->dim); - MEM_STAT_REG(R,TYPE_MAT); - MEM_STAT_REG(Q,TYPE_MAT); - - if (ip->x == VNULL) { /* ip->x == 0 */ - ip->x = v_get(ip->b->dim); - ip->shared_x = FALSE; - } - - v.dim = v.max_dim = ip->b->dim; /* v and v1 are pointers to rows */ - v1.dim = v1.max_dim = ip->b->dim; /* of matrix Q */ - - if (ip->Bx != (Fun_Ax)NULL) { /* if precondition is defined */ - z = v_resize(z,ip->b->dim); - MEM_STAT_REG(z,TYPE_VEC); - } - - done = FALSE; - for (ip->steps = 0; ip->steps < ip->limit; ) { - - /* restart */ - - ip->Ax(ip->A_par,ip->x,u); /* u = A*x */ - v_sub(ip->b,u,u); /* u = b - A*x */ - rr = u; /* rr is a pointer only */ - - if (ip->Bx) { - (ip->Bx)(ip->B_par,u,z); /* tmp = B*(b-A*x) */ - rr = z; - } - - nres = v_norm2(rr); - if (ip->steps == 0) { - if (ip->info) ip->info(ip,nres,VNULL,VNULL); - ip->init_res = nres; - } - - if ( nres == 0.0 ) { - done = TRUE; - break; - } - - v.ve = Q->me[0]; - sv_mlt(1.0/nres,rr,&v); - - v_zero(r); - v_zero(rhs); - rhs->ve[0] = nres; - - for ( i = 0; i < ip->k && ip->steps < ip->limit; i++ ) { - ip->steps++; - v.ve = Q->me[i]; - (ip->Ax)(ip->A_par,&v,u); - rr = u; - if (ip->Bx) { - (ip->Bx)(ip->B_par,u,z); - rr = z; - } - - if (i < ip->k - 1) { - v1.ve = Q->me[i+1]; - v_copy(rr,&v1); - for (j = 0; j <= i; j++) { - v.ve = Q->me[j]; - /* r->ve[j] = in_prod(&v,rr); */ - /* modified Gram-Schmidt algorithm */ - r->ve[j] = in_prod(&v,&v1); - v_mltadd(&v1,&v,-r->ve[j],&v1); - } - - r->ve[i+1] = nres = v_norm2(&v1); - if (nres <= MACHEPS*ip->init_res) { - for (j = 0; j < i; j++) - rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r); - set_col(R,i,r); - done = TRUE; - break; - } - sv_mlt(1.0/nres,&v1,&v1); - } - else { /* i == ip->k - 1 */ - /* Q->me[ip->k] need not be computed */ - - for (j = 0; j <= i; j++) { - v.ve = Q->me[j]; - r->ve[j] = in_prod(&v,rr); - } - - nres = in_prod(rr,rr) - in_prod(r,r); - if (sqrt(fabs(nres)) <= MACHEPS*ip->init_res) { - for (j = 0; j < i; j++) - rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r); - set_col(R,i,r); - done = TRUE; - break; - } - if (nres < 0.0) { /* do restart */ - i--; - ip->steps--; - break; - } - r->ve[i+1] = sqrt(nres); - } - - /* QR update */ - - /* last_h = r->ve[i+1]; */ /* for test only */ - for (j = 0; j < i; j++) - rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r); - givens(r->ve[i],r->ve[i+1],&givc->ve[i],&givs->ve[i]); - rot_vec(r,i,i+1,givc->ve[i],givs->ve[i],r); - rot_vec(rhs,i,i+1,givc->ve[i],givs->ve[i],rhs); - - set_col(R,i,r); - - nres = fabs((double) rhs->ve[i+1]); - if (ip->info) ip->info(ip,nres,VNULL,VNULL); - if ( ip->stop_crit(ip,nres,VNULL,VNULL) ) { - done = TRUE; - break; - } - } - - /* use ixi submatrix of R */ - - if (i >= ip->k) i = ip->k - 1; - - R = m_resize(R,i+1,i+1); - rhs = v_resize(rhs,i+1); - - /* test only */ - /* test_gmres(ip,i,Q,R,givc,givs,last_h); */ - - Usolve(R,rhs,rhs,0.0); /* solve a system: R*x = rhs */ - - /* new approximation */ - - for (j = 0; j <= i; j++) { - v.ve = Q->me[j]; - v_mltadd(ip->x,&v,rhs->ve[j],ip->x); - } - - if (done) break; - - /* back to old dimensions */ - - rhs = v_resize(rhs,ip->k+1); - R = m_resize(R,ip->k+1,ip->k); - - } - - return ip->x; -} - -/* iter_spgmres - a simple interface to iter_gmres */ - -VEC *iter_spgmres(A,B,b,tol,x,k,limit,steps) -SPMAT *A, *B; -VEC *b, *x; -double tol; -int *steps,k,limit; -{ - ITER *ip; - - ip = iter_get(0,0); - ip->Ax = (Fun_Ax) sp_mv_mlt; - ip->A_par = (void *) A; - if (B) { - ip->Bx = (Fun_Ax) sp_mv_mlt; - ip->B_par = (void *) B; - } - else { - ip->Bx = (Fun_Ax) NULL; - ip->B_par = NULL; - } - ip->k = k; - ip->limit = limit; - ip->info = (Fun_info) NULL; - ip->b = b; - ip->eps = tol; - ip->x = x; - iter_gmres(ip); - x = ip->x; - if (steps) *steps = ip->steps; - ip->shared_x = ip->shared_b = TRUE; - iter_free(ip); /* release only ITER structure */ - return x; -} - - -/* for testing mgcr */ -static void test_mgcr(ip,i,Q,R) -ITER *ip; -int i; -MAT *Q, *R; -{ - VEC vt, vt1; - static MAT *R1; - static VEC *r, *r1; - VEC *rr; - int k,j; - Real sm; - - - /* check Q*Q^T = I */ - vt.dim = vt.max_dim = ip->b->dim; - vt1.dim = vt1.max_dim = ip->b->dim; - - Q = m_resize(Q,i+1,ip->b->dim); - R1 = m_resize(R1,i+1,i+1); - r = v_resize(r,ip->b->dim); - r1 = v_resize(r1,ip->b->dim); - MEM_STAT_REG(R1,TYPE_MAT); - MEM_STAT_REG(r,TYPE_VEC); - MEM_STAT_REG(r1,TYPE_VEC); - - m_zero(R1); - for (k=1; k <= i; k++) - for (j=1; j <= i; j++) { - vt.ve = Q->me[k]; - vt1.ve = Q->me[j]; - R1->me[k][j] = in_prod(&vt,&vt1); - } - for (j=1; j <= i; j++) - R1->me[j][j] -= 1.0; - if (m_norm_inf(R1) > MACHEPS*ip->b->dim) - printf(" ! (mgcr:) m_norm_inf(Q*Q^T) = %g\n",m_norm_inf(R1)); - - /* check (r_i,Ap_j) = 0 for j <= i */ - - ip->Ax(ip->A_par,ip->x,r); - v_sub(ip->b,r,r); - rr = r; - if (ip->Bx) { - ip->Bx(ip->B_par,r,r1); - rr = r1; - } - - printf(" ||r|| = %g\n",v_norm2(rr)); - sm = 0.0; - for (j = 1; j <= i; j++) { - vt.ve = Q->me[j]; - sm = max(sm,in_prod(&vt,rr)); - } - if (sm >= MACHEPS*ip->b->dim) - printf(" ! (mgcr:) max_j (r,Ap_j) = %g\n",sm); - -} - - - - -/* - iter_mgcr -- modified generalized conjugate residual algorithm; - fast version of GCR; -*/ -VEC *iter_mgcr(ip) -ITER *ip; -{ - static VEC *As, *beta, *alpha, *z; - static MAT *N, *H; - - VEC *rr, v, s; /* additional pointer and structures */ - Real nres; /* norm of a residual */ - Real dd; /* coefficient d_i */ - int i,j; - int done; /* if TRUE then stop the iterative process */ - int dim; /* dimension of the problem */ - - /* ip cannot be NULL */ - if (ip == INULL) error(E_NULL,"mgcr"); - /* Ax, b and stopping criterion must be given */ - if (! ip->Ax || ! ip->b || ! ip->stop_crit) - error(E_NULL,"mgcr"); - /* at least one direction vector must exist */ - if ( ip->k <= 0) error(E_BOUNDS,"mgcr"); - /* if the vector x is given then b and x must have the same dimension */ - if ( ip->x && ip->x->dim != ip->b->dim) - error(E_SIZES,"mgcr"); - if (ip->eps <= 0.0) ip->eps = MACHEPS; - - dim = ip->b->dim; - As = v_resize(As,dim); - alpha = v_resize(alpha,ip->k); - beta = v_resize(beta,ip->k); - - MEM_STAT_REG(As,TYPE_VEC); - MEM_STAT_REG(alpha,TYPE_VEC); - MEM_STAT_REG(beta,TYPE_VEC); - - H = m_resize(H,ip->k,ip->k); - N = m_resize(N,ip->k,dim); - - MEM_STAT_REG(H,TYPE_MAT); - MEM_STAT_REG(N,TYPE_MAT); - - /* if a preconditioner is defined */ - if (ip->Bx) { - z = v_resize(z,dim); - MEM_STAT_REG(z,TYPE_VEC); - } - - /* if x is NULL then it is assumed that x has - entries with value zero */ - if ( ! ip->x ) { - ip->x = v_get(ip->b->dim); - ip->shared_x = FALSE; - } - - /* v and s are additional pointers to rows of N */ - /* they must have the same dimension as rows of N */ - v.dim = v.max_dim = s.dim = s.max_dim = dim; - - - done = FALSE; - for (ip->steps = 0; ip->steps < ip->limit; ) { - (*ip->Ax)(ip->A_par,ip->x,As); /* As = A*x */ - v_sub(ip->b,As,As); /* As = b - A*x */ - rr = As; /* rr is an additional pointer */ - - /* if a preconditioner is defined */ - if (ip->Bx) { - (*ip->Bx)(ip->B_par,As,z); /* z = B*(b-A*x) */ - rr = z; - } - - /* norm of the residual */ - nres = v_norm2(rr); - dd = nres; /* dd = ||r_i|| */ - - /* check if the norm of the residual is zero */ - if (ip->steps == 0) { - /* information for a user */ - if (ip->info) (*ip->info)(ip,nres,As,rr); - ip->init_res = fabs(nres); - } - - if (nres == 0.0) { - /* iterative process is finished */ - done = TRUE; - break; - } - - /* save this residual in the first row of N */ - v.ve = N->me[0]; - v_copy(rr,&v); - - for (i = 0; i < ip->k && ip->steps < ip->limit; i++) { - ip->steps++; - v.ve = N->me[i]; /* pointer to a row of N (=s_i) */ - /* note that we must use here &v, not v */ - (*ip->Ax)(ip->A_par,&v,As); - rr = As; /* As = A*s_i */ - if (ip->Bx) { - (*ip->Bx)(ip->B_par,As,z); /* z = B*A*s_i */ - rr = z; - } - - if (i < ip->k - 1) { - s.ve = N->me[i+1]; /* pointer to a row of N (=s_{i+1}) */ - v_copy(rr,&s); /* s_{i+1} = B*A*s_i */ - for (j = 0; j <= i-1; j++) { - v.ve = N->me[j+1]; /* pointer to a row of N (=s_{j+1}) */ - /* beta->ve[j] = in_prod(&v,rr); */ /* beta_{j,i} */ - /* modified Gram-Schmidt algorithm */ - beta->ve[j] = in_prod(&v,&s); /* beta_{j,i} */ - /* s_{i+1} -= beta_{j,i}*s_{j+1} */ - v_mltadd(&s,&v,- beta->ve[j],&s); - } - - /* beta_{i,i} = ||s_{i+1}||_2 */ - beta->ve[i] = nres = v_norm2(&s); - if ( nres <= MACHEPS*ip->init_res) { - /* s_{i+1} == 0 */ - i--; - done = TRUE; - break; - } - sv_mlt(1.0/nres,&s,&s); /* normalize s_{i+1} */ - - v.ve = N->me[0]; - alpha->ve[i] = in_prod(&v,&s); /* alpha_i = (s_0 , s_{i+1}) */ - - } - else { - for (j = 0; j <= i-1; j++) { - v.ve = N->me[j+1]; /* pointer to a row of N (=s_{j+1}) */ - beta->ve[j] = in_prod(&v,rr); /* beta_{j,i} */ - } - - nres = in_prod(rr,rr); /* rr = B*A*s_{k-1} */ - for (j = 0; j <= i-1; j++) - nres -= beta->ve[j]*beta->ve[j]; - - if (sqrt(fabs(nres)) <= MACHEPS*ip->init_res) { - /* s_k is zero */ - i--; - done = TRUE; - break; - } - if (nres < 0.0) { /* do restart */ - i--; - ip->steps--; - break; - } - beta->ve[i] = sqrt(nres); /* beta_{k-1,k-1} */ - - v.ve = N->me[0]; - alpha->ve[i] = in_prod(&v,rr); - for (j = 0; j <= i-1; j++) - alpha->ve[i] -= beta->ve[j]*alpha->ve[j]; - alpha->ve[i] /= beta->ve[i]; /* alpha_{k-1} */ - - } - - set_col(H,i,beta); - - /* other method of computing dd */ - /* if (fabs((double)alpha->ve[i]) > dd) { - nres = - dd*dd + alpha->ve[i]*alpha->ve[i]; - nres = sqrt((double) nres); - if (ip->info) (*ip->info)(ip,-nres,VNULL,VNULL); - break; - } */ - /* to avoid overflow/underflow in computing dd */ - /* dd *= cos(asin((double)(alpha->ve[i]/dd))); */ - - nres = alpha->ve[i]/dd; - if (fabs(nres-1.0) <= MACHEPS*ip->init_res) - dd = 0.0; - else { - nres = 1.0 - nres*nres; - if (nres < 0.0) { - nres = sqrt((double) -nres); - if (ip->info) (*ip->info)(ip,-dd*nres,VNULL,VNULL); - break; - } - dd *= sqrt((double) nres); - } - - if (ip->info) (*ip->info)(ip,dd,VNULL,VNULL); - if ( ip->stop_crit(ip,dd,VNULL,VNULL) ) { - /* stopping criterion is satisfied */ - done = TRUE; - break; - } - - } /* end of for */ - - if (i >= ip->k) i = ip->k - 1; - - /* use (i+1) by (i+1) submatrix of H */ - H = m_resize(H,i+1,i+1); - alpha = v_resize(alpha,i+1); - Usolve(H,alpha,alpha,0.0); /* c_i is saved in alpha */ - - for (j = 0; j <= i; j++) { - v.ve = N->me[j]; - v_mltadd(ip->x,&v,alpha->ve[j],ip->x); - } - - - if (done) break; /* stop the iterative process */ - alpha = v_resize(alpha,ip->k); - H = m_resize(H,ip->k,ip->k); - - } /* end of while */ - - return ip->x; /* return the solution */ -} - - - -/* iter_spmgcr - a simple interface to iter_mgcr */ -/* no preconditioner */ -VEC *iter_spmgcr(A,B,b,tol,x,k,limit,steps) -SPMAT *A, *B; -VEC *b, *x; -double tol; -int *steps,k,limit; -{ - ITER *ip; - - ip = iter_get(0,0); - ip->Ax = (Fun_Ax) sp_mv_mlt; - ip->A_par = (void *) A; - if (B) { - ip->Bx = (Fun_Ax) sp_mv_mlt; - ip->B_par = (void *) B; - } - else { - ip->Bx = (Fun_Ax) NULL; - ip->B_par = NULL; - } - - ip->k = k; - ip->limit = limit; - ip->info = (Fun_info) NULL; - ip->b = b; - ip->eps = tol; - ip->x = x; - iter_mgcr(ip); - x = ip->x; - if (steps) *steps = ip->steps; - ip->shared_x = ip->shared_b = TRUE; - iter_free(ip); /* release only ITER structure */ - return x; -} - - - -/* - Conjugate gradients method for a normal equation - a preconditioner B must be symmetric !! -*/ -VEC *iter_cgne(ip) -ITER *ip; -{ - static VEC *r = VNULL, *p = VNULL, *q = VNULL, *z = VNULL; - Real alpha, beta, inner, old_inner, nres; - VEC *rr1; /* pointer only */ - - if (ip == INULL) - error(E_NULL,"iter_cgne"); - if (!ip->Ax || ! ip->ATx || !ip->b) - error(E_NULL,"iter_cgne"); - if ( ip->x == ip->b ) - error(E_INSITU,"iter_cgne"); - if (!ip->stop_crit) - error(E_NULL,"iter_cgne"); - - if ( ip->eps <= 0.0 ) ip->eps = MACHEPS; - - r = v_resize(r,ip->b->dim); - p = v_resize(p,ip->b->dim); - q = v_resize(q,ip->b->dim); - - MEM_STAT_REG(r,TYPE_VEC); - MEM_STAT_REG(p,TYPE_VEC); - MEM_STAT_REG(q,TYPE_VEC); - - z = v_resize(z,ip->b->dim); - MEM_STAT_REG(z,TYPE_VEC); - - if (ip->x) { - if (ip->x->dim != ip->b->dim) - error(E_SIZES,"iter_cgne"); - ip->Ax(ip->A_par,ip->x,p); /* p = A*x */ - v_sub(ip->b,p,z); /* z = b - A*x */ - } - else { /* ip->x == 0 */ - ip->x = v_get(ip->b->dim); - ip->shared_x = FALSE; - v_copy(ip->b,z); - } - rr1 = z; - if (ip->Bx) { - (ip->Bx)(ip->B_par,rr1,p); - rr1 = p; - } - (ip->ATx)(ip->AT_par,rr1,r); /* r = A^T*B*(b-A*x) */ - - - old_inner = 0.0; - for ( ip->steps = 0; ip->steps <= ip->limit; ip->steps++ ) - { - rr1 = r; - if ( ip->Bx ) { - (ip->Bx)(ip->B_par,r,z); /* rr = B*r */ - rr1 = z; - } - - inner = in_prod(r,rr1); - nres = sqrt(fabs(inner)); - if (ip->info) ip->info(ip,nres,r,rr1); - if (ip->steps == 0) ip->init_res = nres; - if ( ip->stop_crit(ip,nres,r,rr1) ) break; - - if ( ip->steps ) /* if ( ip->steps > 0 ) ... */ - { - beta = inner/old_inner; - p = v_mltadd(rr1,p,beta,p); - } - else /* if ( ip->steps == 0 ) ... */ - { - beta = 0.0; - p = v_copy(rr1,p); - old_inner = 0.0; - } - (ip->Ax)(ip->A_par,p,q); /* q = A*p */ - if (ip->Bx) { - (ip->Bx)(ip->B_par,q,z); - (ip->ATx)(ip->AT_par,z,q); - rr1 = q; /* q = A^T*B*A*p */ - } - else { - (ip->ATx)(ip->AT_par,q,z); /* z = A^T*A*p */ - rr1 = z; - } - - alpha = inner/in_prod(rr1,p); - v_mltadd(ip->x,p,alpha,ip->x); - v_mltadd(r,rr1,-alpha,r); - old_inner = inner; - } - - return ip->x; -} - -/* iter_spcgne -- a simple interface to iter_cgne() which - uses sparse matrix data structures - -- assumes that B contains an actual preconditioner (or NULL) - use always as follows: - x = iter_spcgne(A,B,b,eps,x,limit,steps); - or - x = iter_spcgne(A,B,b,eps,VNULL,limit,steps); - In the second case the solution vector is created. -*/ -VEC *iter_spcgne(A,B,b,eps,x,limit,steps) -SPMAT *A, *B; -VEC *b, *x; -double eps; -int *steps, limit; -{ - ITER *ip; - - ip = iter_get(0,0); - ip->Ax = (Fun_Ax) sp_mv_mlt; - ip->A_par = (void *)A; - ip->ATx = (Fun_Ax) sp_vm_mlt; - ip->AT_par = (void *)A; - if (B) { - ip->Bx = (Fun_Ax) sp_mv_mlt; - ip->B_par = (void *)B; - } - else { - ip->Bx = (Fun_Ax) NULL; - ip->B_par = NULL; - } - ip->info = (Fun_info) NULL; - ip->b = b; - ip->eps = eps; - ip->limit = limit; - ip->x = x; - iter_cgne(ip); - x = ip->x; - if (steps) *steps = ip->steps; - ip->shared_x = ip->shared_b = TRUE; - iter_free(ip); /* release only ITER structure */ - return x; -} - - - //GO.SYSIN DD iternsym.c bigmail CUT HERE............ test -w meschach3.shar && test -r 24048P07 && ( cat 24048P07 >> meschach3.shar; rm 24048P07 ) cat > meschach4.shar <<'bigmail CUT HERE............' # to unbundle, sh this file (in an empty directory) echo zmachine.c 1>&2 sed >zmachine.c <<'//GO.SYSIN DD zmachine.c' 's/^-//' - -/************************************************************************** -** -** Copyright (C) 1993 David E. Steward & Zbigniew Leyk, all rights reserved. -** -** Meschach Library -** -** This Meschach Library is provided "as is" without any express -** or implied warranty of any kind with respect to this software. -** In particular the authors shall not be liable for any direct, -** indirect, special, incidental or consequential damages arising -** in any way from use of the software. -** -** Everyone is granted permission to copy, modify and redistribute this -** Meschach Library, provided: -** 1. All copies contain this copyright notice. -** 2. All modified copies shall carry a notice stating who -** made the last modification and the date of such modification. -** 3. No charge is made for this software or works derived from it. -** This clause shall not be construed as constraining other software -** distributed on the same medium as this software, nor is a -** distribution fee considered a charge. -** -***************************************************************************/ - - -/* - This file contains basic routines which are used by the functions - involving complex vectors. - These are the routines that should be modified in order to take - full advantage of specialised architectures (pipelining, vector - processors etc). - */ -static char *rcsid = "$Id: m9,v 1.1.1.1 1999/04/14 14:16:23 borland Exp $"; - -#include "machine.h" -#include "zmatrix.h" -#include - - -/* __zconj__ -- complex conjugate */ -void __zconj__(zp,len) -complex *zp; -int len; -{ - int i; - - for ( i = 0; i < len; i++ ) - zp[i].im = - zp[i].im; -} - -/* __zip__ -- inner product - -- computes sum_i zp1[i].zp2[i] if flag == 0 - sum_i zp1[i]*.zp2[i] if flag != 0 */ bigmail CUT HERE............ test -w meschach4.shar && test -r 24048P08 && test -r 24048P09 && ( cat 24048P08 >> meschach4.shar; rm 24048P08 cat 24048P09 >> meschach4.shar; rm 24048P09 )