#ifndef MAD_TPSA_COMPOSE_SER_TC #define MAD_TPSA_COMPOSE_SER_TC /* o----------------------------------------------------------------------------o | | TPSA serial map composition module implementation | | Methodical Accelerator Design - Copyright (c) 2016+ | Support: http://cern.ch/mad - mad at cern.ch | Authors: L. Deniau, laurent.deniau at cern.ch | C. Tomoiaga | Contrib: - | o----------------------------------------------------------------------------o | You can redistribute this file and/or modify it under the terms of the GNU | General Public License GPLv3 (or later), as published by the Free Software | Foundation. This file is distributed in the hope that it will be useful, but | WITHOUT ANY WARRANTY OF ANY KIND. See http://gnu.org/licenses for details. o----------------------------------------------------------------------------o */ struct compose_ctx_ser { ssz_t sa, sb; ord_t hi_ord; log_t *required; const T **ma, **mb; T **mc, **ords; const D *d; }; #define CTX struct compose_ctx_ser static inline void compose_ord1(ssz_t sa, const T *ma[sa], ssz_t sb, const T *mb[sb], T *mc[sa]) { FOR(ia,sa) { FUN(setvar)(mc[ia], ma[ia]->coef[0], 0,0); FOR(ib,sb) { NUM coef = FUN(geti)(ma[ia],ib+1); if (coef) FUN(acc)(mb[ib], coef, mc[ia]); } } } static inline void compose(int ib, idx_t idx, ord_t o, ord_t mono[], CTX *ctx) { // ib : current variable index (in mb) // idx : current monomial index // o : current monomial order // mono: current monomial const D *d = ctx->d; if (idx < 0 || !ctx->required[idx]) return; #if DEBUG_COMPOSE printf("compose: ib=%d, o=%d, req[% 3d]->", ib, o, idx); mad_mono_print(d->nn, mono, 0); printf("\n"); #endif if (o > 0) FUN(mul)(ctx->ords[o-1], ctx->mb[ib], ctx->ords[o]); FOR(ia,ctx->sa) { NUM coef = FUN(geti)(ctx->ma[ia],idx); if (coef) FUN(acc)(ctx->ords[o], coef, ctx->mc[ia]); } if (o < ctx->hi_ord) for(; ib < ctx->sb; ++ib) { mono[ib]++; idx = mad_desc_idxm(d, d->nn, mono); compose(ib, idx, o+1, mono, ctx); // recursive call mono[ib]--; } } static inline ord_t init_required(ssz_t sa, const T *ma[sa], log_t required[], ord_t hi_ord) { assert(ma && required); const D *d = ma[0]->d; const idx_t *o2i = d->ord2idx; // root is always required required[0] = 1; // primary nodes (non-zero coefs) FOR(ia,sa) FOR(o,ma[ia]->lo,ma[ia]->hi+1) if (mad_bit_tst(ma[ia]->nz, o)) FOR(c,o2i[o],o2i[o+1]) if (ma[ia]->coef[c]) required[c] = 1; // fathers of primary nodes ord_t mono[d->nn]; idx_t j, father; for (ord_t o=hi_ord; o > 1; --o) { FOR(c,o2i[o],o2i[o+1]) if (required[c]) { mad_mono_copy(d->nn, d->To[c], mono); for (j = d->nn-1; j >= 0 && !mono[j]; --j) ; mono[j]--; father = mad_desc_idxm(d, d->nn, mono); #if DEBUG_COMPOSE printf("compini: "); mad_mono_print(d->nn, d->To[c], 0); printf("->"); mad_mono_print(d->nn, mono, 0); printf(" req[% 3d]%c\n", father, required[father] ? '*' : ' '); #endif required[father] = 1; } } return hi_ord; } static inline void compose_serial(ssz_t sa, const T *ma[sa], ssz_t sb, const T *mb[sb], T *mc[sa], ord_t hi_ord) { const D *d = ma[0]->d; #if DEBUG_COMPOSE printf("hi: %d\n", hi_ord); printf("ma:\n"); print_damap(sa, ma, 0); // LD printf("mb:\n"); print_damap(sb, mb, 0); // LD #endif if (hi_ord == 1) { compose_ord1(sa,ma,sb,mb,mc); return; } mad_alloc_tmp(log_t, required, d->nc); init_required(sa, ma, memset(required, 0, d->nc*sizeof(log_t)), hi_ord); // initialization T *ords[hi_ord+1]; // one for each order [0,hi_ord] FOR(o,hi_ord+1) ords[o] = FUN(newd)(d,d->to); FUN(setvar)(ords[0],1,0,0); FOR(ic,sa) FUN(clear)(mc[ic]); CTX ctx = { .d=d, .sa=sa, .sb=sb, .ma=ma, .mb=mb, .mc=mc, .ords=ords, .required=required, .hi_ord=hi_ord }; // compose starting at root of tree: ib 0, idx 0, ord 0, mono 0 ord_t mono[d->nn]; compose(0, 0, 0, memset(mono, 0, sizeof mono), &ctx); // cleanup FOR(o,hi_ord+1) FUN(del)(ords[o]); mad_free_tmp(required); } #undef CTX #endif // MAD_TPSA_COMPOSE_SER_TC