/* -*- Mode: C; c-basic-offset:4 ; -*- */ /* * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifdef HAVE_ALLOCA_H #include #endif #if !defined(MEM_OP_NAME) #error #endif /* !defined((MEM_OP_NAME) */ #if !defined(MEM_OP) #error #endif /* !defined(MEM_OP) */ #ifndef STRINGIFY # define STRINGIFY_(arg) #arg # define STRINGIFY(arg) STRINGIFY_(arg) #endif #ifndef DT_CONCAT # define DT__CONCAT(a, b) a##b # define DT_CONCAT(a, b) DT__CONCAT(a, b) #endif #define _predefined_data DT_CONCAT(MEM_OP_NAME,_predefined_data) #define _contiguous_loop DT_CONCAT(MEM_OP_NAME,_contiguous_loop) #define _copy_content_same_ddt DT_CONCAT(MEM_OP_NAME,_copy_content_same_ddt) static inline void _predefined_data( const dt_elem_desc_t* ELEM, const opal_datatype_t* DATATYPE, unsigned char* SOURCE_BASE, size_t TOTAL_COUNT, size_t COUNT, unsigned char* SOURCE, unsigned char* DESTINATION, size_t* SPACE ) { const ddt_elem_desc_t* _elem = &((ELEM)->elem); unsigned char* _source = (SOURCE) + _elem->disp; unsigned char* _destination = (DESTINATION) + _elem->disp; size_t do_now = _elem->count, do_now_bytes; assert( (COUNT) == (do_now * _elem->blocklen)); /* We don't a prologue and epilogue here as we are __always__ working * with full copies of the data description. */ /** * Compute how many full blocklen we need to do and do them. */ do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size; assert( (do_now * do_now_bytes) <= (*SPACE) ); for(size_t _i = 0; _i < do_now; _i++ ) { OPAL_DATATYPE_SAFEGUARD_POINTER( _source, do_now_bytes, (SOURCE_BASE), (DATATYPE), (TOTAL_COUNT) ); DO_DEBUG( opal_output( 0, "copy %s( %p, %p, %" PRIsize_t " ) => space %" PRIsize_t "\n", STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, do_now_bytes, *(SPACE) - _i * do_now_bytes ); ); MEM_OP( _destination, _source, do_now_bytes ); _destination += _elem->extent; _source += _elem->extent; } *(SPACE) -= (do_now_bytes * do_now); } static inline void _contiguous_loop( const dt_elem_desc_t* ELEM, const opal_datatype_t* DATATYPE, unsigned char* SOURCE_BASE, size_t TOTAL_COUNT, size_t COUNT, unsigned char* SOURCE, unsigned char* DESTINATION, size_t* SPACE ) { ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM); ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + _loop->items); unsigned char* _source = (SOURCE) + _end_loop->first_elem_disp; unsigned char* _destination = (DESTINATION) + _end_loop->first_elem_disp; size_t _copy_loops = (COUNT); if( _loop->extent == (ptrdiff_t)_end_loop->size ) { /* the loop is contiguous */ _copy_loops *= _end_loop->size; OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _copy_loops, (SOURCE_BASE), (DATATYPE), (TOTAL_COUNT) ); MEM_OP( _destination, _source, _copy_loops ); } else { for(size_t _i = 0; _i < _copy_loops; _i++ ) { OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _end_loop->size, (SOURCE_BASE), (DATATYPE), (TOTAL_COUNT) ); DO_DEBUG( opal_output( 0, "copy 3. %s( %p, %p, %" PRIsize_t " ) => space %" PRIsize_t "\n", STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, _end_loop->size, *(SPACE) - _i * _end_loop->size ); ); MEM_OP( _destination, _source, _end_loop->size ); _source += _loop->extent; _destination += _loop->extent; } _copy_loops *= _end_loop->size; } *(SPACE) -= _copy_loops; } static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, int32_t count, char* destination_base, char* source_base ) { dt_stack_t* pStack; /* pointer to the position on the stack */ int32_t stack_pos; /* index of the stack level */ uint32_t pos_desc; /* actual position in the description of the derived datatype */ uint32_t count_desc; /* the number of items already done in the actual pos_desc */ dt_elem_desc_t* description; dt_elem_desc_t* pElem; size_t iov_len_local; unsigned char *source = (unsigned char*)source_base, *destination = (unsigned char*)destination_base; DO_DEBUG( opal_output( 0, "_copy_content_same_ddt( %p, %d, dst %p, src %p )\n", (void*)datatype, count, (void*)destination_base, (void*)source_base ); ); iov_len_local = (size_t)count * datatype->size; /* If we have to copy a contiguous datatype then simply * do a MEM_OP. */ if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { ptrdiff_t extent = (datatype->ub - datatype->lb); /* Now that we know the datatype is contiguous, we should move the 2 pointers * source and destination to the correct displacement. */ destination += datatype->true_lb; source += datatype->true_lb; if( (ptrdiff_t)datatype->size == extent ) { /* all contiguous == no gaps around */ size_t total_length = iov_len_local; size_t memop_chunk = opal_datatype_memop_block_size; OPAL_DATATYPE_SAFEGUARD_POINTER( source, iov_len_local, (unsigned char*)source_base, datatype, count ); while( total_length > 0 ) { if( memop_chunk > total_length ) memop_chunk = total_length; DO_DEBUG( opal_output( 0, "copy c1. %s( %p, %p, %lu ) => space %lu\n", STRINGIFY(MEM_OP_NAME), (void*)destination, (void*)source, (unsigned long)memop_chunk, (unsigned long)total_length ); ); MEM_OP( destination, source, memop_chunk ); destination += memop_chunk; source += memop_chunk; total_length -= memop_chunk; } return 0; /* completed */ } for( pos_desc = 0; (int32_t)pos_desc < count; pos_desc++ ) { OPAL_DATATYPE_SAFEGUARD_POINTER( destination, datatype->size, (unsigned char*)destination_base, datatype, count ); OPAL_DATATYPE_SAFEGUARD_POINTER( source, datatype->size, (unsigned char*)source_base, datatype, count ); DO_DEBUG( opal_output( 0, "copy c2. %s( %p, %p, %lu ) => space %lu\n", STRINGIFY(MEM_OP_NAME), (void*)destination, (void*)source, (unsigned long)datatype->size, (unsigned long)(iov_len_local - (pos_desc * datatype->size)) ); ); MEM_OP( destination, source, datatype->size ); destination += extent; source += extent; } return 0; /* completed */ } pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 1) ); pStack->count = count; pStack->index = -1; pStack->disp = 0; pos_desc = 0; stack_pos = 0; description = datatype->opt_desc.desc; if( NULL == description ) { description = datatype->desc.desc; } UPDATE_INTERNAL_COUNTERS( description, 0, pElem, count_desc ); while( 1 ) { while( OPAL_LIKELY(pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA) ) { /* now here we have a basic datatype */ _predefined_data( pElem, datatype, (unsigned char*)source_base, count, count_desc, source, destination, &iov_len_local ); pos_desc++; /* advance to the next data */ UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); } if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ DO_DEBUG( opal_output( 0, "copy end_loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %ld space %lu\n", pStack->count, stack_pos, pos_desc, pStack->disp, (unsigned long)iov_len_local ); ); if( --(pStack->count) == 0 ) { /* end of loop */ if( stack_pos == 0 ) { assert( iov_len_local == 0 ); return 0; /* completed */ } stack_pos--; pStack--; pos_desc++; } else { pos_desc = pStack->index + 1; if( pStack->index == -1 ) { pStack->disp += (datatype->ub - datatype->lb); } else { assert( OPAL_DATATYPE_LOOP == description[pStack->index].loop.common.type ); pStack->disp += description[pStack->index].loop.extent; } } source = (unsigned char*)source_base + pStack->disp; destination = (unsigned char*)destination_base + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); DO_DEBUG( opal_output( 0, "copy new_loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %ld space %lu\n", pStack->count, stack_pos, pos_desc, pStack->disp, (unsigned long)iov_len_local ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { ptrdiff_t local_disp = (ptrdiff_t)source; if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { _contiguous_loop( pElem, datatype, (unsigned char*)source_base, count, count_desc, source, destination, &iov_len_local ); pos_desc += pElem->loop.items + 1; goto update_loop_description; } local_disp = (ptrdiff_t)source - local_disp; PUSH_STACK( pStack, stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, pStack->disp + local_disp); pos_desc++; update_loop_description: /* update the current state */ source = (unsigned char*)source_base + pStack->disp; destination = (unsigned char*)destination_base + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); DDT_DUMP_STACK( pStack, stack_pos, pElem, "advance loop" ); continue; } } }