/* -*- Mode: C; c-basic-offset:4 ; -*- */ /* * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "opal_config.h" #include #include #include "opal/datatype/opal_datatype.h" #include "opal/datatype/opal_convertor.h" #include "opal/datatype/opal_datatype_internal.h" #if OPAL_ENABLE_DEBUG #include "opal/util/output.h" #define DO_DEBUG(INST) if( opal_ddt_position_debug ) { INST } #else #define DO_DEBUG(INST) #endif /* OPAL_ENABLE_DEBUG */ /* The pack/unpack functions need a cleanup. I have to create a proper interface to access * all basic functionalities, hence using them as basic blocks for all conversion functions. * * But first let's make some global assumptions: * - a datatype (with the flag DT_DATA set) will have the contiguous flags set if and only if * the data is really contiguous (extent equal with size) * - for the OPAL_DATATYPE_LOOP type the DT_CONTIGUOUS flag set means that the content of the loop is * contiguous but with a gap in the begining or at the end. * - the DT_CONTIGUOUS flag for the type OPAL_DATATYPE_END_LOOP is meaningless. */ static inline void position_single_block(opal_convertor_t* CONVERTOR, unsigned char** mem, ptrdiff_t mem_update, size_t* space, size_t space_update, size_t* cnt, size_t cnt_update) { OPAL_DATATYPE_SAFEGUARD_POINTER( *mem, mem_update, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); DO_DEBUG( opal_output( 0, "position( %p, %lu ) => space %lu [prolog]\n", (void*)*mem, (unsigned long)space_update, (unsigned long)(*space) ); ); *mem += mem_update; *space -= space_update; *cnt -= cnt_update; } /** * Advance the convertors' position according. Update the pointer and the remaining space * accordingly. */ static inline void position_predefined_data( opal_convertor_t* CONVERTOR, dt_elem_desc_t* ELEM, size_t* COUNT, unsigned char** POINTER, size_t* SPACE ) { const ddt_elem_desc_t* _elem = &((ELEM)->elem); size_t total_count = (size_t)_elem->count * _elem->blocklen; size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size; size_t do_now, do_now_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size; unsigned char* _memory = (*POINTER) + _elem->disp; assert( *(COUNT) <= ((size_t)_elem->count * _elem->blocklen) ); if( cando_count > *(COUNT) ) cando_count = *(COUNT); if( 1 == _elem->blocklen ) { DO_DEBUG( opal_output( 0, "position( %p, %" PRIsize_t " ) x (count %" PRIsize_t ", extent %ld) => space %lu [prolog]\n", (void*)_memory, (unsigned long)do_now_bytes, cando_count, _elem->extent, (unsigned long)(*SPACE) ); ); _memory += cando_count * _elem->extent; *SPACE -= cando_count * do_now_bytes; *COUNT -= cando_count; goto update_and_return; } /** * First check if we already did something on this element ? */ do_now = (total_count - *(COUNT)); /* done elements */ if( 0 != do_now ) { do_now = do_now % _elem->blocklen; /* partial blocklen? */ if( 0 != do_now ) { size_t left_in_block = _elem->blocklen - do_now; /* left in the current blocklen */ do_now = (left_in_block > cando_count ) ? cando_count : left_in_block; do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; position_single_block( CONVERTOR, &_memory, do_now_bytes, SPACE, do_now_bytes, COUNT, do_now ); /* compensate if we just completed a blocklen */ if( do_now == left_in_block ) _memory += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size); cando_count -= do_now; } } /** * Compute how many full blocklen we need to do and do them. */ do_now = cando_count / _elem->blocklen; if( 0 != do_now ) { do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size; #if OPAL_ENABLE_DEBUG for(size_t _i = 0; _i < do_now; _i++ ) { position_single_block( CONVERTOR, &_memory, _elem->extent, SPACE, do_now_bytes, COUNT, _elem->blocklen ); cando_count -= _elem->blocklen; } #else _memory += do_now * _elem->extent; *SPACE -= do_now * do_now_bytes; *COUNT -= do_now * _elem->blocklen; cando_count -= do_now * _elem->blocklen; #endif /* OPAL_ENABLE_DEBUG */ } /** * As an epilog do anything left from the last blocklen. */ do_now = cando_count; if( 0 != do_now ) { do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; position_single_block( CONVERTOR, &_memory, do_now_bytes, SPACE, do_now_bytes, COUNT, do_now ); } update_and_return: *(POINTER) = _memory - _elem->disp; } int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, size_t* position ) { dt_stack_t* pStack; /* pointer to the position on the stack */ uint32_t pos_desc; /* actual position in the description of the derived datatype */ size_t count_desc; /* the number of items already done in the actual pos_desc */ size_t iov_len_local; dt_elem_desc_t* description = pConvertor->use_desc->desc; dt_elem_desc_t* pElem; /* current position */ unsigned char *base_pointer = pConvertor->pBaseBuf; ptrdiff_t extent = pConvertor->pDesc->ub - pConvertor->pDesc->lb; DUMP( "opal_convertor_generic_simple_position( %p, &%ld )\n", (void*)pConvertor, (long)*position ); assert(*position > pConvertor->bConverted); /* We dont want to have to parse the datatype multiple times. What we are interested in * here is to compute the number of completed datatypes that we can move forward, update * the counters and compute the position taking in account only the remaining elements. * The only problem is that we have to modify all the elements on the stack. */ iov_len_local = *position - pConvertor->bConverted; if( iov_len_local > pConvertor->pDesc->size ) { pStack = pConvertor->pStack; /* we're working with the full stack */ count_desc = iov_len_local / pConvertor->pDesc->size; DO_DEBUG( opal_output( 0, "position before %lu asked %lu data size %lu" " iov_len_local %lu count_desc %" PRIsize_t "\n", (unsigned long)pConvertor->bConverted, (unsigned long)*position, (unsigned long)pConvertor->pDesc->size, (unsigned long)iov_len_local, count_desc ); ); /* Update all the stack including the last one */ for( pos_desc = 0; pos_desc <= pConvertor->stack_pos; pos_desc++ ) pStack[pos_desc].disp += count_desc * extent; pConvertor->bConverted += count_desc * pConvertor->pDesc->size; iov_len_local = *position - pConvertor->bConverted; pStack[0].count -= count_desc; DO_DEBUG( opal_output( 0, "after bConverted %lu remaining count %lu iov_len_local %lu\n", (unsigned long)pConvertor->bConverted, (unsigned long)pStack[0].count, (unsigned long)iov_len_local ); ); } pStack = pConvertor->pStack + pConvertor->stack_pos; pos_desc = pStack->index; base_pointer += pStack->disp; count_desc = pStack->count; pStack--; pConvertor->stack_pos--; pElem = &(description[pos_desc]); DO_DEBUG( opal_output( 0, "position start pos_desc %d count_desc %" PRIsize_t " disp %llx\n" "stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %llx\n", pos_desc, count_desc, (unsigned long long)(base_pointer - pConvertor->pBaseBuf), pConvertor->stack_pos, pStack->index, pStack->count, (unsigned long long)pStack->disp ); ); /* Last data has been only partially converted. Compute the relative position */ if( 0 != pConvertor->partial_length ) { size_t element_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size; size_t missing_length = element_length - pConvertor->partial_length; if( missing_length >= iov_len_local ) { pConvertor->partial_length = (pConvertor->partial_length + iov_len_local) % element_length; pConvertor->bConverted += iov_len_local; assert(pConvertor->partial_length < element_length); return 0; } pConvertor->partial_length = 0; pConvertor->bConverted += missing_length; iov_len_local -= missing_length; count_desc--; } while( 1 ) { if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the the entire datatype */ DO_DEBUG( opal_output( 0, "position end_loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %lx space %lu\n", pStack->count, pConvertor->stack_pos, pos_desc, pStack->disp, (unsigned long)iov_len_local ); ); if( --(pStack->count) == 0 ) { /* end of loop */ if( pConvertor->stack_pos == 0 ) { pConvertor->flags |= CONVERTOR_COMPLETED; goto complete_loop; /* completed */ } pConvertor->stack_pos--; pStack--; pos_desc++; } else { if( pStack->index == -1 ) { pStack->disp += extent; pos_desc = 0; /* back to the first element */ } else { assert( OPAL_DATATYPE_LOOP == description[pStack->index].loop.common.type ); pStack->disp += description[pStack->index].loop.extent; pos_desc = pStack->index; /* go back to the loop start itself to give a chance * to move forward by entire loops */ } } base_pointer = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); DO_DEBUG( opal_output( 0, "position new_loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %lx space %lu\n", pStack->count, pConvertor->stack_pos, pos_desc, pStack->disp, (unsigned long)iov_len_local ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { ptrdiff_t local_disp = (ptrdiff_t)base_pointer; ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)(pElem + pElem->loop.items); size_t full_loops = iov_len_local / end_loop->size; full_loops = count_desc <= full_loops ? count_desc : full_loops; if( full_loops ) { base_pointer += full_loops * pElem->loop.extent; iov_len_local -= full_loops * end_loop->size; count_desc -= full_loops; if( 0 == count_desc ) { /* completed */ pos_desc += pElem->loop.items + 1; goto update_loop_description; } /* Save the stack with the correct last_count value. */ } local_disp = (ptrdiff_t)base_pointer - local_disp; PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, pStack->disp + local_disp ); pos_desc++; update_loop_description: /* update the current state */ base_pointer = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" ); DO_DEBUG( opal_output( 0, "position set loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %lx space %lu\n", pStack->count, pConvertor->stack_pos, pos_desc, pStack->disp, (unsigned long)iov_len_local ); ); continue; } while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* now here we have a basic datatype */ position_predefined_data( pConvertor, pElem, &count_desc, &base_pointer, &iov_len_local ); if( 0 != count_desc ) { /* completed */ pConvertor->partial_length = iov_len_local; goto complete_loop; } base_pointer = pConvertor->pBaseBuf + pStack->disp; pos_desc++; /* advance to the next data */ UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); DO_DEBUG( opal_output( 0, "position set loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %lx space %lu\n", pStack->count, pConvertor->stack_pos, pos_desc, pStack->disp, (unsigned long)iov_len_local ); ); } } complete_loop: pConvertor->bConverted = *position; /* update the already converted bytes */ if( !(pConvertor->flags & CONVERTOR_COMPLETED) ) { /* I complete an element, next step I should go to the next one */ PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem->elem.common.type, count_desc, base_pointer - pConvertor->pBaseBuf ); DO_DEBUG( opal_output( 0, "position save stack stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %llx\n", pConvertor->stack_pos, pStack->index, pStack->count, (unsigned long long)pStack->disp ); ); return 0; } return 1; }