/* -*- Mode: C; c-basic-offset:4 ; -*- */ /* * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2017-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "opal_config.h" #include #include "opal/datatype/opal_convertor_internal.h" #include "opal/datatype/opal_datatype_internal.h" #if OPAL_ENABLE_DEBUG #include "opal/util/output.h" #define DO_DEBUG(INST) if( opal_ddt_pack_debug ) { INST } #else #define DO_DEBUG(INST) #endif /* OPAL_ENABLE_DEBUG */ #include "opal/datatype/opal_datatype_checksum.h" #include "opal/datatype/opal_datatype_pack.h" #include "opal/datatype/opal_datatype_prototypes.h" #if defined(CHECKSUM) #define opal_pack_homogeneous_contig_function opal_pack_homogeneous_contig_checksum #define opal_pack_homogeneous_contig_with_gaps_function opal_pack_homogeneous_contig_with_gaps_checksum #define opal_generic_simple_pack_function opal_generic_simple_pack_checksum #define opal_pack_general_function opal_pack_general_checksum #else #define opal_pack_homogeneous_contig_function opal_pack_homogeneous_contig #define opal_pack_homogeneous_contig_with_gaps_function opal_pack_homogeneous_contig_with_gaps #define opal_generic_simple_pack_function opal_generic_simple_pack #define opal_pack_general_function opal_pack_general #endif /* defined(CHECKSUM) */ /* the contig versions does not use the stack. They can easily retrieve * the status with just the informations from pConvertor->bConverted. */ int32_t opal_pack_homogeneous_contig_function( opal_convertor_t* pConv, struct iovec* iov, uint32_t* out_size, size_t* max_data ) { dt_stack_t* pStack = pConv->pStack; unsigned char *source_base = NULL; uint32_t iov_count; size_t length = pConv->local_size - pConv->bConverted, initial_amount = pConv->bConverted; source_base = (pConv->pBaseBuf + pConv->pDesc->true_lb + pStack[0].disp + pStack[1].disp); /* There are some optimizations that can be done if the upper level * does not provide a buffer. */ for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { if( 0 == length ) break; if( (size_t)iov[iov_count].iov_len > length ) iov[iov_count].iov_len = length; if( iov[iov_count].iov_base == NULL ) { iov[iov_count].iov_base = (IOVBASE_TYPE *) source_base; COMPUTE_CSUM( iov[iov_count].iov_base, iov[iov_count].iov_len, pConv ); } else { /* contiguous data just memcpy the smallest data in the user buffer */ OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, iov[iov_count].iov_len, pConv->pBaseBuf, pConv->pDesc, pConv->count ); MEMCPY_CSUM( iov[iov_count].iov_base, source_base, iov[iov_count].iov_len, pConv ); } length -= iov[iov_count].iov_len; pConv->bConverted += iov[iov_count].iov_len; pStack[0].disp += iov[iov_count].iov_len; source_base += iov[iov_count].iov_len; } /* update the return value */ *max_data = pConv->bConverted - initial_amount; *out_size = iov_count; if( pConv->bConverted == pConv->local_size ) { pConv->flags |= CONVERTOR_COMPLETED; return 1; } return 0; } int32_t opal_pack_homogeneous_contig_with_gaps_function( opal_convertor_t* pConv, struct iovec* iov, uint32_t* out_size, size_t* max_data ) { size_t remaining, length, initial_bytes_converted = pConv->bConverted; const opal_datatype_t* pData = pConv->pDesc; dt_stack_t* stack = pConv->pStack; ptrdiff_t extent = pData->ub - pData->lb; unsigned char *user_memory, *packed_buffer; uint32_t idx; size_t i; /* The memory layout is contiguous with gaps in the begining and at the end. The datatype true_lb * is the initial displacement, the size the length of the contiguous area and the extent represent * how much we should jump between elements. */ assert( (pData->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && ((ptrdiff_t)pData->size != extent) ); assert( pData->opt_desc.used <= 1 ); DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( pBaseBuf %p, iov_count %d )\n", (void*)pConv->pBaseBuf, *out_size ); ); if( stack[1].type != opal_datatype_uint1.id ) { stack[1].count *= opal_datatype_basicDatatypes[stack[1].type]->size; stack[1].type = opal_datatype_uint1.id; } /* We can provide directly the pointers in the user buffers (like the convertor_raw) */ if( NULL == iov[0].iov_base ) { user_memory = pConv->pBaseBuf + pData->true_lb; for( idx = 0; (idx < (*out_size)) && stack[0].count; idx++ ) { iov[idx].iov_base = user_memory + stack[0].disp + stack[1].disp; iov[idx].iov_len = stack[1].count; COMPUTE_CSUM( iov[idx].iov_base, iov[idx].iov_len, pConv ); pConv->bConverted += stack[1].count; stack[0].disp += extent; stack[0].count--; stack[1].disp = 0; stack[1].count = pData->size; /* we might need this to update the partial * length for the first iteration */ } goto update_status_and_return; } for( idx = 0; idx < (*out_size); idx++ ) { /* Limit the amount of packed data to the data left over on this convertor */ remaining = pConv->local_size - pConv->bConverted; if( 0 == remaining ) break; /* we're done this time */ if( remaining > iov[idx].iov_len ) remaining = iov[idx].iov_len; packed_buffer = (unsigned char *)iov[idx].iov_base; pConv->bConverted += remaining; user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp + stack[1].disp; DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( user_memory %p, packed_buffer %p length %" PRIsize_t "\n", (void*)user_memory, (void*)packed_buffer, remaining ); ); length = (0 == pConv->stack_pos ? 0 : stack[1].count); /* left over from the last pack */ /* data left from last round and enough space in the buffer */ if( (pData->size != length) && (length <= remaining)) { /* copy the partial left-over from the previous round */ OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, length, pConv->pBaseBuf, pData, pConv->count ); DO_DEBUG( opal_output( 0, "pack dest %p src %p length %" PRIsize_t " [prologue]\n", (void*)user_memory, (void*)packed_buffer, length ); ); MEMCPY_CSUM( packed_buffer, user_memory, length, pConv ); packed_buffer += length; remaining -= length; stack[1].count -= length; stack[1].disp += length; /* just in case, we overwrite this below */ if( 0 == stack[1].count) { /* one completed element */ stack[0].count--; stack[0].disp += extent; if( 0 == stack[0].count ) /* not yet done */ break; stack[1].count = pData->size; stack[1].disp = 0; } user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp + stack[1].disp; } for( i = 0; pData->size <= remaining; i++ ) { OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, pData->size, pConv->pBaseBuf, pData, pConv->count ); DO_DEBUG( opal_output( 0, "pack dest %p src %p length %" PRIsize_t " [%" PRIsize_t "/%" PRIsize_t "\n", (void*)user_memory, (void*)packed_buffer, pData->size, remaining, iov[idx].iov_len ); ); MEMCPY_CSUM( packed_buffer, user_memory, pData->size, pConv ); packed_buffer += pData->size; user_memory += extent; remaining -= pData->size; } stack[0].count -= i; /* the entire datatype copied above */ stack[0].disp += (i * extent); /* Copy the last bits */ if( 0 != remaining ) { OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf, pData, pConv->count ); DO_DEBUG( opal_output( 0, "4. pack dest %p src %p length %" PRIsize_t "\n", (void*)user_memory, (void*)packed_buffer, remaining ); ); MEMCPY_CSUM( packed_buffer, user_memory, remaining, pConv ); stack[1].count -= remaining; stack[1].disp += remaining; /* keep the += in case we are copying less that the datatype size */ if( 0 == stack[1].count ) { /* prepare for the next element */ stack[1].count = pData->size; stack[1].disp = 0; } } } update_status_and_return: *out_size = idx; *max_data = pConv->bConverted - initial_bytes_converted; if( pConv->bConverted == pConv->local_size ) pConv->flags |= CONVERTOR_COMPLETED; return !!(pConv->flags & CONVERTOR_COMPLETED); /* done or not */ } /* The pack/unpack functions need a cleanup. I have to create a proper interface to access * all basic functionalities, hence using them as basic blocks for all conversion functions. * * But first let's make some global assumptions: * - a datatype (with the flag DT_DATA set) will have the contiguous flags set if and only if * the data is really contiguous (extent equal with size) * - for the OPAL_DATATYPE_LOOP type the DT_CONTIGUOUS flag set means that the content of the loop is * contiguous but with a gap in the begining or at the end. * - the DT_CONTIGUOUS flag for the type OPAL_DATATYPE_END_LOOP is meaningless. */ int32_t opal_generic_simple_pack_function( opal_convertor_t* pConvertor, struct iovec* iov, uint32_t* out_size, size_t* max_data ) { dt_stack_t* pStack; /* pointer to the position on the stack */ uint32_t pos_desc; /* actual position in the description of the derived datatype */ size_t count_desc; /* the number of items already done in the actual pos_desc */ size_t total_packed = 0; /* total amount packed this time */ dt_elem_desc_t* description; dt_elem_desc_t* pElem; const opal_datatype_t *pData = pConvertor->pDesc; unsigned char *conv_ptr, *iov_ptr; size_t iov_len_local; uint32_t iov_count; DO_DEBUG( opal_output( 0, "opal_convertor_generic_simple_pack( %p:%p, {%p, %lu}, %d )\n", (void*)pConvertor, (void*)pConvertor->pBaseBuf, (void*)iov[0].iov_base, (unsigned long)iov[0].iov_len, *out_size ); ); description = pConvertor->use_desc->desc; /* For the first step we have to add both displacement to the source. After in the * main while loop we will set back the conv_ptr to the correct value. This is * due to the fact that the convertor can stop in the middle of a data with a count */ pStack = pConvertor->pStack + pConvertor->stack_pos; pos_desc = pStack->index; conv_ptr = pConvertor->pBaseBuf + pStack->disp; count_desc = pStack->count; pStack--; pConvertor->stack_pos--; pElem = &(description[pos_desc]); DO_DEBUG( opal_output( 0, "pack start pos_desc %d count_desc %" PRIsize_t " disp %ld\n" "stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n", pos_desc, count_desc, (long)(conv_ptr - pConvertor->pBaseBuf), pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); ); for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { iov_ptr = (unsigned char *) iov[iov_count].iov_base; iov_len_local = iov[iov_count].iov_len; if( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { if( ((size_t)pElem->elem.count * pElem->elem.blocklen) != count_desc ) { /* we have a partial (less than blocklen) basic datatype */ int rc = PACK_PARTIAL_BLOCKLEN( pConvertor, pElem, count_desc, conv_ptr, iov_ptr, iov_len_local ); if( 0 == rc ) /* not done */ goto complete_loop; if( 0 == count_desc ) { conv_ptr = pConvertor->pBaseBuf + pStack->disp; pos_desc++; /* advance to the next data */ UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); } } } while( 1 ) { while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* we have a basic datatype (working on full blocks) */ PACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc, conv_ptr, iov_ptr, iov_len_local ); if( 0 != count_desc ) /* completed? */ goto complete_loop; conv_ptr = pConvertor->pBaseBuf + pStack->disp; pos_desc++; /* advance to the next data */ UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); } if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ DO_DEBUG( opal_output( 0, "pack end_loop count %" PRIsize_t " stack_pos %d" " pos_desc %d disp %ld space %lu\n", pStack->count, pConvertor->stack_pos, pos_desc, pStack->disp, (unsigned long)iov_len_local ); ); if( --(pStack->count) == 0 ) { /* end of loop */ if( 0 == pConvertor->stack_pos ) { /* we're done. Force the exit of the main for loop (around iovec) */ *out_size = iov_count; goto complete_loop; } pConvertor->stack_pos--; /* go one position up on the stack */ pStack--; pos_desc++; /* and move to the next element */ } else { pos_desc = pStack->index + 1; /* jump back to the begining of the loop */ if( pStack->index == -1 ) { /* If it's the datatype count loop */ pStack->disp += (pData->ub - pData->lb); /* jump by the datatype extent */ } else { assert( OPAL_DATATYPE_LOOP == description[pStack->index].loop.common.type ); pStack->disp += description[pStack->index].loop.extent; /* jump by the loop extent */ } } conv_ptr = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); DO_DEBUG( opal_output( 0, "pack new_loop count %" PRIsize_t " stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld space %lu\n", pStack->count, pConvertor->stack_pos, pos_desc, count_desc, pStack->disp, (unsigned long)iov_len_local ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { ptrdiff_t local_disp = (ptrdiff_t)conv_ptr; if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { PACK_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc, conv_ptr, iov_ptr, iov_len_local ); if( 0 == count_desc ) { /* completed */ pos_desc += pElem->loop.items + 1; goto update_loop_description; } /* Save the stack with the correct last_count value. */ } local_disp = (ptrdiff_t)conv_ptr - local_disp; PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, pStack->disp + local_disp); pos_desc++; update_loop_description: /* update the current state */ conv_ptr = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" ); } } complete_loop: iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */ total_packed += iov[iov_count].iov_len; } *max_data = total_packed; pConvertor->bConverted += total_packed; /* update the already converted bytes */ *out_size = iov_count; if( pConvertor->bConverted == pConvertor->local_size ) { pConvertor->flags |= CONVERTOR_COMPLETED; return 1; } /* Save the global position for the next round */ PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem->elem.common.type, count_desc, conv_ptr - pConvertor->pBaseBuf ); DO_DEBUG( opal_output( 0, "pack save stack stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n", pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); ); return 0; } /* * Remember that the first item in the stack (ie. position 0) is the number * of times the datatype is involved in the operation (ie. the count argument * in the MPI_ call). */ /* Convert data from multiple input buffers (as received from the network layer) * to a contiguous output buffer with a predefined size. * return OPAL_SUCCESS if everything went OK and if there is still room before the complete * conversion of the data (need additional call with others input buffers ) * 1 if everything went fine and the data was completly converted * -1 something wrong occurs. */ static inline void pack_predefined_heterogeneous(opal_convertor_t *CONVERTOR, const dt_elem_desc_t *ELEM, size_t *COUNT, unsigned char **memory, unsigned char **packed, size_t *SPACE) { const opal_convertor_master_t *master = (CONVERTOR)->master; const ddt_elem_desc_t *_elem = &((ELEM)->elem); size_t cando_count = *(COUNT), do_now_bytes; size_t local_elem_size = opal_datatype_basicDatatypes[_elem->common.type]->size; size_t remote_elem_size = master->remote_sizes[_elem->common.type]; size_t blocklen_bytes = remote_elem_size; unsigned char *_memory = (*memory) + _elem->disp; unsigned char *_packed = *packed; ptrdiff_t advance = 0; assert(0 == (cando_count % _elem->blocklen)); /* no partials here */ assert(*(COUNT) <= ((size_t) _elem->count * _elem->blocklen)); if ((remote_elem_size * cando_count) > *(SPACE)) cando_count = (*SPACE) / blocklen_bytes; /* premptively update the number of COUNT we will return. */ *(COUNT) -= cando_count; if (_elem->blocklen == 1) { master->pFunctions[_elem->common.type](CONVERTOR, cando_count, _memory, *SPACE, _elem->extent, _packed, *SPACE, remote_elem_size, &advance); _memory += cando_count * _elem->extent; _packed += cando_count * remote_elem_size; goto update_and_return; } if ((1 < _elem->count) && (_elem->blocklen <= cando_count)) { blocklen_bytes = remote_elem_size * _elem->blocklen; do { /* Do as many full blocklen as possible */ OPAL_DATATYPE_SAFEGUARD_POINTER(_memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count); DO_DEBUG(opal_output(0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n", (void *) _packed, (void *) _memory, (unsigned long) blocklen_bytes, (unsigned long) (*(SPACE) - (_packed - *(packed))));); master->pFunctions[_elem->common.type](CONVERTOR, _elem->blocklen, _memory, *SPACE, local_elem_size, _packed, *SPACE, remote_elem_size, &advance); _packed += blocklen_bytes; _memory += _elem->extent; cando_count -= _elem->blocklen; } while (_elem->blocklen <= cando_count); } /** * As an epilog do anything left from the last blocklen. */ if (0 != cando_count) { assert((cando_count < _elem->blocklen) || ((1 == _elem->count) && (cando_count <= _elem->blocklen))); do_now_bytes = cando_count * remote_elem_size; OPAL_DATATYPE_SAFEGUARD_POINTER(_memory, do_now_bytes, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count); DO_DEBUG(opal_output(0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n", (void *) _packed, (void *) _memory, (unsigned long) do_now_bytes, (unsigned long) (*(SPACE) - (_packed - *(packed))));); master->pFunctions[_elem->common.type](CONVERTOR, cando_count, _memory, *SPACE, local_elem_size, _packed, *SPACE, remote_elem_size, &advance); _memory += do_now_bytes; _packed += do_now_bytes; } update_and_return: *(memory) = _memory - _elem->disp; *(SPACE) -= (_packed - *packed); *(packed) = _packed; } int32_t opal_pack_general_function( opal_convertor_t* pConvertor, struct iovec* iov, uint32_t* out_size, size_t* max_data ) { dt_stack_t* pStack; /* pointer to the position on the stack */ uint32_t pos_desc; /* actual position in the description of the derived datatype */ size_t count_desc; /* the number of items already done in the actual pos_desc */ size_t total_packed = 0; /* total amount packed this time */ dt_elem_desc_t* description; dt_elem_desc_t* pElem; const opal_datatype_t *pData = pConvertor->pDesc; unsigned char *conv_ptr, *iov_ptr; size_t iov_len_local; uint32_t iov_count; DO_DEBUG( opal_output( 0, "opal_convertor_general_pack( %p:%p, {%p, %lu}, %d )\n", (void*)pConvertor, (void*)pConvertor->pBaseBuf, (void*)iov[0].iov_base, (unsigned long)iov[0].iov_len, *out_size ); ); description = pConvertor->use_desc->desc; /* For the first step we have to add both displacement to the source. After in the * main while loop we will set back the conv_ptr to the correct value. This is * due to the fact that the convertor can stop in the middle of a data with a count */ pStack = pConvertor->pStack + pConvertor->stack_pos; pos_desc = pStack->index; conv_ptr = pConvertor->pBaseBuf + pStack->disp; count_desc = pStack->count; pStack--; pConvertor->stack_pos--; pElem = &(description[pos_desc]); DO_DEBUG( opal_output( 0, "pack start pos_desc %d count_desc %" PRIsize_t " disp %ld\n" "stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n", pos_desc, count_desc, (long)(conv_ptr - pConvertor->pBaseBuf), pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); ); for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { iov_ptr = (unsigned char *) iov[iov_count].iov_base; iov_len_local = iov[iov_count].iov_len; while( 1 ) { while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* now here we have a basic datatype */ DO_DEBUG( opal_output( 0, "pack (%p:%ld, %" PRIsize_t ", %ld) -> (%p, %ld) type %s\n", (void*)pConvertor->pBaseBuf, conv_ptr + pElem->elem.disp - pConvertor->pBaseBuf, count_desc, description[pos_desc].elem.extent, (void*)iov_ptr, iov_len_local, opal_datatype_basicDatatypes[pElem->elem.common.type]->name ); ); pack_predefined_heterogeneous( pConvertor, pElem, &count_desc, &conv_ptr, &iov_ptr, &iov_len_local); #if 0 PACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc, conv_ptr, iov_ptr, iov_len_local ); #endif if( 0 == count_desc ) { /* completed */ conv_ptr = pConvertor->pBaseBuf + pStack->disp; pos_desc++; /* advance to the next data */ UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); continue; } goto complete_loop; } if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ DO_DEBUG( opal_output( 0, "pack end_loop count %" PRIsize_t " stack_pos %d" " pos_desc %d disp %ld space %lu\n", pStack->count, pConvertor->stack_pos, pos_desc, pStack->disp, (unsigned long)iov_len_local ); ); if( --(pStack->count) == 0 ) { /* end of loop */ if( 0 == pConvertor->stack_pos ) { /* we lie about the size of the next element in order to * make sure we exit the main loop. */ *out_size = iov_count; goto complete_loop; /* completed */ } pConvertor->stack_pos--; pStack--; pos_desc++; } else { pos_desc = pStack->index + 1; if( pStack->index == -1 ) { pStack->disp += (pData->ub - pData->lb); } else { assert( OPAL_DATATYPE_LOOP == description[pStack->index].loop.common.type ); pStack->disp += description[pStack->index].loop.extent; } } conv_ptr = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); DO_DEBUG( opal_output( 0, "pack new_loop count %" PRIsize_t " stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld space %lu\n", pStack->count, pConvertor->stack_pos, pos_desc, count_desc, pStack->disp, (unsigned long)iov_len_local ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { ptrdiff_t local_disp = (ptrdiff_t)conv_ptr; #if 0 if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { PACK_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc, conv_ptr, iov_ptr, iov_len_local ); if( 0 == count_desc ) { /* completed */ pos_desc += pElem->loop.items + 1; goto update_loop_description; } /* Save the stack with the correct last_count value. */ } #endif /* in a heterogeneous environment we can't handle the contiguous loops */ local_disp = (ptrdiff_t)conv_ptr - local_disp; PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, pStack->disp + local_disp); pos_desc++; #if 0 update_loop_description: /* update the current state */ #endif /* in a heterogeneous environment we can't handle the contiguous loops */ conv_ptr = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" ); continue; } } complete_loop: iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */ total_packed += iov[iov_count].iov_len; } *max_data = total_packed; pConvertor->bConverted += total_packed; /* update the already converted bytes */ *out_size = iov_count; if( pConvertor->bConverted == pConvertor->local_size ) { pConvertor->flags |= CONVERTOR_COMPLETED; return 1; } /* Save the global position for the next round */ PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem->elem.common.type, count_desc, conv_ptr - pConvertor->pBaseBuf ); DO_DEBUG( opal_output( 0, "pack save stack stack_pos %d pos_desc %d count_desc %" PRIsize_t" disp %ld\n", pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); ); return 0; }