/* This file is part of the Vc library. Copyright (C) 2009-2011 Matthias Kretz Vc is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Vc is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with Vc. If not, see . */ #ifndef SSE_CASTS_H #define SSE_CASTS_H #include "intrinsics.h" #include "types.h" namespace ROOT { namespace Vc { namespace SSE { template static Vc_ALWAYS_INLINE To Vc_CONST mm128_reinterpret_cast(VC_ALIGNED_PARAMETER(From) v) { return v; } template<> Vc_ALWAYS_INLINE _M128I Vc_CONST mm128_reinterpret_cast<_M128I, _M128 >(VC_ALIGNED_PARAMETER(_M128 ) v) { return _mm_castps_si128(v); } template<> Vc_ALWAYS_INLINE _M128I Vc_CONST mm128_reinterpret_cast<_M128I, _M128D>(VC_ALIGNED_PARAMETER(_M128D) v) { return _mm_castpd_si128(v); } template<> Vc_ALWAYS_INLINE _M128 Vc_CONST mm128_reinterpret_cast<_M128 , _M128D>(VC_ALIGNED_PARAMETER(_M128D) v) { return _mm_castpd_ps(v); } template<> Vc_ALWAYS_INLINE _M128 Vc_CONST mm128_reinterpret_cast<_M128 , _M128I>(VC_ALIGNED_PARAMETER(_M128I) v) { return _mm_castsi128_ps(v); } template<> Vc_ALWAYS_INLINE _M128D Vc_CONST mm128_reinterpret_cast<_M128D, _M128I>(VC_ALIGNED_PARAMETER(_M128I) v) { return _mm_castsi128_pd(v); } template<> Vc_ALWAYS_INLINE _M128D Vc_CONST mm128_reinterpret_cast<_M128D, _M128 >(VC_ALIGNED_PARAMETER(_M128 ) v) { return _mm_castps_pd(v); } template static Vc_ALWAYS_INLINE To Vc_CONST sse_cast(VC_ALIGNED_PARAMETER(From) v) { return mm128_reinterpret_cast(v); } template struct StaticCastHelper {}; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128I cast(const _M128 &v) { return _mm_cvttps_epi32(v); } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128I cast(const _M128D &v) { return _mm_cvttpd_epi32(v); } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128I cast(const _M128 &v) { return _mm_castps_si128(mm_blendv_ps( _mm_castsi128_ps(_mm_cvttps_epi32(v)), _mm_castsi128_ps(_mm_add_epi32(_mm_cvttps_epi32(_mm_sub_ps(v, _mm_set1_ps(1u << 31))), _mm_set1_epi32(1 << 31))), _mm_cmpge_ps(v, _mm_set1_ps(1u << 31)) )); } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128I cast(const _M128D &v) { return _mm_cvttpd_epi32(v); } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128 cast(const _M128 &v) { return v; } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128 cast(const _M128D &v) { return _mm_cvtpd_ps(v); } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128 cast(const _M128I &v) { return _mm_cvtepi32_ps(v); } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128 cast(const _M128I &v) { return mm_blendv_ps( _mm_cvtepi32_ps(v), _mm_add_ps(_mm_cvtepi32_ps(_mm_sub_epi32(v, _mm_set1_epi32(1 << 31))), _mm_set1_ps(1u << 31)), _mm_castsi128_ps(_mm_cmplt_epi32(v, _mm_setzero_si128())) ); } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128D cast(const _M128 &v) { return _mm_cvtps_pd(v); } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128D cast(const _M128D &v) { return v; } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128D cast(const _M128I &v) { return _mm_cvtepi32_pd(v); } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128D cast(const _M128I &v) { return _mm_cvtepi32_pd(v); } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE M256 cast(const _M128I &v) { return M256::create(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v, _mm_setzero_si128())), _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, _mm_setzero_si128()))); } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE M256 cast(const _M128I &v) { const _M128I neg = _mm_cmplt_epi16(v, _mm_setzero_si128()); return M256::create(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v, neg)), _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, neg))); } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128I cast(const M256 &v) { return _mm_packs_epi32(_mm_cvttps_epi32(v[0]), _mm_cvttps_epi32(v[1])); } }; #ifdef VC_IMPL_SSE4_1 template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128I cast(const M256 &v) { return _mm_packus_epi32(_mm_cvttps_epi32(v[0]), _mm_cvttps_epi32(v[1])); } }; #else template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128I cast(const M256 &v) { return _mm_add_epi16(_mm_set1_epi16(-32768), _mm_packs_epi32( _mm_add_epi32(_mm_set1_epi32(-32768), _mm_cvttps_epi32(v[0])), _mm_add_epi32(_mm_set1_epi32(-32768), _mm_cvttps_epi32(v[1])) ) ); } }; #endif template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128I cast(const _M128 &v) { return _mm_packs_epi32(_mm_cvttps_epi32(v), _mm_setzero_si128()); } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128I cast(const _M128 &v) { return _mm_packs_epi32(_mm_cvttps_epi32(v), _mm_setzero_si128()); } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } }; template<> struct StaticCastHelper { static Vc_ALWAYS_INLINE _M128I cast(const _M128I &v) { return v; } }; } // namespace SSE } // namespace Vc } // namespace ROOT #endif // SSE_CASTS_H