using arm neon inline assembly and using it only if in 32 bits (#1201)

using arm neon inline assembly
This commit is contained in:
Alden Torres 2016-10-11 22:02:02 -06:00 committed by Arvid Norberg
parent 7e0a2dbcbb
commit e0d7729f87
2 changed files with 13 additions and 13 deletions

View File

@ -37,10 +37,6 @@ POSSIBILITY OF SUCH DAMAGE.
#include <intrin.h>
#endif
#if TORRENT_HAS_ARM_NEON
#include <arm_neon.h>
#endif
namespace libtorrent
{
bool bitfield::all_set() const
@ -85,18 +81,20 @@ namespace libtorrent
}
#endif // TORRENT_HAS_SSE
#if TORRENT_HAS_ARM_NEON
#if TORRENT_HAS_ARM_NEON && defined __arm__
if (aux::arm_neon_support)
{
for (int i = 1; i < words + 1; ++i)
{
uint8x8_t const in_val = vld1_u8(reinterpret_cast<unsigned char*>(&m_buf[i]));
uint8x8_t const cnt8x8_val = vcnt_u8(in_val);
uint16x4_t const cnt16x4_val = vpaddl_u8(cnt8x8_val);
uint32x2_t const cnt32x2_val = vpaddl_u16(cnt16x4_val);
uint32_t cnt;
vst1_u32(&cnt, cnt32x2_val);
std::uint32_t cnt;
__asm__(
"vld1.u32 d0[0], [%1] \n"
"vcnt.u8 d0, d0 \n"
"vpaddl.u8 d0, d0 \n"
"vpaddl.u16 d0, d0 \n"
"vst1.u32 d0[0], [%0]"
:: "r"(&cnt), "r"(&m_buf[i])
: "d0", "memory");
ret += cnt;
}

View File

@ -101,7 +101,9 @@ namespace libtorrent { namespace aux
return (getauxval(16) & (1 << 12));
#elif defined __aarch64__
//return (getauxval(AT_HWCAP) & HWCAP_ASIMD);
return (getauxval(16) & (1 << 1));
//return (getauxval(16) & (1 << 1));
// TODO: enable when aarch64 is really tested
return false;
#endif
#else
return false;