added neon support for popcount (#861)

added neon support for popcount
This commit is contained in:
Alden Torres 2016-06-27 18:51:43 -04:00 committed by Arvid Norberg
parent 9233edae4e
commit f7515edc8b
6 changed files with 59 additions and 2 deletions

View File

@ -40,6 +40,7 @@ namespace libtorrent { namespace aux
// initialized by static initializers (in cpuid.cpp) // initialized by static initializers (in cpuid.cpp)
TORRENT_EXTRA_EXPORT extern bool sse42_support; TORRENT_EXTRA_EXPORT extern bool sse42_support;
TORRENT_EXTRA_EXPORT extern bool mmx_support; TORRENT_EXTRA_EXPORT extern bool mmx_support;
TORRENT_EXTRA_EXPORT extern bool arm_neon_support;
} } } }
#endif // TORRENT_CPUID_HPP_INCLUDED #endif // TORRENT_CPUID_HPP_INCLUDED

View File

@ -538,5 +538,10 @@ POSSIBILITY OF SUCH DAMAGE.
# define TORRENT_HAS_BUILTIN_CLZ 0 # define TORRENT_HAS_BUILTIN_CLZ 0
#endif // TORRENT_HAS_BUILTIN_CLZ #endif // TORRENT_HAS_BUILTIN_CLZ
#endif // TORRENT_CONFIG_HPP_INCLUDED #if TORRENT_HAS_ARM && defined __ARM_NEON
# define TORRENT_HAS_ARM_NEON 1
#else
# define TORRENT_HAS_ARM_NEON 0
#endif // TORRENT_HAS_ARM_NEON
#endif // TORRENT_CONFIG_HPP_INCLUDED

View File

@ -37,6 +37,10 @@ POSSIBILITY OF SUCH DAMAGE.
#include <intrin.h> #include <intrin.h>
#endif #endif
#if TORRENT_HAS_ARM_NEON
#include <arm_neon.h>
#endif
namespace libtorrent namespace libtorrent
{ {
bool bitfield::all_set() const bool bitfield::all_set() const
@ -79,6 +83,25 @@ namespace libtorrent
} }
#endif // TORRENT_HAS_SSE #endif // TORRENT_HAS_SSE
#if TORRENT_HAS_ARM_NEON
if (aux::arm_neon_support)
{
for (int i = 0; i < words; ++i)
{
uint8x8_t const in_val = vld1_u8((unsigned char *) &m_buf[i]);
uint8x8_t const cnt8x8_val = vcnt_u8(in_val);
uint16x4_t const cnt16x4_val = vpaddl_u8(cnt8x8_val);
uint32x2_t const cnt32x2_val = vpaddl_u16(cnt16x4_val);
uint32_t cnt;
vst1_u32(&cnt, cnt32x2_val);
ret += cnt;
}
return ret;
}
#endif // TORRENT_HAS_ARM_NEON
for (int i = 0; i < words; ++i) for (int i = 0; i < words; ++i)
{ {
std::uint32_t v = m_buf[i]; std::uint32_t v = m_buf[i];

View File

@ -44,6 +44,10 @@ POSSIBILITY OF SUCH DAMAGE.
#include <cstring> // for std::memset #include <cstring> // for std::memset
#endif #endif
#if TORRENT_HAS_ARM
#include <sys/auxv.h>
#endif
namespace libtorrent { namespace aux namespace libtorrent { namespace aux
{ {
namespace { namespace {
@ -87,8 +91,24 @@ namespace libtorrent { namespace aux
#endif #endif
} }
bool supports_arm_neon()
{
#if TORRENT_HAS_ARM_NEON
#if defined __arm__
//return (getauxval(AT_HWCAP) & HWCAP_NEON);
return (getauxval(16) & (1 << 12));
#elif defined __aarch64__
//return (getauxval(AT_HWCAP) & HWCAP_ASIMD);
return (getauxval(16) & (1 << 1));
#endif // TORRENT_HAS_ARM
#else
return false;
#endif
}
} // anonymous namespace } // anonymous namespace
bool sse42_support = supports_sse42(); bool sse42_support = supports_sse42();
bool mmx_support = supports_mmx(); bool mmx_support = supports_mmx();
bool arm_neon_support = supports_arm_neon();
} } } }

View File

@ -125,7 +125,6 @@ test-suite libtorrent :
test_socket_io.cpp test_socket_io.cpp
# test_random.cpp # test_random.cpp
test_gzip.cpp test_gzip.cpp
test_bitfield.cpp
test_part_file.cpp test_part_file.cpp
test_peer_list.cpp test_peer_list.cpp
test_torrent_info.cpp test_torrent_info.cpp
@ -160,6 +159,7 @@ test-suite libtorrent :
] ]
[ run test_sha1_hash.cpp ] [ run test_sha1_hash.cpp ]
[ run test_bitfield.cpp ]
[ run test_receive_buffer.cpp ] [ run test_receive_buffer.cpp ]
[ run test_alert_manager.cpp ] [ run test_alert_manager.cpp ]
[ run test_direct_dht.cpp ] [ run test_direct_dht.cpp ]
@ -271,6 +271,7 @@ explicit win-tests ;
alias arm-tests : alias arm-tests :
test_sha1_hash test_sha1_hash
test_bitfield
; ;
explicit arm-tests ; explicit arm-tests ;

View File

@ -32,6 +32,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "test.hpp" #include "test.hpp"
#include "libtorrent/bitfield.hpp" #include "libtorrent/bitfield.hpp"
#include "libtorrent/aux_/cpuid.hpp"
#include <stdlib.h> #include <stdlib.h>
using namespace libtorrent; using namespace libtorrent;
@ -187,4 +188,10 @@ TORRENT_TEST(bitfield)
TEST_EQUAL(test1.count(), 32 + 8 + 2); TEST_EQUAL(test1.count(), 32 + 8 + 2);
TEST_EQUAL(test1.all_set(), true); TEST_EQUAL(test1.all_set(), true);
} }
#if TORRENT_HAS_ARM
TORRENT_ASSERT(aux::arm_neon_support);
#else
TORRENT_ASSERT(!aux::arm_neon_support);
#endif
} }