aboutsummaryrefslogtreecommitdiff
path: root/files/source/compare_win.cc
diff options
context:
space:
mode:
Diffstat (limited to 'files/source/compare_win.cc')
-rw-r--r--files/source/compare_win.cc37
1 files changed, 28 insertions, 9 deletions
diff --git a/files/source/compare_win.cc b/files/source/compare_win.cc
index b17fc8e1..d57d3d9d 100644
--- a/files/source/compare_win.cc
+++ b/files/source/compare_win.cc
@@ -13,16 +13,35 @@
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
+#if defined(_MSC_VER)
+#include <intrin.h> // For __popcnt
+#endif
+
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// This module is for 32 bit Visual C x86 and clangcl
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
+
+uint32_t HammingDistance_SSE42(const uint8_t* src_a,
+ const uint8_t* src_b,
+ int count) {
+ uint32_t diff = 0u;
+
+ int i;
+ for (i = 0; i < count - 3; i += 4) {
+ uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); // NOLINT
+ src_a += 4;
+ src_b += 4;
+ diff += __popcnt(x);
+ }
+ return diff;
+}
-__declspec(naked) uint32
- SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
+__declspec(naked) uint32_t
+ SumSquareError_SSE2(const uint8_t* src_a, const uint8_t* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b
@@ -62,8 +81,8 @@ __declspec(naked) uint32
#if _MSC_VER >= 1700
// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
#pragma warning(disable : 4752)
-__declspec(naked) uint32
- SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
+__declspec(naked) uint32_t
+ SumSquareError_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b
@@ -127,8 +146,8 @@ uvec32 kHashMul3 = {
0x00000001, // 33 ^ 0
};
-__declspec(naked) uint32
- HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
+__declspec(naked) uint32_t
+ HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) {
__asm {
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count
@@ -178,8 +197,8 @@ __declspec(naked) uint32
// Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
-__declspec(naked) uint32
- HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
+__declspec(naked) uint32_t
+ HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed) {
__asm {
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count