aboutsummaryrefslogtreecommitdiff
path: root/source/row_common.cc
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2021-10-15 12:12:02 -0700
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2021-10-15 19:46:02 +0000
commit55b97cb48f027d2af417ce1f895cefad2ed1ce23 (patch)
treeb1091fee47160f8027abc07ceb0ef5701eb5af86 /source/row_common.cc
parent11cbf8f976a41ccb279dc67489832ea9f12d56d7 (diff)
downloadlibyuv-55b97cb48f027d2af417ce1f895cefad2ed1ce23.tar.gz
BIT_EXACT for unattenuate and attenuate.
- reenable Intel SIMD unaffected by BIT_EXACT - add bit exact version of ARGBAttenuate, which uses ARM version of formula. - add bit exact version of ARGBUnatenuate, which mimics the AVX code. Apply clang format to cleanup code. Bug: libyuv:908, b/202888439 Change-Id: Ie842b1b3956b48f4190858e61c02998caedc2897 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3224702 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: richard winterton <rrwinterton@gmail.com>
Diffstat (limited to 'source/row_common.cc')
-rw-r--r--source/row_common.cc36
1 files changed, 24 insertions, 12 deletions
diff --git a/source/row_common.cc b/source/row_common.cc
index a5ab81f2..092e538e 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -28,14 +28,20 @@ extern "C" {
// The following macro from row_win makes the C code match the row_win code,
// which is 7 bit fixed point for ARGBToI420:
-#if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
- !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
+#if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && \
+ defined(_MSC_VER) && !defined(__clang__) && \
+ (defined(_M_IX86) || defined(_M_X64))
#define LIBYUV_RGB7 1
#endif
-#if !defined(LIBYUV_BIT_EXACT) && (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86))
+#if !defined(LIBYUV_BIT_EXACT) && (defined(__x86_64__) || defined(_M_X64) || \
+ defined(__i386__) || defined(_M_IX86))
#define LIBYUV_ARGBTOUV_PAVGB 1
#define LIBYUV_RGBTOU_TRUNCATE 1
+#define LIBYUV_ATTENUATE_DUP 1
+#endif
+#if defined(LIBYUV_BIT_EXACT)
+#define LIBYUV_UNATTENUATE_DUP 1
#endif
// llvm x86 is poor at ternary operator, so use branchless min/max.
@@ -3151,11 +3157,11 @@ void BlendPlaneRow_C(const uint8_t* src0,
}
#undef UBLEND
-#if defined(__aarch64__) || defined(__arm__)
-#define ATTENUATE(f, a) (f * a + 128) >> 8
-#else
+#if LIBYUV_ATTENUATE_DUP
// This code mimics the SSSE3 version for better testability.
#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
+#else
+#define ATTENUATE(f, a) (f * a + 128) >> 8
#endif
// Multiply source RGB by alpha and store to destination.
@@ -3242,6 +3248,14 @@ const uint32_t fixed_invtbl8[256] = {
T(0xfc), T(0xfd), T(0xfe), 0x01000100};
#undef T
+#if LIBYUV_UNATTENUATE_DUP
+// This code mimics the Intel SIMD version for better testability.
+#define UNATTENUATE(f, ia) clamp255(((f | (f << 8)) * ia) >> 16)
+#else
+#define UNATTENUATE(f, ia) clamp255((f * ia) >> 8)
+#endif
+
+// mimics the Intel SIMD code for exactness.
void ARGBUnattenuateRow_C(const uint8_t* src_argb,
uint8_t* dst_argb,
int width) {
@@ -3252,13 +3266,11 @@ void ARGBUnattenuateRow_C(const uint8_t* src_argb,
uint32_t r = src_argb[2];
const uint32_t a = src_argb[3];
const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
- b = (b * ia) >> 8;
- g = (g * ia) >> 8;
- r = (r * ia) >> 8;
+
// Clamping should not be necessary but is free in assembly.
- dst_argb[0] = clamp255(b);
- dst_argb[1] = clamp255(g);
- dst_argb[2] = clamp255(r);
+ dst_argb[0] = UNATTENUATE(b, ia);
+ dst_argb[1] = UNATTENUATE(g, ia);
+ dst_argb[2] = UNATTENUATE(r, ia);
dst_argb[3] = a;
src_argb += 4;
dst_argb += 4;