aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorandroid-build-team Robot <android-build-team-robot@google.com>2019-07-03 03:01:19 +0000
committerandroid-build-team Robot <android-build-team-robot@google.com>2019-07-03 03:01:19 +0000
commiteed3f6460d0b4ee34e76dc729bef4238b40cc9b9 (patch)
tree9e8f433f6be822b0955c52d702888844d3caf8c8
parent96399584ccc0b735f43543f5599d246558c871cf (diff)
parent211e09de645270d087fec038afaadb8343e45f03 (diff)
downloadlibyuv-eed3f6460d0b4ee34e76dc729bef4238b40cc9b9.tar.gz
Snap for 5704056 from 211e09de645270d087fec038afaadb8343e45f03 to rvc-release
Change-Id: I8af18a718796eb077246a450f2e36aa06ed9f325
-rw-r--r--README.version2
-rw-r--r--files/DEPS148
-rw-r--r--files/README.chromium2
-rw-r--r--files/include/libyuv/convert.h44
-rw-r--r--files/include/libyuv/convert_argb.h12
-rw-r--r--files/include/libyuv/planar_functions.h13
-rw-r--r--files/include/libyuv/rotate.h18
-rw-r--r--files/include/libyuv/row.h77
-rw-r--r--files/include/libyuv/scale.h48
-rw-r--r--files/include/libyuv/version.h2
-rw-r--r--files/source/convert.cc405
-rw-r--r--files/source/convert_argb.cc51
-rw-r--r--files/source/convert_from.cc4
-rw-r--r--files/source/convert_jpeg.cc42
-rw-r--r--files/source/mjpeg_decoder.cc13
-rw-r--r--files/source/mjpeg_validate.cc27
-rw-r--r--files/source/planar_functions.cc58
-rw-r--r--files/source/rotate.cc60
-rw-r--r--files/source/row_any.cc44
-rw-r--r--files/source/row_common.cc113
-rw-r--r--files/source/row_gcc.cc123
-rw-r--r--files/source/row_neon.cc245
-rw-r--r--files/source/row_neon64.cc200
-rw-r--r--files/source/row_win.cc8
-rw-r--r--files/source/scale.cc69
-rw-r--r--files/source/scale_gcc.cc16
-rw-r--r--files/source/scale_mmi.cc4
-rw-r--r--files/source/scale_neon.cc12
-rw-r--r--files/source/scale_neon64.cc16
-rwxr-xr-xfiles/tools_libyuv/autoroller/roll_deps.py9
-rw-r--r--files/unit_test/convert_test.cc258
-rw-r--r--files/unit_test/planar_test.cc106
-rw-r--r--files/unit_test/rotate_test.cc117
-rw-r--r--files/unit_test/scale_test.cc355
-rw-r--r--files/util/psnr.cc2
35 files changed, 2457 insertions, 266 deletions
diff --git a/README.version b/README.version
index 889f604c..0e74ad15 100644
--- a/README.version
+++ b/README.version
@@ -1,3 +1,3 @@
-Version: r1722
+Version: r1732
BugComponent: 42195
Owner: lajos
diff --git a/files/DEPS b/files/DEPS
index ac20e062..c5f81b86 100644
--- a/files/DEPS
+++ b/files/DEPS
@@ -1,7 +1,7 @@
vars = {
'chromium_git': 'https://chromium.googlesource.com',
- 'chromium_revision': '35b72bf255d6519506b7e732f9c74205d2ab452d',
- 'swarming_revision': '486c9b53c4d54dd4b95bb6ce0e31160e600dfc11',
+ 'chromium_revision': '4476bd69d1c8e4e1cde8633d3b33c992f7d3a6d0',
+ 'swarming_revision': '0e3e1c4dc4e79f25a5b58fcbc135dc93183c0c54',
# Three lines of non-changing comments so that
# the commit queue can handle CLs rolling lss
# and whatever else without interference from each other.
@@ -9,34 +9,69 @@ vars = {
# Three lines of non-changing comments so that
# the commit queue can handle CLs rolling catapult
# and whatever else without interference from each other.
- 'catapult_revision': '0d25dda9b148bcd2dad9e1080b1dc57eaf9d2c2a',
+ 'catapult_revision': 'a24a725f7834c16b3628bfb63f349b3480bf9592',
+ # the commit queue can handle CLs rolling android_sdk_build-tools_version
+ # and whatever else without interference from each other.
+ 'android_sdk_build-tools_version': 'DLK621q5_Bga5EsOr7cp6bHWWxFKx6UHLu_Ix_m3AckC',
+ # Three lines of non-changing comments so that
+ # the commit queue can handle CLs rolling android_sdk_emulator_version
+ # and whatever else without interference from each other.
+ 'android_sdk_emulator_version': 'ki7EDQRAiZAUYlnTWR1XmI6cJTk65fJ-DNZUU1zrtS8C',
+ # Three lines of non-changing comments so that
+ # the commit queue can handle CLs rolling android_sdk_extras_version
+ # and whatever else without interference from each other.
+ 'android_sdk_extras_version': 'iIwhhDox5E-mHgwUhCz8JACWQCpUjdqt5KTY9VLugKQC',
+ # Three lines of non-changing comments so that
+ # the commit queue can handle CLs rolling android_sdk_patcher_version
+ # and whatever else without interference from each other.
+ 'android_sdk_patcher_version': 'I6FNMhrXlpB-E1lOhMlvld7xt9lBVNOO83KIluXDyA0C',
+ # Three lines of non-changing comments so that
+ # the commit queue can handle CLs rolling android_sdk_platform-tools_version
+ # and whatever else without interference from each other.
+ 'android_sdk_platform-tools_version': '4Y2Cb2LGzoc-qt-oIUIlhySotJaKeE3ELFedSVe6Uk8C',
+ # Three lines of non-changing comments so that
+ # the commit queue can handle CLs rolling android_sdk_platforms_version
+ # and whatever else without interference from each other.
+ 'android_sdk_platforms_version': 'Kg2t9p0YnQk8bldUv4VA3o156uPXLUfIFAmVZ-Gm5ewC',
+ # Three lines of non-changing comments so that
+ # the commit queue can handle CLs rolling android_sdk_sources_version
+ # and whatever else without interference from each other.
+ 'android_sdk_sources_version': 'K9uEn3JvNELEVjjVK_GQD3ZQD3rqAnJSxCWxjmUmRkgC',
+ # Three lines of non-changing comments so that
+ # the commit queue can handle CLs rolling android_sdk_tools_version
+ # and whatever else without interference from each other.
+ 'android_sdk_tools_version': 'wYcRQC2WHsw2dKWs4EA7fw9Qsyzu1ds1_fRjKmGxe5QC',
+ # Three lines of non-changing comments so that
+ # the commit queue can handle CLs rolling android_sdk_tools-lint_version
+ # and whatever else without interference from each other.
+ 'android_sdk_tools-lint_version': '89hXqZYzCum3delB5RV7J_QyWkaRodqdtQS0s3LMh3wC',
}
deps = {
'src/build':
- Var('chromium_git') + '/chromium/src/build' + '@' + 'f79db013c75bff172913707cd762eba847838fea',
+ Var('chromium_git') + '/chromium/src/build' + '@' + '669e41d6f18842ed5740449662a71b715dc607c6',
'src/buildtools':
- Var('chromium_git') + '/chromium/buildtools.git' + '@' + '9a90d9aaadeb5e04327ed05775f45132e4b3523f',
+ Var('chromium_git') + '/chromium/buildtools.git' + '@' + '0e1cbc4eab6861b0c84bf2ed9a3c4b7aa2063819',
'src/testing':
- Var('chromium_git') + '/chromium/src/testing' + '@' + 'd2fde4ae5b8d0a5021e6f79d2f4a62e83ba348bc',
+ Var('chromium_git') + '/chromium/src/testing' + '@' + 'b1c6aeebeabcc177a83ff0a33dc6c3ab03d4aa94',
'src/third_party':
- Var('chromium_git') + '/chromium/src/third_party' + '@' + 'f931bb4f2bdcb327d066052df1914cab4bd68c50',
+ Var('chromium_git') + '/chromium/src/third_party' + '@' + 'be3e0fc18f2e9ea14d0e9369e539eae5986335fd',
'src/third_party/catapult':
Var('chromium_git') + '/catapult.git' + '@' + Var('catapult_revision'),
'src/third_party/colorama/src':
Var('chromium_git') + '/external/colorama.git' + '@' + '799604a1041e9b3bc5d2789ecbd7e8db2e18e6b8',
'src/third_party/freetype/src':
- Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + '578bcf103a12fb742cdb314565819011d1ac12a7',
+ Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + 'd01e28f41f8810c8ea422b854f8722659589fa99',
'src/third_party/googletest/src':
- Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + 'd5266326752f0a1dadbd310932d8f4fd8c3c5e7d',
+ Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + '879ac092fde0a19e1b3a61b2546b2a422b1528bc',
'src/third_party/harfbuzz-ng/src':
- Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '2b76767bf572364d3d647cdd139f2044a7ad06b2',
+ Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '26c5b54fb09fb45e02c9c4618bcea4958c698953',
'src/third_party/libjpeg_turbo':
- Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + 'a1750dbc79a8792dde3d3f7d7d8ac28ba01ac9dd',
+ Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + '61a2bbaa9aec89cb2c882d87ace6aba9aee49bb9',
'src/third_party/yasm/source/patched-yasm':
Var('chromium_git') + '/chromium/deps/yasm/patched-yasm.git' + '@' + '720b70524a4424b15fc57e82263568c8ba0496ad',
'src/tools':
- Var('chromium_git') + '/chromium/src/tools' + '@' + 'f2c6ed916b94176158763400de308c2afd56b259',
+ Var('chromium_git') + '/chromium/src/tools' + '@' + '419541c8352b3b75a99c9a5a7c0d1e7b92f3fcf7',
'src/tools/swarming_client':
Var('chromium_git') + '/infra/luci/client-py.git' + '@' + Var('swarming_revision'),
@@ -69,7 +104,7 @@ deps = {
'condition': 'checkout_android',
},
'src/base': {
- 'url': Var('chromium_git') + '/chromium/src/base' + '@' + '6c0497f398c5f6e6af0c66fbf4d77e875eb3f2b1',
+ 'url': Var('chromium_git') + '/chromium/src/base' + '@' + '162a5d66ad148f26bbbe6b6ecaf5c1bafa2173e6',
'condition': 'checkout_android',
},
'src/third_party/bazel': {
@@ -93,7 +128,7 @@ deps = {
'dep_type': 'cipd',
},
'src/third_party/android_ndk': {
- 'url': Var('chromium_git') + '/android_ndk.git' + '@' + '5cd86312e794bdf542a3685c6f10cbb96072990b',
+ 'url': Var('chromium_git') + '/android_ndk.git' + '@' + '4e2cea441bfd43f0863d14f57b1e1844260b9884',
'condition': 'checkout_android',
},
'src/third_party/android_support_test_runner': {
@@ -107,9 +142,61 @@ deps = {
'dep_type': 'cipd',
},
'src/third_party/android_tools': {
- 'url': Var('chromium_git') + '/android_tools.git' + '@' + '130499e25286f4d56acafa252fee09f3cc595c49',
+ 'url': Var('chromium_git') + '/android_tools.git' + '@' + 'e958d6ea74442d4e0849bb8a018d215a0e78981d',
'condition': 'checkout_android',
},
+ 'src/third_party/android_sdk/public': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/android_sdk/public/build-tools',
+ 'version': Var('android_sdk_build-tools_version'),
+ },
+ {
+ 'package': 'chromium/third_party/android_sdk/public/emulator',
+ 'version': Var('android_sdk_emulator_version'),
+ },
+ {
+ 'package': 'chromium/third_party/android_sdk/public/extras',
+ 'version': Var('android_sdk_extras_version'),
+ },
+ {
+ 'package': 'chromium/third_party/android_sdk/public/patcher',
+ 'version': Var('android_sdk_patcher_version'),
+ },
+ {
+ 'package': 'chromium/third_party/android_sdk/public/platform-tools',
+ 'version': Var('android_sdk_platform-tools_version'),
+ },
+ {
+ 'package': 'chromium/third_party/android_sdk/public/platforms',
+ 'version': Var('android_sdk_platforms_version'),
+ },
+ {
+ 'package': 'chromium/third_party/android_sdk/public/sources',
+ 'version': Var('android_sdk_sources_version'),
+ },
+ {
+ 'package': 'chromium/third_party/android_sdk/public/tools',
+ 'version': Var('android_sdk_tools_version'),
+ },
+ {
+ 'package': 'chromium/third_party/android_sdk/public/tools-lint',
+ 'version': Var('android_sdk_tools-lint_version'),
+ },
+ ],
+ 'condition': 'checkout_android_native_support',
+ 'dep_type': 'cipd',
+ },
+ 'src/third_party/android_build_tools/aapt2': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/android_tools_aapt2',
+ 'version': 'version:3.2.0-alpha18-4804415-cr0',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
+ },
'src/third_party/byte_buddy': {
'packages': [
{
@@ -163,7 +250,7 @@ deps = {
'dep_type': 'cipd',
},
'src/third_party/icu': {
- 'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + '297a4dd02b9d36c92ab9b4f121e433c9c3bc14f8',
+ 'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'd65301491c513d49163ad29c853eb85c02c8d5b4',
},
'src/third_party/icu4j': {
'packages': [
@@ -227,6 +314,16 @@ deps = {
'condition': 'checkout_android',
'dep_type': 'cipd',
},
+ 'src/third_party/proguard': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/proguard',
+ 'version': '3bd778c422ea5496de2ef25c007a517dbb5ce5ca',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
+ },
'src/third_party/requests/src': {
'url': Var('chromium_git') + '/external/github.com/kennethreitz/requests.git' + '@' + 'f172b30356d821d180fa4ecfa3e71c7274a32de4',
'condition': 'checkout_android',
@@ -272,7 +369,7 @@ deps = {
# iOS deps:
'src/ios': {
- 'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '8e45eb00dffadde7e0669a881991e237b0b7a8eb',
+ 'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '44be3c093cf2db7ab4cf1997d6a1a07722f1f391',
'condition': 'checkout_ios'
},
@@ -747,6 +844,17 @@ deps = {
'dep_type': 'cipd',
},
+ 'src/third_party/android_deps/libs/com_squareup_javapoet': {
+ 'packages': [
+ {
+ 'package': 'chromium/third_party/android_deps/libs/com_squareup_javapoet',
+ 'version': 'version:1.11.0-cr0',
+ },
+ ],
+ 'condition': 'checkout_android',
+ 'dep_type': 'cipd',
+ },
+
# === ANDROID_DEPS Generated Code End ===
}
@@ -971,12 +1079,10 @@ hooks = [
# such dependencies we share with Chromium.
{
# This downloads SDK extras and puts them in the
- # third_party/android_tools/sdk/extras directory.
+ # third_party/android_sdk/public/extras directory.
'name': 'sdkextras',
'condition': 'checkout_android',
'pattern': '.',
- # When adding a new sdk extras package to download, add the package
- # directory and zip file to .gitignore in third_party/android_tools.
'action': ['vpython',
'src/build/android/play_services/update.py',
'download'
@@ -987,6 +1093,4 @@ hooks = [
recursedeps = [
# buildtools provides clang_format, libc++, and libc++abi.
'src/buildtools',
- # android_tools manages the NDK.
- 'src/third_party/android_tools',
]
diff --git a/files/README.chromium b/files/README.chromium
index 4a239b3c..bddc2023 100644
--- a/files/README.chromium
+++ b/files/README.chromium
@@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
-Version: 1722
+Version: 1732
License: BSD
License File: LICENSE
diff --git a/files/include/libyuv/convert.h b/files/include/libyuv/convert.h
index d8b47a83..f571142f 100644
--- a/files/include/libyuv/convert.h
+++ b/files/include/libyuv/convert.h
@@ -226,6 +226,28 @@ int UYVYToI420(const uint8_t* src_uyvy,
int width,
int height);
+// Convert AYUV to NV12.
+LIBYUV_API
+int AYUVToNV12(const uint8_t* src_ayuv,
+ int src_stride_ayuv,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height);
+
+// Convert AYUV to NV21.
+LIBYUV_API
+int AYUVToNV21(const uint8_t* src_ayuv,
+ int src_stride_ayuv,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_vu,
+ int dst_stride_vu,
+ int width,
+ int height);
+
// Convert M420 to I420.
LIBYUV_API
int M420ToI420(const uint8_t* src_m420,
@@ -322,6 +344,19 @@ int RGB24ToI420(const uint8_t* src_rgb24,
int width,
int height);
+// RGB little endian (bgr in memory) to J420.
+LIBYUV_API
+int RGB24ToJ420(const uint8_t* src_rgb24,
+ int src_stride_rgb24,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height);
+
// RGB big endian (rgb in memory) to I420.
LIBYUV_API
int RAWToI420(const uint8_t* src_raw,
@@ -374,6 +409,15 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
int width,
int height);
+// RGB little endian (bgr in memory) to J400.
+LIBYUV_API
+int RGB24ToJ400(const uint8_t* src_rgb24,
+ int src_stride_rgb24,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
+ int width,
+ int height);
+
#ifdef HAVE_JPEG
// src_width/height provided by capture.
// dst_width/height for clipping determine final size.
diff --git a/files/include/libyuv/convert_argb.h b/files/include/libyuv/convert_argb.h
index 891c57de..e8ed1f59 100644
--- a/files/include/libyuv/convert_argb.h
+++ b/files/include/libyuv/convert_argb.h
@@ -256,6 +256,7 @@ int NV21ToARGB(const uint8_t* src_y,
int height);
// Convert NV12 to ABGR.
+LIBYUV_API
int NV12ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
@@ -298,6 +299,17 @@ int NV21ToRGB24(const uint8_t* src_y,
int width,
int height);
+// Convert NV21 to YUV24.
+LIBYUV_API
+int NV21ToYUV24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_vu,
+ int src_stride_vu,
+ uint8_t* dst_yuv24,
+ int dst_stride_yuv24,
+ int width,
+ int height);
+
// Convert NV12 to RAW.
LIBYUV_API
int NV12ToRAW(const uint8_t* src_y,
diff --git a/files/include/libyuv/planar_functions.h b/files/include/libyuv/planar_functions.h
index 91137bab..f6f5b3ed 100644
--- a/files/include/libyuv/planar_functions.h
+++ b/files/include/libyuv/planar_functions.h
@@ -224,6 +224,19 @@ int UYVYToNV12(const uint8_t* src_uyvy,
int width,
int height);
+// Convert NV21 to NV12.
+LIBYUV_API
+int NV21ToNV12(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_vu,
+ int src_stride_vu,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height);
+
LIBYUV_API
int YUY2ToY(const uint8_t* src_yuy2,
int src_stride_yuy2,
diff --git a/files/include/libyuv/rotate.h b/files/include/libyuv/rotate.h
index 76b692be..c64e0216 100644
--- a/files/include/libyuv/rotate.h
+++ b/files/include/libyuv/rotate.h
@@ -49,6 +49,24 @@ int I420Rotate(const uint8_t* src_y,
int height,
enum RotationMode mode);
+// Rotate I444 frame.
+LIBYUV_API
+int I444Rotate(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum RotationMode mode);
+
// Rotate NV12 input and store in I420.
LIBYUV_API
int NV12ToI420Rotate(const uint8_t* src_y,
diff --git a/files/include/libyuv/row.h b/files/include/libyuv/row.h
index cc948878..9bb48850 100644
--- a/files/include/libyuv/row.h
+++ b/files/include/libyuv/row.h
@@ -284,9 +284,8 @@ extern "C" {
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
#define HAS_ABGRTOAR30ROW_AVX2
#define HAS_ARGBTOAR30ROW_AVX2
-// Fix AVX2 b:118386049 segfault
-//#define HAS_ARGBTORAWROW_AVX2
-//#define HAS_ARGBTORGB24ROW_AVX2
+#define HAS_ARGBTORAWROW_AVX2
+#define HAS_ARGBTORGB24ROW_AVX2
#define HAS_CONVERT16TO8ROW_AVX2
#define HAS_CONVERT8TO16ROW_AVX2
#define HAS_I210TOAR30ROW_AVX2
@@ -296,6 +295,8 @@ extern "C" {
#define HAS_I422TOYUY2ROW_AVX2
#define HAS_MERGEUVROW_16_AVX2
#define HAS_MULTIPLYROW_16_AVX2
+// TODO(fbarchard): Fix AVX2 version of YUV24
+// #define HAS_NV21TOYUV24ROW_AVX2
#endif
// The following are available for AVX512 clang x86 platforms:
@@ -304,8 +305,7 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \
(defined(CLANG_HAS_AVX512))
-// Fix AVX2 b:118386049 segfault
-//#define HAS_ARGBTORGB24ROW_AVX512VBMI
+#define HAS_ARGBTORGB24ROW_AVX512VBMI
#endif
// The following are available on Neon platforms:
@@ -332,6 +332,9 @@ extern "C" {
#define HAS_ARGBTOUVROW_NEON
#define HAS_ARGBTOYJROW_NEON
#define HAS_ARGBTOYROW_NEON
+#define HAS_AYUVTOUVROW_NEON
+#define HAS_AYUVTOVUROW_NEON
+#define HAS_AYUVTOYROW_NEON
#define HAS_BGRATOUVROW_NEON
#define HAS_BGRATOYROW_NEON
#define HAS_BYTETOFLOATROW_NEON
@@ -357,6 +360,7 @@ extern "C" {
#define HAS_NV12TORGB565ROW_NEON
#define HAS_NV21TOARGBROW_NEON
#define HAS_NV21TORGB24ROW_NEON
+#define HAS_NV21TOYUV24ROW_NEON
#define HAS_RAWTOARGBROW_NEON
#define HAS_RAWTORGB24ROW_NEON
#define HAS_RAWTOUVROW_NEON
@@ -372,6 +376,7 @@ extern "C" {
#define HAS_SETROW_NEON
#define HAS_SPLITRGBROW_NEON
#define HAS_SPLITUVROW_NEON
+#define HAS_UVToVUROW_NEON
#define HAS_UYVYTOARGBROW_NEON
#define HAS_UYVYTOUV422ROW_NEON
#define HAS_UYVYTOUVROW_NEON
@@ -404,6 +409,7 @@ extern "C" {
// The following are available on AArch64 platforms:
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+#define HAS_FLOATDIVTOBYTEROW_NEON
#define HAS_SCALESUMSAMPLES_NEON
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
@@ -817,6 +823,10 @@ void NV21ToRGB24Row_NEON(const uint8_t* src_y,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width);
+void NV21ToYUV24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_yuv24,
+ int width);
void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
@@ -2185,6 +2195,10 @@ void NV21ToRGB24Row_C(const uint8_t* src_y,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
+void NV21ToYUV24Row_C(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_yuv24,
+ int width);
void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
@@ -2351,6 +2365,10 @@ void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width);
+void NV21ToYUV24Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_yuv24,
+ int width);
void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* dst_rgb565,
@@ -2556,6 +2574,10 @@ void NV21ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void NV21ToYUV24Row_Any_AVX2(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_yuv24,
+ int width);
void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
@@ -3029,6 +3051,10 @@ void NV21ToRGB24Row_Any_NEON(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void NV21ToYUV24Row_Any_NEON(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_yuv24,
+ int width);
void NV12ToRGB565Row_Any_NEON(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
@@ -3346,6 +3372,36 @@ void UYVYToUV422Row_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void UVToVURow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width);
+void UVToVURow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width);
+void UVToVURow_Any_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width);
+void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
+void AYUVToUVRow_C(const uint8_t* src_ayuv,
+ int stride_ayuv,
+ uint8_t* dst_uv,
+ int width);
+void AYUVToVURow_C(const uint8_t* src_ayuv,
+ int stride_ayuv,
+ uint8_t* dst_vu,
+ int width);
+void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
+void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
+ int stride_ayuv,
+ uint8_t* dst_uv,
+ int width);
+void AYUVToVURow_NEON(const uint8_t* src_ayuv,
+ int stride_ayuv,
+ uint8_t* dst_vu,
+ int width);
+void AYUVToYRow_Any_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width);
+void AYUVToUVRow_Any_NEON(const uint8_t* src_ayuv,
+ int stride_ayuv,
+ uint8_t* dst_uv,
+ int width);
+void AYUVToVURow_Any_NEON(const uint8_t* src_ayuv,
+ int stride_ayuv,
+ uint8_t* dst_vu,
+ int width);
void I422ToYUY2Row_C(const uint8_t* src_y,
const uint8_t* src_u,
@@ -3962,6 +4018,17 @@ float ScaleSumSamples_NEON(const float* src,
void ScaleSamples_C(const float* src, float* dst, float scale, int width);
void ScaleSamples_NEON(const float* src, float* dst, float scale, int width);
+void FloatDivToByteRow_C(const float* src_weights,
+ const float* src_values,
+ uint8_t* dst_out,
+ uint8_t* dst_mask,
+ int width);
+void FloatDivToByteRow_NEON(const float* src_weights,
+ const float* src_values,
+ uint8_t* dst_out,
+ uint8_t* dst_mask,
+ int width);
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/files/include/libyuv/scale.h b/files/include/libyuv/scale.h
index b937d348..23ba1634 100644
--- a/files/include/libyuv/scale.h
+++ b/files/include/libyuv/scale.h
@@ -97,6 +97,54 @@ int I420Scale_16(const uint16_t* src_y,
int dst_height,
enum FilterMode filtering);
+// Scales a YUV 4:4:4 image from the src width and height to the
+// dst width and height.
+// If filtering is kFilterNone, a simple nearest-neighbor algorithm is
+// used. This produces basic (blocky) quality at the fastest speed.
+// If filtering is kFilterBilinear, interpolation is used to produce a better
+// quality image, at the expense of speed.
+// If filtering is kFilterBox, averaging is used to produce ever better
+// quality image, at further expense of speed.
+// Returns 0 if successful.
+
+LIBYUV_API
+int I444Scale(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ int src_width,
+ int src_height,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering);
+
+LIBYUV_API
+int I444Scale_16(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ int src_width,
+ int src_height,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering);
+
#ifdef __cplusplus
// Legacy API. Deprecated.
LIBYUV_API
diff --git a/files/include/libyuv/version.h b/files/include/libyuv/version.h
index 1a38ba7d..741ef34d 100644
--- a/files/include/libyuv/version.h
+++ b/files/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1722
+#define LIBYUV_VERSION 1732
#endif // INCLUDE_LIBYUV_VERSION_H_
diff --git a/files/source/convert.cc b/files/source/convert.cc
index 4b8d0dc5..614fa482 100644
--- a/files/source/convert.cc
+++ b/files/source/convert.cc
@@ -880,6 +880,144 @@ int UYVYToI420(const uint8_t* src_uyvy,
return 0;
}
+// Convert AYUV to NV12.
+LIBYUV_API
+int AYUVToNV12(const uint8_t* src_ayuv,
+ int src_stride_ayuv,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height) {
+ int y;
+ void (*AYUVToUVRow)(const uint8_t* src_ayuv, int src_stride_ayuv,
+ uint8_t* dst_uv, int width) = AYUVToUVRow_C;
+ void (*AYUVToYRow)(const uint8_t* src_ayuv, uint8_t* dst_y, int width) =
+ AYUVToYRow_C;
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_ayuv = src_ayuv + (height - 1) * src_stride_ayuv;
+ src_stride_ayuv = -src_stride_ayuv;
+ }
+// place holders for future intel code
+#if defined(HAS_AYUVTOYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ AYUVToUVRow = AYUVToUVRow_Any_SSE2;
+ AYUVToYRow = AYUVToYRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ AYUVToUVRow = AYUVToUVRow_SSE2;
+ AYUVToYRow = AYUVToYRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_AYUVTOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ AYUVToUVRow = AYUVToUVRow_Any_AVX2;
+ AYUVToYRow = AYUVToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ AYUVToUVRow = AYUVToUVRow_AVX2;
+ AYUVToYRow = AYUVToYRow_AVX2;
+ }
+ }
+#endif
+
+#if defined(HAS_AYUVTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ AYUVToYRow = AYUVToYRow_Any_NEON;
+ AYUVToUVRow = AYUVToUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ AYUVToYRow = AYUVToYRow_NEON;
+ AYUVToUVRow = AYUVToUVRow_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+ AYUVToUVRow(src_ayuv, src_stride_ayuv, dst_uv, width);
+ AYUVToYRow(src_ayuv, dst_y, width);
+ AYUVToYRow(src_ayuv + src_stride_ayuv, dst_y + dst_stride_y, width);
+ src_ayuv += src_stride_ayuv * 2;
+ dst_y += dst_stride_y * 2;
+ dst_uv += dst_stride_uv;
+ }
+ if (height & 1) {
+ AYUVToUVRow(src_ayuv, 0, dst_uv, width);
+ AYUVToYRow(src_ayuv, dst_y, width);
+ }
+ return 0;
+}
+
+// Convert AYUV to NV21.
+LIBYUV_API
+int AYUVToNV21(const uint8_t* src_ayuv,
+ int src_stride_ayuv,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_vu,
+ int dst_stride_vu,
+ int width,
+ int height) {
+ int y;
+ void (*AYUVToVURow)(const uint8_t* src_ayuv, int src_stride_ayuv,
+ uint8_t* dst_vu, int width) = AYUVToVURow_C;
+ void (*AYUVToYRow)(const uint8_t* src_ayuv, uint8_t* dst_y, int width) =
+ AYUVToYRow_C;
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_ayuv = src_ayuv + (height - 1) * src_stride_ayuv;
+ src_stride_ayuv = -src_stride_ayuv;
+ }
+// place holders for future intel code
+#if defined(HAS_AYUVTOYROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ AYUVToVURow = AYUVToVURow_Any_SSE2;
+ AYUVToYRow = AYUVToYRow_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ AYUVToVURow = AYUVToVURow_SSE2;
+ AYUVToYRow = AYUVToYRow_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_AYUVTOYROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ AYUVToVURow = AYUVToVURow_Any_AVX2;
+ AYUVToYRow = AYUVToYRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ AYUVToVURow = AYUVToVURow_AVX2;
+ AYUVToYRow = AYUVToYRow_AVX2;
+ }
+ }
+#endif
+
+#if defined(HAS_AYUVTOYROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ AYUVToYRow = AYUVToYRow_Any_NEON;
+ AYUVToVURow = AYUVToVURow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ AYUVToYRow = AYUVToYRow_NEON;
+ AYUVToVURow = AYUVToVURow_NEON;
+ }
+ }
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+ AYUVToVURow(src_ayuv, src_stride_ayuv, dst_vu, width);
+ AYUVToYRow(src_ayuv, dst_y, width);
+ AYUVToYRow(src_ayuv + src_stride_ayuv, dst_y + dst_stride_y, width);
+ src_ayuv += src_stride_ayuv * 2;
+ dst_y += dst_stride_y * 2;
+ dst_vu += dst_stride_vu;
+ }
+ if (height & 1) {
+ AYUVToVURow(src_ayuv, 0, dst_vu, width);
+ AYUVToYRow(src_ayuv, dst_y, width);
+ }
+ return 0;
+}
+
// Convert ARGB to I420.
LIBYUV_API
int ARGBToI420(const uint8_t* src_argb,
@@ -1446,6 +1584,155 @@ int RGB24ToI420(const uint8_t* src_rgb24,
return 0;
}
+// TODO(fbarchard): Use Matrix version to implement I420 and J420.
+// Convert RGB24 to J420.
+LIBYUV_API
+int RGB24ToJ420(const uint8_t* src_rgb24,
+ int src_stride_rgb24,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ int y;
+#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
+ defined(HAS_RGB24TOYJROW_MMI))
+ void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ RGB24ToUVJRow_C;
+ void (*RGB24ToYJRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) =
+ RGB24ToYJRow_C;
+#else
+ void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
+ RGB24ToARGBRow_C;
+ void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
+ uint8_t* dst_u, uint8_t* dst_v, int width) =
+ ARGBToUVJRow_C;
+ void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
+ ARGBToYJRow_C;
+#endif
+ if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
+ src_stride_rgb24 = -src_stride_rgb24;
+ }
+
+// Neon version does direct RGB24 to YUV.
+#if defined(HAS_RGB24TOYJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RGB24ToUVJRow = RGB24ToUVJRow_Any_NEON;
+ RGB24ToYJRow = RGB24ToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ RGB24ToYJRow = RGB24ToYJRow_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ RGB24ToUVJRow = RGB24ToUVJRow_NEON;
+ }
+ }
+ }
+#elif defined(HAS_RGB24TOYJROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ RGB24ToUVJRow = RGB24ToUVJRow_Any_MSA;
+ RGB24ToYJRow = RGB24ToYJRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ RGB24ToYJRow = RGB24ToYJRow_MSA;
+ RGB24ToUVJRow = RGB24ToUVJRow_MSA;
+ }
+ }
+#elif defined(HAS_RGB24TOYJROW_MMI)
+ if (TestCpuFlag(kCpuHasMMI)) {
+ RGB24ToUVJRow = RGB24ToUVJRow_Any_MMI;
+ RGB24ToYJRow = RGB24ToYJRow_Any_MMI;
+ if (IS_ALIGNED(width, 8)) {
+ RGB24ToYJRow = RGB24ToYJRow_MMI;
+ if (IS_ALIGNED(width, 16)) {
+ RGB24ToUVJRow = RGB24ToUVJRow_MMI;
+ }
+ }
+ }
+// Other platforms do intermediate conversion from RGB24 to ARGB.
+#else
+#if defined(HAS_RGB24TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
+ ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToUVJRow = ARGBToUVJRow_SSSE3;
+ ARGBToYJRow = ARGBToYJRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
+ ARGBToYJRow = ARGBToYJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToUVJRow = ARGBToUVJRow_AVX2;
+ ARGBToYJRow = ARGBToYJRow_AVX2;
+ }
+ }
+#endif
+#endif
+
+ {
+#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
+ defined(HAS_RGB24TOYJROW_MMI))
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 31) & ~31;
+ align_buffer_64(row, kRowSize * 2);
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
+ defined(HAS_RGB24TOYJROW_MMI))
+ RGB24ToUVJRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
+ RGB24ToYJRow(src_rgb24, dst_y, width);
+ RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
+#else
+ RGB24ToARGBRow(src_rgb24, row, width);
+ RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
+ ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width);
+ ARGBToYJRow(row, dst_y, width);
+ ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width);
+#endif
+ src_rgb24 += src_stride_rgb24 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_u += dst_stride_u;
+ dst_v += dst_stride_v;
+ }
+ if (height & 1) {
+#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
+ defined(HAS_RGB24TOYJROW_MMI))
+ RGB24ToUVJRow(src_rgb24, 0, dst_u, dst_v, width);
+ RGB24ToYJRow(src_rgb24, dst_y, width);
+#else
+ RGB24ToARGBRow(src_rgb24, row, width);
+ ARGBToUVJRow(row, 0, dst_u, dst_v, width);
+ ARGBToYJRow(row, dst_y, width);
+#endif
+ }
+#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
+ defined(HAS_RGB24TOYJROW_MMI))
+ free_aligned_buffer_64(row);
+#endif
+ }
+ return 0;
+}
+
// Convert RAW to I420.
LIBYUV_API
int RAWToI420(const uint8_t* src_raw,
@@ -2082,6 +2369,124 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
return 0;
}
+// Convert RGB24 to J400.
+LIBYUV_API
+int RGB24ToJ400(const uint8_t* src_rgb24,
+ int src_stride_rgb24,
+ uint8_t* dst_yj,
+ int dst_stride_yj,
+ int width,
+ int height) {
+ int y;
+#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
+ defined(HAS_RGB24TOYJROW_MMI))
+ void (*RGB24ToYJRow)(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) =
+ RGB24ToYJRow_C;
+#else
+ void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
+ RGB24ToARGBRow_C;
+ void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
+ ARGBToYJRow_C;
+#endif
+ if (!src_rgb24 || !dst_yj || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
+ src_stride_rgb24 = -src_stride_rgb24;
+ }
+
+// Neon version does direct RGB24 to YUV.
+#if defined(HAS_RGB24TOYJROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ RGB24ToYJRow = RGB24ToYJRow_Any_NEON;
+ if (IS_ALIGNED(width, 8)) {
+ RGB24ToYJRow = RGB24ToYJRow_NEON;
+ }
+ }
+#elif defined(HAS_RGB24TOYJROW_MSA)
+ if (TestCpuFlag(kCpuHasMSA)) {
+ RGB24ToYJRow = RGB24ToYJRow_Any_MSA;
+ if (IS_ALIGNED(width, 16)) {
+ RGB24ToYJRow = RGB24ToYJRow_MSA;
+ }
+ }
+#elif defined(HAS_RGB24TOYJROW_MMI)
+ if (TestCpuFlag(kCpuHasMMI)) {
+ RGB24ToYJRow = RGB24ToYJRow_Any_MMI;
+ if (IS_ALIGNED(width, 8)) {
+ RGB24ToYJRow = RGB24ToYJRow_MMI;
+ }
+ }
+// Other platforms do intermediate conversion from RGB24 to ARGB.
+#else
+#if defined(HAS_RGB24TOARGBROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_SSSE3)
+ if (TestCpuFlag(kCpuHasSSSE3)) {
+ ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
+ if (IS_ALIGNED(width, 16)) {
+ ARGBToYJRow = ARGBToYJRow_SSSE3;
+ }
+ }
+#endif
+#if defined(HAS_ARGBTOYJROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ARGBToYJRow = ARGBToYJRow_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ ARGBToYJRow = ARGBToYJRow_AVX2;
+ }
+ }
+#endif
+#endif
+
+ {
+#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
+ defined(HAS_RGB24TOYJROW_MMI))
+ // Allocate 2 rows of ARGB.
+ const int kRowSize = (width * 4 + 31) & ~31;
+ align_buffer_64(row, kRowSize * 2);
+#endif
+
+ for (y = 0; y < height - 1; y += 2) {
+#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
+ defined(HAS_RGB24TOYJROW_MMI))
+ RGB24ToYJRow(src_rgb24, dst_yj, width);
+ RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_yj + dst_stride_yj, width);
+#else
+ RGB24ToARGBRow(src_rgb24, row, width);
+ RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width);
+ ARGBToYJRow(row, dst_yj, width);
+ ARGBToYJRow(row + kRowSize, dst_yj + dst_stride_yj, width);
+#endif
+ src_rgb24 += src_stride_rgb24 * 2;
+ dst_yj += dst_stride_yj * 2;
+ }
+ if (height & 1) {
+#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
+ defined(HAS_RGB24TOYJROW_MMI))
+ RGB24ToYJRow(src_rgb24, dst_yj, width);
+#else
+ RGB24ToARGBRow(src_rgb24, row, width);
+ ARGBToYJRow(row, dst_yj, width);
+#endif
+ }
+#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
+ defined(HAS_RGB24TOYJROW_MMI))
+ free_aligned_buffer_64(row);
+#endif
+ }
+ return 0;
+}
+
static void SplitPixels(const uint8_t* src_u,
int src_pixel_stride_uv,
uint8_t* dst_u,
diff --git a/files/source/convert_argb.cc b/files/source/convert_argb.cc
index b376a0f3..54050333 100644
--- a/files/source/convert_argb.cc
+++ b/files/source/convert_argb.cc
@@ -1793,8 +1793,9 @@ int NV21ToARGB(const uint8_t* src_y,
}
// Convert NV12 to ABGR.
-// To output ABGR instead of ARGB swap the UV and use a mirrrored yuc matrix.
+// To output ABGR instead of ARGB swap the UV and use a mirrored yuv matrix.
// To swap the UV use NV12 instead of NV21.LIBYUV_API
+LIBYUV_API
int NV12ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
@@ -1998,6 +1999,54 @@ int NV21ToRAW(const uint8_t* src_y,
dst_stride_raw, &kYvuI601Constants, width, height);
}
+// Convert NV21 to YUV24
+int NV21ToYUV24(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_vu,
+ int src_stride_vu,
+ uint8_t* dst_yuv24,
+ int dst_stride_yuv24,
+ int width,
+ int height) {
+ int y;
+ void (*NV21ToYUV24Row)(const uint8_t* src_y, const uint8_t* src_vu,
+ uint8_t* dst_yuv24, int width) = NV21ToYUV24Row_C;
+ if (!src_y || !src_vu || !dst_yuv24 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_yuv24 = dst_yuv24 + (height - 1) * dst_stride_yuv24;
+ dst_stride_yuv24 = -dst_stride_yuv24;
+ }
+#if defined(HAS_NV21TOYUV24ROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ NV21ToYUV24Row = NV21ToYUV24Row_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ NV21ToYUV24Row = NV21ToYUV24Row_NEON;
+ }
+ }
+#endif
+#if defined(HAS_NV21TOYUV24ROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ NV21ToYUV24Row = NV21ToYUV24Row_Any_AVX2;
+ if (IS_ALIGNED(width, 32)) {
+ NV21ToYUV24Row = NV21ToYUV24Row_AVX2;
+ }
+ }
+#endif
+ for (y = 0; y < height; ++y) {
+ NV21ToYUV24Row(src_y, src_vu, dst_yuv24, width);
+ dst_yuv24 += dst_stride_yuv24;
+ src_y += src_stride_y;
+ if (y & 1) {
+ src_vu += src_stride_vu;
+ }
+ }
+ return 0;
+}
+
// Convert M420 to ARGB.
LIBYUV_API
int M420ToARGB(const uint8_t* src_m420,
diff --git a/files/source/convert_from.cc b/files/source/convert_from.cc
index 706067bb..60140cb4 100644
--- a/files/source/convert_from.cc
+++ b/files/source/convert_from.cc
@@ -670,7 +670,7 @@ static int I420ToRGB24Matrix(const uint8_t* src_y,
#if defined(HAS_I422TORGB24ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3;
- if (IS_ALIGNED(width, 8)) {
+ if (IS_ALIGNED(width, 16)) {
I422ToRGB24Row = I422ToRGB24Row_SSSE3;
}
}
@@ -678,7 +678,7 @@ static int I420ToRGB24Matrix(const uint8_t* src_y,
#if defined(HAS_I422TORGB24ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToRGB24Row = I422ToRGB24Row_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
+ if (IS_ALIGNED(width, 32)) {
I422ToRGB24Row = I422ToRGB24Row_AVX2;
}
}
diff --git a/files/source/convert_jpeg.cc b/files/source/convert_jpeg.cc
index 56a95c57..f440c7c2 100644
--- a/files/source/convert_jpeg.cc
+++ b/files/source/convert_jpeg.cc
@@ -89,12 +89,12 @@ static void JpegI400ToI420(void* opaque,
// Query size of MJPG in pixels.
LIBYUV_API
-int MJPGSize(const uint8_t* sample,
- size_t sample_size,
+int MJPGSize(const uint8_t* src_mjpg,
+ size_t src_size_mjpg,
int* width,
int* height) {
MJpegDecoder mjpeg_decoder;
- LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
+ LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(src_mjpg, src_size_mjpg);
if (ret) {
*width = mjpeg_decoder.GetWidth();
*height = mjpeg_decoder.GetHeight();
@@ -107,8 +107,8 @@ int MJPGSize(const uint8_t* sample,
// TODO(fbarchard): review src_width and src_height requirement. dst_width and
// dst_height may be enough.
LIBYUV_API
-int MJPGToI420(const uint8_t* sample,
- size_t sample_size,
+int MJPGToI420(const uint8_t* src_mjpg,
+ size_t src_size_mjpg,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
@@ -119,14 +119,14 @@ int MJPGToI420(const uint8_t* sample,
int src_height,
int dst_width,
int dst_height) {
- if (sample_size == kUnknownDataSize) {
+ if (src_size_mjpg == kUnknownDataSize) {
// ERROR: MJPEG frame size unknown
return -1;
}
// TODO(fbarchard): Port MJpeg to C.
MJpegDecoder mjpeg_decoder;
- LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
+ LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(src_mjpg, src_size_mjpg);
if (ret && (mjpeg_decoder.GetWidth() != src_width ||
mjpeg_decoder.GetHeight() != src_height)) {
// ERROR: MJPEG frame has unexpected dimensions
@@ -180,9 +180,9 @@ int MJPGToI420(const uint8_t* sample,
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dst_width,
dst_height);
} else {
- // TODO(fbarchard): Implement conversion for any other colorspace/sample
- // factors that occur in practice.
- // ERROR: Unable to convert MJPEG frame because format is not supported
+ // TODO(fbarchard): Implement conversion for any other
+ // colorspace/subsample factors that occur in practice. ERROR: Unable to
+ // convert MJPEG frame because format is not supported
mjpeg_decoder.UnloadFrame();
return 1;
}
@@ -249,8 +249,8 @@ static void JpegI400ToNV21(void* opaque,
// MJPG (Motion JPeg) to NV21
LIBYUV_API
-int MJPGToNV21(const uint8_t* sample,
- size_t sample_size,
+int MJPGToNV21(const uint8_t* src_mjpg,
+ size_t src_size_mjpg,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_vu,
@@ -259,14 +259,14 @@ int MJPGToNV21(const uint8_t* sample,
int src_height,
int dst_width,
int dst_height) {
- if (sample_size == kUnknownDataSize) {
+ if (src_size_mjpg == kUnknownDataSize) {
// ERROR: MJPEG frame size unknown
return -1;
}
// TODO(fbarchard): Port MJpeg to C.
MJpegDecoder mjpeg_decoder;
- LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
+ LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(src_mjpg, src_size_mjpg);
if (ret && (mjpeg_decoder.GetWidth() != src_width ||
mjpeg_decoder.GetHeight() != src_height)) {
// ERROR: MJPEG frame has unexpected dimensions
@@ -382,22 +382,22 @@ static void JpegI400ToARGB(void* opaque,
// TODO(fbarchard): review src_width and src_height requirement. dst_width and
// dst_height may be enough.
LIBYUV_API
-int MJPGToARGB(const uint8_t* sample,
- size_t sample_size,
+int MJPGToARGB(const uint8_t* src_mjpg,
+ size_t src_size_mjpg,
uint8_t* dst_argb,
int dst_stride_argb,
int src_width,
int src_height,
int dst_width,
int dst_height) {
- if (sample_size == kUnknownDataSize) {
+ if (src_size_mjpg == kUnknownDataSize) {
// ERROR: MJPEG frame size unknown
return -1;
}
// TODO(fbarchard): Port MJpeg to C.
MJpegDecoder mjpeg_decoder;
- LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
+ LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(src_mjpg, src_size_mjpg);
if (ret && (mjpeg_decoder.GetWidth() != src_width ||
mjpeg_decoder.GetHeight() != src_height)) {
// ERROR: MJPEG frame has unexpected dimensions
@@ -450,9 +450,9 @@ int MJPGToARGB(const uint8_t* sample,
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dst_width,
dst_height);
} else {
- // TODO(fbarchard): Implement conversion for any other colorspace/sample
- // factors that occur in practice.
- // ERROR: Unable to convert MJPEG frame because format is not supported
+ // TODO(fbarchard): Implement conversion for any other
+ // colorspace/subsample factors that occur in practice. ERROR: Unable to
+ // convert MJPEG frame because format is not supported
mjpeg_decoder.UnloadFrame();
return 1;
}
diff --git a/files/source/mjpeg_decoder.cc b/files/source/mjpeg_decoder.cc
index eaf25301..5c5e5ead 100644
--- a/files/source/mjpeg_decoder.cc
+++ b/files/source/mjpeg_decoder.cc
@@ -25,7 +25,8 @@
#endif
#endif
-struct FILE; // For jpeglib.h.
+
+#include <stdio.h> // For jpeglib.h.
// C++ build requires extern C for jpeg internals.
#ifdef __cplusplus
@@ -427,7 +428,15 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) {
}
void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
- cinfo->src->next_input_byte += num_bytes;
+ jpeg_source_mgr* src = cinfo->src;
+ size_t bytes = static_cast<size_t>(num_bytes);
+ if(bytes > src->bytes_in_buffer) {
+ src->next_input_byte = nullptr;
+ src->bytes_in_buffer = 0;
+ } else {
+ src->next_input_byte += bytes;
+ src->bytes_in_buffer -= bytes;
+ }
}
void term_source(j_decompress_ptr cinfo) {
diff --git a/files/source/mjpeg_validate.cc b/files/source/mjpeg_validate.cc
index 80c2cc0c..ba0a03ab 100644
--- a/files/source/mjpeg_validate.cc
+++ b/files/source/mjpeg_validate.cc
@@ -18,10 +18,10 @@ extern "C" {
#endif
// Helper function to scan for EOI marker (0xff 0xd9).
-static LIBYUV_BOOL ScanEOI(const uint8_t* sample, size_t sample_size) {
- if (sample_size >= 2) {
- const uint8_t* end = sample + sample_size - 1;
- const uint8_t* it = sample;
+static LIBYUV_BOOL ScanEOI(const uint8_t* src_mjpg, size_t src_size_mjpg) {
+ if (src_size_mjpg >= 2) {
+ const uint8_t* end = src_mjpg + src_size_mjpg - 1;
+ const uint8_t* it = src_mjpg;
while (it < end) {
// TODO(fbarchard): scan for 0xd9 instead.
it = (const uint8_t*)(memchr(it, 0xff, end - it));
@@ -34,34 +34,35 @@ static LIBYUV_BOOL ScanEOI(const uint8_t* sample, size_t sample_size) {
++it; // Skip over current 0xff.
}
}
- // ERROR: Invalid jpeg end code not found. Size sample_size
+ // ERROR: Invalid jpeg end code not found. Size src_size_mjpg
return LIBYUV_FALSE;
}
// Helper function to validate the jpeg appears intact.
-LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size) {
+LIBYUV_BOOL ValidateJpeg(const uint8_t* src_mjpg, size_t src_size_mjpg) {
// Maximum size that ValidateJpeg will consider valid.
const size_t kMaxJpegSize = 0x7fffffffull;
const size_t kBackSearchSize = 1024;
- if (sample_size < 64 || sample_size > kMaxJpegSize || !sample) {
- // ERROR: Invalid jpeg size: sample_size
+ if (src_size_mjpg < 64 || src_size_mjpg > kMaxJpegSize || !src_mjpg) {
+ // ERROR: Invalid jpeg size: src_size_mjpg
return LIBYUV_FALSE;
}
- if (sample[0] != 0xff || sample[1] != 0xd8) { // SOI marker
+ // SOI marker
+ if (src_mjpg[0] != 0xff || src_mjpg[1] != 0xd8 || src_mjpg[2] != 0xff) {
// ERROR: Invalid jpeg initial start code
return LIBYUV_FALSE;
}
// Look for the End Of Image (EOI) marker near the end of the buffer.
- if (sample_size > kBackSearchSize) {
- if (ScanEOI(sample + sample_size - kBackSearchSize, kBackSearchSize)) {
+ if (src_size_mjpg > kBackSearchSize) {
+ if (ScanEOI(src_mjpg + src_size_mjpg - kBackSearchSize, kBackSearchSize)) {
return LIBYUV_TRUE; // Success: Valid jpeg.
}
// Reduce search size for forward search.
- sample_size = sample_size - kBackSearchSize + 1;
+ src_size_mjpg = src_size_mjpg - kBackSearchSize + 1;
}
// Step over SOI marker and scan for EOI.
- return ScanEOI(sample + 2, sample_size - 2);
+ return ScanEOI(src_mjpg + 2, src_size_mjpg - 2);
}
#ifdef __cplusplus
diff --git a/files/source/planar_functions.cc b/files/source/planar_functions.cc
index b49bf0a0..9cab230f 100644
--- a/files/source/planar_functions.cc
+++ b/files/source/planar_functions.cc
@@ -440,7 +440,6 @@ void MergeUVPlane(const uint8_t* src_u,
int y;
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
uint8_t* dst_uv, int width) = MergeUVRow_C;
- // Coalesce rows.
// Negative height means invert the image.
if (height < 0) {
height = -height;
@@ -504,6 +503,63 @@ void MergeUVPlane(const uint8_t* src_u,
}
}
+// Convert NV21 to NV12.
+LIBYUV_API
+int NV21ToNV12(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_vu,
+ int src_stride_vu,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_uv,
+ int dst_stride_uv,
+ int width,
+ int height) {
+ int y;
+ void (*UVToVURow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
+ UVToVURow_C;
+
+ int halfwidth = (width + 1) >> 1;
+ int halfheight = (height + 1) >> 1;
+ if (!src_vu || !dst_uv || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ halfheight = (height + 1) >> 1;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_vu = src_vu + (halfheight - 1) * src_stride_vu;
+ src_stride_y = -src_stride_y;
+ src_stride_vu = -src_stride_vu;
+ }
+ // Coalesce rows.
+ if (src_stride_vu == halfwidth * 2 && dst_stride_uv == halfwidth * 2) {
+ halfwidth *= halfheight;
+ halfheight = 1;
+ src_stride_vu = dst_stride_uv = 0;
+ }
+
+#if defined(HAS_UVToVUROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ UVToVURow = UVToVURow_Any_NEON;
+ if (IS_ALIGNED(halfwidth, 16)) {
+ UVToVURow = UVToVURow_NEON;
+ }
+ }
+#endif
+ if (dst_y) {
+ CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ }
+
+ for (y = 0; y < halfheight; ++y) {
+ UVToVURow(src_vu, dst_uv, halfwidth);
+ src_vu += src_stride_vu;
+ dst_uv += dst_stride_uv;
+ }
+ return 0;
+}
+
// Support function for NV12 etc RGB channels.
// Width and height are plane sizes (typically half pixel width).
LIBYUV_API
diff --git a/files/source/rotate.cc b/files/source/rotate.cc
index f28a06d3..d414186a 100644
--- a/files/source/rotate.cc
+++ b/files/source/rotate.cc
@@ -482,6 +482,66 @@ int I420Rotate(const uint8_t* src_y,
}
LIBYUV_API
+int I444Rotate(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height,
+ enum libyuv::RotationMode mode) {
+ if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
+ !dst_u || !dst_v) {
+ return -1;
+ }
+
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (height - 1) * src_stride_u;
+ src_v = src_v + (height - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ switch (mode) {
+ case libyuv::kRotate0:
+ // copy frame
+ CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+ CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
+ return 0;
+ case libyuv::kRotate90:
+ RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+ RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
+ return 0;
+ case libyuv::kRotate270:
+ RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+ RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
+ return 0;
+ case libyuv::kRotate180:
+ RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+ RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
+LIBYUV_API
int NV12ToI420Rotate(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
diff --git a/files/source/row_any.cc b/files/source/row_any.cc
index 031a8f64..06ca723a 100644
--- a/files/source/row_any.cc
+++ b/files/source/row_any.cc
@@ -286,7 +286,12 @@ ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15)
#ifdef HAS_MERGEUVROW_MMI
ANY21(MergeUVRow_Any_MMI, MergeUVRow_MMI, 0, 1, 1, 2, 7)
#endif
-
+#ifdef HAS_NV21TOYUV24ROW_NEON
+ANY21(NV21ToYUV24Row_Any_NEON, NV21ToYUV24Row_NEON, 1, 1, 2, 3, 15)
+#endif
+#ifdef HAS_NV21TOYUV24ROW_AVX2
+ANY21(NV21ToYUV24Row_Any_AVX2, NV21ToYUV24Row_AVX2, 1, 1, 2, 3, 31)
+#endif
// Math functions.
#ifdef HAS_ARGBMULTIPLYROW_SSE2
ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3)
@@ -702,6 +707,12 @@ ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31)
#ifdef HAS_UYVYTOYROW_MMI
ANY11(UYVYToYRow_Any_MMI, UYVYToYRow_MMI, 1, 4, 1, 15)
#endif
+#ifdef HAS_AYUVTOYROW_NEON
+ANY11(AYUVToYRow_Any_NEON, AYUVToYRow_NEON, 0, 4, 1, 15)
+#endif
+#ifdef HAS_AYUVTOYROW_NEON
+ANY11(UVToVURow_Any_NEON, UVToVURow_NEON, 0, 2, 2, 15)
+#endif
#ifdef HAS_RGB24TOARGBROW_NEON
ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
#endif
@@ -1381,6 +1392,37 @@ ANY12S(UYVYToUVRow_Any_MMI, UYVYToUVRow_MMI, 1, 4, 15)
#endif
#undef ANY12S
+// Any 1 to 1 with source stride (2 rows of source). Outputs UV plane.
+// 128 byte row allows for 32 avx ARGB pixels.
+#define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
+ void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_vu, \
+ int width) { \
+ SIMD_ALIGNED(uint8_t temp[128 * 3]); \
+ memset(temp, 0, 128 * 2); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, src_stride_ptr, dst_vu, n); \
+ } \
+ memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
+ memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
+ SS(r, UVSHIFT) * BPP); \
+ if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \
+ memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \
+ BPP); \
+ memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
+ temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \
+ } \
+ ANY_SIMD(temp, 128, temp + 256, MASK + 1); \
+ memcpy(dst_vu + (n >> 1) * 2, temp + 256, SS(r, 1) * 2); \
+ }
+
+#ifdef HAS_AYUVTOVUROW_NEON
+ANY11S(AYUVToUVRow_Any_NEON, AYUVToUVRow_NEON, 0, 4, 15)
+ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15)
+#endif
+#undef ANY11S
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/files/source/row_common.cc b/files/source/row_common.cc
index 2bbc5adb..8951d003 100644
--- a/files/source/row_common.cc
+++ b/files/source/row_common.cc
@@ -3231,6 +3231,119 @@ void GaussCol_C(const uint16_t* src0,
}
}
+// Convert biplanar NV21 to packed YUV24
+void NV21ToYUV24Row_C(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_yuv24,
+ int width) {
+ int x;
+ for (x = 0; x < width - 1; x += 2) {
+ dst_yuv24[0] = src_vu[0]; // V
+ dst_yuv24[1] = src_vu[1]; // U
+ dst_yuv24[2] = src_y[0]; // Y0
+ dst_yuv24[3] = src_vu[0]; // V
+ dst_yuv24[4] = src_vu[1]; // U
+ dst_yuv24[5] = src_y[1]; // Y1
+ src_y += 2;
+ src_vu += 2;
+ dst_yuv24 += 6; // Advance 2 pixels.
+ }
+ if (width & 1) {
+ dst_yuv24[0] = src_vu[0]; // V
+ dst_yuv24[1] = src_vu[1]; // U
+ dst_yuv24[2] = src_y[0]; // Y0
+ }
+}
+
+// Filter 2 rows of AYUV UV's (444) into UV (420).
+void AYUVToUVRow_C(const uint8_t* src_ayuv,
+ int src_stride_ayuv,
+ uint8_t* dst_uv,
+ int width) {
+ // Output a row of UV values, filtering 2x2 rows of AYUV.
+ int x;
+ for (x = 0; x < width; x += 2) {
+ dst_uv[0] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
+ src_ayuv[src_stride_ayuv + 5] + 2) >>
+ 2;
+ dst_uv[1] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
+ src_ayuv[src_stride_ayuv + 4] + 2) >>
+ 2;
+ src_ayuv += 8;
+ dst_uv += 2;
+ }
+ if (width & 1) {
+ dst_uv[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] +
+ src_ayuv[src_stride_ayuv + 0] + 2) >>
+ 2;
+ dst_uv[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] +
+ src_ayuv[src_stride_ayuv + 1] + 2) >>
+ 2;
+ }
+}
+
+// Filter 2 rows of AYUV UV's (444) into VU (420).
+void AYUVToVURow_C(const uint8_t* src_ayuv,
+ int src_stride_ayuv,
+ uint8_t* dst_vu,
+ int width) {
+ // Output a row of VU values, filtering 2x2 rows of AYUV.
+ int x;
+ for (x = 0; x < width; x += 2) {
+ dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
+ src_ayuv[src_stride_ayuv + 4] + 2) >>
+ 2;
+ dst_vu[1] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
+ src_ayuv[src_stride_ayuv + 5] + 2) >>
+ 2;
+ src_ayuv += 8;
+ dst_vu += 2;
+ }
+ if (width & 1) {
+ dst_vu[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] +
+ src_ayuv[src_stride_ayuv + 0] + 2) >>
+ 2;
+ dst_vu[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] +
+ src_ayuv[src_stride_ayuv + 1] + 2) >>
+ 2;
+ }
+}
+
+// Copy row of AYUV Y's into Y
+void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
+ // Output a row of Y values.
+ int x;
+ for (x = 0; x < width; ++x) {
+ dst_y[x] = src_ayuv[2]; // v,u,y,a
+ src_ayuv += 4;
+ }
+}
+
+void UVToVURow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t u = src_uv[0];
+ uint8_t v = src_uv[1];
+ dst_vu[0] = v;
+ dst_vu[1] = u;
+ src_uv += 2;
+ dst_vu += 2;
+ }
+}
+
+// divide values by weights and provide mask to indicate weight of 0.
+void FloatDivToByteRow_C(const float* src_weights,
+ const float* src_values,
+ uint8_t* dst_out,
+ uint8_t* dst_mask,
+ int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ dst_out[x] = Clamp(src_values[x] / src_weights[x]);
+ dst_mask[x] = src_weights[x] > 0 ? 0 : 0xff;
+ }
+}
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/files/source/row_gcc.cc b/files/source/row_gcc.cc
index 8d3cb81c..decd3d2e 100644
--- a/files/source/row_gcc.cc
+++ b/files/source/row_gcc.cc
@@ -5238,7 +5238,7 @@ void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
,
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
- );
+ );
}
#endif // HAS_ARGBMULTIPLYROW_AVX2
@@ -6669,6 +6669,127 @@ void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb,
}
#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
+#ifdef HAS_NV21TOYUV24ROW_AVX2
+
+// begin NV21ToYUV24Row_C avx2 constants
+static const ulvec8 kBLEND0 = {0x80, 0x00, 0x80, 0x80, 0x00, 0x80, 0x80, 0x00,
+ 0x80, 0x80, 0x00, 0x80, 0x80, 0x00, 0x80, 0x80,
+ 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00};
+
+static const ulvec8 kBLEND1 = {0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00,
+ 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00,
+ 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00,
+ 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80};
+
+static const ulvec8 kBLEND2 = {0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00,
+ 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80,
+ 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00,
+ 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x80, 0x00};
+
+static const ulvec8 kSHUF0 = {0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02, 0x0d,
+ 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80, 0x05,
+ 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02, 0x0d,
+ 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80, 0x05};
+
+static const ulvec8 kSHUF1 = {0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02,
+ 0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80,
+ 0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80, 0x02,
+ 0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f, 0x80};
+
+static const ulvec8 kSHUF2 = {0x0a, 0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80,
+ 0x02, 0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f,
+ 0x0a, 0x80, 0x00, 0x0b, 0x80, 0x01, 0x0c, 0x80,
+ 0x02, 0x0d, 0x80, 0x03, 0x0e, 0x80, 0x04, 0x0f};
+
+static const ulvec8 kSHUF3 = {0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80, 0x80,
+ 0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a, 0x80,
+ 0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80, 0x80,
+ 0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a, 0x80};
+
+static const ulvec8 kSHUF4 = {0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80,
+ 0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a,
+ 0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07, 0x80,
+ 0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80, 0x0a};
+
+static const ulvec8 kSHUF5 = {0x80, 0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07,
+ 0x80, 0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80,
+ 0x80, 0x05, 0x80, 0x80, 0x06, 0x80, 0x80, 0x07,
+ 0x80, 0x80, 0x08, 0x80, 0x80, 0x09, 0x80, 0x80};
+
+// NV21ToYUV24Row_AVX2
+void NV21ToYUV24Row_AVX2(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_yuv24,
+ int width) {
+ uint8_t* src_y_ptr;
+ uint64_t src_offset = 0;
+ uint64_t width64;
+
+ width64 = width;
+ src_y_ptr = (uint8_t*)src_y;
+
+ asm volatile(
+ "vmovdqu %5, %%ymm0 \n" // init blend value
+ "vmovdqu %6, %%ymm1 \n" // init blend value
+ "vmovdqu %7, %%ymm2 \n" // init blend value
+ // "sub $0x20, %3 \n" //sub 32 from width for final loop
+
+ LABELALIGN
+ "1: \n" // label 1
+ "vmovdqu (%0,%4), %%ymm3 \n" // src_y
+ "vmovdqu 1(%1,%4), %%ymm4 \n" // src_uv+1
+ "vmovdqu (%1), %%ymm5 \n" // src_uv
+ "vpshufb %8, %%ymm3, %%ymm13 \n" // y, kSHUF0 for shuf
+ "vpshufb %9, %%ymm4, %%ymm14 \n" // uv+1, kSHUF1 for
+ // shuf
+ "vpshufb %10, %%ymm5, %%ymm15 \n" // uv, kSHUF2 for
+ // shuf
+ "vpshufb %11, %%ymm3, %%ymm3 \n" // y kSHUF3 for shuf
+ "vpshufb %12, %%ymm4, %%ymm4 \n" // uv+1 kSHUF4 for
+ // shuf
+ "vpblendvb %%ymm0, %%ymm14, %%ymm13, %%ymm12 \n" // blend 0
+ "vpblendvb %%ymm0, %%ymm13, %%ymm14, %%ymm14 \n" // blend 0
+ "vpblendvb %%ymm2, %%ymm15, %%ymm12, %%ymm12 \n" // blend 2
+ "vpblendvb %%ymm1, %%ymm15, %%ymm14, %%ymm13 \n" // blend 1
+ "vpshufb %13, %%ymm5, %%ymm15 \n" // shuffle const
+ "vpor %%ymm4, %%ymm3, %%ymm5 \n" // get results
+ "vmovdqu %%ymm12, 0x20(%2) \n" // store dst_yuv+20h
+ "vpor %%ymm15, %%ymm5, %%ymm3 \n" // get results
+ "add $0x20, %4 \n" // add to src buffer
+ // ptr
+ "vinserti128 $0x1, %%xmm3, %%ymm13, %%ymm4 \n" // insert
+ "vperm2i128 $0x31, %%ymm13, %%ymm3, %%ymm5 \n" // insert
+ "vmovdqu %%ymm4, (%2) \n" // store dst_yuv
+ "vmovdqu %%ymm5, 0x40(%2) \n" // store dst_yuv+40h
+ "add $0x60,%2 \n" // add to dst buffer
+ // ptr
+ // "cmp %3, %4 \n" //(width64 -
+ // 32 bytes) and src_offset
+ "sub $0x20,%3 \n" // 32 pixels per loop
+ "jg 1b \n"
+ "vzeroupper \n" // sse-avx2
+ // transistions
+
+ : "+r"(src_y), //%0
+ "+r"(src_vu), //%1
+ "+r"(dst_yuv24), //%2
+ "+r"(width64), //%3
+ "+r"(src_offset) //%4
+ : "m"(kBLEND0), //%5
+ "m"(kBLEND1), //%6
+ "m"(kBLEND2), //%7
+ "m"(kSHUF0), //%8
+ "m"(kSHUF1), //%9
+ "m"(kSHUF2), //%10
+ "m"(kSHUF3), //%11
+ "m"(kSHUF4), //%12
+ "m"(kSHUF5) //%13
+ : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm12",
+ "xmm13", "xmm14", "xmm15");
+}
+#endif // HAS_NV21TOYUV24ROW_AVX2
+
#endif // defined(__x86_64__) || defined(__i386__)
#ifdef __cplusplus
diff --git a/files/source/row_neon.cc b/files/source/row_neon.cc
index ff87e74c..a12fa790 100644
--- a/files/source/row_neon.cc
+++ b/files/source/row_neon.cc
@@ -561,7 +561,7 @@ void SplitUVRow_NEON(const uint8_t* src_uv,
"+r"(width) // %3 // Output registers
: // Input registers
: "cc", "memory", "q0", "q1" // Clobber List
- );
+ );
}
// Reads 16 U's and V's and writes out 16 pairs of UV.
@@ -582,7 +582,7 @@ void MergeUVRow_NEON(const uint8_t* src_u,
"+r"(width) // %3 // Output registers
: // Input registers
: "cc", "memory", "q0", "q1" // Clobber List
- );
+ );
}
// Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b.
@@ -607,7 +607,7 @@ void SplitRGBRow_NEON(const uint8_t* src_rgb,
"+r"(width) // %4
: // Input registers
: "cc", "memory", "d0", "d1", "d2" // Clobber List
- );
+ );
}
// Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time
@@ -632,7 +632,7 @@ void MergeRGBRow_NEON(const uint8_t* src_r,
"+r"(width) // %4
: // Input registers
: "cc", "memory", "q0", "q1", "q2" // Clobber List
- );
+ );
}
// Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15.
@@ -648,7 +648,7 @@ void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
"+r"(width) // %2 // Output registers
: // Input registers
: "cc", "memory", "q0", "q1" // Clobber List
- );
+ );
}
// SetRow writes 'width' bytes using an 8 bit value repeated.
@@ -761,7 +761,7 @@ void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
"+r"(width) // %2
:
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
- );
+ );
}
void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
@@ -778,7 +778,7 @@ void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
"+r"(width) // %2
:
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
- );
+ );
}
void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
@@ -795,7 +795,7 @@ void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
"+r"(width) // %2
:
: "cc", "memory", "d1", "d2", "d3" // Clobber List
- );
+ );
}
#define RGB565TOARGB \
@@ -826,7 +826,7 @@ void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
- );
+ );
}
#define ARGB1555TOARGB \
@@ -872,7 +872,7 @@ void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
- );
+ );
}
#define ARGB4444TOARGB \
@@ -901,7 +901,7 @@ void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2" // Clobber List
- );
+ );
}
void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
@@ -919,7 +919,7 @@ void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
"+r"(width) // %2
:
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
- );
+ );
}
void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
@@ -935,7 +935,7 @@ void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
"+r"(width) // %2
:
: "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List
- );
+ );
}
void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
@@ -950,7 +950,7 @@ void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1" // Clobber List
- );
+ );
}
void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
@@ -965,7 +965,7 @@ void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1" // Clobber List
- );
+ );
}
void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
@@ -985,7 +985,7 @@ void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
"+r"(width) // %3
:
: "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
- );
+ );
}
void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
@@ -1005,7 +1005,7 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
"+r"(width) // %3
:
: "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List
- );
+ );
}
void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
@@ -1032,7 +1032,7 @@ void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6",
"d7" // Clobber List
- );
+ );
}
void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
@@ -1059,7 +1059,7 @@ void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6",
"d7" // Clobber List
- );
+ );
}
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
@@ -1081,7 +1081,7 @@ void ARGBShuffleRow_NEON(const uint8_t* src_argb,
"+r"(width) // %2
: "r"(shuffler) // %3
: "cc", "memory", "q0", "q1", "q2" // Clobber List
- );
+ );
}
void I422ToYUY2Row_NEON(const uint8_t* src_y,
@@ -1241,7 +1241,7 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
"+r"(width) // %2
:
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List
- );
+ );
}
void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
@@ -2564,7 +2564,7 @@ void SobelXRow_NEON(const uint8_t* src_y0,
: "r"(2), // %5
"r"(6) // %6
: "cc", "memory", "q0", "q1" // Clobber List
- );
+ );
}
// SobelY as a matrix is
@@ -2601,7 +2601,7 @@ void SobelYRow_NEON(const uint8_t* src_y0,
: "r"(1), // %4
"r"(6) // %5
: "cc", "memory", "q0", "q1" // Clobber List
- );
+ );
}
// %y passes a float as a scalar vector for vector * scalar multiply.
@@ -2685,6 +2685,205 @@ void ByteToFloatRow_NEON(const uint8_t* src,
: "cc", "memory", "q1", "q2", "q3");
}
+// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
+void GaussCol_NEON(const uint16_t* src0,
+ const uint16_t* src1,
+ const uint16_t* src2,
+ const uint16_t* src3,
+ const uint16_t* src4,
+ uint32_t* dst,
+ int width) {
+ asm volatile(
+ "vmov.u16 d6, #4 \n" // constant 4
+ "vmov.u16 d7, #6 \n" // constant 6
+
+ "1: \n"
+ "vld1.16 {q1}, [%0]! \n" // load 8 samples, 5 rows
+ "vld1.16 {q2}, [%4]! \n"
+ "vaddl.u16 q0, d2, d4 \n" // * 1
+ "vaddl.u16 q1, d3, d5 \n" // * 1
+ "vld1.16 {q2}, [%1]! \n"
+ "vmlal.u16 q0, d4, d6 \n" // * 4
+ "vmlal.u16 q1, d5, d6 \n" // * 4
+ "vld1.16 {q2}, [%2]! \n"
+ "vmlal.u16 q0, d4, d7 \n" // * 6
+ "vmlal.u16 q1, d5, d7 \n" // * 6
+ "vld1.16 {q2}, [%3]! \n"
+ "vmlal.u16 q0, d4, d6 \n" // * 4
+ "vmlal.u16 q1, d5, d6 \n" // * 4
+ "subs %6, %6, #8 \n" // 8 processed per loop
+ "vst1.32 {q0, q1}, [%5]! \n" // store 8 samples
+ "bgt 1b \n"
+ : "+r"(src0), // %0
+ "+r"(src1), // %1
+ "+r"(src2), // %2
+ "+r"(src3), // %3
+ "+r"(src4), // %4
+ "+r"(dst), // %5
+ "+r"(width) // %6
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3");
+}
+
+// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
+void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width) {
+ const uint32_t* src1 = src + 1;
+ const uint32_t* src2 = src + 2;
+ const uint32_t* src3 = src + 3;
+ asm volatile(
+ "vmov.u32 q10, #4 \n" // constant 4
+ "vmov.u32 q11, #6 \n" // constant 6
+
+ "1: \n"
+ "vld1.32 {q0, q1}, [%0]! \n" // load 12 source samples
+ "vld1.32 {q2}, [%0] \n"
+ "vadd.u32 q0, q0, q1 \n" // * 1
+ "vadd.u32 q1, q1, q2 \n" // * 1
+ "vld1.32 {q2, q3}, [%2]! \n"
+ "vmla.u32 q0, q2, q11 \n" // * 6
+ "vmla.u32 q1, q3, q11 \n" // * 6
+ "vld1.32 {q2, q3}, [%1]! \n"
+ "vld1.32 {q8, q9}, [%3]! \n"
+ "vadd.u32 q2, q2, q8 \n" // add rows for * 4
+ "vadd.u32 q3, q3, q9 \n"
+ "vmla.u32 q0, q2, q10 \n" // * 4
+ "vmla.u32 q1, q3, q10 \n" // * 4
+ "subs %5, %5, #8 \n" // 8 processed per loop
+ "vqshrn.u32 d0, q0, #8 \n" // round and pack
+ "vqshrn.u32 d1, q1, #8 \n"
+ "vst1.u16 {q0}, [%4]! \n" // store 8 samples
+ "bgt 1b \n"
+ : "+r"(src), // %0
+ "+r"(src1), // %1
+ "+r"(src2), // %2
+ "+r"(src3), // %3
+ "+r"(dst), // %4
+ "+r"(width) // %5
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
+}
+
+// Convert biplanar NV21 to packed YUV24
+void NV21ToYUV24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_yuv24,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "vld1.8 {q2}, [%0]! \n" // load 16 Y values
+ "vld2.8 {d0, d2}, [%1]! \n" // load 8 VU values
+ "vmov d1, d0 \n"
+ "vzip.u8 d0, d1 \n" // VV
+ "vmov d3, d2 \n"
+ "vzip.u8 d2, d3 \n" // UU
+ "subs %3, %3, #16 \n" // 16 pixels per loop
+ "vst3.8 {d0, d2, d4}, [%2]! \n" // store 16 YUV pixels
+ "vst3.8 {d1, d3, d5}, [%2]! \n"
+ "bgt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_vu), // %1
+ "+r"(dst_yuv24), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "q0", "q1", "q2");
+}
+
+void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
+ int src_stride_ayuv,
+ uint8_t* dst_uv,
+ int width) {
+ asm volatile(
+ "add %1, %0, %1 \n" // src_stride + src_AYUV
+ "1: \n"
+ "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels.
+ "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV
+ // pixels.
+ "vpaddl.u8 q0, q0 \n" // V 16 bytes -> 8 shorts.
+ "vpaddl.u8 q1, q1 \n" // U 16 bytes -> 8 shorts.
+ "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more AYUV
+ // pixels.
+ "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 AYUV
+ // pixels.
+ "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
+ "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
+ "vqrshrun.s16 d1, q0, #2 \n" // 2x2 average
+ "vqrshrun.s16 d0, q1, #2 \n"
+ "subs %3, %3, #16 \n" // 16 processed per loop.
+ "vst2.8 {d0, d1}, [%2]! \n" // store 8 pixels UV.
+ "bgt 1b \n"
+ : "+r"(src_ayuv), // %0
+ "+r"(src_stride_ayuv), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7");
+}
+
+void AYUVToVURow_NEON(const uint8_t* src_ayuv,
+ int src_stride_ayuv,
+ uint8_t* dst_vu,
+ int width) {
+ asm volatile(
+ "add %1, %0, %1 \n" // src_stride + src_AYUV
+ "1: \n"
+ "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels.
+ "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV
+ // pixels.
+ "vpaddl.u8 q0, q0 \n" // V 16 bytes -> 8 shorts.
+ "vpaddl.u8 q1, q1 \n" // U 16 bytes -> 8 shorts.
+ "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more AYUV
+ // pixels.
+ "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 AYUV
+ // pixels.
+ "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
+ "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
+ "vqrshrun.s16 d0, q0, #2 \n" // 2x2 average
+ "vqrshrun.s16 d1, q1, #2 \n"
+ "subs %3, %3, #16 \n" // 16 processed per loop.
+ "vst2.8 {d0, d1}, [%2]! \n" // store 8 pixels VU.
+ "bgt 1b \n"
+ : "+r"(src_ayuv), // %0
+ "+r"(src_stride_ayuv), // %1
+ "+r"(dst_vu), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7");
+}
+
+// Copy row of AYUV Y's into Y.
+// Similar to ARGBExtractAlphaRow_NEON
+void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
+ asm volatile(
+ "1: \n"
+ "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 AYUV pixels
+ "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 AYUV pixels
+ "subs %2, %2, #16 \n" // 16 processed per loop
+ "vst1.8 {q2}, [%1]! \n" // store 16 Y's.
+ "bgt 1b \n"
+ : "+r"(src_ayuv), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q1", "q2", "q3");
+}
+
+// Convert biplanar UV channel of NV12 to NV21
+void UVToVURow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
+ asm volatile(
+ "1: \n"
+ "vld2.8 {d0, d2}, [%0]! \n" // load 16 UV values
+ "vld2.8 {d1, d3}, [%0]! \n"
+ "vorr.u8 q2, q0, q0 \n" // move U after V
+ "subs %2, %2, #16 \n" // 16 pixels per loop
+ "vst2.8 {q1, q2}, [%1]! \n" // store 16 VU pixels
+ "bgt 1b \n"
+ : "+r"(src_uv), // %0
+ "+r"(dst_vu), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "q0", "q1", "q2");
+}
+
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)..
#ifdef __cplusplus
diff --git a/files/source/row_neon64.cc b/files/source/row_neon64.cc
index 24b4520b..f5cbb470 100644
--- a/files/source/row_neon64.cc
+++ b/files/source/row_neon64.cc
@@ -608,7 +608,7 @@ void SplitUVRow_NEON(const uint8_t* src_uv,
"+r"(width) // %3 // Output registers
: // Input registers
: "cc", "memory", "v0", "v1" // Clobber List
- );
+ );
}
// Reads 16 U's and V's and writes out 16 pairs of UV.
@@ -629,7 +629,7 @@ void MergeUVRow_NEON(const uint8_t* src_u,
"+r"(width) // %3 // Output registers
: // Input registers
: "cc", "memory", "v0", "v1" // Clobber List
- );
+ );
}
// Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b.
@@ -653,7 +653,7 @@ void SplitRGBRow_NEON(const uint8_t* src_rgb,
"+r"(width) // %4
: // Input registers
: "cc", "memory", "v0", "v1", "v2" // Clobber List
- );
+ );
}
// Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time
@@ -677,7 +677,7 @@ void MergeRGBRow_NEON(const uint8_t* src_r,
"+r"(width) // %4
: // Input registers
: "cc", "memory", "v0", "v1", "v2" // Clobber List
- );
+ );
}
// Copy multiple of 32.
@@ -693,7 +693,7 @@ void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
"+r"(width) // %2 // Output registers
: // Input registers
: "cc", "memory", "v0", "v1" // Clobber List
- );
+ );
}
// SetRow writes 'width' bytes using an 8 bit value repeated.
@@ -800,7 +800,7 @@ void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
"+r"(width) // %2
:
: "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
- );
+ );
}
void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
@@ -818,7 +818,7 @@ void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List
- );
+ );
}
void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
@@ -835,7 +835,7 @@ void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List
- );
+ );
}
#define RGB565TOARGB \
@@ -867,7 +867,7 @@ void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6" // Clobber List
- );
+ );
}
#define ARGB1555TOARGB \
@@ -924,7 +924,7 @@ void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
- );
+ );
}
#define ARGB4444TOARGB \
@@ -955,7 +955,7 @@ void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List
- );
+ );
}
void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
@@ -973,7 +973,7 @@ void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
"+r"(width) // %2
:
: "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
- );
+ );
}
void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
@@ -990,7 +990,7 @@ void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
"+r"(width) // %2
:
: "cc", "memory", "v1", "v2", "v3", "v4", "v5" // Clobber List
- );
+ );
}
void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
@@ -1005,7 +1005,7 @@ void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1" // Clobber List
- );
+ );
}
void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
@@ -1020,7 +1020,7 @@ void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1" // Clobber List
- );
+ );
}
void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
@@ -1040,7 +1040,7 @@ void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
"+r"(width) // %3
:
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
- );
+ );
}
void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
@@ -1060,7 +1060,7 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
"+r"(width) // %3
:
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
- );
+ );
}
void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
@@ -1087,7 +1087,7 @@ void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
"v7" // Clobber List
- );
+ );
}
void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
@@ -1114,7 +1114,7 @@ void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6",
"v7" // Clobber List
- );
+ );
}
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
@@ -1135,7 +1135,7 @@ void ARGBShuffleRow_NEON(const uint8_t* src_argb,
"+r"(width) // %2
: "r"(shuffler) // %3
: "cc", "memory", "v0", "v1", "v2" // Clobber List
- );
+ );
}
void I422ToYUY2Row_NEON(const uint8_t* src_y,
@@ -1298,7 +1298,7 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
- );
+ );
}
void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
@@ -1863,7 +1863,7 @@ void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27",
"v28"
- );
+ );
}
void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
@@ -2611,7 +2611,7 @@ void SobelXRow_NEON(const uint8_t* src_y0,
: "r"(2LL), // %5
"r"(6LL) // %6
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
- );
+ );
}
// SobelY as a matrix is
@@ -2648,7 +2648,7 @@ void SobelYRow_NEON(const uint8_t* src_y0,
: "r"(1LL), // %4
"r"(6LL) // %5
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
- );
+ );
}
// Caveat - rounds float to half float whereas scaling version truncates.
@@ -2876,6 +2876,158 @@ void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width) {
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
}
+// Convert biplanar NV21 to packed YUV24
+void NV21ToYUV24Row_NEON(const uint8_t* src_y,
+ const uint8_t* src_vu,
+ uint8_t* dst_yuv24,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v2.16b}, [%0], #16 \n" // load 16 Y values
+ "ld2 {v0.8b, v1.8b}, [%1], #16 \n" // load 8 VU values
+ "zip1 v0.16b, v0.16b, v0.16b \n" // replicate V values
+ "zip1 v1.16b, v1.16b, v1.16b \n" // replicate U values
+ "subs %w3, %w3, #16 \n" // 16 pixels per loop
+ "st3 {v0.16b,v1.16b,v2.16b}, [%2], #48 \n" // store 16 YUV pixels
+ "b.gt 1b \n"
+ : "+r"(src_y), // %0
+ "+r"(src_vu), // %1
+ "+r"(dst_yuv24), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "v0", "v1", "v2");
+}
+
+void AYUVToUVRow_NEON(const uint8_t* src_ayuv,
+ int src_stride_ayuv,
+ uint8_t* dst_uv,
+ int width) {
+ const uint8_t* src_ayuv_1 = src_ayuv + src_stride_ayuv;
+ asm volatile(
+
+ "1: \n"
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16
+ // pixels.
+ "uaddlp v0.8h, v0.16b \n" // V 16 bytes -> 8 shorts.
+ "uaddlp v1.8h, v1.16b \n" // U 16 bytes -> 8 shorts.
+ "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
+ "uadalp v0.8h, v4.16b \n" // V 16 bytes -> 8 shorts.
+ "uadalp v1.8h, v5.16b \n" // U 16 bytes -> 8 shorts.
+ "uqrshrn v3.8b, v0.8h, #2 \n" // 2x2 average
+ "uqrshrn v2.8b, v1.8h, #2 \n"
+ "subs %w3, %w3, #16 \n" // 16 processed per loop.
+ "st2 {v2.8b,v3.8b}, [%2], #16 \n" // store 8 pixels UV.
+ "b.gt 1b \n"
+ : "+r"(src_ayuv), // %0
+ "+r"(src_ayuv_1), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
+}
+
+void AYUVToVURow_NEON(const uint8_t* src_ayuv,
+ int src_stride_ayuv,
+ uint8_t* dst_vu,
+ int width) {
+ const uint8_t* src_ayuv_1 = src_ayuv + src_stride_ayuv;
+ asm volatile(
+
+ "1: \n"
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16
+ // pixels.
+ "uaddlp v0.8h, v0.16b \n" // V 16 bytes -> 8 shorts.
+ "uaddlp v1.8h, v1.16b \n" // U 16 bytes -> 8 shorts.
+ "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
+ "uadalp v0.8h, v4.16b \n" // V 16 bytes -> 8 shorts.
+ "uadalp v1.8h, v5.16b \n" // U 16 bytes -> 8 shorts.
+ "uqrshrn v0.8b, v0.8h, #2 \n" // 2x2 average
+ "uqrshrn v1.8b, v1.8h, #2 \n"
+ "subs %w3, %w3, #16 \n" // 16 processed per loop.
+ "st2 {v0.8b,v1.8b}, [%2], #16 \n" // store 8 pixels VU.
+ "b.gt 1b \n"
+ : "+r"(src_ayuv), // %0
+ "+r"(src_ayuv_1), // %1
+ "+r"(dst_vu), // %2
+ "+r"(width) // %3
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
+}
+
+// Copy row of AYUV Y's into Y
+void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
+ asm volatile(
+ "1: \n"
+ "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16
+ // pixels
+ "subs %w2, %w2, #16 \n" // 16 pixels per loop
+ "st1 {v2.16b}, [%1], #16 \n" // store 16 Y pixels
+ "b.gt 1b \n"
+ : "+r"(src_ayuv), // %0
+ "+r"(dst_y), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2", "v3");
+}
+
+void FloatDivToByteRow_NEON(const float* src_weights,
+ const float* src_values,
+ uint8_t* dst_out,
+ uint8_t* dst_mask,
+ int width) {
+ asm volatile(
+ "movi v0.4s, #0 \n"
+
+ "1: \n"
+ "ld1 {v1.4s,v2.4s}, [%0], #32 \n" // load 8 float weights
+ "ld1 {v3.4s,v4.4s}, [%1], #32 \n" // load 8 float values
+ "subs %w4, %w4, #8 \n" // 8 pixels per loop
+
+ "fdiv v1.4s, v3.4s, v1.4s \n" // values / weights
+ "fdiv v2.4s, v4.4s, v2.4s \n"
+
+ "fcvtas v1.4s, v1.4s \n" // float to int
+ "fcvtas v2.4s, v2.4s \n" // float to int
+ "uqxtn v1.4h, v1.4s \n" // 8 shorts
+ "uqxtn2 v1.8h, v2.4s \n"
+ "uqxtn v1.8b, v1.8h \n" // 8 bytes
+
+ "st1 {v1.8b}, [%2], #8 \n" // store 8 byte out
+
+ "fcmgt v5.4s, v1.4s, v0.4s \n" // cmp weight to zero
+ "fcmgt v6.4s, v2.4s, v0.4s \n"
+ "uqxtn v5.4h, v5.4s \n" // 8 shorts
+ "uqxtn2 v5.8h, v6.4s \n"
+ "uqxtn v5.8b, v1.8h \n" // 8 bytes
+
+ "st1 {v5.8b}, [%3], #8 \n" // store 8 byte mask
+
+ "b.gt 1b \n"
+ : "+r"(src_weights), // %0
+ "+r"(src_values), // %1
+ "+r"(dst_out), // %2
+ "+r"(dst_mask), // %3
+ "+r"(width) // %4
+ :
+ : "cc", "memory", "v1", "v2", "v3", "v4", "v5", "v6");
+}
+
+// Convert biplanar UV channel of NV12 to NV21
+void UVToVURow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
+ asm volatile(
+ "1: \n"
+ "ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 UV values
+ "orr v2.16b, v0.16b, v0.16b \n" // move U after V
+ "subs %w2, %w2, #16 \n" // 16 pixels per loop
+ "st2 {v1.16b, v2.16b}, [%1], #32 \n" // store 16 VU pixels
+ "b.gt 1b \n"
+ : "+r"(src_uv), // %0
+ "+r"(dst_vu), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v0", "v1", "v2");
+}
+
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus
diff --git a/files/source/row_win.cc b/files/source/row_win.cc
index 2042c9c8..27e3da7b 100644
--- a/files/source/row_win.cc
+++ b/files/source/row_win.cc
@@ -4222,7 +4222,7 @@ __declspec(naked) void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
add ecx, 4 - 1
jl convertloop1b
- // 1 pixel loop.
+ // 1 pixel loop.
convertloop1:
movd xmm3, [eax] // src argb
lea eax, [eax + 4]
@@ -5360,7 +5360,7 @@ void CumulativeSumToAverageRow_SSE2(const int32_t* topleft,
add ecx, 4 - 1
jl l1b
- // 1 pixel loop
+ // 1 pixel loop
l1:
movdqu xmm0, [eax]
psubd xmm0, [eax + edx * 4]
@@ -5448,7 +5448,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8_t* row,
add ecx, 4 - 1
jl l1b
- // 1 pixel loop
+ // 1 pixel loop
l1:
movd xmm2, dword ptr [eax] // 1 argb pixel, 4 bytes.
lea eax, [eax + 4]
@@ -5534,7 +5534,7 @@ __declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8_t* src_argb,
add ecx, 4 - 1
jl l1b
- // 1 pixel loop
+ // 1 pixel loop
l1:
cvttps2dq xmm0, xmm2 // x, y float to int
packssdw xmm0, xmm0 // x, y as shorts
diff --git a/files/source/scale.cc b/files/source/scale.cc
index a8db93fd..ab085496 100644
--- a/files/source/scale.cc
+++ b/files/source/scale.cc
@@ -1788,6 +1788,75 @@ int I420Scale_16(const uint16_t* src_y,
return 0;
}
+// Scale an I444 image.
+// This function in turn calls a scaling function for each plane.
+
+LIBYUV_API
+int I444Scale(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_u,
+ int src_stride_u,
+ const uint8_t* src_v,
+ int src_stride_v,
+ int src_width,
+ int src_height,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ uint8_t* dst_u,
+ int dst_stride_u,
+ uint8_t* dst_v,
+ int dst_stride_v,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering) {
+ if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
+ src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
+ dst_width <= 0 || dst_height <= 0) {
+ return -1;
+ }
+
+ ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
+ dst_width, dst_height, filtering);
+ ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
+ dst_width, dst_height, filtering);
+ ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
+ dst_width, dst_height, filtering);
+ return 0;
+}
+
+LIBYUV_API
+int I444Scale_16(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ int src_width,
+ int src_height,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int dst_width,
+ int dst_height,
+ enum FilterMode filtering) {
+ if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
+ src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
+ dst_width <= 0 || dst_height <= 0) {
+ return -1;
+ }
+
+ ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
+ dst_width, dst_height, filtering);
+ ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
+ dst_width, dst_height, filtering);
+ ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
+ dst_width, dst_height, filtering);
+ return 0;
+}
+
// Deprecated api
LIBYUV_API
int Scale(const uint8_t* src_y,
diff --git a/files/source/scale_gcc.cc b/files/source/scale_gcc.cc
index 312236d2..90a49f30 100644
--- a/files/source/scale_gcc.cc
+++ b/files/source/scale_gcc.cc
@@ -483,7 +483,7 @@ void ScaleRowDown34_SSSE3(const uint8_t* src_ptr,
: "m"(kShuf0), // %0
"m"(kShuf1), // %1
"m"(kShuf2) // %2
- );
+ );
asm volatile(
LABELALIGN
@@ -521,7 +521,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr,
: "m"(kShuf01), // %0
"m"(kShuf11), // %1
"m"(kShuf21) // %2
- );
+ );
asm volatile(
"movdqa %0,%%xmm5 \n" // kMadd01
"movdqa %1,%%xmm0 \n" // kMadd11
@@ -530,7 +530,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr,
: "m"(kMadd01), // %0
"m"(kMadd11), // %1
"m"(kRound34) // %2
- );
+ );
asm volatile(
LABELALIGN
@@ -587,7 +587,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr,
: "m"(kShuf01), // %0
"m"(kShuf11), // %1
"m"(kShuf21) // %2
- );
+ );
asm volatile(
"movdqa %0,%%xmm5 \n" // kMadd01
"movdqa %1,%%xmm0 \n" // kMadd11
@@ -596,7 +596,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr,
: "m"(kMadd01), // %0
"m"(kMadd11), // %1
"m"(kRound34) // %2
- );
+ );
asm volatile(
@@ -690,7 +690,7 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr,
"m"(kShufAb1), // %1
"m"(kShufAb2), // %2
"m"(kScaleAb2) // %3
- );
+ );
asm volatile(
LABELALIGN
@@ -734,7 +734,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr,
: "m"(kShufAc), // %0
"m"(kShufAc3), // %1
"m"(kScaleAc33) // %2
- );
+ );
asm volatile(
LABELALIGN
@@ -1272,7 +1272,7 @@ void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb,
:
: "m"(kShuffleColARGB), // %0
"m"(kShuffleFractions) // %1
- );
+ );
asm volatile(
"movd %5,%%xmm2 \n"
diff --git a/files/source/scale_mmi.cc b/files/source/scale_mmi.cc
index e12c6bb7..990463c2 100644
--- a/files/source/scale_mmi.cc
+++ b/files/source/scale_mmi.cc
@@ -26,6 +26,8 @@ extern "C" {
// This module is for Mips MMI.
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
+// clang-format off
+
// CPU agnostic row functions
void ScaleRowDown2_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
@@ -1101,6 +1103,8 @@ void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
: "memory");
}
+// clang-format on
+
#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
#ifdef __cplusplus
diff --git a/files/source/scale_neon.cc b/files/source/scale_neon.cc
index 46f5ba4c..366b155b 100644
--- a/files/source/scale_neon.cc
+++ b/files/source/scale_neon.cc
@@ -40,7 +40,7 @@ void ScaleRowDown2_NEON(const uint8_t* src_ptr,
"+r"(dst_width) // %2
:
: "q0", "q1" // Clobber List
- );
+ );
}
// Read 32x1 average down and write 16x1.
@@ -61,7 +61,7 @@ void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr,
"+r"(dst_width) // %2
:
: "q0", "q1" // Clobber List
- );
+ );
}
// Read 32x2 average down and write 16x1.
@@ -92,7 +92,7 @@ void ScaleRowDown2Box_NEON(const uint8_t* src_ptr,
"+r"(dst_width) // %3
:
: "q0", "q1", "q2", "q3" // Clobber List
- );
+ );
}
void ScaleRowDown4_NEON(const uint8_t* src_ptr,
@@ -523,7 +523,7 @@ void ScaleAddRow_NEON(const uint8_t* src_ptr,
"+r"(src_width) // %2
:
: "memory", "cc", "q0", "q1", "q2" // Clobber List
- );
+ );
}
// TODO(Yang Zhang): Investigate less load instructions for
@@ -705,7 +705,7 @@ void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr,
"+r"(dst_width) // %2
:
: "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
- );
+ );
}
// 46: f964 018d vld4.32 {d16,d18,d20,d22}, [r4]!
@@ -734,7 +734,7 @@ void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb,
"+r"(dst_width) // %2
:
: "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
- );
+ );
}
void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr,
diff --git a/files/source/scale_neon64.cc b/files/source/scale_neon64.cc
index f4aed5fc..0a7b80ce 100644
--- a/files/source/scale_neon64.cc
+++ b/files/source/scale_neon64.cc
@@ -38,7 +38,7 @@ void ScaleRowDown2_NEON(const uint8_t* src_ptr,
"+r"(dst_width) // %2
:
: "v0", "v1" // Clobber List
- );
+ );
}
// Read 32x1 average down and write 16x1.
@@ -60,7 +60,7 @@ void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr,
"+r"(dst_width) // %2
:
: "v0", "v1" // Clobber List
- );
+ );
}
// Read 32x2 average down and write 16x1.
@@ -89,7 +89,7 @@ void ScaleRowDown2Box_NEON(const uint8_t* src_ptr,
"+r"(dst_width) // %3
:
: "v0", "v1", "v2", "v3" // Clobber List
- );
+ );
}
void ScaleRowDown4_NEON(const uint8_t* src_ptr,
@@ -534,7 +534,7 @@ void ScaleAddRow_NEON(const uint8_t* src_ptr,
"+r"(src_width) // %2
:
: "memory", "cc", "v0", "v1", "v2" // Clobber List
- );
+ );
}
// TODO(Yang Zhang): Investigate less load instructions for
@@ -719,7 +719,7 @@ void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr,
"+r"(dst_width) // %2
:
: "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
- );
+ );
}
void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb,
@@ -742,7 +742,7 @@ void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb,
"+r"(dst_width) // %2
:
: "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
- );
+ );
}
void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr,
@@ -991,7 +991,7 @@ void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
"+r"(dst_width) // %3
:
: "v0", "v1", "v2", "v3" // Clobber List
- );
+ );
}
// Read 8x2 upsample with filtering and write 16x1.
@@ -1041,7 +1041,7 @@ void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
"r"(14LL) // %5
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18",
"v19" // Clobber List
- );
+ );
}
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
diff --git a/files/tools_libyuv/autoroller/roll_deps.py b/files/tools_libyuv/autoroller/roll_deps.py
index ea8f69f1..8359d309 100755
--- a/files/tools_libyuv/autoroller/roll_deps.py
+++ b/files/tools_libyuv/autoroller/roll_deps.py
@@ -298,9 +298,6 @@ def GenerateCommitMessage(current_cr_rev, new_cr_rev, current_commit_pos,
commit_msg.append('Change log: %s' % (CHROMIUM_LOG_TEMPLATE % rev_interval))
commit_msg.append('Full diff: %s\n' % (CHROMIUM_COMMIT_TEMPLATE %
rev_interval))
- # TBR field will be empty unless in some custom cases, where some engineers
- # are added.
- tbr_authors = ''
if changed_deps_list:
commit_msg.append('Changed dependencies:')
@@ -322,7 +319,11 @@ def GenerateCommitMessage(current_cr_rev, new_cr_rev, current_commit_pos,
else:
commit_msg.append('No update to Clang.\n')
- commit_msg.append('TBR=%s' % tbr_authors)
+ # TBR needs to be non-empty for Gerrit to process it.
+ git_author = _RunCommand(['git', 'config', 'user.email'],
+ working_dir=CHECKOUT_SRC_DIR)[0].strip()
+ commit_msg.append('TBR=%s' % git_author)
+
commit_msg.append('BUG=None')
return '\n'.join(commit_msg)
diff --git a/files/unit_test/convert_test.cc b/files/unit_test/convert_test.cc
index d97b4fc7..32a4cd1c 100644
--- a/files/unit_test/convert_test.cc
+++ b/files/unit_test/convert_test.cc
@@ -311,10 +311,10 @@ int I400ToNV21(const uint8_t* src_y,
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
- align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \
+ align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
- align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \
+ align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \
@@ -329,21 +329,21 @@ int I400ToNV21(const uint8_t* src_y,
} \
memset(dst_y_c, 1, kWidth* kHeight); \
memset(dst_uv_c, 2, \
- SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_y_opt, 101, kWidth* kHeight); \
memset(dst_uv_opt, 102, \
- SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
MaskCpuFlags(disable_cpu_flags_); \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \
- dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \
+ dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \
- dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \
+ dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
} \
int max_diff = 0; \
for (int i = 0; i < kHeight; ++i) { \
@@ -357,12 +357,12 @@ int I400ToNV21(const uint8_t* src_y,
} \
EXPECT_LE(max_diff, 1); \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
- for (int j = 0; j < SUBSAMPLE(kWidth * 2, SUBSAMP_X); ++j) { \
+ for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \
int abs_diff = \
abs(static_cast<int>( \
- dst_uv_c[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]) - \
+ dst_uv_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \
static_cast<int>( \
- dst_uv_opt[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j])); \
+ dst_uv_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \
if (abs_diff > max_diff) { \
max_diff = abs_diff; \
} \
@@ -395,6 +395,99 @@ TESTPLANARTOBP(I422, 2, 1, NV21, 2, 2)
TESTPLANARTOBP(I444, 1, 1, NV21, 2, 2)
TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2)
+#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, \
+ OFF) \
+ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
+ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
+ const int kHeight = benchmark_height_; \
+ align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
+ align_buffer_page_end(src_uv, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2 * \
+ SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
+ OFF); \
+ align_buffer_page_end(dst_y_c, kWidth* kHeight); \
+ align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
+ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
+ align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
+ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ for (int i = 0; i < kHeight; ++i) \
+ for (int j = 0; j < kWidth; ++j) \
+ src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
+ for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
+ for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
+ src_uv[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) * 2 + j + 0 + OFF] = \
+ (fastrand() & 0xff); \
+ src_uv[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) * 2 + j + 1 + OFF] = \
+ (fastrand() & 0xff); \
+ } \
+ } \
+ memset(dst_y_c, 1, kWidth* kHeight); \
+ memset(dst_uv_c, 2, \
+ SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ memset(dst_y_opt, 101, kWidth* kHeight); \
+ memset(dst_uv_opt, 102, \
+ SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
+ MaskCpuFlags(disable_cpu_flags_); \
+ SRC_FMT_PLANAR##To##FMT_PLANAR( \
+ src_y + OFF, kWidth, src_uv + OFF, \
+ SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2, dst_y_c, kWidth, dst_uv_c, \
+ SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
+ MaskCpuFlags(benchmark_cpu_info_); \
+ for (int i = 0; i < benchmark_iterations_; ++i) { \
+ SRC_FMT_PLANAR##To##FMT_PLANAR( \
+ src_y + OFF, kWidth, src_uv + OFF, \
+ SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2, dst_y_opt, kWidth, dst_uv_opt, \
+ SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \
+ } \
+ int max_diff = 0; \
+ for (int i = 0; i < kHeight; ++i) { \
+ for (int j = 0; j < kWidth; ++j) { \
+ int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
+ static_cast<int>(dst_y_opt[i * kWidth + j])); \
+ if (abs_diff > max_diff) { \
+ max_diff = abs_diff; \
+ } \
+ } \
+ } \
+ EXPECT_LE(max_diff, 1); \
+ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
+ for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \
+ int abs_diff = \
+ abs(static_cast<int>( \
+ dst_uv_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \
+ static_cast<int>( \
+ dst_uv_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \
+ if (abs_diff > max_diff) { \
+ max_diff = abs_diff; \
+ } \
+ } \
+ } \
+ EXPECT_LE(max_diff, 1); \
+ free_aligned_buffer_page_end(dst_y_c); \
+ free_aligned_buffer_page_end(dst_uv_c); \
+ free_aligned_buffer_page_end(dst_y_opt); \
+ free_aligned_buffer_page_end(dst_uv_opt); \
+ free_aligned_buffer_page_end(src_y); \
+ free_aligned_buffer_page_end(src_uv); \
+ }
+
+#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
+ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
+ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
+ SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0) \
+ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
+ SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0) \
+ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
+ SUBSAMP_X, SUBSAMP_Y, benchmark_width, _Unaligned, +, 1) \
+ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
+ SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0) \
+ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
+ SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0)
+
+// TODO(fbarchard): Fix msan on this unittest
+// TESTBIPLANARTOBP(NV21, 2, 2, NV12, 2, 2)
+
#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF, \
DOY) \
@@ -680,8 +773,8 @@ TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1)
TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1, 2)
TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
-#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
- W1280, DIFF, N, NEG, OFF) \
+#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \
+ BPP_B, W1280, DIFF, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \
@@ -716,9 +809,9 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight); \
memset(dst_argb32_c, 2, kWidth * 4 * kHeight); \
memset(dst_argb32_opt, 102, kWidth * 4 * kHeight); \
- FMT_B##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \
+ FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \
kHeight); \
- FMT_B##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \
+ FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \
kHeight); \
int max_diff = 0; \
for (int i = 0; i < kHeight; ++i) { \
@@ -740,25 +833,27 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
free_aligned_buffer_page_end(dst_argb32_opt); \
}
-#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF) \
- TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
- benchmark_width_ - 4, DIFF, _Any, +, 0) \
- TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
- benchmark_width_, DIFF, _Unaligned, +, 1) \
- TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
- benchmark_width_, DIFF, _Invert, -, 0) \
- TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
+#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ DIFF) \
+ TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ benchmark_width_ - 4, DIFF, _Any, +, 0) \
+ TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ benchmark_width_, DIFF, _Unaligned, +, 1) \
+ TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
+ benchmark_width_, DIFF, _Invert, -, 0) \
+ TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_, DIFF, _Opt, +, 0)
-TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2)
-TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2)
-TESTBIPLANARTOB(NV12, 2, 2, ABGR, 4, 2)
-TESTBIPLANARTOB(NV21, 2, 2, ABGR, 4, 2)
-TESTBIPLANARTOB(NV12, 2, 2, RGB24, 3, 2)
-TESTBIPLANARTOB(NV21, 2, 2, RGB24, 3, 2)
-TESTBIPLANARTOB(NV12, 2, 2, RAW, 3, 2)
-TESTBIPLANARTOB(NV21, 2, 2, RAW, 3, 2)
-TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9)
+TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4, 2)
+TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4, 2)
+TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4, 2)
+TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4, 2)
+TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3, 2)
+TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3, 2)
+TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3, 2)
+TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3, 2)
+TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2, 9)
+TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3, 2)
#ifdef DO_THREE_PLANES
// Do 3 allocations for yuv. conventional but slower.
@@ -885,26 +980,27 @@ TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9)
TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, DIFF, _Opt, +, 0)
+TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4)
TESTATOPLANAR(ARGB, 4, 1, I420, 2, 2, 4)
+TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2)
+TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2)
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, ARM_YUV_ERROR)
TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, ARM_YUV_ERROR)
+TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15)
+TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17)
TESTATOPLANAR(BGRA, 4, 1, I420, 2, 2, 4)
-TESTATOPLANAR(ABGR, 4, 1, I420, 2, 2, 4)
-TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4)
+TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2)
+TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2)
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4)
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4)
+// TODO(fbarchard): Investigate J420 error of 11 on Windows.
+TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, 11)
TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5)
-// TODO(fbarchard): Make 1555 neon work same as C code, reduce to diff 9.
-TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15)
-TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17)
-TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2)
-TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2)
-TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2)
+TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4)
TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2)
-TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2)
TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1, 2)
-TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2)
-TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2)
+TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2, 2)
+TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1, 2)
#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \
SUBSAMP_Y, W1280, N, NEG, OFF) \
@@ -978,6 +1074,8 @@ TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2)
TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2)
TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2)
TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
+TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2)
+TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B, W1280, DIFF, N, NEG, OFF) \
@@ -1069,45 +1167,46 @@ TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
HEIGHT_B, DIFF)
// TODO(fbarchard): make ARM version of C code that matches NEON.
+TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1, 0)
+TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0)
+TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1, 0)
+TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0)
+TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0)
+TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, BGRA, 4, 4, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0)
+TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2)
+TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2)
TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0)
TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0)
TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0)
-TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4)
+TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4)
-TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2)
-TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 2)
+TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4)
+TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0)
TESTATOB(BGRA, 4, 4, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0)
+TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0)
+TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0)
+TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(J400, 1, 1, 1, J400, 1, 1, 1, 0)
TESTATOB(RAW, 3, 3, 1, ARGB, 4, 4, 1, 0)
TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0)
TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0)
+TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0)
TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(ARGB1555, 2, 2, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(ARGB4444, 2, 2, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0)
-TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1, 0)
-TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1, 0)
-TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR)
+TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0)
TESTATOB(UYVY, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR)
+TESTATOB(YUY2, 2, 4, 1, ARGB, 4, 4, 1, ARM_YUV_ERROR)
TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1, 0)
-TESTATOB(I400, 1, 1, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(J400, 1, 1, 1, ARGB, 4, 4, 1, 0)
-TESTATOB(I400, 1, 1, 1, I400, 1, 1, 1, 0)
-TESTATOB(J400, 1, 1, 1, J400, 1, 1, 1, 0)
-TESTATOB(I400, 1, 1, 1, I400Mirror, 1, 1, 1, 0)
-TESTATOB(ARGB, 4, 4, 1, ARGBMirror, 4, 4, 1, 0)
#define TESTATOBDI(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
HEIGHT_B, W1280, DIFF, N, NEG, OFF) \
@@ -1291,6 +1390,7 @@ TEST_F(LibYUVConvertTest, ValidateJpeg) {
// EOI, SOI. Expect pass.
orig_pixels[0] = 0xff;
orig_pixels[1] = 0xd8; // SOI.
+ orig_pixels[2] = 0xff;
orig_pixels[kSize - kOff + 0] = 0xff;
orig_pixels[kSize - kOff + 1] = 0xd9; // EOI.
for (int times = 0; times < benchmark_iterations_; ++times) {
@@ -1317,6 +1417,7 @@ TEST_F(LibYUVConvertTest, ValidateJpegLarge) {
// EOI, SOI. Expect pass.
orig_pixels[0] = 0xff;
orig_pixels[1] = 0xd8; // SOI.
+ orig_pixels[2] = 0xff;
orig_pixels[kSize - kOff + 0] = 0xff;
orig_pixels[kSize - kOff + 1] = 0xd9; // EOI.
for (int times = 0; times < benchmark_iterations_; ++times) {
@@ -1350,6 +1451,7 @@ TEST_F(LibYUVConvertTest, InvalidateJpeg) {
// SOI but no EOI. Expect fail.
orig_pixels[0] = 0xff;
orig_pixels[1] = 0xd8; // SOI.
+ orig_pixels[2] = 0xff;
for (int times = 0; times < benchmark_iterations_; ++times) {
EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize));
}
@@ -1367,22 +1469,24 @@ TEST_F(LibYUVConvertTest, InvalidateJpeg) {
TEST_F(LibYUVConvertTest, FuzzJpeg) {
// SOI but no EOI. Expect fail.
for (int times = 0; times < benchmark_iterations_; ++times) {
- const int kSize = fastrand() % 5000 + 2;
+ const int kSize = fastrand() % 5000 + 3;
align_buffer_page_end(orig_pixels, kSize);
MemRandomize(orig_pixels, kSize);
// Add SOI so frame will be scanned.
orig_pixels[0] = 0xff;
orig_pixels[1] = 0xd8; // SOI.
+ orig_pixels[2] = 0xff;
orig_pixels[kSize - 1] = 0xff;
- ValidateJpeg(orig_pixels, kSize); // Failure normally expected.
+ ValidateJpeg(orig_pixels,
+ kSize); // Failure normally expected.
free_aligned_buffer_page_end(orig_pixels);
}
}
-// Test data created in GIMP. In export jpeg, disable thumbnails etc,
-// choose a subsampling, and use low quality (50) to keep size small.
-// Generated with xxd -i test.jpg
+// Test data created in GIMP. In export jpeg, disable
+// thumbnails etc, choose a subsampling, and use low quality
+// (50) to keep size small. Generated with xxd -i test.jpg
// test 0 is J400
static const uint8_t kTest0Jpg[] = {
0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01,
@@ -1984,8 +2088,8 @@ TEST_F(LibYUVConvertTest, TestMJPGInfo) {
EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen));
EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen));
EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen));
- EXPECT_EQ(1,
- ShowJPegInfo(kTest4Jpg, kTest4JpgLen)); // Valid but unsupported.
+ EXPECT_EQ(1, ShowJPegInfo(kTest4Jpg,
+ kTest4JpgLen)); // Valid but unsupported.
}
#endif // HAVE_JPEG
@@ -2903,7 +3007,8 @@ TEST_F(LibYUVConvertTest, TestH010ToARGB) {
}
// Test 10 bit YUV to 10 bit RGB
-// Caveat: Result is near due to float rounding in expected result.
+// Caveat: Result is near due to float rounding in expected
+// result.
TEST_F(LibYUVConvertTest, TestH010ToAR30) {
const int kSize = 1024;
int histogram_b[1024];
@@ -2966,7 +3071,8 @@ TEST_F(LibYUVConvertTest, TestH010ToAR30) {
}
// Test 10 bit YUV to 10 bit RGB
-// Caveat: Result is near due to float rounding in expected result.
+// Caveat: Result is near due to float rounding in expected
+// result.
TEST_F(LibYUVConvertTest, TestH010ToAB30) {
const int kSize = 1024;
int histogram_b[1024];
diff --git a/files/unit_test/planar_test.cc b/files/unit_test/planar_test.cc
index 75608955..70f8966e 100644
--- a/files/unit_test/planar_test.cc
+++ b/files/unit_test/planar_test.cc
@@ -3186,7 +3186,8 @@ TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
}
GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 640);
for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+#if !defined(LIBYUV_DISABLE_NEON) && \
+ (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
int has_neon = TestCpuFlag(kCpuHasNEON);
if (has_neon) {
GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 640);
@@ -3239,7 +3240,8 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
&orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_c[0],
640);
for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+#if !defined(LIBYUV_DISABLE_NEON) && \
+ (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
int has_neon = TestCpuFlag(kCpuHasNEON);
if (has_neon) {
GaussCol_NEON(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
@@ -3267,4 +3269,104 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704));
}
+float TestFloatDivToByte(int benchmark_width,
+ int benchmark_height,
+ int benchmark_iterations,
+ float scale,
+ bool opt) {
+ int i, j;
+ // NEON does multiple of 8, so round count up
+ const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
+ align_buffer_page_end(src_weights, kPixels * 4);
+ align_buffer_page_end(src_values, kPixels * 4);
+ align_buffer_page_end(dst_out_c, kPixels);
+ align_buffer_page_end(dst_out_opt, kPixels);
+ align_buffer_page_end(dst_mask_c, kPixels);
+ align_buffer_page_end(dst_mask_opt, kPixels);
+
+ // Randomize works but may contain some denormals affecting performance.
+ // MemRandomize(orig_y, kPixels * 4);
+ // large values are problematic. audio is really -1 to 1.
+ for (i = 0; i < kPixels; ++i) {
+ (reinterpret_cast<float*>(src_weights))[i] = scale;
+ (reinterpret_cast<float*>(src_values))[i] =
+ sinf(static_cast<float>(i) * 0.1f);
+ }
+ memset(dst_out_c, 0, kPixels);
+ memset(dst_out_opt, 1, kPixels);
+ memset(dst_mask_c, 2, kPixels);
+ memset(dst_mask_opt, 3, kPixels);
+
+ FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
+ reinterpret_cast<float*>(src_values), dst_out_c,
+ dst_mask_c, kPixels);
+
+ for (j = 0; j < benchmark_iterations; j++) {
+ if (opt) {
+#ifdef HAS_FLOATDIVTOBYTEROW_NEON
+ FloatDivToByteRow_NEON(reinterpret_cast<float*>(src_weights),
+ reinterpret_cast<float*>(src_values), dst_out_opt,
+ dst_mask_opt, kPixels);
+#else
+ FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
+ reinterpret_cast<float*>(src_values), dst_out_opt,
+ dst_mask_opt, kPixels);
+#endif
+ } else {
+ FloatDivToByteRow_C(reinterpret_cast<float*>(src_weights),
+ reinterpret_cast<float*>(src_values), dst_out_opt,
+ dst_mask_opt, kPixels);
+ }
+ }
+
+ uint8_t max_diff = 0;
+ for (i = 0; i < kPixels; ++i) {
+ uint8_t abs_diff = abs(dst_out_c[i] - dst_out_opt[i]) +
+ abs(dst_mask_c[i] - dst_mask_opt[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+
+ free_aligned_buffer_page_end(src_weights);
+ free_aligned_buffer_page_end(src_values);
+ free_aligned_buffer_page_end(dst_out_c);
+ free_aligned_buffer_page_end(dst_out_opt);
+ free_aligned_buffer_page_end(dst_mask_c);
+ free_aligned_buffer_page_end(dst_mask_opt);
+
+ return max_diff;
+}
+
+TEST_F(LibYUVPlanarTest, TestFloatDivToByte_C) {
+ float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_,
+ benchmark_iterations_, 1.2f, false);
+ EXPECT_EQ(0, diff);
+}
+
+TEST_F(LibYUVPlanarTest, TestFloatDivToByte_Opt) {
+ float diff = TestFloatDivToByte(benchmark_width_, benchmark_height_,
+ benchmark_iterations_, 1.2f, true);
+ EXPECT_EQ(0, diff);
+}
+
+TEST_F(LibYUVPlanarTest, UVToVURow) {
+ const int kPixels = benchmark_width_ * benchmark_height_;
+ align_buffer_page_end(src_pixels_vu, kPixels * 2);
+ align_buffer_page_end(dst_pixels_uv, kPixels * 2);
+
+ MemRandomize(src_pixels_vu, kPixels * 2);
+ memset(dst_pixels_uv, 1, kPixels * 2);
+
+ UVToVURow_C(src_pixels_vu, dst_pixels_uv, kPixels);
+
+ for (int i = 0; i < kPixels; ++i) {
+ EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);
+ EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]);
+ }
+
+ free_aligned_buffer_page_end(src_pixels_vu);
+ free_aligned_buffer_page_end(dst_pixels_uv);
+}
+
} // namespace libyuv
diff --git a/files/unit_test/rotate_test.cc b/files/unit_test/rotate_test.cc
index d04b96e9..61941e63 100644
--- a/files/unit_test/rotate_test.cc
+++ b/files/unit_test/rotate_test.cc
@@ -135,6 +135,123 @@ TEST_F(LibYUVRotateTest, DISABLED_I420Rotate270_Odd) {
benchmark_cpu_info_);
}
+static void I444TestRotate(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ libyuv::RotationMode mode,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (src_width < 1) {
+ src_width = 1;
+ }
+ if (src_height == 0) {
+ src_height = 1;
+ }
+ if (dst_width < 1) {
+ dst_width = 1;
+ }
+ if (dst_height < 1) {
+ dst_height = 1;
+ }
+ int src_i444_y_size = src_width * Abs(src_height);
+ int src_i444_uv_size = src_width * Abs(src_height);
+ int src_i444_size = src_i444_y_size + src_i444_uv_size * 2;
+ align_buffer_page_end(src_i444, src_i444_size);
+ for (int i = 0; i < src_i444_size; ++i) {
+ src_i444[i] = fastrand() & 0xff;
+ }
+
+ int dst_i444_y_size = dst_width * dst_height;
+ int dst_i444_uv_size = dst_width * dst_height;
+ int dst_i444_size = dst_i444_y_size + dst_i444_uv_size * 2;
+ align_buffer_page_end(dst_i444_c, dst_i444_size);
+ align_buffer_page_end(dst_i444_opt, dst_i444_size);
+ memset(dst_i444_c, 2, dst_i444_size);
+ memset(dst_i444_opt, 3, dst_i444_size);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width,
+ src_i444 + src_i444_y_size + src_i444_uv_size, src_width,
+ dst_i444_c, dst_width, dst_i444_c + dst_i444_y_size, dst_width,
+ dst_i444_c + dst_i444_y_size + dst_i444_uv_size, dst_width,
+ src_width, src_height, mode);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (int i = 0; i < benchmark_iterations; ++i) {
+ I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width,
+ src_i444 + src_i444_y_size + src_i444_uv_size, src_width,
+ dst_i444_opt, dst_width, dst_i444_opt + dst_i444_y_size,
+ dst_width, dst_i444_opt + dst_i444_y_size + dst_i444_uv_size,
+ dst_width, src_width, src_height, mode);
+ }
+
+ // Rotation should be exact.
+ for (int i = 0; i < dst_i444_size; ++i) {
+ EXPECT_EQ(dst_i444_c[i], dst_i444_opt[i]);
+ }
+
+ free_aligned_buffer_page_end(dst_i444_c);
+ free_aligned_buffer_page_end(dst_i444_opt);
+ free_aligned_buffer_page_end(src_i444);
+}
+
+TEST_F(LibYUVRotateTest, I444Rotate0_Opt) {
+ I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate0, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I444Rotate90_Opt) {
+ I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate90, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I444Rotate180_Opt) {
+ I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
+ benchmark_height_, kRotate180, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, I444Rotate270_Opt) {
+ I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
+ benchmark_width_, kRotate270, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_);
+}
+
+// TODO(fbarchard): Remove odd width tests.
+// Odd width tests work but disabled because they use C code and can be
+// tested by passing an odd width command line or environment variable.
+TEST_F(LibYUVRotateTest, DISABLED_I444Rotate0_Odd) {
+ I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
+ benchmark_width_ - 3, benchmark_height_ - 1, kRotate0,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, DISABLED_I444Rotate90_Odd) {
+ I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
+ benchmark_height_ - 1, benchmark_width_ - 3, kRotate90,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, DISABLED_I444Rotate180_Odd) {
+ I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
+ benchmark_width_ - 3, benchmark_height_ - 1, kRotate180,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
+TEST_F(LibYUVRotateTest, DISABLED_I444Rotate270_Odd) {
+ I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
+ benchmark_height_ - 1, benchmark_width_ - 3, kRotate270,
+ benchmark_iterations_, disable_cpu_flags_,
+ benchmark_cpu_info_);
+}
+
static void NV12TestRotate(int src_width,
int src_height,
int dst_width,
diff --git a/files/unit_test/scale_test.cc b/files/unit_test/scale_test.cc
index d97d54a8..811b2d04 100644
--- a/files/unit_test/scale_test.cc
+++ b/files/unit_test/scale_test.cc
@@ -22,14 +22,14 @@
namespace libyuv {
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
-static int TestFilter(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- FilterMode f,
- int benchmark_iterations,
- int disable_cpu_flags,
- int benchmark_cpu_info) {
+static int I420TestFilter(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ FilterMode f,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
return 0;
}
@@ -141,14 +141,14 @@ static int TestFilter(int src_width,
// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
// 0 = exact.
-static int TestFilter_16(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- FilterMode f,
- int benchmark_iterations,
- int disable_cpu_flags,
- int benchmark_cpu_info) {
+static int I420TestFilter_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ FilterMode f,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
return 0;
}
@@ -256,6 +256,241 @@ static int TestFilter_16(int src_width,
return max_diff;
}
+// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
+static int I444TestFilter(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ FilterMode f,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
+ return 0;
+ }
+
+ int i, j;
+ int src_width_uv = Abs(src_width);
+ int src_height_uv = Abs(src_height);
+
+ int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
+ int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
+
+ int src_stride_y = Abs(src_width);
+ int src_stride_uv = src_width_uv;
+
+ align_buffer_page_end(src_y, src_y_plane_size);
+ align_buffer_page_end(src_u, src_uv_plane_size);
+ align_buffer_page_end(src_v, src_uv_plane_size);
+ if (!src_y || !src_u || !src_v) {
+ printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
+ return 0;
+ }
+ MemRandomize(src_y, src_y_plane_size);
+ MemRandomize(src_u, src_uv_plane_size);
+ MemRandomize(src_v, src_uv_plane_size);
+
+ int dst_width_uv = dst_width;
+ int dst_height_uv = dst_height;
+
+ int64_t dst_y_plane_size = (dst_width) * (dst_height);
+ int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
+
+ int dst_stride_y = dst_width;
+ int dst_stride_uv = dst_width_uv;
+
+ align_buffer_page_end(dst_y_c, dst_y_plane_size);
+ align_buffer_page_end(dst_u_c, dst_uv_plane_size);
+ align_buffer_page_end(dst_v_c, dst_uv_plane_size);
+ align_buffer_page_end(dst_y_opt, dst_y_plane_size);
+ align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
+ align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
+ if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
+ !dst_v_opt) {
+ printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
+ return 0;
+ }
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ double c_time = get_time();
+ I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
+ src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
+ dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
+ c_time = (get_time() - c_time);
+
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ double opt_time = get_time();
+ for (i = 0; i < benchmark_iterations; ++i) {
+ I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
+ src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
+ dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
+ f);
+ }
+ opt_time = (get_time() - opt_time) / benchmark_iterations;
+ // Report performance of C vs OPT.
+ printf("filter %d - %8d us C - %8d us OPT\n", f,
+ static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
+
+ // C version may be a little off from the optimized. Order of
+ // operations may introduce rounding somewhere. So do a difference
+ // of the buffers and look to see that the max difference is not
+ // over 3.
+ int max_diff = 0;
+ for (i = 0; i < (dst_height); ++i) {
+ for (j = 0; j < (dst_width); ++j) {
+ int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
+ dst_y_opt[(i * dst_stride_y) + j]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+ }
+
+ for (i = 0; i < (dst_height_uv); ++i) {
+ for (j = 0; j < (dst_width_uv); ++j) {
+ int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
+ dst_u_opt[(i * dst_stride_uv) + j]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
+ dst_v_opt[(i * dst_stride_uv) + j]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+ }
+
+ free_aligned_buffer_page_end(dst_y_c);
+ free_aligned_buffer_page_end(dst_u_c);
+ free_aligned_buffer_page_end(dst_v_c);
+ free_aligned_buffer_page_end(dst_y_opt);
+ free_aligned_buffer_page_end(dst_u_opt);
+ free_aligned_buffer_page_end(dst_v_opt);
+ free_aligned_buffer_page_end(src_y);
+ free_aligned_buffer_page_end(src_u);
+ free_aligned_buffer_page_end(src_v);
+
+ return max_diff;
+}
+
+// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
+// 0 = exact.
+static int I444TestFilter_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ FilterMode f,
+ int benchmark_iterations,
+ int disable_cpu_flags,
+ int benchmark_cpu_info) {
+ if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
+ return 0;
+ }
+
+ int i;
+ int src_width_uv = Abs(src_width);
+ int src_height_uv = Abs(src_height);
+
+ int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
+ int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
+
+ int src_stride_y = Abs(src_width);
+ int src_stride_uv = src_width_uv;
+
+ align_buffer_page_end(src_y, src_y_plane_size);
+ align_buffer_page_end(src_u, src_uv_plane_size);
+ align_buffer_page_end(src_v, src_uv_plane_size);
+ align_buffer_page_end(src_y_16, src_y_plane_size * 2);
+ align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
+ align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
+ if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
+ printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
+ return 0;
+ }
+ uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
+ uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
+ uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
+
+ MemRandomize(src_y, src_y_plane_size);
+ MemRandomize(src_u, src_uv_plane_size);
+ MemRandomize(src_v, src_uv_plane_size);
+
+ for (i = 0; i < src_y_plane_size; ++i) {
+ p_src_y_16[i] = src_y[i];
+ }
+ for (i = 0; i < src_uv_plane_size; ++i) {
+ p_src_u_16[i] = src_u[i];
+ p_src_v_16[i] = src_v[i];
+ }
+
+ int dst_width_uv = dst_width;
+ int dst_height_uv = dst_height;
+
+ int dst_y_plane_size = (dst_width) * (dst_height);
+ int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
+
+ int dst_stride_y = dst_width;
+ int dst_stride_uv = dst_width_uv;
+
+ align_buffer_page_end(dst_y_8, dst_y_plane_size);
+ align_buffer_page_end(dst_u_8, dst_uv_plane_size);
+ align_buffer_page_end(dst_v_8, dst_uv_plane_size);
+ align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
+ align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
+ align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
+
+ uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
+ uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
+ uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
+
+ MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
+ I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
+ src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
+ dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
+ MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
+ for (i = 0; i < benchmark_iterations; ++i) {
+ I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
+ p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
+ dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
+ dst_stride_uv, dst_width, dst_height, f);
+ }
+
+ // Expect an exact match.
+ int max_diff = 0;
+ for (i = 0; i < dst_y_plane_size; ++i) {
+ int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+ for (i = 0; i < dst_uv_plane_size; ++i) {
+ int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
+ if (abs_diff > max_diff) {
+ max_diff = abs_diff;
+ }
+ }
+
+ free_aligned_buffer_page_end(dst_y_8);
+ free_aligned_buffer_page_end(dst_u_8);
+ free_aligned_buffer_page_end(dst_v_8);
+ free_aligned_buffer_page_end(dst_y_16);
+ free_aligned_buffer_page_end(dst_u_16);
+ free_aligned_buffer_page_end(dst_v_16);
+ free_aligned_buffer_page_end(src_y);
+ free_aligned_buffer_page_end(src_u);
+ free_aligned_buffer_page_end(src_v);
+ free_aligned_buffer_page_end(src_y_16);
+ free_aligned_buffer_page_end(src_u_16);
+ free_aligned_buffer_page_end(src_v_16);
+
+ return max_diff;
+}
+
// The following adjustments in dimensions ensure the scale factor will be
// exactly achieved.
// 2 is chroma subsample.
@@ -263,16 +498,32 @@ static int TestFilter_16(int src_width,
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
- TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter) { \
- int diff = TestFilter( \
+ TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
+ int diff = I420TestFilter( \
+ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
+ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
+ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \
+ int diff = I444TestFilter( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
- TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter##_16) { \
- int diff = TestFilter_16( \
+ TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter##_16) { \
+ int diff = I420TestFilter_16( \
+ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
+ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
+ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter##_16) { \
+ int diff = I444TestFilter_16( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
@@ -300,30 +551,58 @@ TEST_FACTOR(3, 1, 3, 0)
#undef DX
#define TEST_SCALETO1(name, width, height, filter, max_diff) \
- TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
- int diff = TestFilter(benchmark_width_, benchmark_height_, width, height, \
- kFilter##filter, benchmark_iterations_, \
- disable_cpu_flags_, benchmark_cpu_info_); \
+ TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \
+ int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
+ height, kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \
+ int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \
+ height, kFilter##filter, benchmark_iterations_, \
+ disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter##_16) { \
+ int diff = I420TestFilter_16( \
+ benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter##_16) { \
+ int diff = I444TestFilter_16( \
+ benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
+ EXPECT_LE(diff, max_diff); \
+ } \
+ TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \
+ int diff = I420TestFilter(width, height, Abs(benchmark_width_), \
+ Abs(benchmark_height_), kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
- TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
- int diff = TestFilter(width, height, Abs(benchmark_width_), \
- Abs(benchmark_height_), kFilter##filter, \
- benchmark_iterations_, disable_cpu_flags_, \
- benchmark_cpu_info_); \
+ TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \
+ int diff = I444TestFilter(width, height, Abs(benchmark_width_), \
+ Abs(benchmark_height_), kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
- TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter##_16) { \
- int diff = TestFilter_16(benchmark_width_, benchmark_height_, width, \
- height, kFilter##filter, benchmark_iterations_, \
- disable_cpu_flags_, benchmark_cpu_info_); \
+ TEST_F(LibYUVScaleTest, \
+ I420##name##From##width##x##height##_##filter##_16) { \
+ int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \
+ Abs(benchmark_height_), kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
- TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter##_16) { \
- int diff = TestFilter_16(width, height, Abs(benchmark_width_), \
- Abs(benchmark_height_), kFilter##filter, \
- benchmark_iterations_, disable_cpu_flags_, \
- benchmark_cpu_info_); \
+ TEST_F(LibYUVScaleTest, \
+ I444##name##From##width##x##height##_##filter##_16) { \
+ int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \
+ Abs(benchmark_height_), kFilter##filter, \
+ benchmark_iterations_, disable_cpu_flags_, \
+ benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
}
diff --git a/files/util/psnr.cc b/files/util/psnr.cc
index f54015ba..c7bee7f9 100644
--- a/files/util/psnr.cc
+++ b/files/util/psnr.cc
@@ -189,7 +189,7 @@ static uint32_t SumSquareError_SSE2(const uint8_t* src_a,
,
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
- ); // NOLINT
+ ); // NOLINT
return sse;
}
#endif // LIBYUV_DISABLE_X86 etc