diff options
author | Frank Barchard <fbarchard@google.com> | 2022-03-17 15:50:29 -0700 |
---|---|---|
committer | libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2022-03-18 07:22:36 +0000 |
commit | 95b14b24462d67aede96e30243694732f9471e63 (patch) | |
tree | 0cbcd60504d41cf6d588ca59d59fe6ee31d7901d /source/convert.cc | |
parent | 3aebf69d668177e7ee6dbbe0025e5c3dbb525ff2 (diff) | |
download | libyuv-95b14b24462d67aede96e30243694732f9471e63.tar.gz |
RAWToJ400 faster version for ARM
- Unrolled to 16 pixels
- Take constants via structure, allowing different colorspace and channel order
- Use ADDHN to add 16.5 and take upper 8 bits of 16 bit values, narrowing to 8 bits
- clang-format applied, affecting mips code
On Cortex A510
Was RAWToJ400_Opt (1623 ms)
Now RAWToJ400_Opt (862 ms)
C RAWToJ400_Opt (1627 ms)
Bug: b/220171611
Change-Id: I06a9baf9650ebe2802fb6ff6dfbd524e2c06ada0
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3534023
Reviewed-by: Wan-Teh Chang <wtc@google.com>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Diffstat (limited to 'source/convert.cc')
-rw-r--r-- | source/convert.cc | 80 |
1 files changed, 36 insertions, 44 deletions
diff --git a/source/convert.cc b/source/convert.cc index 8f02636d..45590a7b 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -1422,7 +1422,7 @@ int ARGBToI420(const uint8_t* src_argb, #if defined(HAS_ARGBTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_NEON; } } @@ -1658,7 +1658,7 @@ int ABGRToI420(const uint8_t* src_abgr, #if defined(HAS_ABGRTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ABGRToYRow = ABGRToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { ABGRToYRow = ABGRToYRow_NEON; } } @@ -1754,7 +1754,7 @@ int RGBAToI420(const uint8_t* src_rgba, #if defined(HAS_RGBATOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RGBAToYRow = RGBAToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { RGBAToYRow = RGBAToYRow_NEON; } } @@ -1855,11 +1855,9 @@ int RGB24ToI420(const uint8_t* src_rgb24, if (TestCpuFlag(kCpuHasNEON)) { RGB24ToUVRow = RGB24ToUVRow_Any_NEON; RGB24ToYRow = RGB24ToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { RGB24ToYRow = RGB24ToYRow_NEON; - if (IS_ALIGNED(width, 16)) { - RGB24ToUVRow = RGB24ToUVRow_NEON; - } + RGB24ToUVRow = RGB24ToUVRow_NEON; } } #endif @@ -2031,11 +2029,9 @@ int RGB24ToJ420(const uint8_t* src_rgb24, if (TestCpuFlag(kCpuHasNEON)) { RGB24ToUVJRow = RGB24ToUVJRow_Any_NEON; RGB24ToYJRow = RGB24ToYJRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { RGB24ToYJRow = RGB24ToYJRow_NEON; - if (IS_ALIGNED(width, 16)) { - RGB24ToUVJRow = RGB24ToUVJRow_NEON; - } + RGB24ToUVJRow = RGB24ToUVJRow_NEON; } } #endif @@ -2095,18 +2091,18 @@ int RGB24ToJ420(const uint8_t* src_rgb24, #endif #endif // HAS_RGB24TOYJROW - { +{ #if !defined(HAS_RGB24TOYJROW) - // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + // Allocate 2 rows of ARGB. + const int kRowSize = (width * 4 + 31) & ~31; + align_buffer_64(row, kRowSize * 2); #endif - for (y = 0; y < height - 1; y += 2) { + for (y = 0; y < height - 1; y += 2) { #if defined(HAS_RGB24TOYJROW) - RGB24ToUVJRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); - RGB24ToYJRow(src_rgb24, dst_y, width); - RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); + RGB24ToUVJRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); + RGB24ToYJRow(src_rgb24, dst_y, width); + RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); #else RGB24ToARGBRow(src_rgb24, row, width); RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width); @@ -2114,26 +2110,26 @@ int RGB24ToJ420(const uint8_t* src_rgb24, ARGBToYJRow(row, dst_y, width); ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width); #endif - src_rgb24 += src_stride_rgb24 * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { + src_rgb24 += src_stride_rgb24 * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { #if defined(HAS_RGB24TOYJROW) - RGB24ToUVJRow(src_rgb24, 0, dst_u, dst_v, width); - RGB24ToYJRow(src_rgb24, dst_y, width); + RGB24ToUVJRow(src_rgb24, 0, dst_u, dst_v, width); + RGB24ToYJRow(src_rgb24, dst_y, width); #else RGB24ToARGBRow(src_rgb24, row, width); ARGBToUVJRow(row, 0, dst_u, dst_v, width); ARGBToYJRow(row, dst_y, width); #endif - } + } #if !defined(HAS_RGB24TOYJROW) - free_aligned_buffer_64(row); + free_aligned_buffer_64(row); #endif - } - return 0; +} +return 0; } #undef HAS_RGB24TOYJROW @@ -2187,11 +2183,9 @@ int RAWToI420(const uint8_t* src_raw, if (TestCpuFlag(kCpuHasNEON)) { RAWToUVRow = RAWToUVRow_Any_NEON; RAWToYRow = RAWToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { RAWToYRow = RAWToYRow_NEON; - if (IS_ALIGNED(width, 16)) { - RAWToUVRow = RAWToUVRow_NEON; - } + RAWToUVRow = RAWToUVRow_NEON; } } #endif @@ -2363,11 +2357,9 @@ int RAWToJ420(const uint8_t* src_raw, if (TestCpuFlag(kCpuHasNEON)) { RAWToUVJRow = RAWToUVJRow_Any_NEON; RAWToYJRow = RAWToYJRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { RAWToYJRow = RAWToYJRow_NEON; - if (IS_ALIGNED(width, 16)) { - RAWToUVJRow = RAWToUVJRow_NEON; - } + RAWToUVJRow = RAWToUVJRow_NEON; } } #endif @@ -2521,8 +2513,8 @@ int RGB565ToI420(const uint8_t* src_rgb565, } } // MSA version does direct RGB565 to YUV. -#elif (defined(HAS_RGB565TOYROW_MSA) || defined(HAS_RGB565TOYROW_LSX) \ - || defined(HAS_RGB565TOYROW_LASX)) +#elif (defined(HAS_RGB565TOYROW_MSA) || defined(HAS_RGB565TOYROW_LSX) || \ + defined(HAS_RGB565TOYROW_LASX)) #if defined(HAS_RGB565TOYROW_MSA) && defined(HAS_RGB565TOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGB565ToUVRow = RGB565ToUVRow_Any_MSA; @@ -2701,8 +2693,8 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, } } // MSA version does direct ARGB1555 to YUV. -#elif (defined(HAS_ARGB1555TOYROW_MSA) || defined(HAS_ARGB1555TOYROW_LSX) \ - || defined(HAS_ARGB1555TOYROW_LASX)) +#elif (defined(HAS_ARGB1555TOYROW_MSA) || defined(HAS_ARGB1555TOYROW_LSX) || \ + defined(HAS_ARGB1555TOYROW_LASX)) #if defined(HAS_ARGB1555TOYROW_MSA) && defined(HAS_ARGB1555TOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGB1555ToUVRow = ARGB1555ToUVRow_Any_MSA; @@ -3067,7 +3059,7 @@ int RGB24ToJ400(const uint8_t* src_rgb24, #if defined(HAS_RGB24TOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RGB24ToYJRow = RGB24ToYJRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { RGB24ToYJRow = RGB24ToYJRow_NEON; } } |