aboutsummaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2023-06-06 15:05:32 -0700
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2023-06-07 00:02:40 +0000
commitb08ccb6a83f5c76d5a9e181b0f65efd33ce8262d (patch)
tree955ccd5af95f4edc619ed1a79ed7e9ad5979a21d /source
parent1602e4c607f3268685eff6ed56b3a0d994f5b3fc (diff)
downloadlibyuv-b08ccb6a83f5c76d5a9e181b0f65efd33ce8262d.tar.gz
FP16 to FP32 float conversion row function
Bug: None Change-Id: I97aab6aafd41c3bf36bfbf33fdcc424e5b3fd6e3 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4590225 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Wan-Teh Chang <wtc@google.com>
Diffstat (limited to 'source')
-rw-r--r--source/row_neon64.cc40
1 files changed, 40 insertions, 0 deletions
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index 74190d61..a341dc13 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -3960,6 +3960,46 @@ void ByteToFloatRow_NEON(const uint8_t* src,
: "cc", "memory", "v1", "v2", "v3");
}
+// Convert FP16 Half Floats to FP32 Floats
+void ConvertFP16ToFP32Row_NEON(const uint16_t* src, // fp16
+ float* dst,
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ld1 {v1.8h}, [%0], #16 \n" // load 8 halffloats
+ "subs %w2, %w2, #8 \n" // 8 floats per loop
+ "prfm pldl1keep, [%0, 448] \n"
+ "fcvtl v2.4s, v1.4h \n" // 8 floats
+ "fcvtl2 v3.4s, v1.8h \n"
+ "stp q2, q3, [%1], #32 \n" // store 8 floats
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v1", "v2", "v3");
+}
+
+// Convert FP32 Floats to FP16 Half Floats
+void ConvertFP32ToFP16Row_NEON(const float* src,
+ uint16_t* dst, // fp16
+ int width) {
+ asm volatile(
+ "1: \n"
+ "ldp q2, q3, [%0], #32 \n" // load 8 floats
+ "subs %w2, %w2, #8 \n" // 8 floats per loop
+ "prfm pldl1keep, [%0, 448] \n"
+ "fcvtn v1.4h, v2.4s \n" // 8 fp16 halffloats
+ "fcvtn2 v1.8h, v3.4s \n"
+ "str q1, [%1], #16 \n" // store 8 fp16 halffloats
+ "b.gt 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(width) // %2
+ :
+ : "cc", "memory", "v1", "v2", "v3");
+}
+
float ScaleMaxSamples_NEON(const float* src,
float* dst,
float scale,