diff options
Diffstat (limited to 'source/scale_neon64.cc')
-rw-r--r-- | source/scale_neon64.cc | 95 |
1 files changed, 0 insertions, 95 deletions
diff --git a/source/scale_neon64.cc b/source/scale_neon64.cc index ad06ee83..7c072380 100644 --- a/source/scale_neon64.cc +++ b/source/scale_neon64.cc @@ -1118,101 +1118,6 @@ void ScaleFilterCols_NEON(uint8_t* dst_ptr, #undef LOAD2_DATA8_LANE -// 16x2 -> 16x1 -void ScaleFilterRows_NEON(uint8_t* dst_ptr, - const uint8_t* src_ptr, - ptrdiff_t src_stride, - int dst_width, - int source_y_fraction) { - int y_fraction = 256 - source_y_fraction; - asm volatile( - "cmp %w4, #0 \n" - "b.eq 100f \n" - "add %2, %2, %1 \n" - "cmp %w4, #64 \n" - "b.eq 75f \n" - "cmp %w4, #128 \n" - "b.eq 50f \n" - "cmp %w4, #192 \n" - "b.eq 25f \n" - - "dup v5.8b, %w4 \n" - "dup v4.8b, %w5 \n" - // General purpose row blend. - "1: \n" - "ld1 {v0.16b}, [%1], #16 \n" - "ld1 {v1.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "umull v6.8h, v0.8b, v4.8b \n" - "umull2 v7.8h, v0.16b, v4.16b \n" - "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead - "umlal v6.8h, v1.8b, v5.8b \n" - "umlal2 v7.8h, v1.16b, v5.16b \n" - "prfm pldl1keep, [%2, 448] \n" - "rshrn v0.8b, v6.8h, #8 \n" - "rshrn2 v0.16b, v7.8h, #8 \n" - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 1b \n" - "b 99f \n" - - // Blend 25 / 75. - "25: \n" - "ld1 {v0.16b}, [%1], #16 \n" - "ld1 {v1.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "urhadd v0.16b, v0.16b, v1.16b \n" - "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead - "urhadd v0.16b, v0.16b, v1.16b \n" - "prfm pldl1keep, [%2, 448] \n" - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 25b \n" - "b 99f \n" - - // Blend 50 / 50. - "50: \n" - "ld1 {v0.16b}, [%1], #16 \n" - "ld1 {v1.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead - "urhadd v0.16b, v0.16b, v1.16b \n" - "prfm pldl1keep, [%2, 448] \n" - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 50b \n" - "b 99f \n" - - // Blend 75 / 25. - "75: \n" - "ld1 {v1.16b}, [%1], #16 \n" - "ld1 {v0.16b}, [%2], #16 \n" - "subs %w3, %w3, #16 \n" - "urhadd v0.16b, v0.16b, v1.16b \n" - "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead - "urhadd v0.16b, v0.16b, v1.16b \n" - "prfm pldl1keep, [%2, 448] \n" - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 75b \n" - "b 99f \n" - - // Blend 100 / 0 - Copy row unchanged. - "100: \n" - "ld1 {v0.16b}, [%1], #16 \n" - "subs %w3, %w3, #16 \n" - "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead - "st1 {v0.16b}, [%0], #16 \n" - "b.gt 100b \n" - - "99: \n" - "st1 {v0.b}[15], [%0] \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(src_stride), // %2 - "+r"(dst_width), // %3 - "+r"(source_y_fraction), // %4 - "+r"(y_fraction) // %5 - : - : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "memory", "cc"); -} - void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, |