aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan E. Egge <unlord@xiph.org>2024-02-24 07:46:53 -0500
committerNathan E. Egge <unlord@xiph.org>2024-02-27 04:47:36 -0500
commitafeeb3cc901a94ded8e20086a06beb45c728fbf0 (patch)
tree2b3d74aedfb32103f3aa6842431b6e468659fb64
parent52948bbfcc6903fdac0184a1a8f18236a0b34242 (diff)
downloadlibdav1d-afeeb3cc901a94ded8e20086a06beb45c728fbf0.tar.gz
riscv64/itx: Add 4x16 8bpc eob test
Kendryte K230 Before After inv_txfm_add_4x16_adst_adst_0_8bpc_rvv: 429.9 (7.45x) 381.3 (8.58x) inv_txfm_add_4x16_adst_adst_1_8bpc_rvv: 430.0 (7.45x) 381.3 (8.57x) inv_txfm_add_4x16_adst_dct_0_8bpc_rvv: 381.0 (8.01x) 332.5 (9.19x) inv_txfm_add_4x16_adst_dct_1_8bpc_rvv: 381.0 (8.00x) 332.5 (9.19x) inv_txfm_add_4x16_adst_flipadst_0_8bpc_rvv: 432.8 (7.42x) 384.5 (8.52x) inv_txfm_add_4x16_adst_flipadst_1_8bpc_rvv: 432.8 (7.42x) 384.4 (8.52x) inv_txfm_add_4x16_adst_identity_0_8bpc_rvv: 304.6 (7.32x) 249.8 (9.18x) inv_txfm_add_4x16_adst_identity_1_8bpc_rvv: 304.5 (7.32x) 249.8 (9.18x) inv_txfm_add_4x16_dct_adst_0_8bpc_rvv: 407.2 (7.68x) 371.4 (8.57x) inv_txfm_add_4x16_dct_adst_1_8bpc_rvv: 407.1 (7.68x) 371.5 (8.58x) inv_txfm_add_4x16_dct_dct_0_8bpc_rvv: 357.9 (1.27x) 323.1 (1.41x) inv_txfm_add_4x16_dct_dct_1_8bpc_rvv: 357.9 (8.29x) 322.9 (9.16x) inv_txfm_add_4x16_dct_flipadst_0_8bpc_rvv: 410.0 (7.62x) 376.6 (8.45x) inv_txfm_add_4x16_dct_flipadst_1_8bpc_rvv: 410.0 (7.62x) 376.5 (8.47x) inv_txfm_add_4x16_dct_identity_0_8bpc_rvv: 275.2 (7.79x) 240.5 (9.21x) inv_txfm_add_4x16_dct_identity_1_8bpc_rvv: 275.3 (7.78x) 240.6 (9.19x) inv_txfm_add_4x16_flipadst_adst_0_8bpc_rvv: 430.5 (7.51x) 382.6 (8.60x) inv_txfm_add_4x16_flipadst_adst_1_8bpc_rvv: 430.1 (7.52x) 382.8 (8.60x) inv_txfm_add_4x16_flipadst_dct_0_8bpc_rvv: 381.1 (8.09x) 333.8 (9.21x) inv_txfm_add_4x16_flipadst_dct_1_8bpc_rvv: 381.0 (8.08x) 333.7 (9.21x) inv_txfm_add_4x16_flipadst_flipadst_0_8bpc_rvv: 433.0 (7.48x) 385.7 (8.55x) inv_txfm_add_4x16_flipadst_flipadst_1_8bpc_rvv: 433.0 (7.48x) 385.7 (8.55x) inv_txfm_add_4x16_flipadst_identity_0_8bpc_rvv: 298.6 (7.57x) 250.8 (9.28x) inv_txfm_add_4x16_flipadst_identity_1_8bpc_rvv: 298.6 (7.57x) 250.9 (9.27x) inv_txfm_add_4x16_identity_adst_0_8bpc_rvv: 361.5 (7.93x) 347.3 (8.35x) inv_txfm_add_4x16_identity_adst_1_8bpc_rvv: 361.4 (7.93x) 347.4 (8.35x) inv_txfm_add_4x16_identity_dct_0_8bpc_rvv: 310.9 (8.69x) 297.8 (9.02x) inv_txfm_add_4x16_identity_dct_1_8bpc_rvv: 311.0 (8.69x) 297.8 (9.02x) inv_txfm_add_4x16_identity_flipadst_0_8bpc_rvv: 364.1 (7.88x) 350.5 (8.29x) inv_txfm_add_4x16_identity_flipadst_1_8bpc_rvv: 364.2 (7.88x) 350.4 (8.31x) inv_txfm_add_4x16_identity_identity_0_8bpc_rvv: 229.7 (8.22x) 211.4 (9.11x) inv_txfm_add_4x16_identity_identity_1_8bpc_rvv: 229.7 (8.21x) 211.2 (9.12x)
-rw-r--r--src/riscv/64/itx.S47
1 files changed, 30 insertions, 17 deletions
diff --git a/src/riscv/64/itx.S b/src/riscv/64/itx.S
index 4ba4c1a..5ed3c90 100644
--- a/src/riscv/64/itx.S
+++ b/src/riscv/64/itx.S
@@ -1567,6 +1567,8 @@ function inv_txfm_\variant\()add_4x16_rvv, export=1, ext=v
vsetivli zero, 8, e16, m1, ta, ma
+ blt a3, a6, 1f
+
addi t0, a2, 16
vle16.v v0, (t0)
addi t0, t0, 32
@@ -1595,6 +1597,14 @@ function inv_txfm_\variant\()add_4x16_rvv, export=1, ext=v
vssra.vi v7, v3, 1
.endif
+ j 2f
+
+1:
+.irp i, 4, 5, 6, 7
+ vmv.v.x v\i, zero
+.endr
+
+2:
vle16.v v0, (a2)
addi t0, a2, 32
vle16.v v1, (t0)
@@ -1805,7 +1815,7 @@ endfunc
def_fn_416_base identity_
def_fn_416_base
-.macro def_fn_416 w, h, txfm1, txfm2
+.macro def_fn_416 w, h, txfm1, txfm2, eob_half
function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_rvv, export=1
.if \w == 4 && (\txfm1 == adst || \txfm1 == flipadst)
la a4, inv_\txfm1\()_e16_x\w\()w_rvv
@@ -1817,6 +1827,9 @@ function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_rvv, export=1
.else
la a5, inv_\txfm2\()_e16_x\h\()_rvv
.endif
+.if \w == 4
+ li a6, \eob_half
+.endif
.ifc \txfm1, identity
j inv_txfm_identity_add_\w\()x\h\()_rvv
.else
@@ -1826,22 +1839,22 @@ endfunc
.endm
.macro def_fns_416 w, h
-def_fn_416 \w, \h, dct, dct
-def_fn_416 \w, \h, identity, identity
-def_fn_416 \w, \h, dct, adst
-def_fn_416 \w, \h, dct, flipadst
-def_fn_416 \w, \h, dct, identity
-def_fn_416 \w, \h, adst, dct
-def_fn_416 \w, \h, adst, adst
-def_fn_416 \w, \h, adst, flipadst
-def_fn_416 \w, \h, flipadst, dct
-def_fn_416 \w, \h, flipadst, adst
-def_fn_416 \w, \h, flipadst, flipadst
-def_fn_416 \w, \h, identity, dct
-def_fn_416 \w, \h, adst, identity
-def_fn_416 \w, \h, flipadst, identity
-def_fn_416 \w, \h, identity, adst
-def_fn_416 \w, \h, identity, flipadst
+def_fn_416 \w, \h, dct, dct, 29
+def_fn_416 \w, \h, identity, identity, 29
+def_fn_416 \w, \h, dct, adst, 29
+def_fn_416 \w, \h, dct, flipadst, 29
+def_fn_416 \w, \h, dct, identity, 8
+def_fn_416 \w, \h, adst, dct, 29
+def_fn_416 \w, \h, adst, adst, 29
+def_fn_416 \w, \h, adst, flipadst, 29
+def_fn_416 \w, \h, flipadst, dct, 29
+def_fn_416 \w, \h, flipadst, adst, 29
+def_fn_416 \w, \h, flipadst, flipadst, 29
+def_fn_416 \w, \h, identity, dct, 32
+def_fn_416 \w, \h, adst, identity, 8
+def_fn_416 \w, \h, flipadst, identity, 8
+def_fn_416 \w, \h, identity, adst, 32
+def_fn_416 \w, \h, identity, flipadst, 32
.endm
def_fns_416 4, 16