diff options
author | Nathan E. Egge <unlord@xiph.org> | 2024-02-24 07:46:53 -0500 |
---|---|---|
committer | Nathan E. Egge <unlord@xiph.org> | 2024-02-27 04:47:36 -0500 |
commit | afeeb3cc901a94ded8e20086a06beb45c728fbf0 (patch) | |
tree | 2b3d74aedfb32103f3aa6842431b6e468659fb64 | |
parent | 52948bbfcc6903fdac0184a1a8f18236a0b34242 (diff) | |
download | libdav1d-afeeb3cc901a94ded8e20086a06beb45c728fbf0.tar.gz |
riscv64/itx: Add 4x16 8bpc eob test
Kendryte K230 Before After
inv_txfm_add_4x16_adst_adst_0_8bpc_rvv: 429.9 (7.45x) 381.3 (8.58x)
inv_txfm_add_4x16_adst_adst_1_8bpc_rvv: 430.0 (7.45x) 381.3 (8.57x)
inv_txfm_add_4x16_adst_dct_0_8bpc_rvv: 381.0 (8.01x) 332.5 (9.19x)
inv_txfm_add_4x16_adst_dct_1_8bpc_rvv: 381.0 (8.00x) 332.5 (9.19x)
inv_txfm_add_4x16_adst_flipadst_0_8bpc_rvv: 432.8 (7.42x) 384.5 (8.52x)
inv_txfm_add_4x16_adst_flipadst_1_8bpc_rvv: 432.8 (7.42x) 384.4 (8.52x)
inv_txfm_add_4x16_adst_identity_0_8bpc_rvv: 304.6 (7.32x) 249.8 (9.18x)
inv_txfm_add_4x16_adst_identity_1_8bpc_rvv: 304.5 (7.32x) 249.8 (9.18x)
inv_txfm_add_4x16_dct_adst_0_8bpc_rvv: 407.2 (7.68x) 371.4 (8.57x)
inv_txfm_add_4x16_dct_adst_1_8bpc_rvv: 407.1 (7.68x) 371.5 (8.58x)
inv_txfm_add_4x16_dct_dct_0_8bpc_rvv: 357.9 (1.27x) 323.1 (1.41x)
inv_txfm_add_4x16_dct_dct_1_8bpc_rvv: 357.9 (8.29x) 322.9 (9.16x)
inv_txfm_add_4x16_dct_flipadst_0_8bpc_rvv: 410.0 (7.62x) 376.6 (8.45x)
inv_txfm_add_4x16_dct_flipadst_1_8bpc_rvv: 410.0 (7.62x) 376.5 (8.47x)
inv_txfm_add_4x16_dct_identity_0_8bpc_rvv: 275.2 (7.79x) 240.5 (9.21x)
inv_txfm_add_4x16_dct_identity_1_8bpc_rvv: 275.3 (7.78x) 240.6 (9.19x)
inv_txfm_add_4x16_flipadst_adst_0_8bpc_rvv: 430.5 (7.51x) 382.6 (8.60x)
inv_txfm_add_4x16_flipadst_adst_1_8bpc_rvv: 430.1 (7.52x) 382.8 (8.60x)
inv_txfm_add_4x16_flipadst_dct_0_8bpc_rvv: 381.1 (8.09x) 333.8 (9.21x)
inv_txfm_add_4x16_flipadst_dct_1_8bpc_rvv: 381.0 (8.08x) 333.7 (9.21x)
inv_txfm_add_4x16_flipadst_flipadst_0_8bpc_rvv: 433.0 (7.48x) 385.7 (8.55x)
inv_txfm_add_4x16_flipadst_flipadst_1_8bpc_rvv: 433.0 (7.48x) 385.7 (8.55x)
inv_txfm_add_4x16_flipadst_identity_0_8bpc_rvv: 298.6 (7.57x) 250.8 (9.28x)
inv_txfm_add_4x16_flipadst_identity_1_8bpc_rvv: 298.6 (7.57x) 250.9 (9.27x)
inv_txfm_add_4x16_identity_adst_0_8bpc_rvv: 361.5 (7.93x) 347.3 (8.35x)
inv_txfm_add_4x16_identity_adst_1_8bpc_rvv: 361.4 (7.93x) 347.4 (8.35x)
inv_txfm_add_4x16_identity_dct_0_8bpc_rvv: 310.9 (8.69x) 297.8 (9.02x)
inv_txfm_add_4x16_identity_dct_1_8bpc_rvv: 311.0 (8.69x) 297.8 (9.02x)
inv_txfm_add_4x16_identity_flipadst_0_8bpc_rvv: 364.1 (7.88x) 350.5 (8.29x)
inv_txfm_add_4x16_identity_flipadst_1_8bpc_rvv: 364.2 (7.88x) 350.4 (8.31x)
inv_txfm_add_4x16_identity_identity_0_8bpc_rvv: 229.7 (8.22x) 211.4 (9.11x)
inv_txfm_add_4x16_identity_identity_1_8bpc_rvv: 229.7 (8.21x) 211.2 (9.12x)
-rw-r--r-- | src/riscv/64/itx.S | 47 |
1 files changed, 30 insertions, 17 deletions
diff --git a/src/riscv/64/itx.S b/src/riscv/64/itx.S index 4ba4c1a..5ed3c90 100644 --- a/src/riscv/64/itx.S +++ b/src/riscv/64/itx.S @@ -1567,6 +1567,8 @@ function inv_txfm_\variant\()add_4x16_rvv, export=1, ext=v vsetivli zero, 8, e16, m1, ta, ma + blt a3, a6, 1f + addi t0, a2, 16 vle16.v v0, (t0) addi t0, t0, 32 @@ -1595,6 +1597,14 @@ function inv_txfm_\variant\()add_4x16_rvv, export=1, ext=v vssra.vi v7, v3, 1 .endif + j 2f + +1: +.irp i, 4, 5, 6, 7 + vmv.v.x v\i, zero +.endr + +2: vle16.v v0, (a2) addi t0, a2, 32 vle16.v v1, (t0) @@ -1805,7 +1815,7 @@ endfunc def_fn_416_base identity_ def_fn_416_base -.macro def_fn_416 w, h, txfm1, txfm2 +.macro def_fn_416 w, h, txfm1, txfm2, eob_half function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_rvv, export=1 .if \w == 4 && (\txfm1 == adst || \txfm1 == flipadst) la a4, inv_\txfm1\()_e16_x\w\()w_rvv @@ -1817,6 +1827,9 @@ function inv_txfm_add_\txfm1\()_\txfm2\()_\w\()x\h\()_8bpc_rvv, export=1 .else la a5, inv_\txfm2\()_e16_x\h\()_rvv .endif +.if \w == 4 + li a6, \eob_half +.endif .ifc \txfm1, identity j inv_txfm_identity_add_\w\()x\h\()_rvv .else @@ -1826,22 +1839,22 @@ endfunc .endm .macro def_fns_416 w, h -def_fn_416 \w, \h, dct, dct -def_fn_416 \w, \h, identity, identity -def_fn_416 \w, \h, dct, adst -def_fn_416 \w, \h, dct, flipadst -def_fn_416 \w, \h, dct, identity -def_fn_416 \w, \h, adst, dct -def_fn_416 \w, \h, adst, adst -def_fn_416 \w, \h, adst, flipadst -def_fn_416 \w, \h, flipadst, dct -def_fn_416 \w, \h, flipadst, adst -def_fn_416 \w, \h, flipadst, flipadst -def_fn_416 \w, \h, identity, dct -def_fn_416 \w, \h, adst, identity -def_fn_416 \w, \h, flipadst, identity -def_fn_416 \w, \h, identity, adst -def_fn_416 \w, \h, identity, flipadst +def_fn_416 \w, \h, dct, dct, 29 +def_fn_416 \w, \h, identity, identity, 29 +def_fn_416 \w, \h, dct, adst, 29 +def_fn_416 \w, \h, dct, flipadst, 29 +def_fn_416 \w, \h, dct, identity, 8 +def_fn_416 \w, \h, adst, dct, 29 +def_fn_416 \w, \h, adst, adst, 29 +def_fn_416 \w, \h, adst, flipadst, 29 +def_fn_416 \w, \h, flipadst, dct, 29 +def_fn_416 \w, \h, flipadst, adst, 29 +def_fn_416 \w, \h, flipadst, flipadst, 29 +def_fn_416 \w, \h, identity, dct, 32 +def_fn_416 \w, \h, adst, identity, 8 +def_fn_416 \w, \h, flipadst, identity, 8 +def_fn_416 \w, \h, identity, adst, 32 +def_fn_416 \w, \h, identity, flipadst, 32 .endm def_fns_416 4, 16 |