aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/x86/refmvs.asm47
1 files changed, 36 insertions, 11 deletions
diff --git a/src/x86/refmvs.asm b/src/x86/refmvs.asm
index 7988aa6..085c9b3 100644
--- a/src/x86/refmvs.asm
+++ b/src/x86/refmvs.asm
@@ -92,6 +92,31 @@ JMP_TABLE splat_mv_avx2, 1, 2, 4, 8, 16, 32
JMP_TABLE splat_mv_sse2, 1, 2, 4, 8, 16, 32
+struc rf
+ .frm_hdr: resq 1
+ .iw4: resd 1
+ .ih4: resd 1
+ .iw8: resd 1
+ .ih8: resd 1
+ .sbsz: resd 1
+ .use_rf_mvs: resd 1
+ .sign_bias: resb 7
+ .mfmv_sign: resb 7
+ .pocdiff: resb 7
+ .mfmv_ref: resb 3
+ .mfmv_ref2cur: resd 3
+ .mfmv_ref2ref: resd 3*7
+ .n_mfmvs: resd 1
+ .n_blocks: resd 1
+ .rp: resq 1
+ .rp_ref: resq 1
+ .rp_proj: resq 1
+ .rp_stride: resq 1
+ .r: resq 1
+ .n_tile_threads: resd 1
+ .n_frame_threads: resd 1
+endstruc
+
SECTION .text
%macro movif32 2
@@ -341,16 +366,16 @@ cglobal load_tmvs, 6, 15, 4, -0x50, rf, tridx, xstart, xend, ystart, yend, \
stride, rp_proj, roff, troff, \
xendi, xstarti, iw8, ih8, dst
xor r14d, r14d
- cmp dword [rfq+200], 1 ; n_tile_threads
- mov ih8d, [rfq+20] ; rf->ih8
- mov iw8d, [rfq+16] ; rf->iw8
+ cmp dword [rfq+rf.n_tile_threads], 1
+ mov ih8d, [rfq+rf.ih8]
+ mov iw8d, [rfq+rf.iw8]
mov xstartd, xstartd
mov xendd, xendd
cmove tridxd, r14d
lea xstartid, [xstartq-8]
lea xendid, [xendq+8]
- mov strideq, [rfq+184]
- mov rp_projq, [rfq+176]
+ mov strideq, [rfq+rf.rp_stride]
+ mov rp_projq, [rfq+rf.rp_proj]
cmp ih8d, yendd
mov [rsp+0x30], strideq
cmovs yendd, ih8d
@@ -397,7 +422,7 @@ cglobal load_tmvs, 6, 15, 4, -0x50, rf, tridx, xstart, xend, ystart, yend, \
jg .init_xloop_start
DEFINE_ARGS rf, _, xstart, xend, ystart, yend, n7, stride, \
_, _, xendi, xstarti, stride5, _, n
- mov r13d, [rfq+152] ; rf->n_mfmvs
+ mov r13d, [rfq+rf.n_mfmvs]
test r13d, r13d
jz .ret
mov [rsp+0x0c], r13d
@@ -418,14 +443,14 @@ cglobal load_tmvs, 6, 15, 4, -0x50, rf, tridx, xstart, xend, ystart, yend, \
DEFINE_ARGS y, off, xstart, xend, ystart, rf, n7, refsign, \
ref, rp_ref, xendi, xstarti, _, _, n
mov rfq, [rsp+0x48]
- mov refd, [rfq+56+nq*4] ; ref2cur
+ mov refd, [rfq+rf.mfmv_ref2cur+nq*4]
cmp refd, 0x80000000
je .next_n
mov [rsp+0x40], refd
mov offq, [rsp+0x00] ; ystart * stride * 5
- movzx refd, byte [rfq+53+nq] ; rf->mfmv_ref[n]
+ movzx refd, byte [rfq+rf.mfmv_ref+nq]
lea refsignq, [refq-4]
- mov rp_refq, [rfq+168]
+ mov rp_refq, [rfq+rf.rp_ref]
movq m2, refsignq
add offq, [rp_refq+refq*8] ; r = rp_ref[ref] + row_offset
mov [rsp+0x14], nd
@@ -452,8 +477,8 @@ cglobal load_tmvs, 6, 15, 4, -0x50, rf, tridx, xstart, xend, ystart, yend, \
test refd, refd
jz .next_x_bad_ref
mov rfq, [rsp+0x48]
- lea r14d, [16+n7q+refq]
- mov ref2refd, [rfq+r14*4] ; rf->mfmv_ref2ref[n][b_ref-1]
+ lea ref2refd, [(rf.mfmv_ref2ref/4)+n7q+refq-1]
+ mov ref2refd, [rfq+ref2refq*4] ; rf->mfmv_ref2ref[n][b_ref-1]
test ref2refd, ref2refd
jz .next_x_bad_ref
lea fracq, [mv_proj]