From: Kostya Shishkov Date: Wed, 16 Jul 2025 04:08:15 +0000 (+0200) Subject: hwdec-vaapi: add SSE branch for copy_luma() X-Git-Url: https://git.nihav.org/?a=commitdiff_plain;h=889807f40b27bdc52fc71c3ef62a56effa20544d;p=nihav-player.git hwdec-vaapi: add SSE branch for copy_luma() --- diff --git a/hwdec-vaapi/src/lib.rs b/hwdec-vaapi/src/lib.rs index 378289b..85749d4 100644 --- a/hwdec-vaapi/src/lib.rs +++ b/hwdec-vaapi/src/lib.rs @@ -347,6 +347,42 @@ fn copy_luma(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usiz out("ymm1") _, ); } + } else if dst.as_ptr().align_offset(16) == 0 && src.as_ptr().align_offset(16) == 0 && + (w % 64) == 0 && ((dstride | sstride) % 16) == 0 { + unsafe { + asm!( + "2:", + " mov {x}, {w}", + " 3:", + " movdqa xmm0, [{src}]", + " movdqa xmm1, [{src}+16]", + " movdqa xmm2, [{src}+32]", + " movdqa xmm3, [{src}+48]", + " movdqa [{dst}], xmm0", + " movdqa [{dst}+16], xmm1", + " movdqa [{dst}+32], xmm2", + " movdqa [{dst}+48], xmm3", + " add {src}, 64", + " add {dst}, 64", + " sub {x}, 64", + " jnz 3b", + " add {src}, {sstep}", + " add {dst}, {dstep}", + " dec {h}", + " jnz 2b", + dst = inout(reg) dst.as_mut_ptr() => _, + src = inout(reg) src.as_ptr() => _, + sstep = in(reg) sstride - w, + dstep = in(reg) dstride - w, + w = in(reg) w, + h = inout(reg) h => _, + x = out(reg) _, + out("xmm0") _, + out("xmm1") _, + out("xmm2") _, + out("xmm3") _, + ); + } } else { let copy_len = dstride.min(w); for (dline, sline) in dst.chunks_mut(dstride)