tb_den: u32,
}
+#[cfg(not(target_arch="x86_64"))]
+fn copy_luma(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
+ for (dline, sline) in dst.chunks_mut(dstride)
+ .zip(src.chunks(sstride))
+ .take(h) {
+ dline[..w].copy_from_slice(&sline[..w]);
+ }
+}
#[cfg(not(target_arch="x86_64"))]
fn deint_chroma(frm: NASimpleVideoFrame<u8>, src: &[u8], sstride: usize) {
let mut uoff = frm.offset[1];
#[cfg(target_arch="x86_64")]
use std::arch::asm;
#[cfg(target_arch="x86_64")]
+fn copy_luma(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
+ if dst.as_ptr().align_offset(32) == 0 && src.as_ptr().align_offset(32) == 0 &&
+ (w % 64) == 0 && ((dstride | sstride) % 32) == 0 {
+ unsafe {
+ asm!(
+ "2:",
+ " mov {x}, {w}",
+ " 3:",
+ " vmovdqa ymm0, [{src}]",
+ " vmovdqa ymm1, [{src}+32]",
+ " vmovdqa [{dst}], ymm0",
+ " vmovdqa [{dst}+32], ymm1",
+ " add {src}, 64",
+ " add {dst}, 64",
+ " sub {x}, 64",
+ " jnz 3b",
+ " add {src}, {sstep}",
+ " add {dst}, {dstep}",
+ " dec {h}",
+ " jnz 2b",
+ dst = inout(reg) dst.as_mut_ptr() => _,
+ src = inout(reg) src.as_ptr() => _,
+ sstep = in(reg) sstride - w,
+ dstep = in(reg) dstride - w,
+ w = in(reg) w,
+ h = in(reg) h,
+ x = out(reg) _,
+ out("ymm0") _,
+ out("ymm1") _,
+ );
+ }
+ } else {
+ for (dline, sline) in dst.chunks_mut(dstride)
+ .zip(src.chunks(sstride))
+ .take(h) {
+ dline[..w].copy_from_slice(&sline[..w]);
+ }
+ }
+}
+#[cfg(target_arch="x86_64")]
fn deint_chroma(frm: NASimpleVideoFrame<u8>, src: &[u8], sstride: usize) {
unsafe {
let width = frm.width[1];
validate!(iimg.width == (frm.width[0] as u16));
validate!(iimg.height == (frm.height[0] as u16));
- for (dline, sline) in frm.data[frm.offset[0]..].chunks_mut(frm.stride[0])
- .zip(imgdata[iimg.offsets[0] as usize..].chunks(iimg.pitches[0] as usize))
- .take(frm.height[0]) {
- dline[..frm.width[0]].copy_from_slice(&sline[..frm.width[0]]);
- }
+ copy_luma(&mut frm.data[frm.offset[0]..], frm.stride[0], &imgdata[iimg.offsets[0] as usize..], iimg.pitches[0] as usize, frm.width[0], frm.height[0]);
deint_chroma(frm, &imgdata[iimg.offsets[1] as usize..], iimg.pitches[1] as usize);
},