tb_den: u32,
}
+#[cfg(not(target_arch="x86_64"))]
+fn deint_chroma(frm: NASimpleVideoFrame<u8>, src: &[u8], sstride: usize) {
+ let mut uoff = frm.offset[1];
+ let mut voff = frm.offset[2];
+ for cline in src.chunks(sstride).take(frm.height[1]) {
+ for (x, pair) in cline.chunks_exact(2).take(frm.width[1]).enumerate() {
+ frm.data[uoff + x] = pair[0];
+ frm.data[voff + x] = pair[1];
+ }
+ uoff += frm.stride[1];
+ voff += frm.stride[2];
+ }
+}
+
+#[cfg(target_arch="x86_64")]
+use std::arch::asm;
+#[cfg(target_arch="x86_64")]
+fn deint_chroma(frm: NASimpleVideoFrame<u8>, src: &[u8], sstride: usize) {
+ unsafe {
+ let width = frm.width[1];
+ let height = frm.height[1];
+ let dst = frm.data.as_mut_ptr();
+ let udst = dst.add(frm.offset[1]);
+ let vdst = dst.add(frm.offset[2]);
+ let dstep = frm.stride[1] - width;
+ let sstep = sstride - width * 2;
+ asm!(
+ "2:",
+ " mov {tmp}, {width}",
+ " test {width}, 8",
+ " jz 3f",
+ " movaps xmm0, [{src}]",
+ " movaps xmm1, xmm0",
+ " psllw xmm0, 8",
+ " psrlw xmm1, 8",
+ " psrlw xmm0, 8",
+ " packuswb xmm1, xmm1",
+ " packuswb xmm0, xmm0",
+ " movq [{vdst}], xmm1",
+ " movq [{udst}], xmm0",
+ " add {src}, 16",
+ " add {vdst}, 8",
+ " add {udst}, 8",
+ " sub {tmp}, 8",
+ " 3:",
+ " movaps xmm0, [{src}]",
+ " movaps xmm1, [{src} + 16]",
+ " movaps xmm2, xmm0",
+ " movaps xmm3, xmm1",
+ " psllw xmm0, 8",
+ " psllw xmm1, 8",
+ " psrlw xmm2, 8",
+ " psrlw xmm3, 8",
+ " psrlw xmm0, 8",
+ " psrlw xmm1, 8",
+ " packuswb xmm2, xmm3",
+ " packuswb xmm0, xmm1",
+ " movups [{vdst}], xmm2",
+ " movups [{udst}], xmm0",
+ " add {src}, 32",
+ " add {vdst}, 16",
+ " add {udst}, 16",
+ " sub {tmp}, 16",
+ " jnz 3b",
+ " add {udst}, {dstep}",
+ " add {vdst}, {dstep}",
+ " add {src}, {sstep}",
+ " dec {height}",
+ " jnz 2b",
+ src = inout(reg) src.as_ptr() => _,
+ udst = inout(reg) udst => _,
+ vdst = inout(reg) vdst => _,
+ width = in(reg) width,
+ height = inout(reg) height => _,
+ dstep = in(reg) dstep,
+ sstep = in(reg) sstep,
+ tmp = out(reg) _,
+ out("xmm0") _,
+ out("xmm1") _,
+ out("xmm2") _,
+ out("xmm3") _,
+ );
+ }
+}
+
fn fill_frame(ifmt: VAImageFormat, pic: &Picture<PictureSync>, frm: &mut NABufferType) -> DecoderResult<()> {
let mut vbuf = frm.get_vbuf().unwrap();
let (w, h) = pic.surface_size();
dline[..frm.width[0]].copy_from_slice(&sline[..frm.width[0]]);
}
- let mut uoff = frm.offset[1];
- let mut voff = frm.offset[2];
- for cline in imgdata[iimg.offsets[1] as usize..].chunks(iimg.pitches[1] as usize).take(frm.height[1]) {
- for (x, pair) in cline.chunks_exact(2).take(frm.width[1]).enumerate() {
- frm.data[uoff + x] = pair[0];
- frm.data[voff + x] = pair[1];
- }
- uoff += frm.stride[1];
- voff += frm.stride[2];
- }
+ deint_chroma(frm, &imgdata[iimg.offsets[1] as usize..], iimg.pitches[1] as usize);
},
_ => unimplemented!(),
};