X-Git-Url: https://git.nihav.org/?a=blobdiff_plain;f=hwdec-vaapi%2Fsrc%2Flib.rs;h=cdc8b080a52f3df32df7fc5a920c80cda04f42a0;hb=HEAD;hp=40e14f445fc17e24793419ae76b82cc1d2b2eba0;hpb=25685ca46df6f9f0d446a2033b1e582985839803;p=nihav-player.git diff --git a/hwdec-vaapi/src/lib.rs b/hwdec-vaapi/src/lib.rs index 40e14f4..9dca7b1 100644 --- a/hwdec-vaapi/src/lib.rs +++ b/hwdec-vaapi/src/lib.rs @@ -273,6 +273,7 @@ struct VaapiInternals { pub struct VaapiH264Decoder { info: NACodecInfoRef, vaapi: Option, + needs_derive: bool, spses: Vec, ppses: Vec, frame_refs: FrameRefs, @@ -283,8 +284,7 @@ pub struct VaapiH264Decoder { tb_den: u32, } -#[cfg(not(target_arch="x86_64"))] -fn copy_luma(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { +fn copy_luma_default(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { for (dline, sline) in dst.chunks_mut(dstride) .zip(src.chunks(sstride)) .take(h) { @@ -292,6 +292,10 @@ fn copy_luma(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usiz } } #[cfg(not(target_arch="x86_64"))] +fn copy_luma(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { + copy_luma_default(dst, dstride, src, sstride, w, h); +} +#[cfg(not(target_arch="x86_64"))] fn deint_chroma(frm: NASimpleVideoFrame, src: &[u8], sstride: usize) { let mut uoff = frm.offset[1]; let mut voff = frm.offset[2]; @@ -309,6 +313,10 @@ fn deint_chroma(frm: NASimpleVideoFrame, src: &[u8], sstride: usize) { use std::arch::asm; #[cfg(target_arch="x86_64")] fn copy_luma(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { + if !is_x86_feature_detected!("avx") { + copy_luma_default(dst, dstride, src, sstride, w, h); + return; + } if dst.as_ptr().align_offset(32) == 0 && src.as_ptr().align_offset(32) == 0 && (w % 64) == 0 && ((dstride | sstride) % 32) == 0 { unsafe { @@ -340,18 +348,19 @@ fn copy_luma(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usiz ); } } else { + let copy_len = dstride.min(w); for (dline, sline) in dst.chunks_mut(dstride) .zip(src.chunks(sstride)) .take(h) { - dline[..w].copy_from_slice(&sline[..w]); + dline[..copy_len].copy_from_slice(&sline[..copy_len]); } } } #[cfg(target_arch="x86_64")] fn deint_chroma(frm: NASimpleVideoFrame, src: &[u8], sstride: usize) { unsafe { - let width = frm.width[1]; - let height = frm.height[1]; + let width = (frm.width[1] + 7) & !7; + let height = (frm.height[1] + 7) & !7; let dst = frm.data.as_mut_ptr(); let udst = dst.add(frm.offset[1]); let vdst = dst.add(frm.offset[2]); @@ -416,12 +425,12 @@ fn deint_chroma(frm: NASimpleVideoFrame, src: &[u8], sstride: usize) { } } -fn fill_frame(ifmt: VAImageFormat, pic: &Picture, frm: &mut NABufferType) -> DecoderResult<()> { +fn fill_frame(ifmt: VAImageFormat, pic: &Picture, frm: &mut NABufferType, needs_derive: bool) -> DecoderResult<()> { let mut vbuf = frm.get_vbuf().unwrap(); let (w, h) = pic.surface_size(); //let cur_ts = pic.timestamp(); - let img = Image::new(pic, ifmt, w, h, true).expect("get image"); + let img = Image::new(pic, ifmt, w, h, !needs_derive).expect("get image"); let iimg = img.image(); let imgdata: &[u8] = img.as_ref(); @@ -429,13 +438,22 @@ fn fill_frame(ifmt: VAImageFormat, pic: &Picture, frm: &mut NABuffe match iimg.format.fourcc().map_err(|_| DecoderError::InvalidData)? { VAFourcc::NV12 => { let frm = NASimpleVideoFrame::from_video_buf(&mut vbuf).unwrap(); - validate!(iimg.width == (frm.width[0] as u16)); - validate!(iimg.height == (frm.height[0] as u16)); + validate!(iimg.width == (((frm.width[0] + 15) & !15) as u16)); + validate!(iimg.height == (((frm.height[0] + 15) & !15) as u16)); - copy_luma(&mut frm.data[frm.offset[0]..], frm.stride[0], &imgdata[iimg.offsets[0] as usize..], iimg.pitches[0] as usize, frm.width[0], frm.height[0]); + copy_luma(&mut frm.data[frm.offset[0]..], frm.stride[0], &imgdata[iimg.offsets[0] as usize..], iimg.pitches[0] as usize, (frm.width[0] + 15) & !15, (frm.height[0] + 15) & !15); deint_chroma(frm, &imgdata[iimg.offsets[1] as usize..], iimg.pitches[1] as usize); }, + VAFourcc::YV12 => { + let frm = NASimpleVideoFrame::from_video_buf(&mut vbuf).unwrap(); + validate!(iimg.width == (((frm.width[0] + 15) & !15) as u16)); + validate!(iimg.height == (((frm.height[0] + 15) & !15) as u16)); + + copy_luma(&mut frm.data[frm.offset[0]..], frm.stride[0], &imgdata[iimg.offsets[0] as usize..], iimg.pitches[0] as usize, (frm.width[0] + 15) & !15, (frm.height[0] + 15) & !15); + copy_luma(&mut frm.data[frm.offset[2]..], frm.stride[2], &imgdata[iimg.offsets[1] as usize..], iimg.pitches[1] as usize, (frm.width[1] + 15) & !15, (frm.height[1] + 15) & !15); + copy_luma(&mut frm.data[frm.offset[1]..], frm.stride[1], &imgdata[iimg.offsets[2] as usize..], iimg.pitches[2] as usize, (frm.width[2] + 15) & !15, (frm.height[2] + 15) & !15); + }, _ => unimplemented!(), }; Ok(()) @@ -446,6 +464,7 @@ impl Default for VaapiH264Decoder { Self { info: NACodecInfoRef::default(), vaapi: None, + needs_derive: false, spses: Vec::with_capacity(1), ppses: Vec::with_capacity(4), frame_refs: FrameRefs::new(), @@ -477,10 +496,10 @@ impl VaapiH264Decoder { let _compatibility = br.read_byte()?; let _level = br.read_byte()?; let b = br.read_byte()?; - validate!((b & 0xFC) == 0xFC); + //validate!((b & 0xFC) == 0xFC); self.nal_len = (b & 3) + 1; let b = br.read_byte()?; - validate!((b & 0xE0) == 0xE0); + //validate!((b & 0xE0) == 0xE0); let num_sps = (b & 0x1F) as usize; for _ in 0..num_sps { let len = br.read_u16be()? as usize; @@ -584,6 +603,10 @@ println!("no decoding support for this profile"); return Err(DecoderError::Bug); } + let needs_derive= if let Ok(vendor) = display.query_vendor_string() { + vendor.contains("Kaby Lake") + } else { false }; + let config = display.create_config(vec![ VAConfigAttrib { type_: VAConfigAttribType::VAConfigAttribRTFormat, value: RTFormat::YUV420.into() }, ], va_profile, VAEntrypoint::VAEntrypointVLD).map_err(|_| { @@ -606,8 +629,9 @@ println!("config creation failed!"); } self.vaapi = Some(VaapiInternals { display, context, ref_pics, surfaces, ifmt }); + self.needs_derive = needs_derive; - let vinfo = NAVideoInfo::new(width, height, false, YUV420_FORMAT); + let vinfo = NAVideoInfo::new(vinfo.get_width(), vinfo.get_height(), false, YUV420_FORMAT); self.info = NACodecInfo::new_ref(info.get_name(), NACodecTypeInfo::Video(vinfo), info.get_extradata()).into_ref(); self.out_frm = alloc_video_buffer(vinfo, 4)?; @@ -1033,7 +1057,7 @@ panic!("ran out of free surfaces"); let is_ref = frm.is_ref; let ftype = frm.ftype; if let Ok(pic) = frm.pic.sync() { - let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm); + let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm, self.needs_derive); if !is_ref { if let Ok(surf) = pic.take_surface() { @@ -1066,7 +1090,7 @@ panic!("ran out of free surfaces"); let is_ref = frm.is_ref; let ftype = frm.ftype; if let Ok(pic) = frm.pic.sync() { - let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm); + let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm, self.needs_derive); if !is_ref { if let Ok(surf) = pic.take_surface() {