From: Kostya Shishkov Date: Tue, 24 Jun 2025 15:39:44 +0000 (+0200) Subject: hwdec-vaapi: switch to derived surfaces for Kaby Lake GPU X-Git-Url: https://git.nihav.org/?a=commitdiff_plain;h=fa346f32b78f5bacea74421f28ac8fabc5c58e1e;p=nihav-player.git hwdec-vaapi: switch to derived surfaces for Kaby Lake GPU Apparently on that GPU it's much faster than using native surfaces. --- diff --git a/hwdec-vaapi/src/lib.rs b/hwdec-vaapi/src/lib.rs index d7c1c9d..27142e8 100644 --- a/hwdec-vaapi/src/lib.rs +++ b/hwdec-vaapi/src/lib.rs @@ -273,6 +273,7 @@ struct VaapiInternals { pub struct VaapiH264Decoder { info: NACodecInfoRef, vaapi: Option, + needs_derive: bool, spses: Vec, ppses: Vec, frame_refs: FrameRefs, @@ -423,12 +424,12 @@ fn deint_chroma(frm: NASimpleVideoFrame, src: &[u8], sstride: usize) { } } -fn fill_frame(ifmt: VAImageFormat, pic: &Picture, frm: &mut NABufferType) -> DecoderResult<()> { +fn fill_frame(ifmt: VAImageFormat, pic: &Picture, frm: &mut NABufferType, needs_derive: bool) -> DecoderResult<()> { let mut vbuf = frm.get_vbuf().unwrap(); let (w, h) = pic.surface_size(); //let cur_ts = pic.timestamp(); - let img = Image::new(pic, ifmt, w, h, true).expect("get image"); + let img = Image::new(pic, ifmt, w, h, !needs_derive).expect("get image"); let iimg = img.image(); let imgdata: &[u8] = img.as_ref(); @@ -443,6 +444,15 @@ fn fill_frame(ifmt: VAImageFormat, pic: &Picture, frm: &mut NABuffe deint_chroma(frm, &imgdata[iimg.offsets[1] as usize..], iimg.pitches[1] as usize); }, + VAFourcc::YV12 => { + let frm = NASimpleVideoFrame::from_video_buf(&mut vbuf).unwrap(); + validate!(iimg.width == (((frm.width[0] + 15) & !15) as u16)); + validate!(iimg.height == (((frm.height[0] + 15) & !15) as u16)); + + copy_luma(&mut frm.data[frm.offset[0]..], frm.stride[0], &imgdata[iimg.offsets[0] as usize..], iimg.pitches[0] as usize, (frm.width[0] + 15) & !15, (frm.height[0] + 15) & !15); + copy_luma(&mut frm.data[frm.offset[2]..], frm.stride[2], &imgdata[iimg.offsets[1] as usize..], iimg.pitches[1] as usize, (frm.width[1] + 15) & !15, (frm.height[1] + 15) & !15); + copy_luma(&mut frm.data[frm.offset[1]..], frm.stride[1], &imgdata[iimg.offsets[2] as usize..], iimg.pitches[2] as usize, (frm.width[2] + 15) & !15, (frm.height[2] + 15) & !15); + }, _ => unimplemented!(), }; Ok(()) @@ -453,6 +463,7 @@ impl Default for VaapiH264Decoder { Self { info: NACodecInfoRef::default(), vaapi: None, + needs_derive: false, spses: Vec::with_capacity(1), ppses: Vec::with_capacity(4), frame_refs: FrameRefs::new(), @@ -591,6 +602,10 @@ println!("no decoding support for this profile"); return Err(DecoderError::Bug); } + let needs_derive= if let Ok(vendor) = display.query_vendor_string() { + vendor.contains("Kaby Lake") + } else { false }; + let config = display.create_config(vec![ VAConfigAttrib { type_: VAConfigAttribType::VAConfigAttribRTFormat, value: RTFormat::YUV420.into() }, ], va_profile, VAEntrypoint::VAEntrypointVLD).map_err(|_| { @@ -613,6 +628,7 @@ println!("config creation failed!"); } self.vaapi = Some(VaapiInternals { display, context, ref_pics, surfaces, ifmt }); + self.needs_derive = needs_derive; let vinfo = NAVideoInfo::new(vinfo.get_width(), vinfo.get_height(), false, YUV420_FORMAT); self.info = NACodecInfo::new_ref(info.get_name(), NACodecTypeInfo::Video(vinfo), info.get_extradata()).into_ref(); @@ -1040,7 +1056,7 @@ panic!("ran out of free surfaces"); let is_ref = frm.is_ref; let ftype = frm.ftype; if let Ok(pic) = frm.pic.sync() { - let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm); + let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm, self.needs_derive); if !is_ref { if let Ok(surf) = pic.take_surface() { @@ -1073,7 +1089,7 @@ panic!("ran out of free surfaces"); let is_ref = frm.is_ref; let ftype = frm.ftype; if let Ok(pic) = frm.pic.sync() { - let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm); + let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm, self.needs_derive); if !is_ref { if let Ok(surf) = pic.take_surface() {