hwdec-vaapi: switch to derived surfaces for Kaby Lake GPU

author Kostya Shishkov <kostya.shishkov@gmail.com>

Tue, 24 Jun 2025 15:39:44 +0000 (17:39 +0200)

committer Kostya Shishkov <kostya.shishkov@gmail.com>

Tue, 24 Jun 2025 15:39:44 +0000 (17:39 +0200)
author Kostya Shishkov <kostya.shishkov@gmail.com>
Tue, 24 Jun 2025 15:39:44 +0000 (17:39 +0200)
committer Kostya Shishkov <kostya.shishkov@gmail.com>
Tue, 24 Jun 2025 15:39:44 +0000 (17:39 +0200)
diff --git a/hwdec-vaapi/src/lib.rs b/hwdec-vaapi/src/lib.rs

index d7c1c9dd2e062f83497f539d2b3d88acfd66ce9e..27142e88e417877ee69ca584d7fda81a1a7f3925 100644 (file)
--- a/hwdec-vaapi/src/lib.rs
+++ b/hwdec-vaapi/src/lib.rs
@@ -273,6 +273,7 @@ struct VaapiInternals {
  pub struct VaapiH264Decoder {
      info:           NACodecInfoRef,
      vaapi:          Option<VaapiInternals>,
+    needs_derive:   bool,
      spses:          Vec<SeqParameterSet>,
      ppses:          Vec<PicParameterSet>,
      frame_refs:     FrameRefs,
@@ -423,12 +424,12 @@ fn deint_chroma(frm: NASimpleVideoFrame<u8>, src: &[u8], sstride: usize) {
      }
  }
  
-fn fill_frame(ifmt: VAImageFormat, pic: &Picture<PictureSync>, frm: &mut NABufferType) -> DecoderResult<()> {
+fn fill_frame(ifmt: VAImageFormat, pic: &Picture<PictureSync>, frm: &mut NABufferType, needs_derive: bool) -> DecoderResult<()> {
      let mut vbuf = frm.get_vbuf().unwrap();
      let (w, h) = pic.surface_size();
      //let cur_ts = pic.timestamp();
  
-    let img = Image::new(pic, ifmt, w, h, true).expect("get image");
+    let img = Image::new(pic, ifmt, w, h, !needs_derive).expect("get image");
  
      let iimg = img.image();
      let imgdata: &[u8] = img.as_ref();
@@ -443,6 +444,15 @@ fn fill_frame(ifmt: VAImageFormat, pic: &Picture<PictureSync>, frm: &mut NABuffe
  
              deint_chroma(frm, &imgdata[iimg.offsets[1] as usize..], iimg.pitches[1] as usize);
          },
+        VAFourcc::YV12 => {
+            let frm = NASimpleVideoFrame::from_video_buf(&mut vbuf).unwrap();
+            validate!(iimg.width  == (((frm.width[0]  + 15) & !15) as u16));
+            validate!(iimg.height == (((frm.height[0] + 15) & !15) as u16));
+
+            copy_luma(&mut frm.data[frm.offset[0]..], frm.stride[0], &imgdata[iimg.offsets[0] as usize..], iimg.pitches[0] as usize, (frm.width[0] + 15) & !15, (frm.height[0] + 15) & !15);
+            copy_luma(&mut frm.data[frm.offset[2]..], frm.stride[2], &imgdata[iimg.offsets[1] as usize..], iimg.pitches[1] as usize, (frm.width[1] + 15) & !15, (frm.height[1] + 15) & !15);
+            copy_luma(&mut frm.data[frm.offset[1]..], frm.stride[1], &imgdata[iimg.offsets[2] as usize..], iimg.pitches[2] as usize, (frm.width[2] + 15) & !15, (frm.height[2] + 15) & !15);
+        },
          _ => unimplemented!(),
      };
      Ok(())
@@ -453,6 +463,7 @@ impl Default for VaapiH264Decoder {
          Self {
              info:           NACodecInfoRef::default(),
              vaapi:          None,
+            needs_derive:   false,
              spses:          Vec::with_capacity(1),
              ppses:          Vec::with_capacity(4),
              frame_refs:     FrameRefs::new(),
@@ -591,6 +602,10 @@ println!("no decoding support for this profile");
                  return Err(DecoderError::Bug);
              }
  
+            let needs_derive= if let Ok(vendor) = display.query_vendor_string() {
+                    vendor.contains("Kaby Lake")
+                } else { false };
+
              let config = display.create_config(vec![
                      VAConfigAttrib { type_: VAConfigAttribType::VAConfigAttribRTFormat, value: RTFormat::YUV420.into() },
                  ], va_profile, VAEntrypoint::VAEntrypointVLD).map_err(|_| {
@@ -613,6 +628,7 @@ println!("config creation failed!");
              }
  
              self.vaapi = Some(VaapiInternals { display, context, ref_pics, surfaces, ifmt });
+            self.needs_derive = needs_derive;
  
              let vinfo = NAVideoInfo::new(vinfo.get_width(), vinfo.get_height(), false, YUV420_FORMAT);
              self.info = NACodecInfo::new_ref(info.get_name(), NACodecTypeInfo::Video(vinfo), info.get_extradata()).into_ref();
@@ -1040,7 +1056,7 @@ panic!("ran out of free surfaces");
                  let is_ref = frm.is_ref;
                  let ftype = frm.ftype;
                  if let Ok(pic) = frm.pic.sync() {
-                    let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm);
+                    let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm, self.needs_derive);
  
                      if !is_ref {
                          if let Ok(surf) = pic.take_surface() {
@@ -1073,7 +1089,7 @@ panic!("ran out of free surfaces");
                  let is_ref = frm.is_ref;
                  let ftype = frm.ftype;
                  if let Ok(pic) = frm.pic.sync() {
-                    let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm);
+                    let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm, self.needs_derive);
  
                      if !is_ref {
                          if let Ok(surf) = pic.take_surface() {
author	Kostya Shishkov <kostya.shishkov@gmail.com>
	Tue, 24 Jun 2025 15:39:44 +0000 (17:39 +0200)
committer	Kostya Shishkov <kostya.shishkov@gmail.com>
	Tue, 24 Jun 2025 15:39:44 +0000 (17:39 +0200)