pub struct VaapiH264Decoder {
info: NACodecInfoRef,
vaapi: Option<VaapiInternals>,
+ needs_derive: bool,
spses: Vec<SeqParameterSet>,
ppses: Vec<PicParameterSet>,
frame_refs: FrameRefs,
tb_den: u32,
}
-#[cfg(not(target_arch="x86_64"))]
-fn copy_luma(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
+fn copy_luma_default(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
for (dline, sline) in dst.chunks_mut(dstride)
.zip(src.chunks(sstride))
.take(h) {
}
}
#[cfg(not(target_arch="x86_64"))]
+fn copy_luma(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
+ copy_luma_default(dst, dstride, src, sstride, w, h);
+}
+#[cfg(not(target_arch="x86_64"))]
fn deint_chroma(frm: NASimpleVideoFrame<u8>, src: &[u8], sstride: usize) {
let mut uoff = frm.offset[1];
let mut voff = frm.offset[2];
use std::arch::asm;
#[cfg(target_arch="x86_64")]
fn copy_luma(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
+ if !is_x86_feature_detected!("avx") {
+ copy_luma_default(dst, dstride, src, sstride, w, h);
+ return;
+ }
if dst.as_ptr().align_offset(32) == 0 && src.as_ptr().align_offset(32) == 0 &&
(w % 64) == 0 && ((dstride | sstride) % 32) == 0 {
unsafe {
);
}
} else {
+ let copy_len = dstride.min(w);
for (dline, sline) in dst.chunks_mut(dstride)
.zip(src.chunks(sstride))
.take(h) {
- dline[..w].copy_from_slice(&sline[..w]);
+ dline[..copy_len].copy_from_slice(&sline[..copy_len]);
}
}
}
#[cfg(target_arch="x86_64")]
fn deint_chroma(frm: NASimpleVideoFrame<u8>, src: &[u8], sstride: usize) {
unsafe {
- let width = frm.width[1];
- let height = frm.height[1];
+ let width = (frm.width[1] + 7) & !7;
+ let height = (frm.height[1] + 7) & !7;
let dst = frm.data.as_mut_ptr();
let udst = dst.add(frm.offset[1]);
let vdst = dst.add(frm.offset[2]);
}
}
-fn fill_frame(ifmt: VAImageFormat, pic: &Picture<PictureSync>, frm: &mut NABufferType) -> DecoderResult<()> {
+fn fill_frame(ifmt: VAImageFormat, pic: &Picture<PictureSync>, frm: &mut NABufferType, needs_derive: bool) -> DecoderResult<()> {
let mut vbuf = frm.get_vbuf().unwrap();
let (w, h) = pic.surface_size();
//let cur_ts = pic.timestamp();
- let img = Image::new(pic, ifmt, w, h, true).expect("get image");
+ let img = Image::new(pic, ifmt, w, h, !needs_derive).expect("get image");
let iimg = img.image();
let imgdata: &[u8] = img.as_ref();
match iimg.format.fourcc().map_err(|_| DecoderError::InvalidData)? {
VAFourcc::NV12 => {
let frm = NASimpleVideoFrame::from_video_buf(&mut vbuf).unwrap();
- validate!(iimg.width == (frm.width[0] as u16));
- validate!(iimg.height == (frm.height[0] as u16));
+ validate!(iimg.width == (((frm.width[0] + 15) & !15) as u16));
+ validate!(iimg.height == (((frm.height[0] + 15) & !15) as u16));
- copy_luma(&mut frm.data[frm.offset[0]..], frm.stride[0], &imgdata[iimg.offsets[0] as usize..], iimg.pitches[0] as usize, frm.width[0], frm.height[0]);
+ copy_luma(&mut frm.data[frm.offset[0]..], frm.stride[0], &imgdata[iimg.offsets[0] as usize..], iimg.pitches[0] as usize, (frm.width[0] + 15) & !15, (frm.height[0] + 15) & !15);
deint_chroma(frm, &imgdata[iimg.offsets[1] as usize..], iimg.pitches[1] as usize);
},
+ VAFourcc::YV12 => {
+ let frm = NASimpleVideoFrame::from_video_buf(&mut vbuf).unwrap();
+ validate!(iimg.width == (((frm.width[0] + 15) & !15) as u16));
+ validate!(iimg.height == (((frm.height[0] + 15) & !15) as u16));
+
+ copy_luma(&mut frm.data[frm.offset[0]..], frm.stride[0], &imgdata[iimg.offsets[0] as usize..], iimg.pitches[0] as usize, (frm.width[0] + 15) & !15, (frm.height[0] + 15) & !15);
+ copy_luma(&mut frm.data[frm.offset[2]..], frm.stride[2], &imgdata[iimg.offsets[1] as usize..], iimg.pitches[1] as usize, (frm.width[1] + 15) & !15, (frm.height[1] + 15) & !15);
+ copy_luma(&mut frm.data[frm.offset[1]..], frm.stride[1], &imgdata[iimg.offsets[2] as usize..], iimg.pitches[2] as usize, (frm.width[2] + 15) & !15, (frm.height[2] + 15) & !15);
+ },
_ => unimplemented!(),
};
Ok(())
Self {
info: NACodecInfoRef::default(),
vaapi: None,
+ needs_derive: false,
spses: Vec::with_capacity(1),
ppses: Vec::with_capacity(4),
frame_refs: FrameRefs::new(),
let _compatibility = br.read_byte()?;
let _level = br.read_byte()?;
let b = br.read_byte()?;
- validate!((b & 0xFC) == 0xFC);
+ //validate!((b & 0xFC) == 0xFC);
self.nal_len = (b & 3) + 1;
let b = br.read_byte()?;
- validate!((b & 0xE0) == 0xE0);
+ //validate!((b & 0xE0) == 0xE0);
let num_sps = (b & 0x1F) as usize;
for _ in 0..num_sps {
let len = br.read_u16be()? as usize;
return Err(DecoderError::Bug);
}
+ let needs_derive= if let Ok(vendor) = display.query_vendor_string() {
+ vendor.contains("Kaby Lake")
+ } else { false };
+
let config = display.create_config(vec![
VAConfigAttrib { type_: VAConfigAttribType::VAConfigAttribRTFormat, value: RTFormat::YUV420.into() },
], va_profile, VAEntrypoint::VAEntrypointVLD).map_err(|_| {
}
self.vaapi = Some(VaapiInternals { display, context, ref_pics, surfaces, ifmt });
+ self.needs_derive = needs_derive;
- let vinfo = NAVideoInfo::new(width, height, false, YUV420_FORMAT);
+ let vinfo = NAVideoInfo::new(vinfo.get_width(), vinfo.get_height(), false, YUV420_FORMAT);
self.info = NACodecInfo::new_ref(info.get_name(), NACodecTypeInfo::Video(vinfo), info.get_extradata()).into_ref();
self.out_frm = alloc_video_buffer(vinfo, 4)?;
let is_ref = frm.is_ref;
let ftype = frm.ftype;
if let Ok(pic) = frm.pic.sync() {
- let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm);
+ let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm, self.needs_derive);
if !is_ref {
if let Ok(surf) = pic.take_surface() {
let is_ref = frm.is_ref;
let ftype = frm.ftype;
if let Ok(pic) = frm.pic.sync() {
- let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm);
+ let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm, self.needs_derive);
if !is_ref {
if let Ok(surf) = pic.take_surface() {
--- /dev/null
+/// Kernel DRM buffer memory type.
+pub const VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM: u32 = 0x10000000;
+/// DRM PRIME memory type (old version)
+///
+/// This supports only single objects with restricted memory layout.
+/// Used with VASurfaceAttribExternalBuffers.
+pub const VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME: u32 = 0x20000000;
+/// DRM PRIME memory type
+///
+/// Used with VADRMPRIMESurfaceDescriptor.
+pub const VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2: u32 = 0x40000000;
+/// DRM PRIME3 memory type
+///
+/// Used with VADRMPRIME3SurfaceDescriptor.
+pub const VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_3: u32 = 0x08000000;
+
+/// Surface object description.
+#[derive(Debug,Clone,Copy,Default)]
+#[repr(C)]
+pub struct VADRMPrimeObject {
+ /// DRM PRIME file descriptor for this object.
+ pub fd: i32,
+ /// Total size of this object (may include regions which are not part of the surface).
+ pub size: u32,
+ /// Format modifier applied to this object.
+ pub drm_format_modifier: u64,
+}
+
+/// Surface layer description.
+#[derive(Debug,Clone,Copy,Default)]
+#[repr(C)]
+pub struct VADRMPrimeLayer {
+ /// DRM format fourcc of this layer (DRM_FOURCC_*).
+ pub drm_format: u32,
+ /// Number of planes in this layer.
+ pub num_planes: u32,
+ /// Index in the objects array of the object containing each plane.
+ pub object_index: [u32; 4],
+ /// Offset within the object of each plane.
+ pub offset: [u32; 4],
+ /// Pitch of each plane.
+ pub pitch: [u32; 4],
+}
+
+/**
+ * External buffer descriptor for a DRM PRIME surface.
+ *
+ * For export, call vaExportSurfaceHandle() with mem_type set to
+ * VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2 and pass a pointer to an
+ * instance of this structure to fill.
+ * If VA_EXPORT_SURFACE_SEPARATE_LAYERS is specified on export, each
+ * layer will contain exactly one plane. For example, an NV12
+ * surface will be exported as two layers, one of DRM_FORMAT_R8 and
+ * one of DRM_FORMAT_GR88.
+ * If VA_EXPORT_SURFACE_COMPOSED_LAYERS is specified on export,
+ * there will be exactly one layer.
+ *
+ * For import, call vaCreateSurfaces() with the MemoryType attribute
+ * set to VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2 and the
+ * ExternalBufferDescriptor attribute set to point to an array of
+ * num_surfaces instances of this structure.
+ * The number of planes which need to be provided for a given layer
+ * is dependent on both the format and the format modifier used for
+ * the objects containing it. For example, the format DRM_FORMAT_RGBA
+ * normally requires one plane, but with the format modifier
+ * I915_FORMAT_MOD_Y_TILED_CCS it requires two planes - the first
+ * being the main data plane and the second containing the color
+ * control surface.
+ * Note that a given driver may only support a subset of possible
+ * representations of a particular format. For example, it may only
+ * support NV12 surfaces when they are contained within a single DRM
+ * object, and therefore fail to create such surfaces if the two
+ * planes are in different DRM objects.
+ * Note that backend driver will retrieve the resource represent by fd,
+ * and a valid surface ID is generated. Backend driver will not close
+ * the file descriptor. Application should handle the release of the fd.
+ * releasing the fd will not impact the existence of the surface.
+ */
+#[derive(Debug,Clone,Copy,Default)]
+#[repr(C)]
+pub struct VADRMPRIMESurfaceDescriptor {
+ /// Pixel format fourcc of the whole surface (VA_FOURCC_*).
+ pub fourcc: u32,
+ /// Width of the surface in pixels.
+ pub width: u32,
+ /// Height of the surface in pixels.
+ pub height: u32,
+ /// Number of distinct DRM objects making up the surface.
+ pub num_objects: u32,
+ /// Description of each object.
+ pub objects: [VADRMPrimeObject; 4],
+ /// Number of layers making up the surface.
+ pub num_layers: u32,
+ /// Description of each layer in the surface.
+ pub layers: [VADRMPrimeLayer; 4],
+}
+
+/**
+ * External buffer descriptor for a DRM PRIME surface with flags
+ *
+ * This structure is an extention for VADRMPRIMESurfaceDescriptor,
+ * it has the same behavior as if used with VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2.
+ *
+ * The field "flags" is added, see "Surface external buffer descriptor flags".
+ * To use this structure, use VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_3 instead.
+ */
+#[derive(Debug,Clone,Copy,Default)]
+#[repr(C)]
+pub struct VADRMPRIME3SurfaceDescriptor {
+ /// Pixel format fourcc of the whole surface (VA_FOURCC_*).
+ pub fourcc: u32,
+ /// Width of the surface in pixels.
+ pub width: u32,
+ /// Height of the surface in pixels.
+ pub height: u32,
+ /// Number of distinct DRM objects making up the surface.
+ pub num_objects: u32,
+ /// Description of each object.
+ pub objects: [VADRMPrimeObject; 4],
+ /// Number of layers making up the surface.
+ pub num_layers: u32,
+ /// Description of each layer in the surface.
+ pub layers: [VADRMPrimeLayer; 4],
+ /// flags. See "Surface external buffer descriptor flags".
+ pub flags: u32,
+ /// reserved bytes, must be zero
+ reserved: [u32; 8 - 1],
+}