1 use std::collections::VecDeque;
2 use std::convert::TryInto;
5 use nihav_core::codecs::*;
6 use nihav_core::io::byteio::*;
7 use nihav_core::io::bitreader::*;
8 use nihav_core::io::intcode::*;
12 #[cfg(debug_assertions)]
13 macro_rules! validate {
14 ($a:expr) => { if !$a { println!("check failed at {}:{}", file!(), line!()); return Err(DecoderError::InvalidData); } };
16 #[cfg(not(debug_assertions))]
17 macro_rules! validate {
18 ($a:expr) => { if !$a { return Err(DecoderError::InvalidData); } };
23 #[allow(clippy::manual_range_contains)]
24 #[allow(clippy::needless_range_loop)]
27 #[allow(clippy::manual_range_contains)]
32 fn read_ue(&mut self) -> DecoderResult<u32>;
33 fn read_te(&mut self, range: u32) -> DecoderResult<u32>;
34 fn read_ue_lim(&mut self, max_val: u32) -> DecoderResult<u32> {
35 let val = self.read_ue()?;
36 validate!(val <= max_val);
39 fn read_se(&mut self) -> DecoderResult<i32> {
40 let val = self.read_ue()?;
42 Ok (((val >> 1) as i32) + 1)
44 Ok (-((val >> 1) as i32))
49 impl<'a> ReadUE for BitReader<'a> {
50 fn read_ue(&mut self) -> DecoderResult<u32> {
51 Ok(self.read_code(UintCodeType::GammaP)? - 1)
53 fn read_te(&mut self, range: u32) -> DecoderResult<u32> {
55 if self.read_bool()? {
61 let val = self.read_ue()?;
62 validate!(val <= range);
68 fn get_long_term_id(is_idr: bool, slice_hdr: &SliceHeader) -> Option<usize> {
69 if is_idr && !slice_hdr.long_term_reference {
72 let marking = &slice_hdr.adaptive_ref_pic_marking;
73 for (&op, &arg) in marking.memory_management_control_op.iter().zip(marking.operation_arg.iter()).take(marking.num_ops) {
75 return Some(arg as usize);
82 fn unescape_nal(src: &[u8], dst: &mut Vec<u8>) -> usize {
86 dst.reserve(src.len());
87 while off < src.len() {
93 if zrun == 2 && off + 1 < src.len() && src[off + 1] == 0x03 {
97 if zrun >= 3 && off + 1 < src.len() && src[off + 1] == 0x01 {
108 fn make_dummy_h264_pic() -> PictureH264 {
109 PictureH264::new(VA_INVALID_ID, 0, H264PictureFlag::Invalid.into(), 0, 0)
113 fn make_pic(&self) -> PictureH264;
116 impl MakePicH264 for PictureInfo {
117 fn make_pic(&self) -> PictureH264 {
118 let mut flags = H264PictureFlags::default();
119 let frame_idx = if let Some(id) = self.long_term {
120 flags |= H264PictureFlag::LongTermReference;
124 flags |= H264PictureFlag::ShortTermReference;
128 PictureH264::new(self.surface_id, frame_idx, flags, self.top_id as i32, self.bot_id as i32)
132 fn map_ref_list(refs: &[Option<PictureInfo>]) -> [PictureH264; 32] {
133 let mut ref_list = Vec::with_capacity(32);
135 for rpic in refs.iter() {
136 ref_list.push(rpic.as_ref().map_or_else(make_dummy_h264_pic, |pic| pic.make_pic()));
139 while ref_list.len() < 32 {
140 ref_list.push(make_dummy_h264_pic());
142 if let Ok(ret) = ref_list.try_into() {
145 panic!("can't convert");
149 fn profile_name(profile: VAProfile::Type) -> &'static str {
151 VAProfile::VAProfileMPEG2Simple => "MPEG2 Simple",
152 VAProfile::VAProfileMPEG2Main => "MPEG2 Main",
153 VAProfile::VAProfileMPEG4Simple => "MPEG4 Simple",
154 VAProfile::VAProfileMPEG4AdvancedSimple => "MPEG4 Advanced Simple",
155 VAProfile::VAProfileMPEG4Main => "MPEG4 Main",
156 VAProfile::VAProfileH264Baseline => "H264 Baseline",
157 VAProfile::VAProfileH264Main => "H264 Main",
158 VAProfile::VAProfileH264High => "H264 High",
159 VAProfile::VAProfileVC1Simple => "VC1 Simple",
160 VAProfile::VAProfileVC1Main => "VC1 Main",
161 VAProfile::VAProfileVC1Advanced => "VC1 Advanced",
162 VAProfile::VAProfileH263Baseline => "H263 Baseline",
163 VAProfile::VAProfileJPEGBaseline => "JPEG Baseline",
164 VAProfile::VAProfileH264ConstrainedBaseline => "H264 Constrained Baseline",
165 VAProfile::VAProfileVP8Version0_3 => "VP8",
166 VAProfile::VAProfileH264MultiviewHigh => "H.264 Multiview High",
167 VAProfile::VAProfileH264StereoHigh => "H264 Stereo High",
168 VAProfile::VAProfileHEVCMain => "H.EVC Main",
169 VAProfile::VAProfileHEVCMain10 => "H.EVC Main10",
170 VAProfile::VAProfileVP9Profile0 => "VP9 Profile 0",
171 VAProfile::VAProfileVP9Profile1 => "VP9 Profile 1",
172 VAProfile::VAProfileVP9Profile2 => "VP9 Profile 2",
173 VAProfile::VAProfileVP9Profile3 => "VP9 Profile 3",
174 VAProfile::VAProfileHEVCMain12 => "HEVC Main12",
175 VAProfile::VAProfileHEVCMain422_10 => "HEVC Main10 4:2:2",
176 VAProfile::VAProfileHEVCMain422_12 => "HEVC Main12 4:2:2",
177 VAProfile::VAProfileHEVCMain444 => "HEVC Main 4:4:4",
178 VAProfile::VAProfileHEVCMain444_10 => "HEVC Main10 4:4:4",
179 VAProfile::VAProfileHEVCMain444_12 => "HEVC Main12 4:4:4",
180 VAProfile::VAProfileHEVCSccMain => "HEVC SCC Main",
181 VAProfile::VAProfileHEVCSccMain10 => "HEVC SCC Main10",
182 VAProfile::VAProfileHEVCSccMain444 => "HEVC SCC Main 4:4:4",
183 VAProfile::VAProfileAV1Profile0 => "AV1 Profile 0",
184 VAProfile::VAProfileAV1Profile1 => "AV1 Profile 1",
185 VAProfile::VAProfileHEVCSccMain444_10 => "HEVC SCC Main10 4:4:4",
190 const NUM_REF_PICS: usize = 16;
192 struct WaitingFrame {
194 pic: Picture<PictureEnd>,
201 last_ref_dts: Option<u64>,
203 frames: VecDeque<WaitingFrame>,
206 impl Default for Reorderer {
207 fn default() -> Self {
211 frames: VecDeque::with_capacity(16),
217 fn add_frame(&mut self, new_frame: WaitingFrame) {
218 if !new_frame.is_ref {
219 if self.frames.is_empty() {
220 self.frames.push_back(new_frame);
222 let new_dts = new_frame.ts;
224 for (i, frm) in self.frames.iter().enumerate() {
226 if frm.ts > new_dts {
230 self.frames.insert(idx, new_frame);
233 for (i, frm) in self.frames.iter().enumerate() {
234 if Some(frm.ts) == self.last_ref_dts {
235 self.ready_idx = i + 1;
238 self.last_ref_dts = Some(new_frame.ts);
239 self.frames.push_back(new_frame);
242 fn get_frame(&mut self) -> Option<WaitingFrame> {
243 if self.ready_idx > 0 {
244 match self.frames[0].pic.query_status() {
245 _ if self.ready_idx > 16 => {},
246 Ok(VASurfaceStatus::Ready) => {},
247 Ok(VASurfaceStatus::Rendering) => return None,
253 self.frames.pop_front()
258 fn flush(&mut self) {
259 self.last_ref_dts = None;
265 struct VaapiInternals {
266 display: Rc<Display>,
267 context: Rc<Context>,
268 ref_pics: Vec<(Picture<PictureSync>, VASurfaceID)>,
269 surfaces: Vec<Surface>,
273 pub struct VaapiH264Decoder {
274 info: NACodecInfoRef,
275 vaapi: Option<VaapiInternals>,
277 spses: Vec<SeqParameterSet>,
278 ppses: Vec<PicParameterSet>,
279 frame_refs: FrameRefs,
281 out_frm: NABufferType,
282 reorderer: Reorderer,
287 fn copy_luma_default(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
288 for (dline, sline) in dst.chunks_mut(dstride)
289 .zip(src.chunks(sstride))
291 dline[..w].copy_from_slice(&sline[..w]);
294 #[cfg(not(target_arch="x86_64"))]
295 fn copy_luma(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
296 copy_luma_default(dst, dstride, src, sstride, w, h);
298 #[cfg(not(target_arch="x86_64"))]
299 fn deint_chroma(frm: NASimpleVideoFrame<u8>, src: &[u8], sstride: usize) {
300 let mut uoff = frm.offset[1];
301 let mut voff = frm.offset[2];
302 for cline in src.chunks(sstride).take(frm.height[1]) {
303 for (x, pair) in cline.chunks_exact(2).take(frm.width[1]).enumerate() {
304 frm.data[uoff + x] = pair[0];
305 frm.data[voff + x] = pair[1];
307 uoff += frm.stride[1];
308 voff += frm.stride[2];
312 #[cfg(target_arch="x86_64")]
314 #[cfg(target_arch="x86_64")]
315 fn copy_luma(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
316 if !is_x86_feature_detected!("avx") {
317 copy_luma_default(dst, dstride, src, sstride, w, h);
320 if dst.as_ptr().align_offset(32) == 0 && src.as_ptr().align_offset(32) == 0 &&
321 (w % 64) == 0 && ((dstride | sstride) % 32) == 0 {
327 " vmovdqa ymm0, [{src}]",
328 " vmovdqa ymm1, [{src}+32]",
329 " vmovdqa [{dst}], ymm0",
330 " vmovdqa [{dst}+32], ymm1",
335 " add {src}, {sstep}",
336 " add {dst}, {dstep}",
339 dst = inout(reg) dst.as_mut_ptr() => _,
340 src = inout(reg) src.as_ptr() => _,
341 sstep = in(reg) sstride - w,
342 dstep = in(reg) dstride - w,
351 let copy_len = dstride.min(w);
352 for (dline, sline) in dst.chunks_mut(dstride)
353 .zip(src.chunks(sstride))
355 dline[..copy_len].copy_from_slice(&sline[..copy_len]);
359 #[cfg(target_arch="x86_64")]
360 fn deint_chroma(frm: NASimpleVideoFrame<u8>, src: &[u8], sstride: usize) {
362 let width = (frm.width[1] + 7) & !7;
363 let height = (frm.height[1] + 7) & !7;
364 let dst = frm.data.as_mut_ptr();
365 let udst = dst.add(frm.offset[1]);
366 let vdst = dst.add(frm.offset[2]);
367 let dstep = frm.stride[1] - width;
368 let sstep = sstride - width * 2;
371 " mov {tmp}, {width}",
374 " movaps xmm0, [{src}]",
375 " movaps xmm1, xmm0",
379 " packuswb xmm1, xmm1",
380 " packuswb xmm0, xmm0",
381 " movq [{vdst}], xmm1",
382 " movq [{udst}], xmm0",
388 " movaps xmm0, [{src}]",
389 " movaps xmm1, [{src} + 16]",
390 " movaps xmm2, xmm0",
391 " movaps xmm3, xmm1",
398 " packuswb xmm2, xmm3",
399 " packuswb xmm0, xmm1",
400 " movups [{vdst}], xmm2",
401 " movups [{udst}], xmm0",
407 " add {udst}, {dstep}",
408 " add {vdst}, {dstep}",
409 " add {src}, {sstep}",
412 src = inout(reg) src.as_ptr() => _,
413 udst = inout(reg) udst => _,
414 vdst = inout(reg) vdst => _,
415 width = in(reg) width,
416 height = inout(reg) height => _,
417 dstep = in(reg) dstep,
418 sstep = in(reg) sstep,
428 fn fill_frame(ifmt: VAImageFormat, pic: &Picture<PictureSync>, frm: &mut NABufferType, needs_derive: bool) -> DecoderResult<()> {
429 let mut vbuf = frm.get_vbuf().unwrap();
430 let (w, h) = pic.surface_size();
431 //let cur_ts = pic.timestamp();
433 let img = Image::new(pic, ifmt, w, h, !needs_derive).expect("get image");
435 let iimg = img.image();
436 let imgdata: &[u8] = img.as_ref();
438 match iimg.format.fourcc().map_err(|_| DecoderError::InvalidData)? {
440 let frm = NASimpleVideoFrame::from_video_buf(&mut vbuf).unwrap();
441 validate!(iimg.width == (((frm.width[0] + 15) & !15) as u16));
442 validate!(iimg.height == (((frm.height[0] + 15) & !15) as u16));
444 copy_luma(&mut frm.data[frm.offset[0]..], frm.stride[0], &imgdata[iimg.offsets[0] as usize..], iimg.pitches[0] as usize, (frm.width[0] + 15) & !15, (frm.height[0] + 15) & !15);
446 deint_chroma(frm, &imgdata[iimg.offsets[1] as usize..], iimg.pitches[1] as usize);
449 let frm = NASimpleVideoFrame::from_video_buf(&mut vbuf).unwrap();
450 validate!(iimg.width == (((frm.width[0] + 15) & !15) as u16));
451 validate!(iimg.height == (((frm.height[0] + 15) & !15) as u16));
453 copy_luma(&mut frm.data[frm.offset[0]..], frm.stride[0], &imgdata[iimg.offsets[0] as usize..], iimg.pitches[0] as usize, (frm.width[0] + 15) & !15, (frm.height[0] + 15) & !15);
454 copy_luma(&mut frm.data[frm.offset[2]..], frm.stride[2], &imgdata[iimg.offsets[1] as usize..], iimg.pitches[1] as usize, (frm.width[1] + 15) & !15, (frm.height[1] + 15) & !15);
455 copy_luma(&mut frm.data[frm.offset[1]..], frm.stride[1], &imgdata[iimg.offsets[2] as usize..], iimg.pitches[2] as usize, (frm.width[2] + 15) & !15, (frm.height[2] + 15) & !15);
457 _ => unimplemented!(),
462 impl Default for VaapiH264Decoder {
463 fn default() -> Self {
465 info: NACodecInfoRef::default(),
468 spses: Vec::with_capacity(1),
469 ppses: Vec::with_capacity(4),
470 frame_refs: FrameRefs::new(),
472 out_frm: NABufferType::None,
473 reorderer: Reorderer::default(),
480 impl VaapiH264Decoder {
481 pub fn new() -> Self { Self::default() }
482 pub fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()> {
483 if let NACodecTypeInfo::Video(vinfo) = info.get_properties() {
484 let edata = info.get_extradata().unwrap();
485 //print!("edata:"); for &el in edata.iter() { print!(" {:02X}", el); } println!();
487 let mut nal_buf = Vec::with_capacity(1024);
488 if edata.len() > 11 && &edata[0..4] == b"avcC" {
489 let mut mr = MemoryReader::new_read(edata.as_slice());
490 let mut br = ByteReader::new(&mut mr);
493 let version = br.read_byte()?;
494 validate!(version == 1);
495 profile = br.read_byte()?;
496 let _compatibility = br.read_byte()?;
497 let _level = br.read_byte()?;
498 let b = br.read_byte()?;
499 //validate!((b & 0xFC) == 0xFC);
500 self.nal_len = (b & 3) + 1;
501 let b = br.read_byte()?;
502 //validate!((b & 0xE0) == 0xE0);
503 let num_sps = (b & 0x1F) as usize;
504 for _ in 0..num_sps {
505 let len = br.read_u16be()? as usize;
506 let offset = br.tell() as usize;
507 validate!((br.peek_byte()? & 0x1F) == 7);
508 let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf);
510 let sps = parse_sps(&nal_buf[1..])?;
511 self.spses.push(sps);
513 let num_pps = br.read_byte()? as usize;
514 for _ in 0..num_pps {
515 let len = br.read_u16be()? as usize;
516 let offset = br.tell() as usize;
517 validate!((br.peek_byte()? & 0x1F) == 8);
518 let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf);
522 let mut full_size = src.len() * 8;
523 for &byte in src.iter().rev() {
527 full_size -= (byte.trailing_zeros() + 1) as usize;
531 validate!(full_size > 0);
533 let pps = parse_pps(&src[1..], &self.spses, full_size - 8)?;
534 let mut found = false;
535 for stored_pps in self.ppses.iter_mut() {
536 if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id {
537 *stored_pps = pps.clone();
543 self.ppses.push(pps);
548 100 | 110 | 122 | 144 => {
549 let b = br.read_byte()?;
550 // some encoders put something different here
551 if (b & 0xFC) != 0xFC {
554 // b & 3 -> chroma format
555 let b = br.read_byte()?;
556 validate!((b & 0xF8) == 0xF8);
557 // b & 7 -> luma depth minus 8
558 let b = br.read_byte()?;
559 validate!((b & 0xF8) == 0xF8);
560 // b & 7 -> chroma depth minus 8
561 let num_spsext = br.read_byte()? as usize;
562 for _ in 0..num_spsext {
563 let len = br.read_u16be()? as usize;
572 return Err(DecoderError::NotImplemented);
575 validate!(profile > 0);
576 let width = (vinfo.get_width() + 15) & !15;
577 let height = (vinfo.get_height() + 15) & !15;
579 let display = Display::open_silently().expect("open display");
581 let num_surfaces = self.spses[0].num_ref_frames + 4 + 64;
583 let va_profile = match profile {
584 66 => VAProfile::VAProfileH264ConstrainedBaseline,
585 77 => VAProfile::VAProfileH264Main,
586 88 | 100 | 110 | 122 => VAProfile::VAProfileH264High,
587 _ => return Err(DecoderError::NotImplemented),
589 if let Ok(profiles) = display.query_config_profiles() {
590 if !profiles.contains(&va_profile) {
591 println!("Profile {} ({}) not supported", profile, profile_name(va_profile));
592 return Err(DecoderError::NotImplemented);
595 return Err(DecoderError::Bug);
597 if let Ok(points) = display.query_config_entrypoints(va_profile) {
598 if !points.contains(&VAEntrypoint::VAEntrypointVLD) {
599 println!("no decoding support for this profile");
600 return Err(DecoderError::NotImplemented);
603 return Err(DecoderError::Bug);
606 let needs_derive= if let Ok(vendor) = display.query_vendor_string() {
607 vendor.contains("Kaby Lake")
610 let config = display.create_config(vec![
611 VAConfigAttrib { type_: VAConfigAttribType::VAConfigAttribRTFormat, value: RTFormat::YUV420.into() },
612 ], va_profile, VAEntrypoint::VAEntrypointVLD).map_err(|_| {
613 println!("config creation failed!");
616 let surfaces = display.create_surfaces(RTFormat::YUV420, None, width as u32, height as u32, Some(UsageHint::Decoder.into()), num_surfaces as u32).map_err(|_| DecoderError::AllocError)?;
617 let context = display.create_context(&config, width as i32, height as i32, Some(&surfaces), true).map_err(|_| DecoderError::Bug)?;
619 let ref_pics = Vec::new();
621 let image_formats = display.query_image_formats().map_err(|_| DecoderError::Bug)?;
622 validate!(!image_formats.is_empty());
623 let mut ifmt = image_formats[0];
624 for fmt in image_formats.iter() {
625 if fmt.bits_per_pixel == 12 {
631 self.vaapi = Some(VaapiInternals { display, context, ref_pics, surfaces, ifmt });
632 self.needs_derive = needs_derive;
634 let vinfo = NAVideoInfo::new(vinfo.get_width(), vinfo.get_height(), false, YUV420_FORMAT);
635 self.info = NACodecInfo::new_ref(info.get_name(), NACodecTypeInfo::Video(vinfo), info.get_extradata()).into_ref();
636 self.out_frm = alloc_video_buffer(vinfo, 4)?;
640 Err(DecoderError::InvalidData)
643 fn decode(&mut self, pkt: &NAPacket) -> DecoderResult<()> {
644 let src = pkt.get_buffer();
645 let vactx = if let Some(ref mut ctx) = self.vaapi { ctx } else { return Err(DecoderError::Bug) };
647 let timestamp = pkt.get_dts().unwrap_or_else(|| pkt.get_pts().unwrap_or(0));
649 if vactx.surfaces.is_empty() {
650 panic!("ran out of free surfaces");
651 // return Err(DecoderError::AllocError);
653 let surface = vactx.surfaces.pop().unwrap();
654 let surface_id = surface.id();
655 let mut pic = Picture::new(timestamp, vactx.context.clone(), surface);
656 let mut is_ref = false;
657 let mut is_keyframe = false;
659 self.tb_num = pkt.ts.tb_num;
660 self.tb_den = pkt.ts.tb_den;
662 let mut mr = MemoryReader::new_read(&src);
663 let mut br = ByteReader::new(&mut mr);
664 let mut frame_type = FrameType::I;
665 let mut nal_buf = Vec::with_capacity(1024);
666 while br.left() > 0 {
667 let size = match self.nal_len {
668 1 => br.read_byte()? as usize,
669 2 => br.read_u16be()? as usize,
670 3 => br.read_u24be()? as usize,
671 4 => br.read_u32be()? as usize,
674 validate!(br.left() >= (size as i64));
675 let offset = br.tell() as usize;
676 let raw_nal = &src[offset..][..size];
677 let _size = unescape_nal(raw_nal, &mut nal_buf);
680 validate!((src[0] & 0x80) == 0);
681 let nal_ref_idc = src[0] >> 5;
682 let nal_unit_type = src[0] & 0x1F;
684 let mut full_size = src.len() * 8;
685 for &byte in src.iter().rev() {
689 full_size -= (byte.trailing_zeros() + 1) as usize;
693 validate!(full_size > 0);
695 match nal_unit_type {
697 let is_idr = nal_unit_type == 5;
698 is_ref |= nal_ref_idc != 0;
699 is_keyframe |= is_idr;
700 let mut br = BitReader::new(&src[..(full_size + 7)/8], BitReaderMode::BE);
703 let slice_hdr = parse_slice_header(&mut br, &self.spses, &self.ppses, is_idr, nal_ref_idc)?;
704 match slice_hdr.slice_type {
705 SliceType::P if frame_type != FrameType::B => frame_type = FrameType::P,
706 SliceType::SP if frame_type != FrameType::B => frame_type = FrameType::P,
707 SliceType::B => frame_type = FrameType::B,
712 let mut pps_found = false;
713 for (i, pps) in self.ppses.iter().enumerate() {
714 if pps.pic_parameter_set_id == slice_hdr.pic_parameter_set_id {
720 validate!(pps_found);
721 let mut sps_found = false;
722 for (i, sps) in self.spses.iter().enumerate() {
723 if sps.seq_parameter_set_id == self.ppses[cur_pps].seq_parameter_set_id {
729 validate!(sps_found);
730 let sps = &self.spses[cur_sps];
731 let pps = &self.ppses[cur_pps];
733 if slice_hdr.first_mb_in_slice == 0 {
734 let (top_id, bot_id) = self.frame_refs.calc_picture_num(&slice_hdr, is_idr, nal_ref_idc, sps);
736 self.frame_refs.clear_refs();
737 for (pic, _) in vactx.ref_pics.drain(..) {
738 if let Ok(surf) = pic.take_surface() {
739 vactx.surfaces.push(surf);
741 panic!("can't take surface");
745 self.frame_refs.select_refs(sps, &slice_hdr, top_id);
746 let mut pic_refs = Vec::with_capacity(NUM_REF_PICS);
747 for pic in self.frame_refs.ref_pics.iter().rev().take(NUM_REF_PICS) {
748 pic_refs.push(pic.make_pic());
750 if slice_hdr.adaptive_ref_pic_marking_mode {
751 self.frame_refs.apply_adaptive_marking(&slice_hdr.adaptive_ref_pic_marking, slice_hdr.frame_num, 1 << sps.log2_max_frame_num)?;
754 while pic_refs.len() < NUM_REF_PICS {
755 pic_refs.push(make_dummy_h264_pic());
758 let mut flags = H264PictureFlags::default();
759 let frame_idx = if let Some(id) = get_long_term_id(is_idr, &slice_hdr) {
760 flags |= H264PictureFlag::LongTermReference;
763 if nal_ref_idc != 0 {
764 flags |= H264PictureFlag::ShortTermReference;
766 u32::from(slice_hdr.frame_num)
768 let pic_refs: [PictureH264; NUM_REF_PICS] = pic_refs.try_into().unwrap_or_else(|_| panic!("can't convert"));
770 let h264pic = PictureH264::new(surface_id, frame_idx, flags, top_id as i32, bot_id as i32);
772 let seq_fields = H264SeqFields::new(
773 u32::from(sps.chroma_format_idc),
774 u32::from(sps.separate_colour_plane),
775 u32::from(sps.gaps_in_frame_num_value_allowed),
776 u32::from(sps.frame_mbs_only),
777 u32::from(sps.mb_adaptive_frame_field),
778 u32::from(sps.direct_8x8_inference),
779 u32::from(sps.level_idc >= 31),
780 u32::from(sps.log2_max_frame_num) - 4,
781 u32::from(sps.pic_order_cnt_type),
782 u32::from(sps.log2_max_pic_order_cnt_lsb).wrapping_sub(4),
783 u32::from(sps.delta_pic_order_always_zero)
785 let pic_fields = H264PicFields::new(
786 u32::from(pps.entropy_coding_mode),
787 u32::from(pps.weighted_pred),
788 u32::from(pps.weighted_bipred_idc),
789 u32::from(pps.transform_8x8_mode),
790 u32::from(slice_hdr.field_pic),
791 u32::from(pps.constrained_intra_pred),
792 u32::from(pps.pic_order_present),
793 u32::from(pps.deblocking_filter_control_present),
794 u32::from(pps.redundant_pic_cnt_present),
795 u32::from(nal_ref_idc != 0)
797 let ppd = PictureParameterBufferH264::new(
800 sps.pic_width_in_mbs as u16 - 1,
801 sps.pic_height_in_mbs as u16 - 1,
802 sps.bit_depth_luma - 8,
803 sps.bit_depth_chroma - 8,
804 sps.num_ref_frames as u8,
806 pps.num_slice_groups as u8 - 1, // should be 0
807 pps.slice_group_map_type, // should be 0
808 0, //pps.slice_group_change_rate as u16 - 1,
809 pps.pic_init_qp as i8 - 26,
810 pps.pic_init_qs as i8 - 26,
811 pps.chroma_qp_index_offset,
812 pps.second_chroma_qp_index_offset,
816 let pic_param = BufferType::PictureParameter(PictureParameter::H264(ppd));
817 let buf = vactx.context.create_buffer(pic_param).map_err(|_| DecoderError::Bug)?;
820 let mut scaling_list_8x8 = [[0; 64]; 2];
821 scaling_list_8x8[0].copy_from_slice(&pps.scaling_list_8x8[0]);
822 scaling_list_8x8[1].copy_from_slice(&pps.scaling_list_8x8[3]);
823 let iqmatrix = BufferType::IQMatrix(IQMatrix::H264(IQMatrixBufferH264::new(pps.scaling_list_4x4, scaling_list_8x8)));
824 let buf = vactx.context.create_buffer(iqmatrix).map_err(|_| DecoderError::Bug)?;
827 let cpic = PictureInfo {
828 id: slice_hdr.frame_num,
832 //pic_type: slice_hdr.slice_type.to_frame_type(),
835 long_term: get_long_term_id(is_idr, &slice_hdr),
838 self.frame_refs.add_short_term(cpic.clone(), sps.num_ref_frames);
840 if let Some(lt_idx) = cpic.long_term {
841 self.frame_refs.add_long_term(lt_idx, cpic);
845 let mut luma_weight_l0 = [0i16; 32];
846 let mut luma_offset_l0 = [0i16; 32];
847 let mut chroma_weight_l0 = [[0i16; 2]; 32];
848 let mut chroma_offset_l0 = [[0i16; 2]; 32];
849 let mut luma_weight_l1 = [0i16; 32];
850 let mut luma_offset_l1 = [0i16; 32];
851 let mut chroma_weight_l1 = [[0i16; 2]; 32];
852 let mut chroma_offset_l1 = [[0i16; 2]; 32];
853 let mut luma_weighted_l0 = false;
854 let mut chroma_weighted_l0 = false;
855 let mut luma_weighted_l1 = false;
856 let mut chroma_weighted_l1 = false;
857 let mut luma_log2_weight_denom = slice_hdr.luma_log2_weight_denom;
858 let mut chroma_log2_weight_denom = slice_hdr.chroma_log2_weight_denom;
860 if (pps.weighted_pred && matches!(slice_hdr.slice_type, SliceType::P | SliceType::B)) || (pps.weighted_bipred_idc == 1 && slice_hdr.slice_type == SliceType::B) {
861 luma_weighted_l0 = true;
862 chroma_weighted_l0 = false;
863 for (i, winfo) in slice_hdr.weights_l0.iter().enumerate().take(slice_hdr.num_ref_idx_l0_active) {
864 if winfo.luma_weighted {
865 luma_weight_l0[i] = winfo.luma_weight.into();
866 luma_offset_l0[i] = winfo.luma_offset.into();
868 luma_weight_l0[i] = 1 << slice_hdr.luma_log2_weight_denom;
870 if winfo.chroma_weighted {
871 chroma_weight_l0[i][0] = winfo.chroma_weight[0].into();
872 chroma_weight_l0[i][1] = winfo.chroma_weight[1].into();
873 chroma_offset_l0[i][0] = winfo.chroma_offset[0].into();
874 chroma_offset_l0[i][1] = winfo.chroma_offset[1].into();
876 chroma_weight_l0[i][0] = 1 << slice_hdr.chroma_log2_weight_denom;
877 chroma_weight_l0[i][1] = 1 << slice_hdr.chroma_log2_weight_denom;
878 chroma_offset_l0[i][0] = 0;
879 chroma_offset_l0[i][1] = 0;
881 chroma_weighted_l0 |= winfo.chroma_weighted;
884 if pps.weighted_bipred_idc == 1 && slice_hdr.slice_type == SliceType::B {
885 luma_weighted_l1 = true;
886 chroma_weighted_l1 = sps.chroma_format_idc != 0;
887 for (i, winfo) in slice_hdr.weights_l1.iter().enumerate().take(slice_hdr.num_ref_idx_l1_active) {
888 if winfo.luma_weighted {
889 luma_weight_l1[i] = winfo.luma_weight.into();
890 luma_offset_l1[i] = winfo.luma_offset.into();
892 luma_weight_l1[i] = 1 << slice_hdr.luma_log2_weight_denom;
894 if chroma_weighted_l1 && winfo.chroma_weighted {
895 chroma_weight_l1[i][0] = winfo.chroma_weight[0].into();
896 chroma_weight_l1[i][1] = winfo.chroma_weight[1].into();
897 chroma_offset_l1[i][0] = winfo.chroma_offset[0].into();
898 chroma_offset_l1[i][1] = winfo.chroma_offset[1].into();
900 chroma_weight_l1[i][0] = 1 << slice_hdr.chroma_log2_weight_denom;
901 chroma_weight_l1[i][1] = 1 << slice_hdr.chroma_log2_weight_denom;
902 chroma_offset_l1[i][0] = 0;
903 chroma_offset_l1[i][1] = 0;
907 if pps.weighted_bipred_idc == 2 && slice_hdr.slice_type == SliceType::B {
908 let num_l0 = slice_hdr.num_ref_idx_l0_active;
909 let num_l1 = slice_hdr.num_ref_idx_l1_active;
910 if num_l0 != 1 || num_l1 != 1 { //xxx: also exclude symmetric case
911 luma_weighted_l0 = false;
912 luma_weighted_l1 = false;
913 chroma_weighted_l0 = false;
914 chroma_weighted_l1 = false;
915 luma_log2_weight_denom = 5;
916 chroma_log2_weight_denom = 5;
918 for w in luma_weight_l0.iter_mut() {
921 for w in luma_weight_l1.iter_mut() {
924 for w in chroma_weight_l0.iter_mut() {
927 for w in chroma_weight_l1.iter_mut() {
933 let ref_pic_list_0 = map_ref_list(&self.frame_refs.cur_refs.ref_list0);
934 let ref_pic_list_1 = map_ref_list(&self.frame_refs.cur_refs.ref_list1);
936 let slice_param = SliceParameterBufferH264::new(
937 raw_nal.len() as u32,
939 VASliceDataFlag::All,
941 slice_hdr.first_mb_in_slice as u16,
942 match slice_hdr.slice_type {
949 slice_hdr.direct_spatial_mv_pred as u8,
950 (slice_hdr.num_ref_idx_l0_active as u8).saturating_sub(1),
951 (slice_hdr.num_ref_idx_l1_active as u8).saturating_sub(1),
952 slice_hdr.cabac_init_idc,
953 slice_hdr.slice_qp_delta as i8,
954 slice_hdr.disable_deblocking_filter_idc,
955 slice_hdr.slice_alpha_c0_offset / 2,
956 slice_hdr.slice_beta_offset / 2,
959 luma_log2_weight_denom,
960 chroma_log2_weight_denom,
961 luma_weighted_l0 as u8, luma_weight_l0, luma_offset_l0,
962 chroma_weighted_l0 as u8, chroma_weight_l0, chroma_offset_l0,
963 luma_weighted_l1 as u8, luma_weight_l1, luma_offset_l1,
964 chroma_weighted_l1 as u8, chroma_weight_l1, chroma_offset_l1,
966 let slc_param = BufferType::SliceParameter(SliceParameter::H264(slice_param));
967 let buf = vactx.context.create_buffer(slc_param).map_err(|_| DecoderError::Bug)?;
970 let slc_data = BufferType::SliceData(raw_nal.to_vec());
971 let buf = vactx.context.create_buffer(slc_data).map_err(|_| DecoderError::Bug)?;
974 2 => { // slice data partition A
976 //slice id = read_ue()
977 //cat 2 slice data (all but MB layer residual)
978 return Err(DecoderError::NotImplemented);
980 3 => { // slice data partition B
981 //slice id = read_ue()
982 //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() }
983 //cat 3 slice data (MB layer residual)
984 return Err(DecoderError::NotImplemented);
986 4 => { // slice data partition C
987 //slice id = read_ue()
988 //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() }
989 //cat 4 slice data (MB layer residual)
990 return Err(DecoderError::NotImplemented);
994 let sps = parse_sps(&src[1..])?;
995 self.spses.push(sps);
998 validate!(full_size >= 8 + 16);
999 let pps = parse_pps(&src[1..], &self.spses, full_size - 8)?;
1000 let mut found = false;
1001 for stored_pps in self.ppses.iter_mut() {
1002 if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id {
1003 *stored_pps = pps.clone();
1009 self.ppses.push(pps);
1012 9 => { // access unit delimiter
1014 10 => {}, //end of sequence
1015 11 => {}, //end of stream
1020 br.read_skip(size)?;
1023 let bpic = pic.begin().expect("begin");
1024 let rpic = bpic.render().expect("render");
1025 let epic = rpic.end().expect("end");
1027 self.reorderer.add_frame(WaitingFrame {
1029 is_idr: is_keyframe,
1036 while idx < vactx.ref_pics.len() {
1037 let cur_surf_id = vactx.ref_pics[idx].1;
1038 if self.frame_refs.ref_pics.iter().any(|fref| fref.surface_id == cur_surf_id) {
1041 let (pic, _) = vactx.ref_pics.remove(idx);
1042 if let Ok(surf) = pic.take_surface() {
1043 vactx.surfaces.push(surf);
1045 panic!("can't take surface");
1052 fn get_frame(&mut self) -> Option<NAFrameRef> {
1053 if let Some(ref mut vactx) = self.vaapi {
1054 if let Some(frm) = self.reorderer.get_frame() {
1056 let is_idr = frm.is_idr;
1057 let is_ref = frm.is_ref;
1058 let ftype = frm.ftype;
1059 if let Ok(pic) = frm.pic.sync() {
1060 let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm, self.needs_derive);
1063 if let Ok(surf) = pic.take_surface() {
1064 vactx.surfaces.push(surf);
1066 panic!("can't take surface");
1069 let id = pic.surface_id();
1070 vactx.ref_pics.push((pic, id));
1073 let ts = NATimeInfo::new(None, Some(ts), None, self.tb_num, self.tb_den);
1074 Some(NAFrame::new(ts, ftype, is_idr, self.info.clone(), self.out_frm.clone()).into_ref())
1076 panic!("can't sync");
1085 fn get_last_frames(&mut self) -> Option<NAFrameRef> {
1086 if let Some(ref mut vactx) = self.vaapi {
1087 if let Some(frm) = self.reorderer.frames.pop_front() {
1089 let is_idr = frm.is_idr;
1090 let is_ref = frm.is_ref;
1091 let ftype = frm.ftype;
1092 if let Ok(pic) = frm.pic.sync() {
1093 let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm, self.needs_derive);
1096 if let Ok(surf) = pic.take_surface() {
1097 vactx.surfaces.push(surf);
1099 panic!("can't take surface");
1102 let id = pic.surface_id();
1103 vactx.ref_pics.push((pic, id));
1106 let ts = NATimeInfo::new(None, Some(ts), None, self.tb_num, self.tb_den);
1107 Some(NAFrame::new(ts, ftype, is_idr, self.info.clone(), self.out_frm.clone()).into_ref())
1109 panic!("can't sync");
1118 fn flush(&mut self) {
1119 self.frame_refs.clear_refs();
1120 if let Some(ref mut vactx) = self.vaapi {
1121 for frm in self.reorderer.frames.drain(..) {
1122 if let Ok(pic) = frm.pic.sync() {
1123 if let Ok(surf) = pic.take_surface() {
1124 vactx.surfaces.push(surf);
1126 panic!("can't take surface");
1129 panic!("can't sync");
1132 self.reorderer.flush();
1133 for (pic, _) in vactx.ref_pics.drain(..) {
1134 if let Ok(surf) = pic.take_surface() {
1135 vactx.surfaces.push(surf);
1137 panic!("can't take surface");
1144 impl NAOptionHandler for VaapiH264Decoder {
1145 fn get_supported_options(&self) -> &[NAOptionDefinition] { &[] }
1146 fn set_options(&mut self, _options: &[NAOption]) {}
1147 fn query_option_value(&self, _name: &str) -> Option<NAValue> { None }
1151 use std::sync::mpsc::*;
1154 Init(NACodecInfoRef),
1169 pub trait HWDecoder {
1170 fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()>;
1171 fn queue_pkt(&mut self, pkt: &NAPacket) -> DecoderResult<()>;
1172 fn get_frame(&mut self) -> Option<NAFrameRef>;
1173 fn get_last_frames(&mut self) -> Option<NAFrameRef>;
1174 fn flush(&mut self);
1177 pub struct HWWrapper {
1178 handle: Option<JoinHandle<DecoderResult<()>>>,
1179 send: SyncSender<DecMessage>,
1180 recv: Receiver<DecResponse>,
1183 #[allow(clippy::new_without_default)]
1185 pub fn new() -> Self {
1186 let (in_send, in_recv) = sync_channel(1);
1187 let (out_send, out_recv) = sync_channel(1);
1188 let handle = std::thread::spawn(move || {
1189 let receiver = in_recv;
1190 let sender = out_send;
1191 let mut dec = VaapiH264Decoder::new();
1192 while let Ok(msg) = receiver.recv() {
1194 DecMessage::Init(info) => {
1195 let msg = if let Err(err) = dec.init(info) {
1196 DecResponse::Err(err)
1200 sender.send(msg).map_err(|_| DecoderError::Bug)?;
1202 DecMessage::Decode(pkt) => {
1203 let msg = match dec.decode(&pkt) {
1204 Ok(()) => DecResponse::Ok,
1205 Err(err) => DecResponse::Err(err),
1207 sender.send(msg).map_err(|_| DecoderError::Bug)?;
1209 DecMessage::GetFrame => {
1210 let msg = match dec.get_frame() {
1211 Some(frm) => DecResponse::Frame(frm),
1212 None => DecResponse::Nothing,
1214 sender.send(msg).map_err(|_| DecoderError::Bug)?;
1216 DecMessage::GetLastFrames => {
1217 let msg = match dec.get_last_frames() {
1218 Some(frm) => DecResponse::Frame(frm),
1219 None => DecResponse::Nothing,
1221 sender.send(msg).map_err(|_| DecoderError::Bug)?;
1223 DecMessage::Flush => dec.flush(),
1224 DecMessage::End => return Ok(()),
1227 Err(DecoderError::Bug)
1231 handle: Some(handle),
1238 impl HWDecoder for HWWrapper {
1239 fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()> {
1240 if self.send.send(DecMessage::Init(info)).is_ok() {
1241 match self.recv.recv() {
1242 Ok(DecResponse::Ok) => Ok(()),
1243 Ok(DecResponse::Err(err)) => Err(err),
1244 Err(_) => Err(DecoderError::Bug),
1245 _ => unreachable!(),
1248 Err(DecoderError::Bug)
1251 fn queue_pkt(&mut self, pkt: &NAPacket) -> DecoderResult<()> {
1252 let pkt2 = NAPacket::new_from_refbuf(pkt.get_stream(), pkt.ts, pkt.keyframe, pkt.get_buffer());
1253 if self.send.send(DecMessage::Decode(pkt2)).is_ok() {
1254 match self.recv.recv() {
1255 Ok(DecResponse::Ok) => Ok(()),
1256 Ok(DecResponse::Err(err)) => Err(err),
1257 Err(_) => Err(DecoderError::Bug),
1258 _ => unreachable!(),
1261 Err(DecoderError::Bug)
1264 fn get_frame(&mut self) -> Option<NAFrameRef> {
1265 if self.send.send(DecMessage::GetFrame).is_ok() {
1266 match self.recv.recv() {
1267 Ok(DecResponse::Frame(frm)) => Some(frm),
1268 Ok(DecResponse::Nothing) => None,
1270 _ => unreachable!(),
1276 fn get_last_frames(&mut self) -> Option<NAFrameRef> {
1277 if self.send.send(DecMessage::GetLastFrames).is_ok() {
1278 match self.recv.recv() {
1279 Ok(DecResponse::Frame(frm)) => Some(frm),
1280 Ok(DecResponse::Nothing) => None,
1282 _ => unreachable!(),
1288 fn flush(&mut self) {
1289 let _ = self.send.send(DecMessage::Flush);
1293 impl Drop for HWWrapper {
1294 fn drop(&mut self) {
1295 if self.send.send(DecMessage::End).is_ok() {
1296 let mut handle = None;
1297 std::mem::swap(&mut handle, &mut self.handle);
1298 if let Some(hdl) = handle {
1305 impl NAOptionHandler for HWWrapper {
1306 fn get_supported_options(&self) -> &[NAOptionDefinition] { &[] }
1307 fn set_options(&mut self, _options: &[NAOption]) {}
1308 fn query_option_value(&self, _name: &str) -> Option<NAValue> { None }
1311 pub fn new_h264_hwdec() -> Box<dyn HWDecoder + Send> {
1312 Box::new(HWWrapper::new())
1317 use nihav_core::codecs::*;
1318 use nihav_core::io::byteio::*;
1319 use nihav_core::demuxers::{RegisteredDemuxers, create_demuxer};
1320 use nihav_commonfmt::generic_register_all_demuxers;
1321 use super::VaapiH264Decoder;
1322 use std::io::prelude::*;
1324 fn decode_h264(name: &str, dname: &str, dmx_reg: &RegisteredDemuxers, opfx: &str) -> DecoderResult<()> {
1325 let dmx_f = dmx_reg.find_demuxer(dname).expect("demuxer exists");
1326 let file = std::fs::File::open(name).expect("file exists");
1327 let mut fr = FileReader::new_read(file);
1328 let mut br = ByteReader::new(&mut fr);
1329 let mut dmx = create_demuxer(dmx_f, &mut br).expect("create demuxer");
1331 let mut vstream_id = 0;
1332 let mut dec = VaapiH264Decoder::new();
1333 for stream in dmx.get_streams() {
1334 if stream.get_media_type() == StreamType::Video {
1335 dec.init(stream.get_info()).expect("inited");
1336 vstream_id = stream.get_id();
1341 let mut frameno = 0;
1342 while let Ok(pkt) = dmx.get_frame() {
1343 if pkt.get_stream().get_id() != vstream_id {
1346 dec.decode(&pkt).expect("decoded");
1347 let frm = dec.get_last_frames().expect("get frame");
1348 let timestamp = frm.get_dts().unwrap_or_else(|| frm.get_pts().unwrap_or(0));
1350 let pic = frm.get_buffer().get_vbuf().expect("got picture");
1352 let nname = format!("assets/test_out/{}{:06}_{}.pgm", opfx, timestamp, frameno);
1354 let mut file = std::fs::File::create(&nname).expect("create file");
1355 let (w, h) = pic.get_dimensions(0);
1356 file.write_all(format!("P5\n{} {}\n255\n", w, h * 3 / 2).as_bytes()).expect("header written");
1357 let data = pic.get_data();
1358 for yline in data.chunks(pic.get_stride(0)).take(h) {
1359 file.write_all(&yline[..w]).expect("Y line written");
1361 for (uline, vline) in data[pic.get_offset(1)..].chunks(pic.get_stride(1))
1362 .zip(data[pic.get_offset(2)..].chunks(pic.get_stride(2))).take(h / 2) {
1363 file.write_all(&uline[..w / 2]).expect("U line written");
1364 file.write_all(&vline[..w / 2]).expect("V line written");
1371 // samples if not specified otherwise come from H.264 conformance suite
1374 fn test_h264_simple() {
1375 let mut dmx_reg = RegisteredDemuxers::new();
1376 generic_register_all_demuxers(&mut dmx_reg);
1378 decode_h264("assets/ITU/DimpledSpanishCuckoo-mobile.mp4", "mov", &dmx_reg, "hw").unwrap();