From: Kostya Shishkov Date: Fri, 20 Oct 2023 16:37:37 +0000 (+0200) Subject: add a crate for VAAPI-based H.264 decoding X-Git-Url: https://git.nihav.org/?p=nihav-player.git;a=commitdiff_plain;h=e5ccd68db9e8cf512c1506e8769ca2e0a07d0b0e add a crate for VAAPI-based H.264 decoding --- diff --git a/hwdec-vaapi/Cargo.toml b/hwdec-vaapi/Cargo.toml new file mode 100644 index 0000000..df79b33 --- /dev/null +++ b/hwdec-vaapi/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "hwdec_vaapi" +version = "0.1.0" +authors = ["Kostya Shishkov "] +edition = "2018" +rust-version = "1.69" + +[dependencies.libva] +package = "nihed-cros-libva" +version = "0.0.4" +path = "../nihed-cros-libva" + +[dependencies.nihav_core] +path = "../../nihav-core" + +[dev-dependencies] +nihav_commonfmt = { path = "../../nihav-commonfmt", default-features=false, features = ["all_demuxers"] } + +[features] +default = [] diff --git a/hwdec-vaapi/src/lib.rs b/hwdec-vaapi/src/lib.rs new file mode 100644 index 0000000..ca843c2 --- /dev/null +++ b/hwdec-vaapi/src/lib.rs @@ -0,0 +1,1233 @@ +use std::collections::VecDeque; +use std::convert::TryInto; +use std::rc::Rc; + +use nihav_core::codecs::*; +use nihav_core::io::byteio::*; +use nihav_core::io::bitreader::*; +use nihav_core::io::intcode::*; + +use libva::*; + +#[cfg(debug_assertions)] +macro_rules! validate { + ($a:expr) => { if !$a { println!("check failed at {}:{}", file!(), line!()); return Err(DecoderError::InvalidData); } }; +} +#[cfg(not(debug_assertions))] +macro_rules! validate { + ($a:expr) => { if !$a { return Err(DecoderError::InvalidData); } }; +} + +mod pic_ref; +pub use pic_ref::*; +#[allow(clippy::manual_range_contains)] +#[allow(clippy::needless_range_loop)] +mod sets; +use sets::*; +#[allow(clippy::manual_range_contains)] +mod slice; +use slice::*; + +trait ReadUE { + fn read_ue(&mut self) -> DecoderResult; + fn read_te(&mut self, range: u32) -> DecoderResult; + fn read_ue_lim(&mut self, max_val: u32) -> DecoderResult { + let val = self.read_ue()?; + validate!(val <= max_val); + Ok(val) + } + fn read_se(&mut self) -> DecoderResult { + let val = self.read_ue()?; + if (val & 1) != 0 { + Ok (((val >> 1) as i32) + 1) + } else { + Ok (-((val >> 1) as i32)) + } + } +} + +impl<'a> ReadUE for BitReader<'a> { + fn read_ue(&mut self) -> DecoderResult { + Ok(self.read_code(UintCodeType::GammaP)? - 1) + } + fn read_te(&mut self, range: u32) -> DecoderResult { + if range == 1 { + if self.read_bool()? { + Ok(0) + } else { + Ok(1) + } + } else { + let val = self.read_ue()?; + validate!(val <= range); + Ok(val) + } + } +} + +fn get_long_term_id(is_idr: bool, slice_hdr: &SliceHeader) -> Option { + if is_idr && !slice_hdr.long_term_reference { + None + } else { + let marking = &slice_hdr.adaptive_ref_pic_marking; + for (&op, &arg) in marking.memory_management_control_op.iter().zip(marking.operation_arg.iter()).take(marking.num_ops) { + if op == 6 { + return Some(arg as usize); + } + } + None + } +} + +fn unescape_nal(src: &[u8], dst: &mut Vec) -> usize { + let mut off = 0; + let mut zrun = 0; + dst.clear(); + dst.reserve(src.len()); + while off < src.len() { + dst.push(src[off]); + if src[off] != 0 { + zrun = 0; + } else { + zrun += 1; + if zrun == 2 && off + 1 < src.len() && src[off + 1] == 0x03 { + zrun = 0; + off += 1; + } + if zrun >= 3 && off + 1 < src.len() && src[off + 1] == 0x01 { + off -= 3; + dst.truncate(off); + break; + } + } + off += 1; + } + off +} + +fn make_dummy_h264_pic() -> PictureH264 { + PictureH264::new(VA_INVALID_ID, 0, H264PictureFlag::Invalid.into(), 0, 0) +} + +trait MakePicH264 { + fn make_pic(&self) -> PictureH264; +} + +impl MakePicH264 for PictureInfo { + fn make_pic(&self) -> PictureH264 { + let mut flags = H264PictureFlags::default(); + let frame_idx = if let Some(id) = self.long_term { + flags |= H264PictureFlag::LongTermReference; + id as u32 + } else { + if self.is_ref { + flags |= H264PictureFlag::ShortTermReference; + } + u32::from(self.id) + }; + PictureH264::new(self.surface_id, frame_idx, flags, self.top_id as i32, self.bot_id as i32) + } +} + +fn map_ref_list(refs: &[Option]) -> [PictureH264; 32] { + let mut ref_list = Vec::with_capacity(32); + + for rpic in refs.iter() { + ref_list.push(rpic.as_ref().map_or_else(make_dummy_h264_pic, |pic| pic.make_pic())); + } + + while ref_list.len() < 32 { + ref_list.push(make_dummy_h264_pic()); + } + if let Ok(ret) = ref_list.try_into() { + ret + } else { + panic!("can't convert"); + } +} + +fn profile_name(profile: VAProfile::Type) -> &'static str { + match profile { + VAProfile::VAProfileMPEG2Simple => "MPEG2 Simple", + VAProfile::VAProfileMPEG2Main => "MPEG2 Main", + VAProfile::VAProfileMPEG4Simple => "MPEG4 Simple", + VAProfile::VAProfileMPEG4AdvancedSimple => "MPEG4 Advanced Simple", + VAProfile::VAProfileMPEG4Main => "MPEG4 Main", + VAProfile::VAProfileH264Baseline => "H264 Baseline", + VAProfile::VAProfileH264Main => "H264 Main", + VAProfile::VAProfileH264High => "H264 High", + VAProfile::VAProfileVC1Simple => "VC1 Simple", + VAProfile::VAProfileVC1Main => "VC1 Main", + VAProfile::VAProfileVC1Advanced => "VC1 Advanced", + VAProfile::VAProfileH263Baseline => "H263 Baseline", + VAProfile::VAProfileJPEGBaseline => "JPEG Baseline", + VAProfile::VAProfileH264ConstrainedBaseline => "H264 Constrained Baseline", + VAProfile::VAProfileVP8Version0_3 => "VP8", + VAProfile::VAProfileH264MultiviewHigh => "H.264 Multiview High", + VAProfile::VAProfileH264StereoHigh => "H264 Stereo High", + VAProfile::VAProfileHEVCMain => "H.EVC Main", + VAProfile::VAProfileHEVCMain10 => "H.EVC Main10", + VAProfile::VAProfileVP9Profile0 => "VP9 Profile 0", + VAProfile::VAProfileVP9Profile1 => "VP9 Profile 1", + VAProfile::VAProfileVP9Profile2 => "VP9 Profile 2", + VAProfile::VAProfileVP9Profile3 => "VP9 Profile 3", + VAProfile::VAProfileHEVCMain12 => "HEVC Main12", + VAProfile::VAProfileHEVCMain422_10 => "HEVC Main10 4:2:2", + VAProfile::VAProfileHEVCMain422_12 => "HEVC Main12 4:2:2", + VAProfile::VAProfileHEVCMain444 => "HEVC Main 4:4:4", + VAProfile::VAProfileHEVCMain444_10 => "HEVC Main10 4:4:4", + VAProfile::VAProfileHEVCMain444_12 => "HEVC Main12 4:4:4", + VAProfile::VAProfileHEVCSccMain => "HEVC SCC Main", + VAProfile::VAProfileHEVCSccMain10 => "HEVC SCC Main10", + VAProfile::VAProfileHEVCSccMain444 => "HEVC SCC Main 4:4:4", + VAProfile::VAProfileAV1Profile0 => "AV1 Profile 0", + VAProfile::VAProfileAV1Profile1 => "AV1 Profile 1", + VAProfile::VAProfileHEVCSccMain444_10 => "HEVC SCC Main10 4:4:4", + _ => "unknown", + } +} + +const NUM_REF_PICS: usize = 16; + +struct WaitingFrame { + ts: u64, + pic: Picture, + is_idr: bool, + is_ref: bool, + ftype: FrameType, +} + +struct Reorderer { + last_ref_dts: Option, + ready_idx: usize, + frames: VecDeque, +} + +impl Default for Reorderer { + fn default() -> Self { + Self { + last_ref_dts: None, + ready_idx: 0, + frames: VecDeque::with_capacity(16), + } + } +} + +impl Reorderer { + fn add_frame(&mut self, new_frame: WaitingFrame) { + if !new_frame.is_ref { + if self.frames.is_empty() { + self.frames.push_back(new_frame); + } else { + let new_dts = new_frame.ts; + let mut idx = 0; + for (i, frm) in self.frames.iter().enumerate() { + idx = i; + if frm.ts > new_dts { + break; + } + } + self.frames.insert(idx, new_frame); + } + } else { + for (i, frm) in self.frames.iter().enumerate() { + if Some(frm.ts) == self.last_ref_dts { + self.ready_idx = i + 1; + } + } + self.last_ref_dts = Some(new_frame.ts); + self.frames.push_back(new_frame); + } + } + fn get_frame(&mut self) -> Option { + if self.ready_idx > 0 { + match self.frames[0].pic.query_status() { + _ if self.ready_idx > 16 => {}, + Ok(VASurfaceStatus::Ready) => {}, + Ok(VASurfaceStatus::Rendering) => return None, + _ => { + unimplemented!(); + }, + }; + self.ready_idx -= 1; + self.frames.pop_front() + } else { + None + } + } + fn flush(&mut self) { + self.last_ref_dts = None; + self.ready_idx = 0; + } +} + +#[allow(dead_code)] +struct VaapiInternals { + display: Rc, + context: Rc, + ref_pics: Vec<(Picture, VASurfaceID)>, + surfaces: Vec, + ifmt: VAImageFormat, +} + +pub struct VaapiH264Decoder { + info: NACodecInfoRef, + vaapi: Option, + spses: Vec, + ppses: Vec, + frame_refs: FrameRefs, + nal_len: u8, + out_frm: NABufferType, + reorderer: Reorderer, + tb_num: u32, + tb_den: u32, +} + +fn fill_frame(ifmt: VAImageFormat, pic: &Picture, frm: &mut NABufferType) -> DecoderResult<()> { + let mut vbuf = frm.get_vbuf().unwrap(); + let (w, h) = pic.surface_size(); + //let cur_ts = pic.timestamp(); + + let img = Image::new(pic, ifmt, w, h, true).expect("get image"); + + let iimg = img.image(); + let imgdata: &[u8] = img.as_ref(); + + match iimg.format.fourcc().map_err(|_| DecoderError::InvalidData)? { + VAFourcc::NV12 => { + let frm = NASimpleVideoFrame::from_video_buf(&mut vbuf).unwrap(); + validate!(iimg.width == (frm.width[0] as u16)); + validate!(iimg.height == (frm.height[0] as u16)); + + for (dline, sline) in frm.data[frm.offset[0]..].chunks_mut(frm.stride[0]) + .zip(imgdata[iimg.offsets[0] as usize..].chunks(iimg.pitches[0] as usize)) + .take(frm.height[0]) { + dline[..frm.width[0]].copy_from_slice(&sline[..frm.width[0]]); + } + + let mut uoff = frm.offset[1]; + let mut voff = frm.offset[2]; + for cline in imgdata[iimg.offsets[1] as usize..].chunks(iimg.pitches[1] as usize).take(frm.height[1]) { + for (x, pair) in cline.chunks_exact(2).take(frm.width[1]).enumerate() { + frm.data[uoff + x] = pair[0]; + frm.data[voff + x] = pair[1]; + } + uoff += frm.stride[1]; + voff += frm.stride[2]; + } + }, + _ => unimplemented!(), + }; + Ok(()) +} + +impl Default for VaapiH264Decoder { + fn default() -> Self { + Self { + info: NACodecInfoRef::default(), + vaapi: None, + spses: Vec::with_capacity(1), + ppses: Vec::with_capacity(4), + frame_refs: FrameRefs::new(), + nal_len: 0, + out_frm: NABufferType::None, + reorderer: Reorderer::default(), + tb_num: 0, + tb_den: 0, + } + } +} + +impl VaapiH264Decoder { + pub fn new() -> Self { Self::default() } + pub fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()> { + if let NACodecTypeInfo::Video(vinfo) = info.get_properties() { + let edata = info.get_extradata().unwrap(); +//print!("edata:"); for &el in edata.iter() { print!(" {:02X}", el); } println!(); + let profile; + let mut nal_buf = Vec::with_capacity(1024); + if edata.len() > 11 && &edata[0..4] == b"avcC" { + let mut mr = MemoryReader::new_read(edata.as_slice()); + let mut br = ByteReader::new(&mut mr); + + br.read_skip(4)?; + let version = br.read_byte()?; + validate!(version == 1); + profile = br.read_byte()?; + let _compatibility = br.read_byte()?; + let _level = br.read_byte()?; + let b = br.read_byte()?; + validate!((b & 0xFC) == 0xFC); + self.nal_len = (b & 3) + 1; + let b = br.read_byte()?; + validate!((b & 0xE0) == 0xE0); + let num_sps = (b & 0x1F) as usize; + for _ in 0..num_sps { + let len = br.read_u16be()? as usize; + let offset = br.tell() as usize; + validate!((br.peek_byte()? & 0x1F) == 7); + let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf); + br.read_skip(len)?; + let sps = parse_sps(&nal_buf[1..])?; + self.spses.push(sps); + } + let num_pps = br.read_byte()? as usize; + for _ in 0..num_pps { + let len = br.read_u16be()? as usize; + let offset = br.tell() as usize; + validate!((br.peek_byte()? & 0x1F) == 8); + let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf); + br.read_skip(len)?; + let src = &nal_buf; + + let mut full_size = src.len() * 8; + for &byte in src.iter().rev() { + if byte == 0 { + full_size -= 8; + } else { + full_size -= (byte.trailing_zeros() + 1) as usize; + break; + } + } + validate!(full_size > 0); + + let pps = parse_pps(&src[1..], &self.spses, full_size - 8)?; + let mut found = false; + for stored_pps in self.ppses.iter_mut() { + if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id { + *stored_pps = pps.clone(); + found = true; + break; + } + } + if !found { + self.ppses.push(pps); + } + } + if br.left() > 0 { + match profile { + 100 | 110 | 122 | 144 => { + let b = br.read_byte()?; + validate!((b & 0xFC) == 0xFC); + // b & 3 -> chroma format + let b = br.read_byte()?; + validate!((b & 0xF8) == 0xF8); + // b & 7 -> luma depth minus 8 + let b = br.read_byte()?; + validate!((b & 0xF8) == 0xF8); + // b & 7 -> chroma depth minus 8 + let num_spsext = br.read_byte()? as usize; + for _ in 0..num_spsext { + let len = br.read_u16be()? as usize; + // parse spsext + br.read_skip(len)?; + } + }, + _ => {}, + }; + } + } else { + return Err(DecoderError::NotImplemented); + } + + validate!(profile > 0); + let width = (vinfo.get_width() + 15) & !15; + let height = (vinfo.get_height() + 15) & !15; + + let display = Display::open_silently().expect("open display"); + + let num_surfaces = self.spses[0].num_ref_frames + 4 + 64; + + let va_profile = match profile { + 66 => VAProfile::VAProfileH264ConstrainedBaseline, + 77 => VAProfile::VAProfileH264Main, + 88 | 100 | 110 | 122 => VAProfile::VAProfileH264High, + _ => return Err(DecoderError::NotImplemented), + }; + if let Ok(profiles) = display.query_config_profiles() { + if !profiles.contains(&va_profile) { +println!("Profile {} ({}) not supported", profile, profile_name(va_profile)); + return Err(DecoderError::NotImplemented); + } + } else { + return Err(DecoderError::Bug); + } + if let Ok(points) = display.query_config_entrypoints(va_profile) { + if !points.contains(&VAEntrypoint::VAEntrypointVLD) { +println!("no decoding support for this profile"); + return Err(DecoderError::NotImplemented); + } + } else { + return Err(DecoderError::Bug); + } + + let config = display.create_config(vec![ + VAConfigAttrib { type_: VAConfigAttribType::VAConfigAttribRTFormat, value: RTFormat::YUV420.into() }, + ], va_profile, VAEntrypoint::VAEntrypointVLD).map_err(|_| { +println!("config creation failed!"); + DecoderError::Bug + })?; + let surfaces = display.create_surfaces(RTFormat::YUV420, None, width as u32, height as u32, Some(UsageHint::Decoder.into()), num_surfaces as u32).map_err(|_| DecoderError::AllocError)?; + let context = display.create_context(&config, width as i32, height as i32, Some(&surfaces), true).map_err(|_| DecoderError::Bug)?; + + let ref_pics = Vec::new(); + + let image_formats = display.query_image_formats().map_err(|_| DecoderError::Bug)?; + validate!(!image_formats.is_empty()); + let mut ifmt = image_formats[0]; + for fmt in image_formats.iter() { + if fmt.bits_per_pixel == 12 { + ifmt = *fmt; + break; + } + } + + self.vaapi = Some(VaapiInternals { display, context, ref_pics, surfaces, ifmt }); + + let vinfo = NAVideoInfo::new(width, height, false, YUV420_FORMAT); + self.info = NACodecInfo::new_ref(info.get_name(), NACodecTypeInfo::Video(vinfo), info.get_extradata()).into_ref(); + self.out_frm = alloc_video_buffer(vinfo, 4)?; + + Ok(()) + } else { + Err(DecoderError::InvalidData) + } + } + fn decode(&mut self, pkt: &NAPacket) -> DecoderResult<()> { + let src = pkt.get_buffer(); + let vactx = if let Some(ref mut ctx) = self.vaapi { ctx } else { return Err(DecoderError::Bug) }; + + let timestamp = pkt.get_dts().unwrap_or_else(|| pkt.get_pts().unwrap_or(0)); + + if vactx.surfaces.is_empty() { +panic!("ran out of free surfaces"); +// return Err(DecoderError::AllocError); + } + let surface = vactx.surfaces.pop().unwrap(); + let surface_id = surface.id(); + let mut pic = Picture::new(timestamp, vactx.context.clone(), surface); + let mut is_ref = false; + let mut is_keyframe = false; + + self.tb_num = pkt.ts.tb_num; + self.tb_den = pkt.ts.tb_den; + + let mut mr = MemoryReader::new_read(&src); + let mut br = ByteReader::new(&mut mr); + let mut frame_type = FrameType::I; + let mut nal_buf = Vec::with_capacity(1024); + while br.left() > 0 { + let size = match self.nal_len { + 1 => br.read_byte()? as usize, + 2 => br.read_u16be()? as usize, + 3 => br.read_u24be()? as usize, + 4 => br.read_u32be()? as usize, + _ => unreachable!(), + }; + validate!(br.left() >= (size as i64)); + let offset = br.tell() as usize; + let raw_nal = &src[offset..][..size]; + let _size = unescape_nal(raw_nal, &mut nal_buf); + + let src = &nal_buf; + validate!((src[0] & 0x80) == 0); + let nal_ref_idc = src[0] >> 5; + let nal_unit_type = src[0] & 0x1F; + + let mut full_size = src.len() * 8; + for &byte in src.iter().rev() { + if byte == 0 { + full_size -= 8; + } else { + full_size -= (byte.trailing_zeros() + 1) as usize; + break; + } + } + validate!(full_size > 0); + + match nal_unit_type { + 1 | 5 => { + let is_idr = nal_unit_type == 5; + is_ref |= nal_ref_idc != 0; + is_keyframe |= is_idr; + let mut br = BitReader::new(&src[..(full_size + 7)/8], BitReaderMode::BE); + br.skip(8)?; + + let slice_hdr = parse_slice_header(&mut br, &self.spses, &self.ppses, is_idr, nal_ref_idc)?; + match slice_hdr.slice_type { + SliceType::P if frame_type != FrameType::B => frame_type = FrameType::P, + SliceType::SP if frame_type != FrameType::B => frame_type = FrameType::P, + SliceType::B => frame_type = FrameType::B, + _ => {}, + }; + let mut cur_sps = 0; + let mut cur_pps = 0; + let mut pps_found = false; + for (i, pps) in self.ppses.iter().enumerate() { + if pps.pic_parameter_set_id == slice_hdr.pic_parameter_set_id { + cur_pps = i; + pps_found = true; + break; + } + } + validate!(pps_found); + let mut sps_found = false; + for (i, sps) in self.spses.iter().enumerate() { + if sps.seq_parameter_set_id == self.ppses[cur_pps].seq_parameter_set_id { + cur_sps = i; + sps_found = true; + break; + } + } + validate!(sps_found); + let sps = &self.spses[cur_sps]; + let pps = &self.ppses[cur_pps]; + + if slice_hdr.first_mb_in_slice == 0 { + let (top_id, bot_id) = self.frame_refs.calc_picture_num(&slice_hdr, is_idr, nal_ref_idc, sps); + if is_idr { + self.frame_refs.clear_refs(); + for (pic, _) in vactx.ref_pics.drain(..) { + if let Ok(surf) = pic.take_surface() { + vactx.surfaces.push(surf); + } else { + panic!("can't take surface"); + } + } + } + self.frame_refs.select_refs(sps, &slice_hdr, top_id); + let mut pic_refs = Vec::with_capacity(NUM_REF_PICS); + for pic in self.frame_refs.ref_pics.iter().rev().take(NUM_REF_PICS) { + pic_refs.push(pic.make_pic()); + } + if slice_hdr.adaptive_ref_pic_marking_mode { + self.frame_refs.apply_adaptive_marking(&slice_hdr.adaptive_ref_pic_marking, slice_hdr.frame_num, 1 << sps.log2_max_frame_num)?; + } + + while pic_refs.len() < NUM_REF_PICS { + pic_refs.push(make_dummy_h264_pic()); + } + + let mut flags = H264PictureFlags::default(); + let frame_idx = if let Some(id) = get_long_term_id(is_idr, &slice_hdr) { + flags |= H264PictureFlag::LongTermReference; + id as u32 + } else { + if nal_ref_idc != 0 { + flags |= H264PictureFlag::ShortTermReference; + } + u32::from(slice_hdr.frame_num) + }; + let pic_refs: [PictureH264; NUM_REF_PICS] = pic_refs.try_into().unwrap_or_else(|_| panic!("can't convert")); + + let h264pic = PictureH264::new(surface_id, frame_idx, flags, top_id as i32, bot_id as i32); + + let seq_fields = H264SeqFields::new( + u32::from(sps.chroma_format_idc), + u32::from(sps.separate_colour_plane), + u32::from(sps.gaps_in_frame_num_value_allowed), + u32::from(sps.frame_mbs_only), + u32::from(sps.mb_adaptive_frame_field), + u32::from(sps.direct_8x8_inference), + u32::from(sps.level_idc >= 31), + u32::from(sps.log2_max_frame_num) - 4, + u32::from(sps.pic_order_cnt_type), + u32::from(sps.log2_max_pic_order_cnt_lsb).wrapping_sub(4), + u32::from(sps.delta_pic_order_always_zero) + ); + let pic_fields = H264PicFields::new( + u32::from(pps.entropy_coding_mode), + u32::from(pps.weighted_pred), + u32::from(pps.weighted_bipred_idc), + u32::from(pps.transform_8x8_mode), + u32::from(slice_hdr.field_pic), + u32::from(pps.constrained_intra_pred), + u32::from(pps.pic_order_present), + u32::from(pps.deblocking_filter_control_present), + u32::from(pps.redundant_pic_cnt_present), + u32::from(nal_ref_idc != 0) + ); + let ppd = PictureParameterBufferH264::new( + h264pic, + pic_refs, + sps.pic_width_in_mbs as u16 - 1, + sps.pic_height_in_mbs as u16 - 1, + sps.bit_depth_luma - 8, + sps.bit_depth_chroma - 8, + sps.num_ref_frames as u8, + &seq_fields, + pps.num_slice_groups as u8 - 1, // should be 0 + pps.slice_group_map_type, // should be 0 + 0, //pps.slice_group_change_rate as u16 - 1, + pps.pic_init_qp as i8 - 26, + pps.pic_init_qs as i8 - 26, + pps.chroma_qp_index_offset, + pps.second_chroma_qp_index_offset, + &pic_fields, + slice_hdr.frame_num + ); + let pic_param = BufferType::PictureParameter(PictureParameter::H264(ppd)); + let buf = vactx.context.create_buffer(pic_param).map_err(|_| DecoderError::Bug)?; + pic.add_buffer(buf); + + let mut scaling_list_8x8 = [[0; 64]; 2]; + scaling_list_8x8[0].copy_from_slice(&pps.scaling_list_8x8[0]); + scaling_list_8x8[1].copy_from_slice(&pps.scaling_list_8x8[3]); + let iqmatrix = BufferType::IQMatrix(IQMatrix::H264(IQMatrixBufferH264::new(pps.scaling_list_4x4, scaling_list_8x8))); + let buf = vactx.context.create_buffer(iqmatrix).map_err(|_| DecoderError::Bug)?; + pic.add_buffer(buf); + + let cpic = PictureInfo { + id: slice_hdr.frame_num, + full_id: top_id, + surface_id, + top_id, bot_id, + //pic_type: slice_hdr.slice_type.to_frame_type(), + is_ref, + is_idr, + long_term: get_long_term_id(is_idr, &slice_hdr), + }; + if cpic.is_ref { + self.frame_refs.add_short_term(cpic.clone(), sps.num_ref_frames); + } + if let Some(lt_idx) = cpic.long_term { + self.frame_refs.add_long_term(lt_idx, cpic); + } + } + + let mut luma_weight_l0 = [0i16; 32]; + let mut luma_offset_l0 = [0i16; 32]; + let mut chroma_weight_l0 = [[0i16; 2]; 32]; + let mut chroma_offset_l0 = [[0i16; 2]; 32]; + let mut luma_weight_l1 = [0i16; 32]; + let mut luma_offset_l1 = [0i16; 32]; + let mut chroma_weight_l1 = [[0i16; 2]; 32]; + let mut chroma_offset_l1 = [[0i16; 2]; 32]; + let mut luma_weighted_l0 = false; + let mut chroma_weighted_l0 = false; + let mut luma_weighted_l1 = false; + let mut chroma_weighted_l1 = false; + let mut luma_log2_weight_denom = slice_hdr.luma_log2_weight_denom; + let mut chroma_log2_weight_denom = slice_hdr.chroma_log2_weight_denom; + + if (pps.weighted_pred && matches!(slice_hdr.slice_type, SliceType::P | SliceType::B)) || (pps.weighted_bipred_idc == 1 && slice_hdr.slice_type == SliceType::B) { + luma_weighted_l0 = true; + chroma_weighted_l0 = false; + for (i, winfo) in slice_hdr.weights_l0.iter().enumerate().take(slice_hdr.num_ref_idx_l0_active) { + if winfo.luma_weighted { + luma_weight_l0[i] = winfo.luma_weight.into(); + luma_offset_l0[i] = winfo.luma_offset.into(); + } else { + luma_weight_l0[i] = 1 << slice_hdr.luma_log2_weight_denom; + } + if winfo.chroma_weighted { + chroma_weight_l0[i][0] = winfo.chroma_weight[0].into(); + chroma_weight_l0[i][1] = winfo.chroma_weight[1].into(); + chroma_offset_l0[i][0] = winfo.chroma_offset[0].into(); + chroma_offset_l0[i][1] = winfo.chroma_offset[1].into(); + } else { + chroma_weight_l0[i][0] = 1 << slice_hdr.chroma_log2_weight_denom; + chroma_weight_l0[i][1] = 1 << slice_hdr.chroma_log2_weight_denom; + chroma_offset_l0[i][0] = 0; + chroma_offset_l0[i][1] = 0; + } + chroma_weighted_l0 |= winfo.chroma_weighted; + } + } + if pps.weighted_bipred_idc == 1 && slice_hdr.slice_type == SliceType::B { + luma_weighted_l1 = true; + chroma_weighted_l1 = sps.chroma_format_idc != 0; + for (i, winfo) in slice_hdr.weights_l1.iter().enumerate().take(slice_hdr.num_ref_idx_l1_active) { + if winfo.luma_weighted { + luma_weight_l1[i] = winfo.luma_weight.into(); + luma_offset_l1[i] = winfo.luma_offset.into(); + } else { + luma_weight_l1[i] = 1 << slice_hdr.luma_log2_weight_denom; + } + if chroma_weighted_l1 && winfo.chroma_weighted { + chroma_weight_l1[i][0] = winfo.chroma_weight[0].into(); + chroma_weight_l1[i][1] = winfo.chroma_weight[1].into(); + chroma_offset_l1[i][0] = winfo.chroma_offset[0].into(); + chroma_offset_l1[i][1] = winfo.chroma_offset[1].into(); + } else { + chroma_weight_l1[i][0] = 1 << slice_hdr.chroma_log2_weight_denom; + chroma_weight_l1[i][1] = 1 << slice_hdr.chroma_log2_weight_denom; + chroma_offset_l1[i][0] = 0; + chroma_offset_l1[i][1] = 0; + } + } + } + if pps.weighted_bipred_idc == 2 && slice_hdr.slice_type == SliceType::B { + let num_l0 = slice_hdr.num_ref_idx_l0_active; + let num_l1 = slice_hdr.num_ref_idx_l1_active; + if num_l0 != 1 || num_l1 != 1 { //xxx: also exclude symmetric case + luma_weighted_l0 = false; + luma_weighted_l1 = false; + chroma_weighted_l0 = false; + chroma_weighted_l1 = false; + luma_log2_weight_denom = 5; + chroma_log2_weight_denom = 5; + + for w in luma_weight_l0.iter_mut() { + *w = 32; + } + for w in luma_weight_l1.iter_mut() { + *w = 32; + } + for w in chroma_weight_l0.iter_mut() { + *w = [32; 2]; + } + for w in chroma_weight_l1.iter_mut() { + *w = [32; 2]; + } + } + } + + let ref_pic_list_0 = map_ref_list(&self.frame_refs.cur_refs.ref_list0); + let ref_pic_list_1 = map_ref_list(&self.frame_refs.cur_refs.ref_list1); + + let slice_param = SliceParameterBufferH264::new( + raw_nal.len() as u32, + 0, // no offset + VASliceDataFlag::All, + br.tell() as u16, + slice_hdr.first_mb_in_slice as u16, + match slice_hdr.slice_type { + SliceType::I => 2, + SliceType::P => 0, + SliceType::B => 1, + SliceType::SI => 4, + SliceType::SP => 3, + }, + slice_hdr.direct_spatial_mv_pred as u8, + (slice_hdr.num_ref_idx_l0_active as u8).saturating_sub(1), + (slice_hdr.num_ref_idx_l1_active as u8).saturating_sub(1), + slice_hdr.cabac_init_idc, + slice_hdr.slice_qp_delta as i8, + slice_hdr.disable_deblocking_filter_idc, + slice_hdr.slice_alpha_c0_offset / 2, + slice_hdr.slice_beta_offset / 2, + ref_pic_list_0, + ref_pic_list_1, + luma_log2_weight_denom, + chroma_log2_weight_denom, + luma_weighted_l0 as u8, luma_weight_l0, luma_offset_l0, + chroma_weighted_l0 as u8, chroma_weight_l0, chroma_offset_l0, + luma_weighted_l1 as u8, luma_weight_l1, luma_offset_l1, + chroma_weighted_l1 as u8, chroma_weight_l1, chroma_offset_l1, + ); + let slc_param = BufferType::SliceParameter(SliceParameter::H264(slice_param)); + let buf = vactx.context.create_buffer(slc_param).map_err(|_| DecoderError::Bug)?; + pic.add_buffer(buf); + + let slc_data = BufferType::SliceData(raw_nal.to_vec()); + let buf = vactx.context.create_buffer(slc_data).map_err(|_| DecoderError::Bug)?; + pic.add_buffer(buf); + }, + 2 => { // slice data partition A + //slice header + //slice id = read_ue() + //cat 2 slice data (all but MB layer residual) + return Err(DecoderError::NotImplemented); + }, + 3 => { // slice data partition B + //slice id = read_ue() + //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() } + //cat 3 slice data (MB layer residual) + return Err(DecoderError::NotImplemented); + }, + 4 => { // slice data partition C + //slice id = read_ue() + //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() } + //cat 4 slice data (MB layer residual) + return Err(DecoderError::NotImplemented); + }, + 6 => {}, //SEI + 7 => { + let sps = parse_sps(&src[1..])?; + self.spses.push(sps); + }, + 8 => { + validate!(full_size >= 8 + 16); + let pps = parse_pps(&src[1..], &self.spses, full_size - 8)?; + let mut found = false; + for stored_pps in self.ppses.iter_mut() { + if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id { + *stored_pps = pps.clone(); + found = true; + break; + } + } + if !found { + self.ppses.push(pps); + } + }, + 9 => { // access unit delimiter + }, + 10 => {}, //end of sequence + 11 => {}, //end of stream + 12 => {}, //filler + _ => {}, + }; + + br.read_skip(size)?; + } + + let bpic = pic.begin().expect("begin"); + let rpic = bpic.render().expect("render"); + let epic = rpic.end().expect("end"); + + self.reorderer.add_frame(WaitingFrame { + pic: epic, + is_idr: is_keyframe, + is_ref, + ftype: frame_type, + ts: timestamp, + }); + + let mut idx = 0; + while idx < vactx.ref_pics.len() { + let cur_surf_id = vactx.ref_pics[idx].1; + if self.frame_refs.ref_pics.iter().any(|fref| fref.surface_id == cur_surf_id) { + idx += 1; + } else { + let (pic, _) = vactx.ref_pics.remove(idx); + if let Ok(surf) = pic.take_surface() { + vactx.surfaces.push(surf); + } else { + panic!("can't take surface"); + } + } + } + + Ok(()) + } + fn get_frame(&mut self) -> Option { + if let Some(ref mut vactx) = self.vaapi { + if let Some(frm) = self.reorderer.get_frame() { + let ts = frm.ts; + let is_idr = frm.is_idr; + let is_ref = frm.is_ref; + let ftype = frm.ftype; + if let Ok(pic) = frm.pic.sync() { + let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm); + + if !is_ref { + if let Ok(surf) = pic.take_surface() { + vactx.surfaces.push(surf); + } else { + panic!("can't take surface"); + } + } else { + let id = pic.surface_id(); + vactx.ref_pics.push((pic, id)); + } + + let ts = NATimeInfo::new(None, Some(ts), None, self.tb_num, self.tb_den); + Some(NAFrame::new(ts, ftype, is_idr, self.info.clone(), self.out_frm.clone()).into_ref()) + } else { + panic!("can't sync"); + } + } else { + None + } + } else { + None + } + } + fn get_last_frames(&mut self) -> Option { + if let Some(ref mut vactx) = self.vaapi { + if let Some(frm) = self.reorderer.frames.pop_front() { + let ts = frm.ts; + let is_idr = frm.is_idr; + let is_ref = frm.is_ref; + let ftype = frm.ftype; + if let Ok(pic) = frm.pic.sync() { + let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm); + + if !is_ref { + if let Ok(surf) = pic.take_surface() { + vactx.surfaces.push(surf); + } else { + panic!("can't take surface"); + } + } else { + let id = pic.surface_id(); + vactx.ref_pics.push((pic, id)); + } + + let ts = NATimeInfo::new(None, Some(ts), None, self.tb_num, self.tb_den); + Some(NAFrame::new(ts, ftype, is_idr, self.info.clone(), self.out_frm.clone()).into_ref()) + } else { + panic!("can't sync"); + } + } else { + None + } + } else { + None + } + } + fn flush(&mut self) { + self.frame_refs.clear_refs(); + if let Some(ref mut vactx) = self.vaapi { + for frm in self.reorderer.frames.drain(..) { + if let Ok(pic) = frm.pic.sync() { + if let Ok(surf) = pic.take_surface() { + vactx.surfaces.push(surf); + } else { + panic!("can't take surface"); + } + } else { + panic!("can't sync"); + } + } + self.reorderer.flush(); + for (pic, _) in vactx.ref_pics.drain(..) { + if let Ok(surf) = pic.take_surface() { + vactx.surfaces.push(surf); + } else { + panic!("can't take surface"); + } + } + } + } +} + +impl NAOptionHandler for VaapiH264Decoder { + fn get_supported_options(&self) -> &[NAOptionDefinition] { &[] } + fn set_options(&mut self, _options: &[NAOption]) {} + fn query_option_value(&self, _name: &str) -> Option { None } +} + +use std::thread::*; +use std::sync::mpsc::*; + +enum DecMessage { + Init(NACodecInfoRef), + Decode(NAPacket), + Flush, + GetFrame, + GetLastFrames, + End +} + +enum DecResponse { + Ok, + Nothing, + Err(DecoderError), + Frame(NAFrameRef), +} + +pub trait HWDecoder { + fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()>; + fn queue_pkt(&mut self, pkt: &NAPacket) -> DecoderResult<()>; + fn get_frame(&mut self) -> Option; + fn get_last_frames(&mut self) -> Option; + fn flush(&mut self); +} + +pub struct HWWrapper { + handle: Option>>, + send: SyncSender, + recv: Receiver, +} + +#[allow(clippy::new_without_default)] +impl HWWrapper { + pub fn new() -> Self { + let (in_send, in_recv) = sync_channel(1); + let (out_send, out_recv) = sync_channel(1); + let handle = std::thread::spawn(move || { + let receiver = in_recv; + let sender = out_send; + let mut dec = VaapiH264Decoder::new(); + while let Ok(msg) = receiver.recv() { + match msg { + DecMessage::Init(info) => { + let msg = if let Err(err) = dec.init(info) { + DecResponse::Err(err) + } else { + DecResponse::Ok + }; + sender.send(msg).map_err(|_| DecoderError::Bug)?; + }, + DecMessage::Decode(pkt) => { + let msg = match dec.decode(&pkt) { + Ok(()) => DecResponse::Ok, + Err(err) => DecResponse::Err(err), + }; + sender.send(msg).map_err(|_| DecoderError::Bug)?; + }, + DecMessage::GetFrame => { + let msg = match dec.get_frame() { + Some(frm) => DecResponse::Frame(frm), + None => DecResponse::Nothing, + }; + sender.send(msg).map_err(|_| DecoderError::Bug)?; + }, + DecMessage::GetLastFrames => { + let msg = match dec.get_last_frames() { + Some(frm) => DecResponse::Frame(frm), + None => DecResponse::Nothing, + }; + sender.send(msg).map_err(|_| DecoderError::Bug)?; + }, + DecMessage::Flush => dec.flush(), + DecMessage::End => return Ok(()), + }; + } + Err(DecoderError::Bug) + }); + + Self { + handle: Some(handle), + send: in_send, + recv: out_recv, + } + } +} + +impl HWDecoder for HWWrapper { + fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()> { + if self.send.send(DecMessage::Init(info)).is_ok() { + match self.recv.recv() { + Ok(DecResponse::Ok) => Ok(()), + Ok(DecResponse::Err(err)) => Err(err), + Err(_) => Err(DecoderError::Bug), + _ => unreachable!(), + } + } else { + Err(DecoderError::Bug) + } + } + fn queue_pkt(&mut self, pkt: &NAPacket) -> DecoderResult<()> { + let pkt2 = NAPacket::new_from_refbuf(pkt.get_stream(), pkt.ts, pkt.keyframe, pkt.get_buffer()); + if self.send.send(DecMessage::Decode(pkt2)).is_ok() { + match self.recv.recv() { + Ok(DecResponse::Ok) => Ok(()), + Ok(DecResponse::Err(err)) => Err(err), + Err(_) => Err(DecoderError::Bug), + _ => unreachable!(), + } + } else { + Err(DecoderError::Bug) + } + } + fn get_frame(&mut self) -> Option { + if self.send.send(DecMessage::GetFrame).is_ok() { + match self.recv.recv() { + Ok(DecResponse::Frame(frm)) => Some(frm), + Ok(DecResponse::Nothing) => None, + Err(_) => None, + _ => unreachable!(), + } + } else { + None + } + } + fn get_last_frames(&mut self) -> Option { + if self.send.send(DecMessage::GetLastFrames).is_ok() { + match self.recv.recv() { + Ok(DecResponse::Frame(frm)) => Some(frm), + Ok(DecResponse::Nothing) => None, + Err(_) => None, + _ => unreachable!(), + } + } else { + None + } + } + fn flush(&mut self) { + let _ = self.send.send(DecMessage::Flush); + } +} + +impl Drop for HWWrapper { + fn drop(&mut self) { + if self.send.send(DecMessage::End).is_ok() { + let mut handle = None; + std::mem::swap(&mut handle, &mut self.handle); + if let Some(hdl) = handle { + let _ = hdl.join(); + } + } + } +} + +impl NAOptionHandler for HWWrapper { + fn get_supported_options(&self) -> &[NAOptionDefinition] { &[] } + fn set_options(&mut self, _options: &[NAOption]) {} + fn query_option_value(&self, _name: &str) -> Option { None } +} + +pub fn new_h264_hwdec() -> Box { + Box::new(HWWrapper::new()) +} + +#[cfg(test)] +mod test { + use nihav_core::codecs::*; + use nihav_core::io::byteio::*; + use nihav_core::demuxers::{RegisteredDemuxers, create_demuxer}; + use nihav_commonfmt::generic_register_all_demuxers; + use super::VaapiH264Decoder; + use std::io::prelude::*; + + fn decode_h264(name: &str, dname: &str, dmx_reg: &RegisteredDemuxers, opfx: &str) -> DecoderResult<()> { + let dmx_f = dmx_reg.find_demuxer(dname).expect("demuxer exists"); + let file = std::fs::File::open(name).expect("file exists"); + let mut fr = FileReader::new_read(file); + let mut br = ByteReader::new(&mut fr); + let mut dmx = create_demuxer(dmx_f, &mut br).expect("create demuxer"); + + let mut vstream_id = 0; + let mut dec = VaapiH264Decoder::new(); + for stream in dmx.get_streams() { + if stream.get_media_type() == StreamType::Video { + dec.init(stream.get_info()).expect("inited"); + vstream_id = stream.get_id(); + break; + } + } + + let mut frameno = 0; + while let Ok(pkt) = dmx.get_frame() { + if pkt.get_stream().get_id() != vstream_id { + continue; + } + dec.decode(&pkt).expect("decoded"); + let frm = dec.get_last_frames().expect("get frame"); + let timestamp = frm.get_dts().unwrap_or_else(|| frm.get_pts().unwrap_or(0)); + + let pic = frm.get_buffer().get_vbuf().expect("got picture"); + + let nname = format!("assets/test_out/{}{:06}_{}.pgm", opfx, timestamp, frameno); + frameno += 1; + let mut file = std::fs::File::create(&nname).expect("create file"); + let (w, h) = pic.get_dimensions(0); + file.write_all(format!("P5\n{} {}\n255\n", w, h * 3 / 2).as_bytes()).expect("header written"); + let data = pic.get_data(); + for yline in data.chunks(pic.get_stride(0)).take(h) { + file.write_all(&yline[..w]).expect("Y line written"); + } + for (uline, vline) in data[pic.get_offset(1)..].chunks(pic.get_stride(1)) + .zip(data[pic.get_offset(2)..].chunks(pic.get_stride(2))).take(h / 2) { + file.write_all(&uline[..w / 2]).expect("U line written"); + file.write_all(&vline[..w / 2]).expect("V line written"); + } + } + Ok(()) + } + + + // samples if not specified otherwise come from H.264 conformance suite + + #[test] + fn test_h264_simple() { + let mut dmx_reg = RegisteredDemuxers::new(); + generic_register_all_demuxers(&mut dmx_reg); + + decode_h264("assets/ITU/DimpledSpanishCuckoo-mobile.mp4", "mov", &dmx_reg, "hw").unwrap(); + } +} diff --git a/hwdec-vaapi/src/pic_ref.rs b/hwdec-vaapi/src/pic_ref.rs new file mode 100644 index 0000000..8d5c982 --- /dev/null +++ b/hwdec-vaapi/src/pic_ref.rs @@ -0,0 +1,411 @@ +use nihav_core::codecs::DecoderResult; +//use nihav_core::frame::{FrameType, NAVideoBufferRef, NATimeInfo}; +use super::sets::SeqParameterSet; +use super::slice::*; + +use libva::VASurfaceID; + +pub const MISSING_POC: u16 = 0xFFFF; + +#[derive(Clone)] +pub struct PictureInfo { + pub id: u16, + pub full_id: u32, + pub top_id: u32, + pub bot_id: u32, +// pub pic_type: FrameType, + pub is_ref: bool, + pub is_idr: bool, + pub long_term: Option, + pub surface_id: VASurfaceID, +} + +#[derive(Clone)] +pub struct SliceRefs { + pub ref_list0: Vec>, + pub ref_list1: Vec>, + pub cur_id: u32, +} + +#[allow(dead_code)] +impl SliceRefs { + pub fn get_ref_id(&self, list_id: u8, ref_id: usize) -> Option { + let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 }; + if ref_list.len() > ref_id { + ref_list[ref_id].as_ref().map(|pic| pic.full_id) + } else { + None + } + } +} + +pub struct FrameRefs { + pub ref_pics: Vec, + pub cur_refs: SliceRefs, + pub long_term: Vec>, + + prev_poc_msb: u32, + prev_poc_lsb: u16, + prev_ref_poc_lsb: u16, + prev_frame_num: u16, + frame_num_offset: u32, + max_frame_num: i32, +} + +#[allow(clippy::new_without_default)] +impl FrameRefs { + pub fn new() -> Self { + Self { + ref_pics: Vec::with_capacity(16), + cur_refs: SliceRefs { + ref_list0: Vec::with_capacity(3), + ref_list1: Vec::with_capacity(3), + cur_id: 0, + }, + long_term: Vec::new(), + + prev_poc_msb: 0, + prev_poc_lsb: 0, + prev_ref_poc_lsb: 0, + prev_frame_num: 0, + frame_num_offset: 0, + max_frame_num: 0, + } + } + pub fn fill_ref_nums(&self, dst: &mut Vec) { + for pic in self.ref_pics.iter() { + if !dst.contains(&pic.full_id) { + dst.push(pic.full_id); + } + } + for pic in self.long_term.iter().flatten() { + if !dst.contains(&pic.full_id) { + dst.push(pic.full_id); + } + } + } + pub fn calc_picture_num(&mut self, slice_hdr: &SliceHeader, is_idr: bool, ref_id: u8, sps: &SeqParameterSet) -> (u32, u32) { + self.max_frame_num = 1 << sps.log2_max_frame_num; + match sps.pic_order_cnt_type { + 0 => { + if is_idr { + self.prev_poc_msb = 0; + self.prev_poc_lsb = 0; + } else { + self.prev_poc_lsb = self.prev_ref_poc_lsb; + } + let max_poc_lsb = 1 << sps.log2_max_pic_order_cnt_lsb; + let half_max_poc_lsb = 1 << (sps.log2_max_pic_order_cnt_lsb - 1); + let cur_lsb = slice_hdr.pic_order_cnt_lsb; + let poc_msb = if cur_lsb < self.prev_poc_lsb && (self.prev_poc_lsb - cur_lsb >= half_max_poc_lsb) { + self.prev_poc_msb + max_poc_lsb + } else if cur_lsb > self.prev_poc_lsb && (cur_lsb - self.prev_poc_lsb > half_max_poc_lsb) { + self.prev_poc_msb.wrapping_sub(max_poc_lsb) + } else { + self.prev_poc_msb + }; + let poc = poc_msb + u32::from(cur_lsb); + if ref_id != 0 { + self.prev_ref_poc_lsb = slice_hdr.pic_order_cnt_lsb; + self.prev_poc_msb = poc_msb; + } + (poc, poc) + }, + 1 => { + let off = if self.prev_frame_num > slice_hdr.frame_num { + self.frame_num_offset + (1 << sps.log2_max_frame_num) + } else { + self.frame_num_offset + }; + let mut anum = if sps.num_ref_frames_in_pic_order_cnt_cycle != 0 { + (off as i32) + i32::from(slice_hdr.frame_num) + } else { + 0 + }; + if ref_id == 0 && anum > 0 { + anum -= 1; + } + let (poc_cycle_cnt, fno_in_poc_cycle) = if anum > 0 { + let nrf = sps.num_ref_frames_in_pic_order_cnt_cycle as i32; + ((anum - 1) / nrf, (anum - 1) % nrf) + } else { + (0, 0) + }; + let mut expected_delta = 0; + for &offset in sps.offset_for_ref_frame[..sps.num_ref_frames_in_pic_order_cnt_cycle].iter() { + expected_delta += offset; + } + let mut expected_poc = if anum > 0 { + let mut sum = poc_cycle_cnt * expected_delta; + for &offset in sps.offset_for_ref_frame[..=fno_in_poc_cycle as usize].iter() { + sum += offset; + } + sum + } else { + 0 + }; + if ref_id == 0 { + expected_poc += sps.offset_for_non_ref_pic; + } + let (top_id, bot_id) = if !slice_hdr.field_pic { + let top_id = expected_poc + slice_hdr.delta_pic_order_cnt[0]; + let bot_id = top_id + sps.offset_for_top_to_bottom_field + slice_hdr.delta_pic_order_cnt[1]; + (top_id, bot_id) + } else if !slice_hdr.bottom_field { + (expected_poc + slice_hdr.delta_pic_order_cnt[0], 0) + } else { + (0, sps.offset_for_top_to_bottom_field + slice_hdr.delta_pic_order_cnt[1]) + }; + self.prev_frame_num = slice_hdr.frame_num; + self.frame_num_offset = off; + (top_id as u32, bot_id as u32) + }, + _ => { + if slice_hdr.frame_num < self.prev_frame_num { + self.frame_num_offset += 1 << sps.log2_max_frame_num; + } + self.prev_frame_num = slice_hdr.frame_num; + let poc = self.frame_num_offset + u32::from(slice_hdr.frame_num); + (poc, poc) + }, + } + } + pub fn apply_adaptive_marking(&mut self, marking: &AdaptiveMarking, cur_id: u16, max_id: u16) -> DecoderResult<()> { + let all_ref_pics = self.ref_pics.clone(); + + for (&op, (&arg1, &arg2)) in marking.memory_management_control_op.iter().zip(marking.operation_arg.iter().zip(marking.operation_arg2.iter())).take(marking.num_ops) { + match op { + 1 => { + let src_id = cur_id.wrapping_sub(arg1) & (max_id - 1); + let mut found = false; + let mut idx = 0; + for (i, pic) in self.ref_pics.iter().enumerate() { + if pic.id == src_id { + found = true; + idx = i; + break; + } + } + if found { + self.ref_pics.remove(idx); + } + }, + 2 => { // mark long term picture as unused + let idx = arg1 as usize; + if idx < self.long_term.len() { + self.long_term[idx] = None; + } + }, + 3 => { + let src_id = cur_id.wrapping_sub(arg1) & (max_id - 1); + + let didx = arg2 as usize; + for pic in all_ref_pics.iter() { + if pic.id == src_id { + if didx < self.long_term.len() { + self.long_term[didx] = Some(pic.clone()); + } + break; + } + } + }, + 4 => { + self.long_term.resize(arg1 as usize, None); + }, + 5 => { + self.ref_pics.clear(); + self.long_term.clear(); + }, + 6 => { + // assign an long term index to current pic - done elsewhere + }, + _ => {}, + }; + } + Ok(()) + } + pub fn clear_refs(&mut self) { + self.ref_pics.clear(); + self.long_term.clear(); + } + #[allow(clippy::cognitive_complexity)] + pub fn select_refs(&mut self, sps: &SeqParameterSet, slice_hdr: &SliceHeader, cur_id: u32) { + self.cur_refs.cur_id = cur_id; + self.cur_refs.ref_list0.clear(); + self.cur_refs.ref_list1.clear(); + let pic_num_mask = if sps.log2_max_frame_num == 16 { + 0xFFFF + } else { + (1 << sps.log2_max_frame_num) - 1 + }; + + if !slice_hdr.slice_type.is_intra() { + let has_reordering = slice_hdr.ref_pic_list_reordering_l0; + if !has_reordering { + let num_ref = slice_hdr.num_ref_idx_l0_active; + if slice_hdr.slice_type.is_p() { + if !self.ref_pics.is_empty() { + for pic in self.ref_pics.iter().rev().take(num_ref) { + self.cur_refs.ref_list0.push(Some(pic.clone())); + } + } + } else { + let mut pivot = 0; + for (i, pic) in self.ref_pics.iter().enumerate() { + pivot = i; + if pic.full_id > cur_id { + break; + } + } + for pic in self.ref_pics[..pivot].iter().rev() { + if self.cur_refs.ref_list0.len() >= num_ref { + break; + } + self.cur_refs.ref_list0.push(Some(pic.clone())); + } + for pic in self.ref_pics.iter().skip(pivot) { + if self.cur_refs.ref_list0.len() >= num_ref { + break; + } + self.cur_refs.ref_list0.push(Some(pic.clone())); + } + } + if !self.long_term.is_empty() && self.cur_refs.ref_list0.len() < num_ref { + let copy_size = num_ref - self.cur_refs.ref_list0.len(); + for ltpic in self.long_term.iter().take(copy_size) { + self.cur_refs.ref_list0.push(ltpic.clone()); + } + } + } else { + form_ref_list(&mut self.cur_refs.ref_list0, + &self.ref_pics, &self.long_term, + &slice_hdr.reordering_list_l0, + slice_hdr.frame_num, pic_num_mask); + } + if slice_hdr.slice_type.is_b() { + let has_reordering = slice_hdr.ref_pic_list_reordering_l1; + if !has_reordering { + let num_ref = slice_hdr.num_ref_idx_l1_active; + let mut pivot = 0; + for (i, pic) in self.ref_pics.iter().enumerate() { + pivot = i; + if pic.full_id > cur_id { + break; + } + } + for pic in self.ref_pics.iter().skip(pivot) { + if self.cur_refs.ref_list1.len() >= num_ref { + break; + } + self.cur_refs.ref_list1.push(Some(pic.clone())); + } + for pic in self.ref_pics[..pivot].iter().rev() { + if self.cur_refs.ref_list1.len() >= num_ref { + break; + } + self.cur_refs.ref_list1.push(Some(pic.clone())); + } + if !self.long_term.is_empty() && self.cur_refs.ref_list1.len() < num_ref { + let copy_size = num_ref - self.cur_refs.ref_list1.len(); + for ltpic in self.long_term.iter().take(copy_size) { + self.cur_refs.ref_list1.push(ltpic.clone()); + } + } + if self.cur_refs.ref_list1.len() > 1 && self.cur_refs.ref_list0.len() == self.cur_refs.ref_list1.len() { + let mut equal = true; + for (pic1, pic2) in self.cur_refs.ref_list0.iter().zip(self.cur_refs.ref_list1.iter()) { + match (pic1, pic2) { + (Some(p1), Some(p2)) => { + if p1.full_id != p2.full_id { + equal = false; + break; + } + }, + (None, None) => {}, + _ => { + equal = false; + break; + }, + }; + } + if equal { + self.cur_refs.ref_list1.swap(0, 1); + } + } + } else { + form_ref_list(&mut self.cur_refs.ref_list1, + &self.ref_pics, &self.long_term, + &slice_hdr.reordering_list_l1, + slice_hdr.frame_num, pic_num_mask); + } + } + } + } + pub fn add_short_term(&mut self, cpic: PictureInfo, num_ref_frames: usize) { + if !self.ref_pics.is_empty() && self.ref_pics.len() >= num_ref_frames { + let base_id = i32::from(cpic.id); + let mut min_id = base_id; + let mut min_idx = 0; + for (i, pic) in self.ref_pics.iter().enumerate() { + let mut pic_id = i32::from(pic.id); + if pic_id > base_id { + pic_id -= self.max_frame_num; + } + if pic_id < min_id { + min_id = pic_id; + min_idx = i; + } + } + self.ref_pics.remove(min_idx); + } + if self.ref_pics.is_empty() || self.ref_pics.last().unwrap().full_id < cpic.full_id { + self.ref_pics.push(cpic); + } else { + let mut idx = 0; + for (i, pic) in self.ref_pics.iter().enumerate() { + if pic.full_id < cpic.full_id { + idx = i; + } else { + break; + } + } + self.ref_pics.insert(idx + 1, cpic); + } + } + pub fn add_long_term(&mut self, lt_idx: usize, cpic: PictureInfo) { + if lt_idx < self.long_term.len() { + self.long_term[lt_idx] = Some(cpic); + } + } +} + +fn form_ref_list(ref_list: &mut Vec>, ref_pics: &[PictureInfo], long_term: &[Option], reord_info: &ReorderingInfo, cur_id: u16, pic_num_mask: u16) { + let mut ref_pic_id = cur_id; + for (&op, &num) in reord_info.reordering_of_pic_nums_idc.iter().zip(reord_info.abs_diff_or_num.iter()).take(reord_info.num_ops) { + if op < 2 { + if op == 0 { + ref_pic_id = ref_pic_id.wrapping_sub(num) & pic_num_mask; + } else { + ref_pic_id = ref_pic_id.wrapping_add(num) & pic_num_mask; + } + let mut found = false; + for pic in ref_pics.iter() { + if pic.id == ref_pic_id { + ref_list.push(Some(pic.clone())); + found = true; + break; + } + } + if !found { + ref_list.push(None); + } + } else { + let idx = num as usize; + if idx < long_term.len() { + ref_list.push(long_term[idx].clone()); + } else { + ref_list.push(None); + } + } + } +} diff --git a/hwdec-vaapi/src/sets.rs b/hwdec-vaapi/src/sets.rs new file mode 100644 index 0000000..7fd090b --- /dev/null +++ b/hwdec-vaapi/src/sets.rs @@ -0,0 +1,425 @@ +use nihav_core::codecs::{DecoderResult, DecoderError}; +use nihav_core::io::bitreader::*; + +use super::ReadUE; + +#[derive(Clone)] +pub struct SeqParameterSet { + pub profile_idc: u8, + pub high_profile: bool, + pub constraint_set0: bool, + pub constraint_set1: bool, + pub constraint_set2: bool, + pub level_idc: u8, + pub seq_parameter_set_id: u32, + pub chroma_format_idc: u8, + pub separate_colour_plane: bool, + pub bit_depth_luma: u8, + pub bit_depth_chroma: u8, + pub qpprime_y_zero_transform_bypass: bool, + pub seq_scaling_matrix_present: bool, + pub scaling_list_4x4: [[u8; 16]; 6], + pub scaling_list_8x8: [[u8; 64]; 6], + pub log2_max_frame_num: u8, + pub pic_order_cnt_type: u8, + pub log2_max_pic_order_cnt_lsb: u8, + pub delta_pic_order_always_zero: bool, + pub offset_for_non_ref_pic: i32, + pub offset_for_top_to_bottom_field: i32, + pub num_ref_frames_in_pic_order_cnt_cycle: usize, + pub offset_for_ref_frame: [i32; 256], + pub num_ref_frames: usize, + pub gaps_in_frame_num_value_allowed: bool, + pub pic_width_in_mbs: usize, + pub pic_height_in_mbs: usize, + pub frame_mbs_only: bool, + pub mb_adaptive_frame_field: bool, + pub direct_8x8_inference: bool, + pub frame_cropping: bool, + pub frame_crop_left_offset: usize, + pub frame_crop_right_offset: usize, + pub frame_crop_top_offset: usize, + pub frame_crop_bottom_offset: usize, + pub vui_parameters_present: bool, +} + +pub fn is_high_profile(profile: u8) -> bool { + matches!(profile, 100 | 110 | 122 | 244 | 44 | 83 | 86 | 118 | 128 | 138 | 139 | 134 | 125) +} + +#[allow(clippy::cognitive_complexity)] +pub fn parse_sps(src: &[u8]) -> DecoderResult { + let mut br = BitReader::new(src, BitReaderMode::BE); + let mut sps: SeqParameterSet = unsafe { std::mem::zeroed() }; + + sps.profile_idc = br.read(8)? as u8; + sps.constraint_set0 = br.read_bool()?; + sps.constraint_set1 = br.read_bool()?; + sps.constraint_set2 = br.read_bool()?; + let reserved = br.read(5)?; + validate!(reserved == 0); + sps.level_idc = br.read(8)? as u8; + sps.seq_parameter_set_id = br.read_ue()?; + sps.high_profile = is_high_profile(sps.profile_idc); + if sps.high_profile { + sps.chroma_format_idc = br.read_ue_lim(3)? as u8; + if sps.chroma_format_idc == 3 { + sps.separate_colour_plane = br.read_bool()?; + } + sps.bit_depth_luma = br.read_ue_lim(6)? as u8 + 8; + sps.bit_depth_chroma = br.read_ue_lim(6)? as u8 + 8; + sps.qpprime_y_zero_transform_bypass = br.read_bool()?; + sps.seq_scaling_matrix_present = br.read_bool()?; + if sps.seq_scaling_matrix_present { + let mut slist_present = [false; 6]; + for (i, slist) in sps.scaling_list_4x4.iter_mut().enumerate() { + slist_present[i] = br.read_bool()?; + if slist_present[i] { + parse_scaling_list(&mut br, slist, i < 3)?; + } + } + for i in 1..6 { + if i == 3 { + continue; + } + if !slist_present[i] { + sps.scaling_list_4x4[i] = sps.scaling_list_4x4[i - 1]; + } + } + + let mut slist_present = [false; 6]; + let num_8x8 = if sps.chroma_format_idc != 3 { 2 } else { 6 }; + for (i, slist) in sps.scaling_list_8x8.iter_mut().take(num_8x8).enumerate() { + slist_present[i] = br.read_bool()?; + if slist_present[i] { + parse_scaling_list(&mut br, slist, (i & 1) == 0)?; + } + } + if num_8x8 > 2 { + for i in 2..6 { + if !slist_present[i] { + sps.scaling_list_8x8[i] = sps.scaling_list_8x8[i - 2]; + } + } + } + } else { + sps.scaling_list_4x4 = [[16; 16]; 6]; + sps.scaling_list_8x8 = [[16; 64]; 6]; + } + } else { + sps.chroma_format_idc = 1; + sps.bit_depth_luma = 8; + sps.bit_depth_chroma = 8; + sps.scaling_list_4x4 = [[16; 16]; 6]; + sps.scaling_list_8x8 = [[16; 64]; 6]; + } + sps.log2_max_frame_num = (br.read_ue_lim(12)? + 4) as u8; + sps.pic_order_cnt_type = br.read_ue_lim(2)? as u8; + match sps.pic_order_cnt_type { + 0 => { + sps.log2_max_pic_order_cnt_lsb = (br.read_ue_lim(12)? + 4) as u8; + }, + 1 => { + sps.delta_pic_order_always_zero = br.read_bool()?; + sps.offset_for_non_ref_pic = br.read_se()?; + sps.offset_for_top_to_bottom_field = br.read_se()?; + sps.num_ref_frames_in_pic_order_cnt_cycle = br.read_ue_lim(255)? as usize; + for offset in sps.offset_for_ref_frame[..sps.num_ref_frames_in_pic_order_cnt_cycle].iter_mut() { + *offset = br.read_se()?; + } + }, + _ => {}, + }; + sps.num_ref_frames = br.read_ue()? as usize; + validate!(sps.num_ref_frames <= super::slice::MAX_FRAMES); + sps.gaps_in_frame_num_value_allowed = br.read_bool()?; + sps.pic_width_in_mbs = (br.read_ue()? + 1) as usize; + sps.pic_height_in_mbs = (br.read_ue()? + 1) as usize; + validate!(sps.pic_width_in_mbs <= 1024 && sps.pic_height_in_mbs <= 1024); + sps.frame_mbs_only = br.read_bool()?; + if !sps.frame_mbs_only { + sps.mb_adaptive_frame_field = br.read_bool()?; + } + sps.direct_8x8_inference = br.read_bool()?; + sps.frame_cropping = br.read_bool()?; + if sps.frame_cropping { + sps.frame_crop_left_offset = br.read_ue()? as usize; + sps.frame_crop_right_offset = br.read_ue()? as usize; + sps.frame_crop_top_offset = br.read_ue()? as usize; + sps.frame_crop_bottom_offset = br.read_ue()? as usize; + let l = sps.frame_crop_left_offset * 2; + let r = sps.pic_width_in_mbs * 16 - sps.frame_crop_right_offset * 2; + let t = sps.frame_crop_top_offset * 2; + let d = sps.pic_height_in_mbs * 16 - sps.frame_crop_bottom_offset * 2; + validate!(l < r && t < d); + } + sps.vui_parameters_present = br.read_bool()?; + if sps.vui_parameters_present { + // xxx: vui is ignored for now + if br.read_bool()? { + let idc = br.read(8)?; + if idc == 255 { + br.read(16)?; + br.read(16)?; + } + } + if br.read_bool()? { + br.read_bool()?; + } + if br.read_bool()? { + br.read(3)?; + br.read_bool()?; + if br.read_bool()? { + br.read(8)?; + br.read(8)?; + br.read(8)?; + } + } + if br.read_bool()? { + br.read_ue()?; + br.read_ue()?; + } + if br.read_bool()? { + br.read(32)?; + br.read(32)?; + br.read_bool()?; + } + let nal_hrd_parameters_present = br.read_bool()?; + if nal_hrd_parameters_present { + skip_hrd_parameters(&mut br)?; + } + let vcl_hrd_parameters_present = br.read_bool()?; + if vcl_hrd_parameters_present { + skip_hrd_parameters(&mut br)?; + } + if nal_hrd_parameters_present || vcl_hrd_parameters_present { + br.read_bool()?; + } + br.read_bool()?; + if br.read_bool()? { + br.read_bool()?; + br.read_ue()?; + br.read_ue()?; + br.read_ue()?; + br.read_ue()?; + br.read_ue()?; + br.read_ue()?; + } + } + + Ok(sps) +} + +fn parse_scaling_list(br: &mut BitReader, slist: &mut[u8], is_intra: bool) -> DecoderResult<()> { + const DEFAULT_INTRA_4X4: [u8; 16] = [ + 6, 13, 13, 20, 20, 20, 28, 28, 28, 28, 32, 32, 32, 37, 37, 42 + ]; + const DEFAULT_INTER_4X4: [u8; 16] = [ + 10, 14, 14, 20, 20, 20, 24, 24, 24, 24, 27, 27, 27, 30, 30, 34 + ]; + const DEFAULT_INTRA_8X8: [u8; 64] = [ + 6, 10, 10, 13, 11, 13, 16, 16, 16, 16, 18, 18, 18, 18, 18, 23, + 23, 23, 23, 23, 23, 25, 25, 25, 25, 25, 25, 25, 27, 27, 27, 27, + 27, 27, 27, 27, 29, 29, 29, 29, 29, 29, 29, 31, 31, 31, 31, 31, + 31, 33, 33, 33, 33, 33, 36, 36, 36, 36, 38, 38, 38, 40, 40, 42 + ]; + const DEFAULT_INTER_8X8: [u8; 64] = [ + 9, 13, 13, 15, 13, 15, 17, 17, 17, 17, 19, 19, 19, 19, 19, 21, + 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 24, 24, 24, 24, + 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 27, 27, 27, 27, 27, + 27, 28, 28, 28, 28, 28, 30, 30, 30, 30, 32, 32, 32, 33, 33, 35 + ]; + let mut last_scale = 8u8; + let mut next_scale = 8u8; + let mut use_default = false; + for (j, elem) in slist.iter_mut().enumerate() { + if next_scale != 0 { + let delta = br.read_se()?; + next_scale = last_scale.wrapping_add(delta as u8); + if (j == 0) && (next_scale == 0) { + use_default = true; + break; + } + } + *elem = if next_scale == 0 { last_scale } else { next_scale }; + last_scale = *elem; + } + if use_default { + match (slist.len(), is_intra) { + (16, true) => slist.copy_from_slice(&DEFAULT_INTRA_4X4), + (16, false) => slist.copy_from_slice(&DEFAULT_INTER_4X4), + (64, true) => slist.copy_from_slice(&DEFAULT_INTRA_8X8), + (64, false) => slist.copy_from_slice(&DEFAULT_INTER_8X8), + _ => unreachable!(), + }; + } + Ok(()) +} + +fn skip_hrd_parameters(br: &mut BitReader) -> DecoderResult<()> { + let cpb_cnt = br.read_ue()? as usize + 1; + br.read(4)?; + br.read(4)?; + for _ in 0..cpb_cnt { + br.read_ue()?; + br.read_ue()?; + br.read_bool()?; + } + br.read(5)?; + br.read(5)?; + br.read(5)?; + br.read(5)?; + Ok(()) +} + +const MAX_SLICE_GROUPS: usize = 8; + +#[derive(Clone)] +pub struct PicParameterSet { + pub pic_parameter_set_id: u32, + pub seq_parameter_set_id: u32, + pub entropy_coding_mode: bool, + pub pic_order_present: bool, + pub num_slice_groups: usize, + pub slice_group_map_type: u8, + pub run_length: [u32; MAX_SLICE_GROUPS], + pub top_left: [u32; MAX_SLICE_GROUPS], + pub bottom_right: [u32; MAX_SLICE_GROUPS], + pub slice_group_change_direction: bool, + pub slice_group_change_rate: u32, + pub pic_size_in_map_units: u32, + pub num_ref_idx_l0_active: usize, + pub num_ref_idx_l1_active: usize, + pub weighted_pred: bool, + pub weighted_bipred_idc: u8, + pub pic_init_qp: u8, + pub pic_init_qs: u8, + pub chroma_qp_index_offset: i8, + pub deblocking_filter_control_present: bool, + pub constrained_intra_pred: bool, + pub redundant_pic_cnt_present: bool, + pub transform_8x8_mode: bool, + pub pic_scaling_matrix_present: bool, + pub scaling_list_4x4: [[u8; 16]; 6], + pub scaling_list_8x8: [[u8; 64]; 6], + pub second_chroma_qp_index_offset: i8, +} + +pub fn parse_pps(src: &[u8], sps_arr: &[SeqParameterSet], full_size: usize) -> DecoderResult { + let mut br = BitReader::new(src, BitReaderMode::BE); + let mut pps: PicParameterSet = unsafe { std::mem::zeroed() }; + + pps.pic_parameter_set_id = br.read_ue()?; + pps.seq_parameter_set_id = br.read_ue()?; + let mut found = false; + let mut cur_sps = None; + for sps in sps_arr.iter() { + if sps.seq_parameter_set_id == pps.seq_parameter_set_id { + found = true; + cur_sps = Some(sps); + break; + } + } + validate!(found); + let sps = cur_sps.unwrap(); + pps.entropy_coding_mode = br.read_bool()?; + pps.pic_order_present = br.read_bool()?; + pps.num_slice_groups = (br.read_ue()? + 1) as usize; + validate!(pps.num_slice_groups <= MAX_SLICE_GROUPS); + if pps.num_slice_groups > 1 { + let smtype = br.read_ue()?; + validate!(smtype <= 6); + pps.slice_group_map_type = smtype as u8; + match pps.slice_group_map_type { + 0 => { + for elem in pps.run_length[..pps.num_slice_groups].iter_mut() { + *elem = br.read_ue()?; + } + }, + 2 => { + for i in 0..pps.num_slice_groups - 1 { + pps.top_left[i] = br.read_ue()?; + pps.bottom_right[i] = br.read_ue()?; + } + }, + 3 | 4 | 5 => { + pps.slice_group_change_direction = br.read_bool()?; + pps.slice_group_change_rate = br.read_ue()?; + }, + 6 => { + pps.pic_size_in_map_units = br.read_ue()? + 1; + for _ in 0..pps.pic_size_in_map_units { + let _slice_group_id = br.read_ue()?; + } + }, + _ => {}, + }; +println!("slice mode!"); + return Err(DecoderError::NotImplemented); + } + pps.num_ref_idx_l0_active = (br.read_ue()? + 1) as usize; + pps.num_ref_idx_l1_active = (br.read_ue()? + 1) as usize; + pps.weighted_pred = br.read_bool()?; + pps.weighted_bipred_idc = br.read(2)? as u8; + let qp = br.read_se()? + 26; + validate!(qp > 0 && qp < 52); + pps.pic_init_qp = qp as u8; + let qs = br.read_se()? + 26; + validate!(qs > 0 && qs < 52); + pps.pic_init_qs = qs as u8; + let off = br.read_se()?; + validate!(off >= -12 && off <= 12); + pps.chroma_qp_index_offset = off as i8; + pps.deblocking_filter_control_present = br.read_bool()?; + pps.constrained_intra_pred = br.read_bool()?; + pps.redundant_pic_cnt_present = br.read_bool()?; + if br.tell() < full_size { + pps.transform_8x8_mode = br.read_bool()?; + pps.pic_scaling_matrix_present = br.read_bool()?; + if pps.pic_scaling_matrix_present { + let mut slist_present = [false; 6]; + for (i, slist) in pps.scaling_list_4x4.iter_mut().enumerate() { + slist_present[i] = br.read_bool()?; + if slist_present[i] { + parse_scaling_list(&mut br, slist, i < 3)?; + } + } + for i in 1..6 { + if i == 3 { + continue; + } + if !slist_present[i] { + pps.scaling_list_4x4[i] = pps.scaling_list_4x4[i - 1]; + } + } + + let mut slist_present = [false; 6]; + let num_8x8 = if !pps.transform_8x8_mode { 0 } else if sps.chroma_format_idc != 3 { 2 } else { 6 }; + for (i, slist) in pps.scaling_list_8x8.iter_mut().take(num_8x8).enumerate() { + slist_present[i] = br.read_bool()?; + if slist_present[i] { + parse_scaling_list(&mut br, slist, (i & 1) == 0)?; + } + } + if num_8x8 > 2 { + for i in 2..6 { + if !slist_present[i] { + pps.scaling_list_8x8[i] = pps.scaling_list_8x8[i - 2]; + } + } + } + } else { + pps.scaling_list_4x4 = sps.scaling_list_4x4; + pps.scaling_list_8x8 = sps.scaling_list_8x8; + } + let off = br.read_se()?; + validate!(off >= -12 && off <= 12); + pps.second_chroma_qp_index_offset = off as i8; + } else { + pps.second_chroma_qp_index_offset = pps.chroma_qp_index_offset; + pps.scaling_list_4x4 = sps.scaling_list_4x4; + pps.scaling_list_8x8 = sps.scaling_list_8x8; + } + + Ok(pps) +} diff --git a/hwdec-vaapi/src/slice.rs b/hwdec-vaapi/src/slice.rs new file mode 100644 index 0000000..692b50f --- /dev/null +++ b/hwdec-vaapi/src/slice.rs @@ -0,0 +1,430 @@ +use nihav_core::codecs::{DecoderResult, DecoderError}; +use nihav_core::frame::FrameType; +use nihav_core::io::bitreader::*; + +use super::ReadUE; +use super::sets::*; + +pub const MAX_FRAMES: usize = 32; + +#[derive(Clone,Copy,Debug,PartialEq)] +pub enum SliceType { + I, + P, + B, + SI, + SP, +} + +impl SliceType { + pub fn is_intra(self) -> bool { + matches!(self, SliceType::I | SliceType::SI) + } + pub fn is_p(self) -> bool { + matches!(self, SliceType::P | SliceType::SP) + } + pub fn is_b(self) -> bool { self == SliceType::B } + pub fn is_s(self) -> bool { + matches!(self, SliceType::SI | SliceType::SP) + } + pub fn to_frame_type(self) -> FrameType { + match self { + SliceType::I | SliceType::SI => FrameType::I, + SliceType::P | SliceType::SP => FrameType::P, + SliceType::B => FrameType::B, + } + } +} + +const SLICE_TYPES: [SliceType; 10] = [ + SliceType::P, SliceType::B, SliceType::I, SliceType::SP, SliceType::SI, + SliceType::P, SliceType::B, SliceType::I, SliceType::SP, SliceType::SI, +]; + +#[derive(Clone,Copy,Default)] +pub struct WeightInfo { + pub luma_weighted: bool, + pub luma_weight: i8, + pub luma_offset: i8, + pub luma_shift: u8, + pub chroma_weighted: bool, + pub chroma_weight: [i8; 2], + pub chroma_offset: [i8; 2], + pub chroma_shift: u8, +} + +impl WeightInfo { + pub fn is_weighted(&self) -> bool { + self.luma_weighted || self.chroma_weighted + } +} + +#[derive(Clone,Copy)] +pub struct ReorderingInfo { + pub reordering_of_pic_nums_idc: [u8; MAX_FRAMES], + pub abs_diff_or_num: [u16; MAX_FRAMES], + pub num_ops: usize, +} + +#[derive(Clone,Copy)] +pub struct AdaptiveMarking { + pub memory_management_control_op: [u8; MAX_FRAMES], + pub operation_arg: [u16; MAX_FRAMES], + pub operation_arg2: [u16; MAX_FRAMES], + pub num_ops: usize, +} + +#[derive(Clone)] +pub struct SliceHeader { + pub first_mb_in_slice: usize, + pub slice_type: SliceType, + pub same_slice_type: bool, + pub pic_parameter_set_id: u32, + pub frame_num: u16, + pub field_pic: bool, + pub bottom_field: bool, + pub idr_pic_id: u16, + pub pic_order_cnt_lsb: u16, + pub delta_pic_order_cnt_bottom: i32, + pub delta_pic_order_cnt: [i32; 2], + pub redundant_pic_cnt: u8, + pub direct_spatial_mv_pred: bool, + pub num_ref_idx_active_override: bool, + pub num_ref_idx_l0_active: usize, + pub num_ref_idx_l1_active: usize, + pub ref_pic_list_reordering_l0: bool, + pub reordering_list_l0: ReorderingInfo, + pub ref_pic_list_reordering_l1: bool, + pub reordering_list_l1: ReorderingInfo, + pub luma_log2_weight_denom: u8, + pub chroma_log2_weight_denom: u8, + pub weights_l0: [WeightInfo; MAX_FRAMES], + pub weights_l1: [WeightInfo; MAX_FRAMES], + pub no_output_of_prior_pics: bool, + pub long_term_reference: bool, + pub adaptive_ref_pic_marking_mode: bool, + pub adaptive_ref_pic_marking: AdaptiveMarking, + pub cabac_init_idc: u8, + pub slice_qp_delta: i32, + pub slice_qp: u8, + pub sp_for_switch: bool, + pub slice_qs_delta: i32, + pub slice_qs: u8, + pub disable_deblocking_filter_idc: u8, + pub slice_alpha_c0_offset: i8, + pub slice_beta_offset: i8, + pub slice_group_change_cycle: u32, +} + +pub const DEF_WEIGHT_INFO: WeightInfo = WeightInfo { + luma_weighted: false, + luma_weight: 0, + luma_offset: 0, + luma_shift: 0, + chroma_weighted: false, + chroma_weight: [0; 2], + chroma_offset: [0; 2], + chroma_shift: 0, +}; + +impl SliceHeader { + #[allow(clippy::collapsible_else_if)] + pub fn get_weight(&self, list_id: u8, idx: usize) -> WeightInfo { + if list_id == 0 { + if idx < self.num_ref_idx_l0_active { + self.weights_l0[idx] + } else { + DEF_WEIGHT_INFO + } + } else { + if idx < self.num_ref_idx_l1_active { + self.weights_l1[idx] + } else { + DEF_WEIGHT_INFO + } + } + } +} + +/*pub fn parse_slice_header_minimal(br: &mut BitReader) -> DecoderResult<(usize, SliceType)> { + let first_mb_in_slice = br.read_ue()? as usize; + let stype = br.read_ue_lim(SLICE_TYPES.len() as u32 - 1)?; + let slice_type = SLICE_TYPES[stype as usize]; + Ok((first_mb_in_slice, slice_type)) +}*/ + +#[allow(clippy::cognitive_complexity)] +#[allow(clippy::manual_range_contains)] +pub fn parse_slice_header(br: &mut BitReader, sps_arr: &[SeqParameterSet], pps_arr: &[PicParameterSet], is_idr: bool, nal_ref_idc: u8) -> DecoderResult { + let mut hdr: SliceHeader = unsafe { std::mem::zeroed() }; + + hdr.first_mb_in_slice = br.read_ue()? as usize; + let stype = br.read_ue_lim(SLICE_TYPES.len() as u32 - 1)?; + hdr.slice_type = SLICE_TYPES[stype as usize]; + hdr.same_slice_type = stype >= 5; + hdr.pic_parameter_set_id = br.read_ue()?; + + let mut pps_ptr = None; + for pps in pps_arr.iter() { + if pps.pic_parameter_set_id == hdr.pic_parameter_set_id { + pps_ptr = Some(pps); + break; + } + } + validate!(pps_ptr.is_some()); + let pps = pps_ptr.unwrap(); + let mut sps_ptr = None; + for sps in sps_arr.iter() { + if sps.seq_parameter_set_id == pps.seq_parameter_set_id { + sps_ptr = Some(sps); + break; + } + } + validate!(sps_ptr.is_some()); + let sps = sps_ptr.unwrap(); + + hdr.frame_num = br.read(sps.log2_max_frame_num)? as u16; + if !sps.frame_mbs_only { + hdr.field_pic = br.read_bool()?; + if hdr.field_pic { + hdr.bottom_field = br.read_bool()?; + } + } + + if is_idr { + hdr.idr_pic_id = br.read_ue_lim(65535)? as u16; + } + if sps.pic_order_cnt_type == 0 { + hdr.pic_order_cnt_lsb = br.read(sps.log2_max_pic_order_cnt_lsb)? as u16; + if pps.pic_order_present && !hdr.field_pic { + hdr.delta_pic_order_cnt_bottom = br.read_se()?; + } + } else if sps.pic_order_cnt_type == 1 && !sps.delta_pic_order_always_zero { + hdr.delta_pic_order_cnt[0] = br.read_se()?; + if pps.pic_order_present && !hdr.field_pic { + hdr.delta_pic_order_cnt[1] = br.read_se()?; + } + } + if pps.redundant_pic_cnt_present { + hdr.redundant_pic_cnt = br.read_ue_lim(127)? as u8; + } + if hdr.slice_type.is_b() { + hdr.direct_spatial_mv_pred = br.read_bool()?; + } + if !hdr.slice_type.is_intra() { + hdr.num_ref_idx_active_override = br.read_bool()?; + if hdr.num_ref_idx_active_override { + hdr.num_ref_idx_l0_active = (br.read_ue_lim(15)? + 1) as usize; + if hdr.slice_type.is_b() { + hdr.num_ref_idx_l1_active = (br.read_ue_lim(15)? + 1) as usize; + } + } else { + hdr.num_ref_idx_l0_active = pps.num_ref_idx_l0_active; + if hdr.slice_type.is_b() { + hdr.num_ref_idx_l1_active = pps.num_ref_idx_l1_active; + } + } + } + parse_ref_pic_list_reordering(&mut hdr, br)?; + if (pps.weighted_pred && hdr.slice_type.is_p()) || + (pps.weighted_bipred_idc == 1 && hdr.slice_type.is_b()) { + parse_pred_weight_table(&mut hdr, br)?; + } else { + for weight in hdr.weights_l0[..hdr.num_ref_idx_l0_active].iter_mut() { + weight.luma_weighted = false; + weight.chroma_weighted = false; + } + for weight in hdr.weights_l1[..hdr.num_ref_idx_l1_active].iter_mut() { + weight.luma_weighted = false; + weight.chroma_weighted = false; + } + } + if nal_ref_idc != 0 { + if is_idr { + hdr.no_output_of_prior_pics = br.read_bool()?; + hdr.long_term_reference = br.read_bool()?; + } else { + hdr.adaptive_ref_pic_marking_mode = br.read_bool()?; + if hdr.adaptive_ref_pic_marking_mode { + let mark_info = &mut hdr.adaptive_ref_pic_marking; + loop { + let memory_management_control_op = br.read_ue_lim(6)? as u8; + if memory_management_control_op == 0 { + break; + } + if mark_info.num_ops >= mark_info.memory_management_control_op.len() { + return Err(DecoderError::NotImplemented); + } + mark_info.memory_management_control_op[mark_info.num_ops] = memory_management_control_op; + mark_info.operation_arg[mark_info.num_ops] = match memory_management_control_op { + 1 | 3 => { + let difference_of_pic_nums = br.read_ue()? + 1; + difference_of_pic_nums as u16 + }, + 2 => { + let long_term_pic_num = br.read_ue_lim(65535)?; + long_term_pic_num as u16 + }, + 6 => { + let long_term_frame_idx = br.read_ue_lim(65536)?; + long_term_frame_idx as u16 + }, + 4 => { + let max_long_term_frame_idx_plus1 = br.read_ue()?; + max_long_term_frame_idx_plus1 as u16 + }, + _ => 0, + }; + mark_info.operation_arg2[mark_info.num_ops] = if memory_management_control_op == 3 { + let long_term_frame_idx = br.read_ue_lim(65536)?; + long_term_frame_idx as u16 + } else { + 0 + }; + mark_info.num_ops += 1; + } + } + } + } + if pps.entropy_coding_mode && !hdr.slice_type.is_intra() { + hdr.cabac_init_idc = br.read_ue_lim(2)? as u8; + } + hdr.slice_qp_delta = br.read_se()?; + let qp = i32::from(pps.pic_init_qp) + hdr.slice_qp_delta; + validate!(qp >= 0 && qp <= 51); + hdr.slice_qp = qp as u8; + if hdr.slice_type.is_s() { + if hdr.slice_type == SliceType::SP { + hdr.sp_for_switch = br.read_bool()?; + } + hdr.slice_qs_delta = br.read_se()?; + let qs = i32::from(pps.pic_init_qs) + hdr.slice_qs_delta; + validate!(qs >= 0 && qs <= 51); + hdr.slice_qs = qs as u8; + } + if pps.deblocking_filter_control_present { + hdr.disable_deblocking_filter_idc = br.read_ue_lim(2)? as u8; + if hdr.disable_deblocking_filter_idc != 1 { + let val = br.read_se()?; + validate!(val >= -6 && val <= 6); + hdr.slice_alpha_c0_offset = val as i8 * 2; + let val = br.read_se()?; + validate!(val >= -6 && val <= 6); + hdr.slice_beta_offset = val as i8 * 2; + } + } + if pps.num_slice_groups > 1 && pps.slice_group_map_type >= 3 && pps.slice_group_map_type <= 5 { + hdr.slice_group_change_cycle = br.read_ue()?; + } + + Ok(hdr) +} + +fn parse_ref_pic_list_reordering(hdr: &mut SliceHeader, br: &mut BitReader) -> DecoderResult<()> { + if !hdr.slice_type.is_intra() { + hdr.ref_pic_list_reordering_l0 = br.read_bool()?; + let reord_list = &mut hdr.reordering_list_l0; + reord_list.num_ops = 0; + if hdr.ref_pic_list_reordering_l0 { + loop { + let reordering_of_pic_nums_idc = br.read_ue_lim(3)?; + if reordering_of_pic_nums_idc == 3 { + break; + } + validate!(reord_list.num_ops < MAX_FRAMES); + reord_list.reordering_of_pic_nums_idc[reord_list.num_ops] = reordering_of_pic_nums_idc as u8; + if reordering_of_pic_nums_idc != 2 { + let abs_diff_pic_num = br.read_ue()? + 1; + reord_list.abs_diff_or_num[reord_list.num_ops] = abs_diff_pic_num as u16; + } else { + let long_term_pic_num = br.read_ue()?; + reord_list.abs_diff_or_num[reord_list.num_ops] = long_term_pic_num as u16; + } + reord_list.num_ops += 1; + } + validate!(reord_list.num_ops > 0); + } + } + if hdr.slice_type.is_b() { + hdr.ref_pic_list_reordering_l1 = br.read_bool()?; + let reord_list = &mut hdr.reordering_list_l1; + reord_list.num_ops = 0; + if hdr.ref_pic_list_reordering_l1 { + loop { + let reordering_of_pic_nums_idc = br.read_ue_lim(3)?; + if reordering_of_pic_nums_idc == 3 { + break; + } + validate!(reord_list.num_ops < MAX_FRAMES); + reord_list.reordering_of_pic_nums_idc[reord_list.num_ops] = reordering_of_pic_nums_idc as u8; + if reordering_of_pic_nums_idc != 2 { + let abs_diff_pic_num = br.read_ue()? + 1; + reord_list.abs_diff_or_num[reord_list.num_ops] = abs_diff_pic_num as u16; + } else { + let long_term_pic_num = br.read_ue()?; + reord_list.abs_diff_or_num[reord_list.num_ops] = long_term_pic_num as u16; + } + reord_list.num_ops += 1; + } + validate!(reord_list.num_ops > 0); + } + } + Ok(()) +} + +fn parse_pred_weight_table(hdr: &mut SliceHeader, br: &mut BitReader) -> DecoderResult<()> { + hdr.luma_log2_weight_denom = br.read_ue_lim(7)? as u8; + hdr.chroma_log2_weight_denom = br.read_ue_lim(7)? as u8; + for weight in hdr.weights_l0[..hdr.num_ref_idx_l0_active].iter_mut() { + weight.luma_weighted = br.read_bool()?; + if weight.luma_weighted { + let w = br.read_se()?; + validate!(w >= -128 && w <= 127); + weight.luma_weight = w as i8; + let offset = br.read_se()?; + validate!(offset >= -128 && offset <= 127); + weight.luma_offset = offset as i8; + } + weight.luma_shift = hdr.luma_log2_weight_denom; + + weight.chroma_weighted = br.read_bool()?; + if weight.chroma_weighted { + for i in 0..2 { + let w = br.read_se()?; + validate!(w >= -128 && w <= 127); + weight.chroma_weight[i] = w as i8; + let offset = br.read_se()?; + validate!(offset >= -128 && offset <= 127); + weight.chroma_offset[i] = offset as i8; + } + } + weight.chroma_shift = hdr.chroma_log2_weight_denom; + } + for weight in hdr.weights_l1[..hdr.num_ref_idx_l1_active].iter_mut() { + weight.luma_weighted = br.read_bool()?; + if weight.luma_weighted { + let w = br.read_se()?; + validate!(w >= -128 && w <= 127); + weight.luma_weight = w as i8; + let offset = br.read_se()?; + validate!(offset >= -128 && offset <= 127); + weight.luma_offset = offset as i8; + } + weight.luma_shift = hdr.luma_log2_weight_denom; + + weight.chroma_weighted = br.read_bool()?; + if weight.chroma_weighted { + for i in 0..2 { + let w = br.read_se()?; + validate!(w >= -128 && w <= 127); + weight.chroma_weight[i] = w as i8; + let offset = br.read_se()?; + validate!(offset >= -128 && offset <= 127); + weight.chroma_offset[i] = offset as i8; + } + } + weight.chroma_shift = hdr.chroma_log2_weight_denom; + } + + Ok(()) +}