add a crate for VAAPI-based H.264 decoding
authorKostya Shishkov <kostya.shishkov@gmail.com>
Fri, 20 Oct 2023 16:37:37 +0000 (18:37 +0200)
committerKostya Shishkov <kostya.shishkov@gmail.com>
Fri, 20 Oct 2023 16:37:37 +0000 (18:37 +0200)
hwdec-vaapi/Cargo.toml [new file with mode: 0644]
hwdec-vaapi/src/lib.rs [new file with mode: 0644]
hwdec-vaapi/src/pic_ref.rs [new file with mode: 0644]
hwdec-vaapi/src/sets.rs [new file with mode: 0644]
hwdec-vaapi/src/slice.rs [new file with mode: 0644]

diff --git a/hwdec-vaapi/Cargo.toml b/hwdec-vaapi/Cargo.toml
new file mode 100644 (file)
index 0000000..df79b33
--- /dev/null
@@ -0,0 +1,20 @@
+[package]
+name = "hwdec_vaapi"
+version = "0.1.0"
+authors = ["Kostya Shishkov <kostya.shishkov@gmail.com>"]
+edition = "2018"
+rust-version = "1.69"
+
+[dependencies.libva]
+package = "nihed-cros-libva"
+version = "0.0.4"
+path = "../nihed-cros-libva"
+
+[dependencies.nihav_core]
+path = "../../nihav-core"
+
+[dev-dependencies]
+nihav_commonfmt = { path = "../../nihav-commonfmt", default-features=false, features = ["all_demuxers"] }
+
+[features]
+default = []
diff --git a/hwdec-vaapi/src/lib.rs b/hwdec-vaapi/src/lib.rs
new file mode 100644 (file)
index 0000000..ca843c2
--- /dev/null
@@ -0,0 +1,1233 @@
+use std::collections::VecDeque;
+use std::convert::TryInto;
+use std::rc::Rc;
+
+use nihav_core::codecs::*;
+use nihav_core::io::byteio::*;
+use nihav_core::io::bitreader::*;
+use nihav_core::io::intcode::*;
+
+use libva::*;
+
+#[cfg(debug_assertions)]
+macro_rules! validate {
+    ($a:expr) => { if !$a { println!("check failed at {}:{}", file!(), line!()); return Err(DecoderError::InvalidData); } };
+}
+#[cfg(not(debug_assertions))]
+macro_rules! validate {
+    ($a:expr) => { if !$a { return Err(DecoderError::InvalidData); } };
+}
+
+mod pic_ref;
+pub use pic_ref::*;
+#[allow(clippy::manual_range_contains)]
+#[allow(clippy::needless_range_loop)]
+mod sets;
+use sets::*;
+#[allow(clippy::manual_range_contains)]
+mod slice;
+use slice::*;
+
+trait ReadUE {
+    fn read_ue(&mut self) -> DecoderResult<u32>;
+    fn read_te(&mut self, range: u32) -> DecoderResult<u32>;
+    fn read_ue_lim(&mut self, max_val: u32) -> DecoderResult<u32> {
+        let val = self.read_ue()?;
+        validate!(val <= max_val);
+        Ok(val)
+    }
+    fn read_se(&mut self) -> DecoderResult<i32> {
+        let val = self.read_ue()?;
+        if (val & 1) != 0 {
+            Ok (((val >> 1) as i32) + 1)
+        } else {
+            Ok (-((val >> 1) as i32))
+        }
+    }
+}
+
+impl<'a> ReadUE for BitReader<'a> {
+    fn read_ue(&mut self) -> DecoderResult<u32> {
+        Ok(self.read_code(UintCodeType::GammaP)? - 1)
+    }
+    fn read_te(&mut self, range: u32) -> DecoderResult<u32> {
+        if range == 1 {
+            if self.read_bool()? {
+                Ok(0)
+            } else {
+                Ok(1)
+            }
+        } else {
+            let val = self.read_ue()?;
+            validate!(val <= range);
+            Ok(val)
+        }
+    }
+}
+
+fn get_long_term_id(is_idr: bool, slice_hdr: &SliceHeader) -> Option<usize> {
+    if is_idr && !slice_hdr.long_term_reference {
+        None
+    } else {
+        let marking = &slice_hdr.adaptive_ref_pic_marking;
+        for (&op, &arg) in marking.memory_management_control_op.iter().zip(marking.operation_arg.iter()).take(marking.num_ops) {
+            if op == 6 {
+                return Some(arg as usize);
+            }
+        }
+        None
+    }
+}
+
+fn unescape_nal(src: &[u8], dst: &mut Vec<u8>) -> usize {
+    let mut off = 0;
+    let mut zrun = 0;
+    dst.clear();
+    dst.reserve(src.len());
+    while off < src.len() {
+        dst.push(src[off]);
+        if src[off] != 0 {
+            zrun = 0;
+        } else {
+            zrun += 1;
+            if zrun == 2 && off + 1 < src.len() && src[off + 1] == 0x03 {
+                zrun = 0;
+                off += 1;
+            }
+            if zrun >= 3 && off + 1 < src.len() && src[off + 1] == 0x01 {
+                off -= 3;
+                dst.truncate(off);
+                break;
+            }
+        }
+        off += 1;
+    }
+    off
+}
+
+fn make_dummy_h264_pic() -> PictureH264 {
+    PictureH264::new(VA_INVALID_ID, 0, H264PictureFlag::Invalid.into(), 0, 0)
+}
+
+trait MakePicH264 {
+    fn make_pic(&self) -> PictureH264;
+}
+
+impl MakePicH264 for PictureInfo {
+    fn make_pic(&self) -> PictureH264 {
+        let mut flags = H264PictureFlags::default();
+        let frame_idx = if let Some(id) = self.long_term {
+                flags |= H264PictureFlag::LongTermReference;
+                id as u32
+            } else {
+                if self.is_ref {
+                    flags |= H264PictureFlag::ShortTermReference;
+                }
+                u32::from(self.id)
+            };
+        PictureH264::new(self.surface_id, frame_idx, flags, self.top_id as i32, self.bot_id as i32)
+    }
+}
+
+fn map_ref_list(refs: &[Option<PictureInfo>]) -> [PictureH264; 32] {
+    let mut ref_list = Vec::with_capacity(32);
+
+    for rpic in refs.iter() {
+        ref_list.push(rpic.as_ref().map_or_else(make_dummy_h264_pic, |pic| pic.make_pic()));
+    }
+
+    while ref_list.len() < 32 {
+        ref_list.push(make_dummy_h264_pic());
+    }
+    if let Ok(ret) = ref_list.try_into() {
+        ret
+    } else {
+        panic!("can't convert");
+    }
+}
+
+fn profile_name(profile: VAProfile::Type) -> &'static str {
+    match profile {
+        VAProfile::VAProfileMPEG2Simple => "MPEG2 Simple",
+        VAProfile::VAProfileMPEG2Main => "MPEG2 Main",
+        VAProfile::VAProfileMPEG4Simple => "MPEG4 Simple",
+        VAProfile::VAProfileMPEG4AdvancedSimple => "MPEG4 Advanced Simple",
+        VAProfile::VAProfileMPEG4Main => "MPEG4 Main",
+        VAProfile::VAProfileH264Baseline => "H264 Baseline",
+        VAProfile::VAProfileH264Main => "H264 Main",
+        VAProfile::VAProfileH264High => "H264 High",
+        VAProfile::VAProfileVC1Simple => "VC1 Simple",
+        VAProfile::VAProfileVC1Main => "VC1 Main",
+        VAProfile::VAProfileVC1Advanced => "VC1 Advanced",
+        VAProfile::VAProfileH263Baseline => "H263 Baseline",
+        VAProfile::VAProfileJPEGBaseline => "JPEG Baseline",
+        VAProfile::VAProfileH264ConstrainedBaseline => "H264 Constrained Baseline",
+        VAProfile::VAProfileVP8Version0_3 => "VP8",
+        VAProfile::VAProfileH264MultiviewHigh => "H.264 Multiview High",
+        VAProfile::VAProfileH264StereoHigh => "H264 Stereo High",
+        VAProfile::VAProfileHEVCMain => "H.EVC Main",
+        VAProfile::VAProfileHEVCMain10 => "H.EVC Main10",
+        VAProfile::VAProfileVP9Profile0 => "VP9 Profile 0",
+        VAProfile::VAProfileVP9Profile1 => "VP9 Profile 1",
+        VAProfile::VAProfileVP9Profile2 => "VP9 Profile 2",
+        VAProfile::VAProfileVP9Profile3 => "VP9 Profile 3",
+        VAProfile::VAProfileHEVCMain12 => "HEVC Main12",
+        VAProfile::VAProfileHEVCMain422_10 => "HEVC Main10 4:2:2",
+        VAProfile::VAProfileHEVCMain422_12 => "HEVC Main12 4:2:2",
+        VAProfile::VAProfileHEVCMain444 => "HEVC Main 4:4:4",
+        VAProfile::VAProfileHEVCMain444_10 => "HEVC Main10 4:4:4",
+        VAProfile::VAProfileHEVCMain444_12 => "HEVC Main12 4:4:4",
+        VAProfile::VAProfileHEVCSccMain => "HEVC SCC Main",
+        VAProfile::VAProfileHEVCSccMain10 => "HEVC SCC Main10",
+        VAProfile::VAProfileHEVCSccMain444 => "HEVC SCC Main 4:4:4",
+        VAProfile::VAProfileAV1Profile0 => "AV1 Profile 0",
+        VAProfile::VAProfileAV1Profile1 => "AV1 Profile 1",
+        VAProfile::VAProfileHEVCSccMain444_10 => "HEVC SCC Main10 4:4:4",
+        _ => "unknown",
+    }
+}
+
+const NUM_REF_PICS: usize = 16;
+
+struct WaitingFrame {
+    ts:     u64,
+    pic:    Picture<PictureEnd>,
+    is_idr: bool,
+    is_ref: bool,
+    ftype:  FrameType,
+}
+
+struct Reorderer {
+    last_ref_dts:   Option<u64>,
+    ready_idx:      usize,
+    frames:         VecDeque<WaitingFrame>,
+}
+
+impl Default for Reorderer {
+    fn default() -> Self {
+        Self {
+            last_ref_dts:   None,
+            ready_idx:      0,
+            frames:         VecDeque::with_capacity(16),
+        }
+    }
+}
+
+impl Reorderer {
+    fn add_frame(&mut self, new_frame: WaitingFrame) {
+        if !new_frame.is_ref {
+            if self.frames.is_empty() {
+                self.frames.push_back(new_frame);
+            } else {
+                let new_dts = new_frame.ts;
+                let mut idx = 0;
+                for (i, frm) in self.frames.iter().enumerate() {
+                    idx = i;
+                    if frm.ts > new_dts {
+                        break;
+                    }
+                }
+                self.frames.insert(idx, new_frame);
+            }
+        } else {
+            for (i, frm) in self.frames.iter().enumerate() {
+                if Some(frm.ts) == self.last_ref_dts {
+                    self.ready_idx = i + 1;
+                }
+            }
+            self.last_ref_dts = Some(new_frame.ts);
+            self.frames.push_back(new_frame);
+        }
+    }
+    fn get_frame(&mut self) -> Option<WaitingFrame> {
+        if self.ready_idx > 0 {
+            match self.frames[0].pic.query_status() {
+                _ if self.ready_idx > 16 => {},
+                Ok(VASurfaceStatus::Ready) => {},
+                Ok(VASurfaceStatus::Rendering) => return None,
+                _ => {
+                    unimplemented!();
+                },
+            };
+            self.ready_idx -= 1;
+            self.frames.pop_front()
+        } else {
+            None
+        }
+    }
+    fn flush(&mut self) {
+        self.last_ref_dts = None;
+        self.ready_idx = 0;
+    }
+}
+
+#[allow(dead_code)]
+struct VaapiInternals {
+    display:        Rc<Display>,
+    context:        Rc<Context>,
+    ref_pics:       Vec<(Picture<PictureSync>, VASurfaceID)>,
+    surfaces:       Vec<Surface>,
+    ifmt:           VAImageFormat,
+}
+
+pub struct VaapiH264Decoder {
+    info:           NACodecInfoRef,
+    vaapi:          Option<VaapiInternals>,
+    spses:          Vec<SeqParameterSet>,
+    ppses:          Vec<PicParameterSet>,
+    frame_refs:     FrameRefs,
+    nal_len:        u8,
+    out_frm:        NABufferType,
+    reorderer:      Reorderer,
+    tb_num:         u32,
+    tb_den:         u32,
+}
+
+fn fill_frame(ifmt: VAImageFormat, pic: &Picture<PictureSync>, frm: &mut NABufferType) -> DecoderResult<()> {
+    let mut vbuf = frm.get_vbuf().unwrap();
+    let (w, h) = pic.surface_size();
+    //let cur_ts = pic.timestamp();
+
+    let img = Image::new(pic, ifmt, w, h, true).expect("get image");
+
+    let iimg = img.image();
+    let imgdata: &[u8] = img.as_ref();
+
+    match iimg.format.fourcc().map_err(|_| DecoderError::InvalidData)? {
+        VAFourcc::NV12 => {
+            let frm = NASimpleVideoFrame::from_video_buf(&mut vbuf).unwrap();
+            validate!(iimg.width == (frm.width[0] as u16));
+            validate!(iimg.height == (frm.height[0] as u16));
+
+            for (dline, sline) in frm.data[frm.offset[0]..].chunks_mut(frm.stride[0])
+                    .zip(imgdata[iimg.offsets[0] as usize..].chunks(iimg.pitches[0] as usize))
+                    .take(frm.height[0]) {
+                dline[..frm.width[0]].copy_from_slice(&sline[..frm.width[0]]);
+            }
+
+            let mut uoff = frm.offset[1];
+            let mut voff = frm.offset[2];
+            for cline in imgdata[iimg.offsets[1] as usize..].chunks(iimg.pitches[1] as usize).take(frm.height[1]) {
+                for (x, pair) in cline.chunks_exact(2).take(frm.width[1]).enumerate() {
+                    frm.data[uoff + x] = pair[0];
+                    frm.data[voff + x] = pair[1];
+                }
+                uoff += frm.stride[1];
+                voff += frm.stride[2];
+            }
+        },
+        _ => unimplemented!(),
+    };
+    Ok(())
+}
+
+impl Default for VaapiH264Decoder {
+    fn default() -> Self {
+        Self {
+            info:           NACodecInfoRef::default(),
+            vaapi:          None,
+            spses:          Vec::with_capacity(1),
+            ppses:          Vec::with_capacity(4),
+            frame_refs:     FrameRefs::new(),
+            nal_len:        0,
+            out_frm:        NABufferType::None,
+            reorderer:      Reorderer::default(),
+            tb_num:         0,
+            tb_den:         0,
+        }
+    }
+}
+
+impl VaapiH264Decoder {
+    pub fn new() -> Self { Self::default() }
+    pub fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()> {
+        if let NACodecTypeInfo::Video(vinfo) = info.get_properties() {
+            let edata = info.get_extradata().unwrap();
+//print!("edata:"); for &el in edata.iter() { print!(" {:02X}", el); } println!();
+            let profile;
+            let mut nal_buf = Vec::with_capacity(1024);
+            if edata.len() > 11 && &edata[0..4] == b"avcC" {
+                let mut mr = MemoryReader::new_read(edata.as_slice());
+                let mut br = ByteReader::new(&mut mr);
+
+                                          br.read_skip(4)?;
+                let version             = br.read_byte()?;
+                validate!(version == 1);
+                profile                 = br.read_byte()?;
+                let _compatibility      = br.read_byte()?;
+                let _level              = br.read_byte()?;
+                let b                   = br.read_byte()?;
+                validate!((b & 0xFC) == 0xFC);
+                self.nal_len            = (b & 3) + 1;
+                let b                   = br.read_byte()?;
+                validate!((b & 0xE0) == 0xE0);
+                let num_sps = (b & 0x1F) as usize;
+                for _ in 0..num_sps {
+                    let len             = br.read_u16be()? as usize;
+                    let offset = br.tell() as usize;
+                    validate!((br.peek_byte()? & 0x1F) == 7);
+                    let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf);
+                                          br.read_skip(len)?;
+                    let sps = parse_sps(&nal_buf[1..])?;
+                    self.spses.push(sps);
+                }
+                let num_pps             = br.read_byte()? as usize;
+                for _ in 0..num_pps {
+                    let len             = br.read_u16be()? as usize;
+                    let offset = br.tell() as usize;
+                    validate!((br.peek_byte()? & 0x1F) == 8);
+                    let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf);
+                                          br.read_skip(len)?;
+                    let src = &nal_buf;
+
+                    let mut full_size = src.len() * 8;
+                    for &byte in src.iter().rev() {
+                        if byte == 0 {
+                            full_size -= 8;
+                        } else {
+                            full_size -= (byte.trailing_zeros() + 1) as usize;
+                            break;
+                        }
+                    }
+                    validate!(full_size > 0);
+
+                    let pps = parse_pps(&src[1..], &self.spses, full_size - 8)?;
+                    let mut found = false;
+                    for stored_pps in self.ppses.iter_mut() {
+                        if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id {
+                            *stored_pps = pps.clone();
+                            found = true;
+                            break;
+                        }
+                    }
+                    if !found {
+                        self.ppses.push(pps);
+                    }
+                }
+                if br.left() > 0 {
+                    match profile {
+                        100 | 110 | 122 | 144 => {
+                            let b       = br.read_byte()?;
+                            validate!((b & 0xFC) == 0xFC);
+                            // b & 3 -> chroma format
+                            let b       = br.read_byte()?;
+                            validate!((b & 0xF8) == 0xF8);
+                            // b & 7 -> luma depth minus 8
+                            let b       = br.read_byte()?;
+                            validate!((b & 0xF8) == 0xF8);
+                            // b & 7 -> chroma depth minus 8
+                            let num_spsext  = br.read_byte()? as usize;
+                            for _ in 0..num_spsext {
+                                let len = br.read_u16be()? as usize;
+                                // parse spsext
+                                          br.read_skip(len)?;
+                            }
+                        },
+                        _ => {},
+                    };
+                }
+            } else {
+                return Err(DecoderError::NotImplemented);
+            }
+
+            validate!(profile > 0);
+            let width  = (vinfo.get_width()  + 15) & !15;
+            let height = (vinfo.get_height() + 15) & !15;
+
+            let display = Display::open_silently().expect("open display");
+
+            let num_surfaces = self.spses[0].num_ref_frames + 4 + 64;
+
+            let va_profile = match profile {
+                    66 => VAProfile::VAProfileH264ConstrainedBaseline,
+                    77 => VAProfile::VAProfileH264Main,
+                    88 | 100 | 110 | 122 => VAProfile::VAProfileH264High,
+                    _ => return Err(DecoderError::NotImplemented),
+                };
+            if let Ok(profiles) = display.query_config_profiles() {
+                if !profiles.contains(&va_profile) {
+println!("Profile {} ({}) not supported", profile, profile_name(va_profile));
+                    return Err(DecoderError::NotImplemented);
+                }
+            } else {
+                return Err(DecoderError::Bug);
+            }
+            if let Ok(points) = display.query_config_entrypoints(va_profile) {
+                if !points.contains(&VAEntrypoint::VAEntrypointVLD) {
+println!("no decoding support for this profile");
+                    return Err(DecoderError::NotImplemented);
+                }
+            } else {
+                return Err(DecoderError::Bug);
+            }
+
+            let config = display.create_config(vec![
+                    VAConfigAttrib { type_: VAConfigAttribType::VAConfigAttribRTFormat, value: RTFormat::YUV420.into() },
+                ], va_profile, VAEntrypoint::VAEntrypointVLD).map_err(|_| {
+println!("config creation failed!");
+                    DecoderError::Bug
+                })?;
+            let surfaces = display.create_surfaces(RTFormat::YUV420, None, width as u32, height as u32, Some(UsageHint::Decoder.into()), num_surfaces as u32).map_err(|_| DecoderError::AllocError)?;
+            let context = display.create_context(&config, width as i32, height as i32, Some(&surfaces), true).map_err(|_| DecoderError::Bug)?;
+
+            let ref_pics = Vec::new();
+
+            let image_formats = display.query_image_formats().map_err(|_| DecoderError::Bug)?;
+            validate!(!image_formats.is_empty());
+            let mut ifmt = image_formats[0];
+            for fmt in image_formats.iter() {
+                if fmt.bits_per_pixel == 12 {
+                    ifmt = *fmt;
+                    break;
+                }
+            }
+
+            self.vaapi = Some(VaapiInternals { display, context, ref_pics, surfaces, ifmt });
+
+            let vinfo = NAVideoInfo::new(width, height, false, YUV420_FORMAT);
+            self.info = NACodecInfo::new_ref(info.get_name(), NACodecTypeInfo::Video(vinfo), info.get_extradata()).into_ref();
+            self.out_frm = alloc_video_buffer(vinfo, 4)?;
+
+            Ok(())
+        } else {
+            Err(DecoderError::InvalidData)
+        }
+    }
+    fn decode(&mut self, pkt: &NAPacket) -> DecoderResult<()> {
+        let src = pkt.get_buffer();
+        let vactx = if let Some(ref mut ctx) = self.vaapi { ctx } else { return Err(DecoderError::Bug) };
+
+        let timestamp = pkt.get_dts().unwrap_or_else(|| pkt.get_pts().unwrap_or(0));
+
+        if vactx.surfaces.is_empty() {
+panic!("ran out of free surfaces");
+//            return Err(DecoderError::AllocError);
+        }
+        let surface = vactx.surfaces.pop().unwrap();
+        let surface_id = surface.id();
+        let mut pic = Picture::new(timestamp, vactx.context.clone(), surface);
+        let mut is_ref = false;
+        let mut is_keyframe = false;
+
+        self.tb_num = pkt.ts.tb_num;
+        self.tb_den = pkt.ts.tb_den;
+
+        let mut mr = MemoryReader::new_read(&src);
+        let mut br = ByteReader::new(&mut mr);
+        let mut frame_type = FrameType::I;
+        let mut nal_buf = Vec::with_capacity(1024);
+        while br.left() > 0 {
+            let size = match self.nal_len {
+                    1 => br.read_byte()? as usize,
+                    2 => br.read_u16be()? as usize,
+                    3 => br.read_u24be()? as usize,
+                    4 => br.read_u32be()? as usize,
+                    _ => unreachable!(),
+                };
+            validate!(br.left() >= (size as i64));
+            let offset = br.tell() as usize;
+            let raw_nal = &src[offset..][..size];
+            let _size = unescape_nal(raw_nal, &mut nal_buf);
+
+            let src = &nal_buf;
+            validate!((src[0] & 0x80) == 0);
+            let nal_ref_idc   = src[0] >> 5;
+            let nal_unit_type = src[0] & 0x1F;
+
+            let mut full_size = src.len() * 8;
+            for &byte in src.iter().rev() {
+                if byte == 0 {
+                    full_size -= 8;
+                } else {
+                    full_size -= (byte.trailing_zeros() + 1) as usize;
+                    break;
+                }
+            }
+            validate!(full_size > 0);
+
+            match nal_unit_type {
+                 1 | 5 => {
+                    let is_idr = nal_unit_type == 5;
+                    is_ref |= nal_ref_idc != 0;
+                    is_keyframe |= is_idr;
+                    let mut br = BitReader::new(&src[..(full_size + 7)/8], BitReaderMode::BE);
+                                                        br.skip(8)?;
+
+                    let slice_hdr = parse_slice_header(&mut br, &self.spses, &self.ppses, is_idr, nal_ref_idc)?;
+                    match slice_hdr.slice_type {
+                        SliceType::P if frame_type != FrameType::B => frame_type = FrameType::P,
+                        SliceType::SP if frame_type != FrameType::B => frame_type = FrameType::P,
+                        SliceType::B => frame_type = FrameType::B,
+                        _ => {},
+                    };
+                    let mut cur_sps = 0;
+                    let mut cur_pps = 0;
+                    let mut pps_found = false;
+                    for (i, pps) in self.ppses.iter().enumerate() {
+                        if pps.pic_parameter_set_id == slice_hdr.pic_parameter_set_id {
+                            cur_pps = i;
+                            pps_found = true;
+                            break;
+                        }
+                    }
+                    validate!(pps_found);
+                    let mut sps_found = false;
+                    for (i, sps) in self.spses.iter().enumerate() {
+                        if sps.seq_parameter_set_id == self.ppses[cur_pps].seq_parameter_set_id {
+                            cur_sps = i;
+                            sps_found = true;
+                            break;
+                        }
+                    }
+                    validate!(sps_found);
+                    let sps = &self.spses[cur_sps];
+                    let pps = &self.ppses[cur_pps];
+
+                    if slice_hdr.first_mb_in_slice == 0 {
+                        let (top_id, bot_id) = self.frame_refs.calc_picture_num(&slice_hdr, is_idr, nal_ref_idc, sps);
+                        if is_idr {
+                            self.frame_refs.clear_refs();
+                            for (pic, _) in vactx.ref_pics.drain(..) {
+                                if let Ok(surf) = pic.take_surface() {
+                                    vactx.surfaces.push(surf);
+                                } else {
+                                    panic!("can't take surface");
+                                }
+                            }
+                        }
+                        self.frame_refs.select_refs(sps, &slice_hdr, top_id);
+                        let mut pic_refs = Vec::with_capacity(NUM_REF_PICS);
+                        for pic in self.frame_refs.ref_pics.iter().rev().take(NUM_REF_PICS) {
+                            pic_refs.push(pic.make_pic());
+                        }
+                        if slice_hdr.adaptive_ref_pic_marking_mode {
+                            self.frame_refs.apply_adaptive_marking(&slice_hdr.adaptive_ref_pic_marking, slice_hdr.frame_num, 1 << sps.log2_max_frame_num)?;
+                        }
+
+                        while pic_refs.len() < NUM_REF_PICS {
+                            pic_refs.push(make_dummy_h264_pic());
+                        }
+
+                        let mut flags = H264PictureFlags::default();
+                        let frame_idx = if let Some(id) = get_long_term_id(is_idr, &slice_hdr) {
+                                flags |= H264PictureFlag::LongTermReference;
+                                id as u32
+                            } else {
+                                if nal_ref_idc != 0 {
+                                    flags |= H264PictureFlag::ShortTermReference;
+                                }
+                                u32::from(slice_hdr.frame_num)
+                            };
+                        let pic_refs: [PictureH264; NUM_REF_PICS] = pic_refs.try_into().unwrap_or_else(|_| panic!("can't convert"));
+
+                        let h264pic = PictureH264::new(surface_id, frame_idx, flags, top_id as i32, bot_id as i32);
+
+                        let seq_fields = H264SeqFields::new(
+                                u32::from(sps.chroma_format_idc),
+                                u32::from(sps.separate_colour_plane),
+                                u32::from(sps.gaps_in_frame_num_value_allowed),
+                                u32::from(sps.frame_mbs_only),
+                                u32::from(sps.mb_adaptive_frame_field),
+                                u32::from(sps.direct_8x8_inference),
+                                u32::from(sps.level_idc >= 31),
+                                u32::from(sps.log2_max_frame_num) - 4,
+                                u32::from(sps.pic_order_cnt_type),
+                                u32::from(sps.log2_max_pic_order_cnt_lsb).wrapping_sub(4),
+                                u32::from(sps.delta_pic_order_always_zero)
+                            );
+                        let pic_fields = H264PicFields::new(
+                                u32::from(pps.entropy_coding_mode),
+                                u32::from(pps.weighted_pred),
+                                u32::from(pps.weighted_bipred_idc),
+                                u32::from(pps.transform_8x8_mode),
+                                u32::from(slice_hdr.field_pic),
+                                u32::from(pps.constrained_intra_pred),
+                                u32::from(pps.pic_order_present),
+                                u32::from(pps.deblocking_filter_control_present),
+                                u32::from(pps.redundant_pic_cnt_present),
+                                u32::from(nal_ref_idc != 0)
+                            );
+                        let ppd = PictureParameterBufferH264::new(
+                                h264pic,
+                                pic_refs,
+                                sps.pic_width_in_mbs as u16 - 1,
+                                sps.pic_height_in_mbs as u16 - 1,
+                                sps.bit_depth_luma - 8,
+                                sps.bit_depth_chroma - 8,
+                                sps.num_ref_frames as u8,
+                                &seq_fields,
+                                pps.num_slice_groups as u8 - 1, // should be 0
+                                pps.slice_group_map_type, // should be 0
+                                0, //pps.slice_group_change_rate as u16 - 1,
+                                pps.pic_init_qp as i8 - 26,
+                                pps.pic_init_qs as i8 - 26,
+                                pps.chroma_qp_index_offset,
+                                pps.second_chroma_qp_index_offset,
+                                &pic_fields,
+                                slice_hdr.frame_num
+                            );
+                        let pic_param = BufferType::PictureParameter(PictureParameter::H264(ppd));
+                        let buf = vactx.context.create_buffer(pic_param).map_err(|_| DecoderError::Bug)?;
+                        pic.add_buffer(buf);
+
+                        let mut scaling_list_8x8 = [[0; 64]; 2];
+                        scaling_list_8x8[0].copy_from_slice(&pps.scaling_list_8x8[0]);
+                        scaling_list_8x8[1].copy_from_slice(&pps.scaling_list_8x8[3]);
+                        let iqmatrix = BufferType::IQMatrix(IQMatrix::H264(IQMatrixBufferH264::new(pps.scaling_list_4x4, scaling_list_8x8)));
+                        let buf = vactx.context.create_buffer(iqmatrix).map_err(|_| DecoderError::Bug)?;
+                        pic.add_buffer(buf);
+
+                        let cpic = PictureInfo {
+                                id: slice_hdr.frame_num,
+                                full_id: top_id,
+                                surface_id,
+                                top_id, bot_id,
+                                //pic_type: slice_hdr.slice_type.to_frame_type(),
+                                is_ref,
+                                is_idr,
+                                long_term: get_long_term_id(is_idr, &slice_hdr),
+                            };
+                        if cpic.is_ref {
+                            self.frame_refs.add_short_term(cpic.clone(), sps.num_ref_frames);
+                        }
+                        if let Some(lt_idx) = cpic.long_term {
+                            self.frame_refs.add_long_term(lt_idx, cpic);
+                        }
+                    }
+
+                    let mut luma_weight_l0 = [0i16; 32];
+                    let mut luma_offset_l0 = [0i16; 32];
+                    let mut chroma_weight_l0 = [[0i16; 2]; 32];
+                    let mut chroma_offset_l0 = [[0i16; 2]; 32];
+                    let mut luma_weight_l1 = [0i16; 32];
+                    let mut luma_offset_l1 = [0i16; 32];
+                    let mut chroma_weight_l1 = [[0i16; 2]; 32];
+                    let mut chroma_offset_l1 = [[0i16; 2]; 32];
+                    let mut luma_weighted_l0 = false;
+                    let mut chroma_weighted_l0 = false;
+                    let mut luma_weighted_l1 = false;
+                    let mut chroma_weighted_l1 = false;
+                    let mut luma_log2_weight_denom = slice_hdr.luma_log2_weight_denom;
+                    let mut chroma_log2_weight_denom = slice_hdr.chroma_log2_weight_denom;
+
+                    if (pps.weighted_pred && matches!(slice_hdr.slice_type, SliceType::P | SliceType::B)) || (pps.weighted_bipred_idc == 1 && slice_hdr.slice_type == SliceType::B) {
+                        luma_weighted_l0 = true;
+                        chroma_weighted_l0 = false;
+                        for (i, winfo) in slice_hdr.weights_l0.iter().enumerate().take(slice_hdr.num_ref_idx_l0_active) {
+                            if winfo.luma_weighted {
+                                luma_weight_l0[i] = winfo.luma_weight.into();
+                                luma_offset_l0[i] = winfo.luma_offset.into();
+                            } else {
+                                luma_weight_l0[i] = 1 << slice_hdr.luma_log2_weight_denom;
+                            }
+                            if winfo.chroma_weighted {
+                                chroma_weight_l0[i][0] = winfo.chroma_weight[0].into();
+                                chroma_weight_l0[i][1] = winfo.chroma_weight[1].into();
+                                chroma_offset_l0[i][0] = winfo.chroma_offset[0].into();
+                                chroma_offset_l0[i][1] = winfo.chroma_offset[1].into();
+                            } else {
+                                chroma_weight_l0[i][0] = 1 << slice_hdr.chroma_log2_weight_denom;
+                                chroma_weight_l0[i][1] = 1 << slice_hdr.chroma_log2_weight_denom;
+                                chroma_offset_l0[i][0] = 0;
+                                chroma_offset_l0[i][1] = 0;
+                            }
+                            chroma_weighted_l0 |= winfo.chroma_weighted;
+                        }
+                    }
+                    if pps.weighted_bipred_idc == 1 && slice_hdr.slice_type == SliceType::B {
+                        luma_weighted_l1 = true;
+                        chroma_weighted_l1 = sps.chroma_format_idc != 0;
+                        for (i, winfo) in slice_hdr.weights_l1.iter().enumerate().take(slice_hdr.num_ref_idx_l1_active) {
+                            if winfo.luma_weighted {
+                                luma_weight_l1[i] = winfo.luma_weight.into();
+                                luma_offset_l1[i] = winfo.luma_offset.into();
+                            } else {
+                                luma_weight_l1[i] = 1 << slice_hdr.luma_log2_weight_denom;
+                            }
+                            if chroma_weighted_l1 && winfo.chroma_weighted {
+                                chroma_weight_l1[i][0] = winfo.chroma_weight[0].into();
+                                chroma_weight_l1[i][1] = winfo.chroma_weight[1].into();
+                                chroma_offset_l1[i][0] = winfo.chroma_offset[0].into();
+                                chroma_offset_l1[i][1] = winfo.chroma_offset[1].into();
+                            } else {
+                                chroma_weight_l1[i][0] = 1 << slice_hdr.chroma_log2_weight_denom;
+                                chroma_weight_l1[i][1] = 1 << slice_hdr.chroma_log2_weight_denom;
+                                chroma_offset_l1[i][0] = 0;
+                                chroma_offset_l1[i][1] = 0;
+                            }
+                        }
+                    }
+                    if pps.weighted_bipred_idc == 2 && slice_hdr.slice_type == SliceType::B {
+                        let num_l0 = slice_hdr.num_ref_idx_l0_active;
+                        let num_l1 = slice_hdr.num_ref_idx_l1_active;
+                        if num_l0 != 1 || num_l1 != 1 { //xxx: also exclude symmetric case
+                            luma_weighted_l0 = false;
+                            luma_weighted_l1 = false;
+                            chroma_weighted_l0 = false;
+                            chroma_weighted_l1 = false;
+                            luma_log2_weight_denom = 5;
+                            chroma_log2_weight_denom = 5;
+
+                            for w in luma_weight_l0.iter_mut() {
+                                *w = 32;
+                            }
+                            for w in luma_weight_l1.iter_mut() {
+                                *w = 32;
+                            }
+                            for w in chroma_weight_l0.iter_mut() {
+                                *w = [32; 2];
+                            }
+                            for w in chroma_weight_l1.iter_mut() {
+                                *w = [32; 2];
+                            }
+                        }
+                    }
+
+                    let ref_pic_list_0 = map_ref_list(&self.frame_refs.cur_refs.ref_list0);
+                    let ref_pic_list_1 = map_ref_list(&self.frame_refs.cur_refs.ref_list1);
+
+                    let slice_param = SliceParameterBufferH264::new(
+                            raw_nal.len() as u32,
+                            0, // no offset
+                            VASliceDataFlag::All,
+                            br.tell() as u16,
+                            slice_hdr.first_mb_in_slice as u16,
+                            match slice_hdr.slice_type {
+                                SliceType::I => 2,
+                                SliceType::P => 0,
+                                SliceType::B => 1,
+                                SliceType::SI => 4,
+                                SliceType::SP => 3,
+                            },
+                            slice_hdr.direct_spatial_mv_pred as u8,
+                            (slice_hdr.num_ref_idx_l0_active as u8).saturating_sub(1),
+                            (slice_hdr.num_ref_idx_l1_active as u8).saturating_sub(1),
+                            slice_hdr.cabac_init_idc,
+                            slice_hdr.slice_qp_delta as i8,
+                            slice_hdr.disable_deblocking_filter_idc,
+                            slice_hdr.slice_alpha_c0_offset / 2,
+                            slice_hdr.slice_beta_offset / 2,
+                            ref_pic_list_0,
+                            ref_pic_list_1,
+                            luma_log2_weight_denom,
+                            chroma_log2_weight_denom,
+                            luma_weighted_l0 as u8, luma_weight_l0, luma_offset_l0,
+                            chroma_weighted_l0 as u8, chroma_weight_l0, chroma_offset_l0,
+                            luma_weighted_l1 as u8, luma_weight_l1, luma_offset_l1,
+                            chroma_weighted_l1 as u8, chroma_weight_l1, chroma_offset_l1,
+                        );
+                    let slc_param = BufferType::SliceParameter(SliceParameter::H264(slice_param));
+                    let buf = vactx.context.create_buffer(slc_param).map_err(|_| DecoderError::Bug)?;
+                    pic.add_buffer(buf);
+
+                    let slc_data = BufferType::SliceData(raw_nal.to_vec());
+                    let buf = vactx.context.create_buffer(slc_data).map_err(|_| DecoderError::Bug)?;
+                    pic.add_buffer(buf);
+                },
+                 2 => { // slice data partition A
+                    //slice header
+                    //slice id = read_ue()
+                    //cat 2 slice data (all but MB layer residual)
+                    return Err(DecoderError::NotImplemented);
+                },
+                 3 => { // slice data partition B
+                    //slice id = read_ue()
+                    //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() }
+                    //cat 3 slice data (MB layer residual)
+                    return Err(DecoderError::NotImplemented);
+                },
+                 4 => { // slice data partition C
+                    //slice id = read_ue()
+                    //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() }
+                    //cat 4 slice data (MB layer residual)
+                    return Err(DecoderError::NotImplemented);
+                },
+                 6 => {}, //SEI
+                 7 => {
+                    let sps = parse_sps(&src[1..])?;
+                    self.spses.push(sps);
+                },
+                 8 => {
+                    validate!(full_size >= 8 + 16);
+                    let pps = parse_pps(&src[1..], &self.spses, full_size - 8)?;
+                    let mut found = false;
+                    for stored_pps in self.ppses.iter_mut() {
+                        if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id {
+                            *stored_pps = pps.clone();
+                            found = true;
+                            break;
+                        }
+                    }
+                    if !found {
+                        self.ppses.push(pps);
+                    }
+                },
+                 9 => { // access unit delimiter
+                },
+                10 => {}, //end of sequence
+                11 => {}, //end of stream
+                12 => {}, //filler
+                _  => {},
+            };
+
+            br.read_skip(size)?;
+        }
+
+        let bpic = pic.begin().expect("begin");
+        let rpic = bpic.render().expect("render");
+        let epic = rpic.end().expect("end");
+
+        self.reorderer.add_frame(WaitingFrame {
+                pic:    epic,
+                is_idr: is_keyframe,
+                is_ref,
+                ftype:  frame_type,
+                ts:     timestamp,
+            });
+
+        let mut idx = 0;
+        while idx < vactx.ref_pics.len() {
+            let cur_surf_id = vactx.ref_pics[idx].1;
+            if self.frame_refs.ref_pics.iter().any(|fref| fref.surface_id == cur_surf_id) {
+                idx += 1;
+            } else {
+                let (pic, _) = vactx.ref_pics.remove(idx);
+                if let Ok(surf) = pic.take_surface() {
+                    vactx.surfaces.push(surf);
+                } else {
+                    panic!("can't take surface");
+                }
+            }
+        }
+
+        Ok(())
+    }
+    fn get_frame(&mut self) -> Option<NAFrameRef> {
+        if let Some(ref mut vactx) = self.vaapi {
+            if let Some(frm) = self.reorderer.get_frame() {
+                let ts = frm.ts;
+                let is_idr = frm.is_idr;
+                let is_ref = frm.is_ref;
+                let ftype = frm.ftype;
+                if let Ok(pic) = frm.pic.sync() {
+                    let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm);
+
+                    if !is_ref {
+                        if let Ok(surf) = pic.take_surface() {
+                            vactx.surfaces.push(surf);
+                        } else {
+                            panic!("can't take surface");
+                        }
+                    } else {
+                        let id = pic.surface_id();
+                        vactx.ref_pics.push((pic, id));
+                    }
+
+                    let ts = NATimeInfo::new(None, Some(ts), None, self.tb_num, self.tb_den);
+                    Some(NAFrame::new(ts, ftype, is_idr, self.info.clone(), self.out_frm.clone()).into_ref())
+                } else {
+                    panic!("can't sync");
+                }
+            } else {
+                None
+            }
+        } else {
+            None
+        }
+    }
+    fn get_last_frames(&mut self) -> Option<NAFrameRef> {
+        if let Some(ref mut vactx) = self.vaapi {
+            if let Some(frm) = self.reorderer.frames.pop_front() {
+                let ts = frm.ts;
+                let is_idr = frm.is_idr;
+                let is_ref = frm.is_ref;
+                let ftype = frm.ftype;
+                if let Ok(pic) = frm.pic.sync() {
+                    let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm);
+
+                    if !is_ref {
+                        if let Ok(surf) = pic.take_surface() {
+                            vactx.surfaces.push(surf);
+                        } else {
+                            panic!("can't take surface");
+                        }
+                    } else {
+                        let id = pic.surface_id();
+                        vactx.ref_pics.push((pic, id));
+                    }
+
+                    let ts = NATimeInfo::new(None, Some(ts), None, self.tb_num, self.tb_den);
+                    Some(NAFrame::new(ts, ftype, is_idr, self.info.clone(), self.out_frm.clone()).into_ref())
+                } else {
+                    panic!("can't sync");
+                }
+            } else {
+                None
+            }
+        } else {
+            None
+        }
+    }
+    fn flush(&mut self) {
+        self.frame_refs.clear_refs();
+        if let Some(ref mut vactx) = self.vaapi {
+            for frm in self.reorderer.frames.drain(..) {
+                if let Ok(pic) = frm.pic.sync() {
+                    if let Ok(surf) = pic.take_surface() {
+                        vactx.surfaces.push(surf);
+                    } else {
+                        panic!("can't take surface");
+                    }
+                } else {
+                    panic!("can't sync");
+                }
+            }
+            self.reorderer.flush();
+            for (pic, _) in vactx.ref_pics.drain(..) {
+                if let Ok(surf) = pic.take_surface() {
+                    vactx.surfaces.push(surf);
+                } else {
+                    panic!("can't take surface");
+                }
+            }
+        }
+    }
+}
+
+impl NAOptionHandler for VaapiH264Decoder {
+    fn get_supported_options(&self) -> &[NAOptionDefinition] { &[] }
+    fn set_options(&mut self, _options: &[NAOption]) {}
+    fn query_option_value(&self, _name: &str) -> Option<NAValue> { None }
+}
+
+use std::thread::*;
+use std::sync::mpsc::*;
+
+enum DecMessage {
+    Init(NACodecInfoRef),
+    Decode(NAPacket),
+    Flush,
+    GetFrame,
+    GetLastFrames,
+    End
+}
+
+enum DecResponse {
+    Ok,
+    Nothing,
+    Err(DecoderError),
+    Frame(NAFrameRef),
+}
+
+pub trait HWDecoder {
+    fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()>;
+    fn queue_pkt(&mut self, pkt: &NAPacket) -> DecoderResult<()>;
+    fn get_frame(&mut self) -> Option<NAFrameRef>;
+    fn get_last_frames(&mut self) -> Option<NAFrameRef>;
+    fn flush(&mut self);
+}
+
+pub struct HWWrapper {
+    handle:     Option<JoinHandle<DecoderResult<()>>>,
+    send:       SyncSender<DecMessage>,
+    recv:       Receiver<DecResponse>,
+}
+
+#[allow(clippy::new_without_default)]
+impl HWWrapper {
+    pub fn new() -> Self {
+        let (in_send, in_recv) = sync_channel(1);
+        let (out_send, out_recv) = sync_channel(1);
+        let handle = std::thread::spawn(move || {
+                let receiver = in_recv;
+                let sender = out_send;
+                let mut dec = VaapiH264Decoder::new();
+                while let Ok(msg) = receiver.recv() {
+                    match msg {
+                        DecMessage::Init(info) => {
+                            let msg = if let Err(err) = dec.init(info) {
+                                    DecResponse::Err(err)
+                                } else {
+                                    DecResponse::Ok
+                                };
+                            sender.send(msg).map_err(|_| DecoderError::Bug)?;
+                        },
+                        DecMessage::Decode(pkt) => {
+                            let msg = match dec.decode(&pkt) {
+                                    Ok(()) => DecResponse::Ok,
+                                    Err(err) => DecResponse::Err(err),
+                                };
+                            sender.send(msg).map_err(|_| DecoderError::Bug)?;
+                        },
+                        DecMessage::GetFrame => {
+                            let msg = match dec.get_frame() {
+                                    Some(frm) => DecResponse::Frame(frm),
+                                    None => DecResponse::Nothing,
+                                };
+                            sender.send(msg).map_err(|_| DecoderError::Bug)?;
+                        },
+                        DecMessage::GetLastFrames => {
+                            let msg = match dec.get_last_frames() {
+                                    Some(frm) => DecResponse::Frame(frm),
+                                    None => DecResponse::Nothing,
+                                };
+                            sender.send(msg).map_err(|_| DecoderError::Bug)?;
+                        },
+                        DecMessage::Flush => dec.flush(),
+                        DecMessage::End => return Ok(()),
+                    };
+                }
+                Err(DecoderError::Bug)
+            });
+
+        Self {
+            handle:     Some(handle),
+            send:       in_send,
+            recv:       out_recv,
+        }
+    }
+}
+
+impl HWDecoder for HWWrapper {
+    fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()> {
+        if self.send.send(DecMessage::Init(info)).is_ok() {
+            match self.recv.recv() {
+                Ok(DecResponse::Ok) => Ok(()),
+                Ok(DecResponse::Err(err)) => Err(err),
+                Err(_) => Err(DecoderError::Bug),
+                _ => unreachable!(),
+            }
+        } else {
+            Err(DecoderError::Bug)
+        }
+    }
+    fn queue_pkt(&mut self, pkt: &NAPacket) -> DecoderResult<()> {
+        let pkt2 = NAPacket::new_from_refbuf(pkt.get_stream(), pkt.ts, pkt.keyframe, pkt.get_buffer());
+        if self.send.send(DecMessage::Decode(pkt2)).is_ok() {
+            match self.recv.recv() {
+                Ok(DecResponse::Ok) => Ok(()),
+                Ok(DecResponse::Err(err)) => Err(err),
+                Err(_) => Err(DecoderError::Bug),
+                _ => unreachable!(),
+            }
+        } else {
+            Err(DecoderError::Bug)
+        }
+    }
+    fn get_frame(&mut self) -> Option<NAFrameRef> {
+        if self.send.send(DecMessage::GetFrame).is_ok() {
+            match self.recv.recv() {
+                Ok(DecResponse::Frame(frm)) => Some(frm),
+                Ok(DecResponse::Nothing) => None,
+                Err(_) => None,
+                _ => unreachable!(),
+            }
+        } else {
+            None
+        }
+    }
+    fn get_last_frames(&mut self) -> Option<NAFrameRef> {
+        if self.send.send(DecMessage::GetLastFrames).is_ok() {
+            match self.recv.recv() {
+                Ok(DecResponse::Frame(frm)) => Some(frm),
+                Ok(DecResponse::Nothing) => None,
+                Err(_) => None,
+                _ => unreachable!(),
+            }
+        } else {
+            None
+        }
+    }
+    fn flush(&mut self) {
+        let _ = self.send.send(DecMessage::Flush);
+    }
+}
+
+impl Drop for HWWrapper {
+    fn drop(&mut self) {
+        if self.send.send(DecMessage::End).is_ok() {
+            let mut handle = None;
+            std::mem::swap(&mut handle, &mut self.handle);
+            if let Some(hdl) = handle {
+                let _ = hdl.join();
+            }
+        }
+    }
+}
+
+impl NAOptionHandler for HWWrapper {
+    fn get_supported_options(&self) -> &[NAOptionDefinition] { &[] }
+    fn set_options(&mut self, _options: &[NAOption]) {}
+    fn query_option_value(&self, _name: &str) -> Option<NAValue> { None }
+}
+
+pub fn new_h264_hwdec() -> Box<dyn HWDecoder + Send> {
+    Box::new(HWWrapper::new())
+}
+
+#[cfg(test)]
+mod test {
+    use nihav_core::codecs::*;
+    use nihav_core::io::byteio::*;
+    use nihav_core::demuxers::{RegisteredDemuxers, create_demuxer};
+    use nihav_commonfmt::generic_register_all_demuxers;
+    use super::VaapiH264Decoder;
+    use std::io::prelude::*;
+
+    fn decode_h264(name: &str, dname: &str, dmx_reg: &RegisteredDemuxers, opfx: &str) -> DecoderResult<()> {
+        let dmx_f = dmx_reg.find_demuxer(dname).expect("demuxer exists");
+        let file = std::fs::File::open(name).expect("file exists");
+        let mut fr = FileReader::new_read(file);
+        let mut br = ByteReader::new(&mut fr);
+        let mut dmx = create_demuxer(dmx_f, &mut br).expect("create demuxer");
+
+        let mut vstream_id = 0;
+        let mut dec = VaapiH264Decoder::new();
+        for stream in dmx.get_streams() {
+            if stream.get_media_type() == StreamType::Video {
+                dec.init(stream.get_info()).expect("inited");
+                vstream_id = stream.get_id();
+                break;
+            }
+        }
+
+        let mut frameno = 0;
+        while let Ok(pkt) = dmx.get_frame() {
+            if pkt.get_stream().get_id() != vstream_id {
+                continue;
+            }
+            dec.decode(&pkt).expect("decoded");
+            let frm = dec.get_last_frames().expect("get frame");
+            let timestamp = frm.get_dts().unwrap_or_else(|| frm.get_pts().unwrap_or(0));
+
+            let pic = frm.get_buffer().get_vbuf().expect("got picture");
+
+            let nname = format!("assets/test_out/{}{:06}_{}.pgm", opfx, timestamp, frameno);
+            frameno += 1;
+            let mut file = std::fs::File::create(&nname).expect("create file");
+            let (w, h) = pic.get_dimensions(0);
+            file.write_all(format!("P5\n{} {}\n255\n", w, h * 3 / 2).as_bytes()).expect("header written");
+            let data = pic.get_data();
+            for yline in data.chunks(pic.get_stride(0)).take(h) {
+                file.write_all(&yline[..w]).expect("Y line written");
+            }
+            for (uline, vline) in data[pic.get_offset(1)..].chunks(pic.get_stride(1))
+                    .zip(data[pic.get_offset(2)..].chunks(pic.get_stride(2))).take(h / 2) {
+                file.write_all(&uline[..w / 2]).expect("U line written");
+                file.write_all(&vline[..w / 2]).expect("V line written");
+            }
+        }
+        Ok(())
+    }
+
+
+    // samples if not specified otherwise come from H.264 conformance suite
+
+    #[test]
+    fn test_h264_simple() {
+        let mut dmx_reg = RegisteredDemuxers::new();
+        generic_register_all_demuxers(&mut dmx_reg);
+
+        decode_h264("assets/ITU/DimpledSpanishCuckoo-mobile.mp4", "mov", &dmx_reg, "hw").unwrap();
+    }
+}
diff --git a/hwdec-vaapi/src/pic_ref.rs b/hwdec-vaapi/src/pic_ref.rs
new file mode 100644 (file)
index 0000000..8d5c982
--- /dev/null
@@ -0,0 +1,411 @@
+use nihav_core::codecs::DecoderResult;
+//use nihav_core::frame::{FrameType, NAVideoBufferRef, NATimeInfo};
+use super::sets::SeqParameterSet;
+use super::slice::*;
+
+use libva::VASurfaceID;
+
+pub const MISSING_POC: u16 = 0xFFFF;
+
+#[derive(Clone)]
+pub struct PictureInfo {
+    pub id:         u16,
+    pub full_id:    u32,
+    pub top_id:     u32,
+    pub bot_id:     u32,
+//    pub pic_type:   FrameType,
+    pub is_ref:     bool,
+    pub is_idr:     bool,
+    pub long_term:  Option<usize>,
+    pub surface_id: VASurfaceID,
+}
+
+#[derive(Clone)]
+pub struct SliceRefs {
+    pub ref_list0:  Vec<Option<PictureInfo>>,
+    pub ref_list1:  Vec<Option<PictureInfo>>,
+    pub cur_id:     u32,
+}
+
+#[allow(dead_code)]
+impl SliceRefs {
+    pub fn get_ref_id(&self, list_id: u8, ref_id: usize) -> Option<u32> {
+        let ref_list = if list_id == 0 { &self.ref_list0 } else { &self.ref_list1 };
+        if ref_list.len() > ref_id {
+            ref_list[ref_id].as_ref().map(|pic| pic.full_id)
+        } else {
+            None
+        }
+    }
+}
+
+pub struct FrameRefs {
+    pub ref_pics:   Vec<PictureInfo>,
+    pub cur_refs:   SliceRefs,
+    pub long_term:  Vec<Option<PictureInfo>>,
+
+    prev_poc_msb:       u32,
+    prev_poc_lsb:       u16,
+    prev_ref_poc_lsb:   u16,
+    prev_frame_num:     u16,
+    frame_num_offset:   u32,
+    max_frame_num:      i32,
+}
+
+#[allow(clippy::new_without_default)]
+impl FrameRefs {
+    pub fn new() -> Self {
+        Self {
+            ref_pics:   Vec::with_capacity(16),
+            cur_refs:   SliceRefs {
+                            ref_list0:  Vec::with_capacity(3),
+                            ref_list1:  Vec::with_capacity(3),
+                            cur_id:     0,
+                        },
+            long_term:  Vec::new(),
+
+            prev_poc_msb:       0,
+            prev_poc_lsb:       0,
+            prev_ref_poc_lsb:   0,
+            prev_frame_num:     0,
+            frame_num_offset:   0,
+            max_frame_num:      0,
+        }
+    }
+    pub fn fill_ref_nums(&self, dst: &mut Vec<u32>) {
+        for pic in self.ref_pics.iter() {
+            if !dst.contains(&pic.full_id) {
+                dst.push(pic.full_id);
+            }
+        }
+        for pic in self.long_term.iter().flatten() {
+            if !dst.contains(&pic.full_id) {
+                dst.push(pic.full_id);
+            }
+        }
+    }
+    pub fn calc_picture_num(&mut self, slice_hdr: &SliceHeader, is_idr: bool, ref_id: u8, sps: &SeqParameterSet) -> (u32, u32) {
+        self.max_frame_num = 1 << sps.log2_max_frame_num;
+        match sps.pic_order_cnt_type {
+            0 => {
+                if is_idr {
+                    self.prev_poc_msb = 0;
+                    self.prev_poc_lsb = 0;
+                } else {
+                    self.prev_poc_lsb = self.prev_ref_poc_lsb;
+                }
+                let max_poc_lsb = 1 << sps.log2_max_pic_order_cnt_lsb;
+                let half_max_poc_lsb = 1 << (sps.log2_max_pic_order_cnt_lsb - 1);
+                let cur_lsb = slice_hdr.pic_order_cnt_lsb;
+                let poc_msb = if cur_lsb < self.prev_poc_lsb && (self.prev_poc_lsb - cur_lsb >= half_max_poc_lsb) {
+                        self.prev_poc_msb + max_poc_lsb
+                    } else if cur_lsb > self.prev_poc_lsb && (cur_lsb - self.prev_poc_lsb > half_max_poc_lsb) {
+                        self.prev_poc_msb.wrapping_sub(max_poc_lsb)
+                    } else {
+                        self.prev_poc_msb
+                    };
+                let poc = poc_msb + u32::from(cur_lsb);
+                if ref_id != 0 {
+                    self.prev_ref_poc_lsb = slice_hdr.pic_order_cnt_lsb;
+                    self.prev_poc_msb = poc_msb;
+                }
+                (poc, poc)
+            },
+            1 => {
+                let off = if self.prev_frame_num > slice_hdr.frame_num {
+                        self.frame_num_offset + (1 << sps.log2_max_frame_num)
+                    } else {
+                        self.frame_num_offset
+                    };
+                let mut anum = if sps.num_ref_frames_in_pic_order_cnt_cycle != 0 {
+                        (off as i32) + i32::from(slice_hdr.frame_num)
+                    } else {
+                        0
+                    };
+                if ref_id == 0 && anum > 0 {
+                    anum -= 1;
+                }
+                let (poc_cycle_cnt, fno_in_poc_cycle) = if anum > 0 {
+                        let nrf = sps.num_ref_frames_in_pic_order_cnt_cycle as i32;
+                        ((anum - 1) / nrf, (anum - 1) % nrf)
+                    } else {
+                        (0, 0)
+                    };
+                let mut expected_delta = 0;
+                for &offset in sps.offset_for_ref_frame[..sps.num_ref_frames_in_pic_order_cnt_cycle].iter() {
+                    expected_delta += offset;
+                }
+                let mut expected_poc = if anum > 0 {
+                        let mut sum = poc_cycle_cnt * expected_delta;
+                        for &offset in sps.offset_for_ref_frame[..=fno_in_poc_cycle as usize].iter() {
+                            sum += offset;
+                        }
+                        sum
+                    } else {
+                        0
+                    };
+                if ref_id == 0 {
+                    expected_poc += sps.offset_for_non_ref_pic;
+                }
+                let (top_id, bot_id) = if !slice_hdr.field_pic {
+                        let top_id = expected_poc + slice_hdr.delta_pic_order_cnt[0];
+                        let bot_id = top_id + sps.offset_for_top_to_bottom_field + slice_hdr.delta_pic_order_cnt[1];
+                        (top_id, bot_id)
+                    } else if !slice_hdr.bottom_field {
+                        (expected_poc + slice_hdr.delta_pic_order_cnt[0], 0)
+                    } else {
+                        (0, sps.offset_for_top_to_bottom_field + slice_hdr.delta_pic_order_cnt[1])
+                    };
+                self.prev_frame_num = slice_hdr.frame_num;
+                self.frame_num_offset = off;
+                (top_id as u32, bot_id as u32)
+            },
+            _ => {
+                if slice_hdr.frame_num < self.prev_frame_num {
+                    self.frame_num_offset   += 1 << sps.log2_max_frame_num;
+                }
+                self.prev_frame_num = slice_hdr.frame_num;
+                let poc = self.frame_num_offset + u32::from(slice_hdr.frame_num);
+                (poc, poc)
+            },
+        }
+    }
+    pub fn apply_adaptive_marking(&mut self, marking: &AdaptiveMarking, cur_id: u16, max_id: u16) -> DecoderResult<()> {
+        let all_ref_pics = self.ref_pics.clone();
+
+        for (&op, (&arg1, &arg2)) in marking.memory_management_control_op.iter().zip(marking.operation_arg.iter().zip(marking.operation_arg2.iter())).take(marking.num_ops) {
+            match op {
+                1 => {
+                    let src_id = cur_id.wrapping_sub(arg1) & (max_id - 1);
+                    let mut found = false;
+                    let mut idx = 0;
+                    for (i, pic) in self.ref_pics.iter().enumerate() {
+                        if pic.id == src_id {
+                            found = true;
+                            idx = i;
+                            break;
+                        }
+                    }
+                    if found {
+                        self.ref_pics.remove(idx);
+                    }
+                },
+                2 => { // mark long term picture as unused
+                    let idx = arg1 as usize;
+                    if idx < self.long_term.len() {
+                        self.long_term[idx] = None;
+                    }
+                },
+                3 => {
+                    let src_id = cur_id.wrapping_sub(arg1) & (max_id - 1);
+
+                    let didx = arg2 as usize;
+                    for pic in all_ref_pics.iter() {
+                        if pic.id == src_id {
+                            if didx < self.long_term.len() {
+                                self.long_term[didx] = Some(pic.clone());
+                            }
+                            break;
+                        }
+                    }
+                },
+                4 => {
+                    self.long_term.resize(arg1 as usize, None);
+                },
+                5 => {
+                    self.ref_pics.clear();
+                    self.long_term.clear();
+                },
+                6 => {
+                    // assign an long term index to current pic - done elsewhere
+                },
+                _ => {},
+            };
+        }
+        Ok(())
+    }
+    pub fn clear_refs(&mut self) {
+        self.ref_pics.clear();
+        self.long_term.clear();
+    }
+    #[allow(clippy::cognitive_complexity)]
+    pub fn select_refs(&mut self, sps: &SeqParameterSet, slice_hdr: &SliceHeader, cur_id: u32) {
+        self.cur_refs.cur_id = cur_id;
+        self.cur_refs.ref_list0.clear();
+        self.cur_refs.ref_list1.clear();
+        let pic_num_mask = if sps.log2_max_frame_num == 16 {
+                0xFFFF
+            } else {
+                (1 << sps.log2_max_frame_num) - 1
+            };
+
+        if !slice_hdr.slice_type.is_intra() {
+            let has_reordering = slice_hdr.ref_pic_list_reordering_l0;
+            if !has_reordering {
+                let num_ref = slice_hdr.num_ref_idx_l0_active;
+                if slice_hdr.slice_type.is_p() {
+                    if !self.ref_pics.is_empty() {
+                        for pic in self.ref_pics.iter().rev().take(num_ref) {
+                            self.cur_refs.ref_list0.push(Some(pic.clone()));
+                        }
+                    }
+                } else {
+                    let mut pivot = 0;
+                    for (i, pic) in self.ref_pics.iter().enumerate() {
+                        pivot = i;
+                        if pic.full_id > cur_id {
+                            break;
+                        }
+                    }
+                    for pic in self.ref_pics[..pivot].iter().rev() {
+                        if self.cur_refs.ref_list0.len() >= num_ref {
+                            break;
+                        }
+                        self.cur_refs.ref_list0.push(Some(pic.clone()));
+                    }
+                    for pic in self.ref_pics.iter().skip(pivot) {
+                        if self.cur_refs.ref_list0.len() >= num_ref {
+                            break;
+                        }
+                        self.cur_refs.ref_list0.push(Some(pic.clone()));
+                    }
+                }
+                if !self.long_term.is_empty() && self.cur_refs.ref_list0.len() < num_ref {
+                    let copy_size = num_ref - self.cur_refs.ref_list0.len();
+                    for ltpic in self.long_term.iter().take(copy_size) {
+                        self.cur_refs.ref_list0.push(ltpic.clone());
+                    }
+                }
+            } else {
+                form_ref_list(&mut self.cur_refs.ref_list0,
+                              &self.ref_pics, &self.long_term,
+                              &slice_hdr.reordering_list_l0,
+                              slice_hdr.frame_num, pic_num_mask);
+            }
+            if slice_hdr.slice_type.is_b() {
+                let has_reordering = slice_hdr.ref_pic_list_reordering_l1;
+                if !has_reordering {
+                    let num_ref = slice_hdr.num_ref_idx_l1_active;
+                    let mut pivot = 0;
+                    for (i, pic) in self.ref_pics.iter().enumerate() {
+                        pivot = i;
+                        if pic.full_id > cur_id {
+                            break;
+                        }
+                    }
+                    for pic in self.ref_pics.iter().skip(pivot) {
+                        if self.cur_refs.ref_list1.len() >= num_ref {
+                            break;
+                        }
+                        self.cur_refs.ref_list1.push(Some(pic.clone()));
+                    }
+                    for pic in self.ref_pics[..pivot].iter().rev() {
+                        if self.cur_refs.ref_list1.len() >= num_ref {
+                            break;
+                        }
+                        self.cur_refs.ref_list1.push(Some(pic.clone()));
+                    }
+                    if !self.long_term.is_empty() && self.cur_refs.ref_list1.len() < num_ref {
+                        let copy_size = num_ref - self.cur_refs.ref_list1.len();
+                        for ltpic in self.long_term.iter().take(copy_size) {
+                            self.cur_refs.ref_list1.push(ltpic.clone());
+                        }
+                    }
+                    if self.cur_refs.ref_list1.len() > 1 && self.cur_refs.ref_list0.len() == self.cur_refs.ref_list1.len() {
+                        let mut equal = true;
+                        for (pic1, pic2) in self.cur_refs.ref_list0.iter().zip(self.cur_refs.ref_list1.iter()) {
+                            match (pic1, pic2) {
+                                (Some(p1), Some(p2)) => {
+                                    if p1.full_id != p2.full_id {
+                                        equal = false;
+                                        break;
+                                    }
+                                },
+                                (None, None) => {},
+                                _ => {
+                                    equal = false;
+                                    break;
+                                },
+                            };
+                        }
+                        if equal {
+                            self.cur_refs.ref_list1.swap(0, 1);
+                        }
+                    }
+                } else {
+                    form_ref_list(&mut self.cur_refs.ref_list1,
+                                  &self.ref_pics, &self.long_term,
+                                  &slice_hdr.reordering_list_l1,
+                                  slice_hdr.frame_num, pic_num_mask);
+                }
+            }
+        }
+    }
+    pub fn add_short_term(&mut self, cpic: PictureInfo, num_ref_frames: usize) {
+        if !self.ref_pics.is_empty() && self.ref_pics.len() >= num_ref_frames {
+            let base_id = i32::from(cpic.id);
+            let mut min_id  = base_id;
+            let mut min_idx = 0;
+            for (i, pic) in self.ref_pics.iter().enumerate() {
+                let mut pic_id = i32::from(pic.id);
+                if pic_id > base_id {
+                    pic_id -= self.max_frame_num;
+                }
+                if pic_id < min_id {
+                    min_id = pic_id;
+                    min_idx = i;
+                }
+            }
+            self.ref_pics.remove(min_idx);
+        }
+        if self.ref_pics.is_empty() || self.ref_pics.last().unwrap().full_id < cpic.full_id {
+            self.ref_pics.push(cpic);
+        } else {
+            let mut idx = 0;
+            for (i, pic) in self.ref_pics.iter().enumerate() {
+                if pic.full_id < cpic.full_id {
+                    idx = i;
+                } else {
+                    break;
+                }
+            }
+            self.ref_pics.insert(idx + 1, cpic);
+        }
+    }
+    pub fn add_long_term(&mut self, lt_idx: usize, cpic: PictureInfo) {
+        if lt_idx < self.long_term.len() {
+            self.long_term[lt_idx] = Some(cpic);
+        }
+    }
+}
+
+fn form_ref_list(ref_list: &mut Vec<Option<PictureInfo>>, ref_pics: &[PictureInfo], long_term: &[Option<PictureInfo>], reord_info: &ReorderingInfo, cur_id: u16, pic_num_mask: u16) {
+    let mut ref_pic_id = cur_id;
+    for (&op, &num) in reord_info.reordering_of_pic_nums_idc.iter().zip(reord_info.abs_diff_or_num.iter()).take(reord_info.num_ops) {
+        if op < 2 {
+            if op == 0 {
+                ref_pic_id = ref_pic_id.wrapping_sub(num) & pic_num_mask;
+            } else {
+                ref_pic_id = ref_pic_id.wrapping_add(num) & pic_num_mask;
+            }
+            let mut found = false;
+            for pic in ref_pics.iter() {
+                if pic.id == ref_pic_id {
+                    ref_list.push(Some(pic.clone()));
+                    found = true;
+                    break;
+                }
+            }
+            if !found {
+                ref_list.push(None);
+            }
+        } else {
+            let idx = num as usize;
+            if idx < long_term.len() {
+                ref_list.push(long_term[idx].clone());
+            } else {
+                ref_list.push(None);
+            }
+        }
+    }
+}
diff --git a/hwdec-vaapi/src/sets.rs b/hwdec-vaapi/src/sets.rs
new file mode 100644 (file)
index 0000000..7fd090b
--- /dev/null
@@ -0,0 +1,425 @@
+use nihav_core::codecs::{DecoderResult, DecoderError};
+use nihav_core::io::bitreader::*;
+
+use super::ReadUE;
+
+#[derive(Clone)]
+pub struct SeqParameterSet {
+    pub profile_idc:                        u8,
+    pub high_profile:                       bool,
+    pub constraint_set0:                    bool,
+    pub constraint_set1:                    bool,
+    pub constraint_set2:                    bool,
+    pub level_idc:                          u8,
+    pub seq_parameter_set_id:               u32,
+    pub chroma_format_idc:                  u8,
+    pub separate_colour_plane:              bool,
+    pub bit_depth_luma:                     u8,
+    pub bit_depth_chroma:                   u8,
+    pub qpprime_y_zero_transform_bypass:    bool,
+    pub seq_scaling_matrix_present:         bool,
+    pub scaling_list_4x4:                   [[u8; 16]; 6],
+    pub scaling_list_8x8:                   [[u8; 64]; 6],
+    pub log2_max_frame_num:                 u8,
+    pub pic_order_cnt_type:                 u8,
+    pub log2_max_pic_order_cnt_lsb:         u8,
+    pub delta_pic_order_always_zero:        bool,
+    pub offset_for_non_ref_pic:             i32,
+    pub offset_for_top_to_bottom_field:     i32,
+    pub num_ref_frames_in_pic_order_cnt_cycle:  usize,
+    pub offset_for_ref_frame:               [i32; 256],
+    pub num_ref_frames:                     usize,
+    pub gaps_in_frame_num_value_allowed:    bool,
+    pub pic_width_in_mbs:                   usize,
+    pub pic_height_in_mbs:                  usize,
+    pub frame_mbs_only:                     bool,
+    pub mb_adaptive_frame_field:            bool,
+    pub direct_8x8_inference:               bool,
+    pub frame_cropping:                     bool,
+    pub frame_crop_left_offset:             usize,
+    pub frame_crop_right_offset:            usize,
+    pub frame_crop_top_offset:              usize,
+    pub frame_crop_bottom_offset:           usize,
+    pub vui_parameters_present:             bool,
+}
+
+pub fn is_high_profile(profile: u8) -> bool {
+    matches!(profile, 100 | 110 | 122 | 244 | 44 | 83 | 86 | 118 | 128 | 138 | 139 | 134 | 125)
+}
+
+#[allow(clippy::cognitive_complexity)]
+pub fn parse_sps(src: &[u8]) -> DecoderResult<SeqParameterSet> {
+    let mut br = BitReader::new(src, BitReaderMode::BE);
+    let mut sps: SeqParameterSet = unsafe { std::mem::zeroed() };
+
+    sps.profile_idc                                 = br.read(8)? as u8;
+    sps.constraint_set0                             = br.read_bool()?;
+    sps.constraint_set1                             = br.read_bool()?;
+    sps.constraint_set2                             = br.read_bool()?;
+    let reserved                                    = br.read(5)?;
+    validate!(reserved == 0);
+    sps.level_idc                                   = br.read(8)? as u8;
+    sps.seq_parameter_set_id                        = br.read_ue()?;
+    sps.high_profile = is_high_profile(sps.profile_idc);
+    if sps.high_profile {
+        sps.chroma_format_idc                       = br.read_ue_lim(3)? as u8;
+        if sps.chroma_format_idc == 3 {
+            sps.separate_colour_plane               = br.read_bool()?;
+        }
+        sps.bit_depth_luma                          = br.read_ue_lim(6)? as u8 + 8;
+        sps.bit_depth_chroma                        = br.read_ue_lim(6)? as u8 + 8;
+        sps.qpprime_y_zero_transform_bypass         = br.read_bool()?;
+        sps.seq_scaling_matrix_present              = br.read_bool()?;
+        if sps.seq_scaling_matrix_present {
+            let mut slist_present = [false; 6];
+            for (i, slist) in sps.scaling_list_4x4.iter_mut().enumerate() {
+                slist_present[i]                    = br.read_bool()?;
+                if slist_present[i] {
+                    parse_scaling_list(&mut br, slist, i < 3)?;
+                }
+            }
+            for i in 1..6 {
+                if i == 3 {
+                    continue;
+                }
+                if !slist_present[i] {
+                    sps.scaling_list_4x4[i] = sps.scaling_list_4x4[i - 1];
+                }
+            }
+
+            let mut slist_present = [false; 6];
+            let num_8x8 = if sps.chroma_format_idc != 3 { 2 } else { 6 };
+            for (i, slist) in sps.scaling_list_8x8.iter_mut().take(num_8x8).enumerate() {
+                slist_present[i]                    = br.read_bool()?;
+                if slist_present[i] {
+                    parse_scaling_list(&mut br, slist, (i & 1) == 0)?;
+                }
+            }
+            if num_8x8 > 2 {
+                for i in 2..6 {
+                    if !slist_present[i] {
+                        sps.scaling_list_8x8[i] = sps.scaling_list_8x8[i - 2];
+                    }
+                }
+            }
+        } else {
+            sps.scaling_list_4x4 = [[16; 16]; 6];
+            sps.scaling_list_8x8 = [[16; 64]; 6];
+        }
+    } else {
+        sps.chroma_format_idc = 1;
+        sps.bit_depth_luma = 8;
+        sps.bit_depth_chroma = 8;
+        sps.scaling_list_4x4 = [[16; 16]; 6];
+        sps.scaling_list_8x8 = [[16; 64]; 6];
+    }
+    sps.log2_max_frame_num                          = (br.read_ue_lim(12)? + 4) as u8;
+    sps.pic_order_cnt_type                          = br.read_ue_lim(2)? as u8;
+    match sps.pic_order_cnt_type {
+        0 => {
+            sps.log2_max_pic_order_cnt_lsb          = (br.read_ue_lim(12)? + 4) as u8;
+        },
+        1 => {
+            sps.delta_pic_order_always_zero         = br.read_bool()?;
+            sps.offset_for_non_ref_pic              = br.read_se()?;
+            sps.offset_for_top_to_bottom_field      = br.read_se()?;
+            sps.num_ref_frames_in_pic_order_cnt_cycle   = br.read_ue_lim(255)? as usize;
+            for offset in sps.offset_for_ref_frame[..sps.num_ref_frames_in_pic_order_cnt_cycle].iter_mut() {
+                *offset                             = br.read_se()?;
+            }
+        },
+        _ => {},
+    };
+    sps.num_ref_frames                              = br.read_ue()? as usize;
+    validate!(sps.num_ref_frames <= super::slice::MAX_FRAMES);
+    sps.gaps_in_frame_num_value_allowed             = br.read_bool()?;
+    sps.pic_width_in_mbs                            = (br.read_ue()? + 1) as usize;
+    sps.pic_height_in_mbs                           = (br.read_ue()? + 1) as usize;
+    validate!(sps.pic_width_in_mbs <= 1024 && sps.pic_height_in_mbs <= 1024);
+    sps.frame_mbs_only                              = br.read_bool()?;
+    if !sps.frame_mbs_only {
+        sps.mb_adaptive_frame_field                 = br.read_bool()?;
+    }
+    sps.direct_8x8_inference                        = br.read_bool()?;
+    sps.frame_cropping                              = br.read_bool()?;
+    if sps.frame_cropping {
+        sps.frame_crop_left_offset                  = br.read_ue()? as usize;
+        sps.frame_crop_right_offset                 = br.read_ue()? as usize;
+        sps.frame_crop_top_offset                   = br.read_ue()? as usize;
+        sps.frame_crop_bottom_offset                = br.read_ue()? as usize;
+        let l = sps.frame_crop_left_offset * 2;
+        let r = sps.pic_width_in_mbs * 16 - sps.frame_crop_right_offset * 2;
+        let t = sps.frame_crop_top_offset * 2;
+        let d = sps.pic_height_in_mbs * 16 - sps.frame_crop_bottom_offset * 2;
+        validate!(l < r && t < d);
+    }
+    sps.vui_parameters_present                      = br.read_bool()?;
+    if sps.vui_parameters_present {
+        // xxx: vui is ignored for now
+        if br.read_bool()? {
+            let idc = br.read(8)?;
+            if idc == 255 {
+                br.read(16)?;
+                br.read(16)?;
+            }
+        }
+        if br.read_bool()? {
+            br.read_bool()?;
+        }
+        if br.read_bool()? {
+            br.read(3)?;
+            br.read_bool()?;
+            if br.read_bool()? {
+                br.read(8)?;
+                br.read(8)?;
+                br.read(8)?;
+            }
+        }
+        if br.read_bool()? {
+            br.read_ue()?;
+            br.read_ue()?;
+        }
+        if br.read_bool()? {
+            br.read(32)?;
+            br.read(32)?;
+            br.read_bool()?;
+        }
+        let nal_hrd_parameters_present = br.read_bool()?;
+        if nal_hrd_parameters_present {
+            skip_hrd_parameters(&mut br)?;
+        }
+        let vcl_hrd_parameters_present = br.read_bool()?;
+        if vcl_hrd_parameters_present {
+            skip_hrd_parameters(&mut br)?;
+        }
+        if nal_hrd_parameters_present || vcl_hrd_parameters_present {
+            br.read_bool()?;
+        }
+        br.read_bool()?;
+        if br.read_bool()? {
+            br.read_bool()?;
+            br.read_ue()?;
+            br.read_ue()?;
+            br.read_ue()?;
+            br.read_ue()?;
+            br.read_ue()?;
+            br.read_ue()?;
+        }
+    }
+
+    Ok(sps)
+}
+
+fn parse_scaling_list(br: &mut BitReader, slist: &mut[u8], is_intra: bool) -> DecoderResult<()> {
+    const DEFAULT_INTRA_4X4: [u8; 16] = [
+        6, 13, 13, 20, 20, 20, 28, 28, 28, 28, 32, 32, 32, 37, 37, 42
+    ];
+    const DEFAULT_INTER_4X4: [u8; 16] = [
+        10, 14, 14, 20, 20, 20, 24, 24, 24, 24, 27, 27, 27, 30, 30, 34
+    ];
+    const DEFAULT_INTRA_8X8: [u8; 64] = [
+         6, 10, 10, 13, 11, 13, 16, 16, 16, 16, 18, 18, 18, 18, 18, 23,
+        23, 23, 23, 23, 23, 25, 25, 25, 25, 25, 25, 25, 27, 27, 27, 27,
+        27, 27, 27, 27, 29, 29, 29, 29, 29, 29, 29, 31, 31, 31, 31, 31,
+        31, 33, 33, 33, 33, 33, 36, 36, 36, 36, 38, 38, 38, 40, 40, 42
+    ];
+    const DEFAULT_INTER_8X8: [u8; 64] = [
+         9, 13, 13, 15, 13, 15, 17, 17, 17, 17, 19, 19, 19, 19, 19, 21,
+        21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 24, 24, 24, 24,
+        24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 27, 27, 27, 27, 27,
+        27, 28, 28, 28, 28, 28, 30, 30, 30, 30, 32, 32, 32, 33, 33, 35
+    ];
+    let mut last_scale = 8u8;
+    let mut next_scale = 8u8;
+    let mut use_default = false;
+    for (j, elem) in slist.iter_mut().enumerate() {
+        if next_scale != 0 {
+            let delta                   = br.read_se()?;
+            next_scale = last_scale.wrapping_add(delta as u8);
+            if (j == 0) && (next_scale == 0) {
+                use_default = true;
+                break;
+            }
+        }
+        *elem = if next_scale == 0 { last_scale } else { next_scale };
+        last_scale = *elem;
+    }
+    if use_default {
+        match (slist.len(), is_intra) {
+            (16, true)  => slist.copy_from_slice(&DEFAULT_INTRA_4X4),
+            (16, false) => slist.copy_from_slice(&DEFAULT_INTER_4X4),
+            (64, true)  => slist.copy_from_slice(&DEFAULT_INTRA_8X8),
+            (64, false) => slist.copy_from_slice(&DEFAULT_INTER_8X8),
+            _ => unreachable!(),
+        };
+    }
+    Ok(())
+}
+
+fn skip_hrd_parameters(br: &mut BitReader) -> DecoderResult<()> {
+    let cpb_cnt = br.read_ue()? as usize + 1;
+    br.read(4)?;
+    br.read(4)?;
+    for _ in 0..cpb_cnt {
+        br.read_ue()?;
+        br.read_ue()?;
+        br.read_bool()?;
+    }
+    br.read(5)?;
+    br.read(5)?;
+    br.read(5)?;
+    br.read(5)?;
+    Ok(())
+}
+
+const MAX_SLICE_GROUPS: usize = 8;
+
+#[derive(Clone)]
+pub struct PicParameterSet {
+    pub pic_parameter_set_id:               u32,
+    pub seq_parameter_set_id:               u32,
+    pub entropy_coding_mode:                bool,
+    pub pic_order_present:                  bool,
+    pub num_slice_groups:                   usize,
+    pub slice_group_map_type:               u8,
+    pub run_length:                         [u32; MAX_SLICE_GROUPS],
+    pub top_left:                           [u32; MAX_SLICE_GROUPS],
+    pub bottom_right:                       [u32; MAX_SLICE_GROUPS],
+    pub slice_group_change_direction:       bool,
+    pub slice_group_change_rate:            u32,
+    pub pic_size_in_map_units:              u32,
+    pub num_ref_idx_l0_active:              usize,
+    pub num_ref_idx_l1_active:              usize,
+    pub weighted_pred:                      bool,
+    pub weighted_bipred_idc:                u8,
+    pub pic_init_qp:                        u8,
+    pub pic_init_qs:                        u8,
+    pub chroma_qp_index_offset:             i8,
+    pub deblocking_filter_control_present:  bool,
+    pub constrained_intra_pred:             bool,
+    pub redundant_pic_cnt_present:          bool,
+    pub transform_8x8_mode:                 bool,
+    pub pic_scaling_matrix_present:         bool,
+    pub scaling_list_4x4:                   [[u8; 16]; 6],
+    pub scaling_list_8x8:                   [[u8; 64]; 6],
+    pub second_chroma_qp_index_offset:      i8,
+}
+
+pub fn parse_pps(src: &[u8], sps_arr: &[SeqParameterSet], full_size: usize) -> DecoderResult<PicParameterSet> {
+    let mut br = BitReader::new(src, BitReaderMode::BE);
+    let mut pps: PicParameterSet = unsafe { std::mem::zeroed() };
+
+    pps.pic_parameter_set_id                        = br.read_ue()?;
+    pps.seq_parameter_set_id                        = br.read_ue()?;
+    let mut found = false;
+    let mut cur_sps = None;
+    for sps in sps_arr.iter() {
+        if sps.seq_parameter_set_id == pps.seq_parameter_set_id {
+            found = true;
+            cur_sps = Some(sps);
+            break;
+        }
+    }
+    validate!(found);
+    let sps = cur_sps.unwrap();
+    pps.entropy_coding_mode                         = br.read_bool()?;
+    pps.pic_order_present                           = br.read_bool()?;
+    pps.num_slice_groups                            = (br.read_ue()? + 1) as usize;
+    validate!(pps.num_slice_groups <= MAX_SLICE_GROUPS);
+    if pps.num_slice_groups > 1 {
+        let smtype                                  = br.read_ue()?;
+        validate!(smtype <= 6);
+        pps.slice_group_map_type = smtype as u8;
+        match pps.slice_group_map_type {
+            0 => {
+                for elem in pps.run_length[..pps.num_slice_groups].iter_mut() {
+                    *elem                           = br.read_ue()?;
+                }
+            },
+            2 => {
+                for i in 0..pps.num_slice_groups - 1 {
+                    pps.top_left[i]                 = br.read_ue()?;
+                    pps.bottom_right[i]             = br.read_ue()?;
+                }
+            },
+            3 | 4 | 5 => {
+                pps.slice_group_change_direction    = br.read_bool()?;
+                pps.slice_group_change_rate         = br.read_ue()?;
+            },
+            6 => {
+                pps.pic_size_in_map_units           = br.read_ue()? + 1;
+                for _ in 0..pps.pic_size_in_map_units {
+                    let _slice_group_id             = br.read_ue()?;
+                }
+            },
+            _ => {},
+        };
+println!("slice mode!");
+        return Err(DecoderError::NotImplemented);
+    }
+    pps.num_ref_idx_l0_active                       = (br.read_ue()? + 1) as usize;
+    pps.num_ref_idx_l1_active                       = (br.read_ue()? + 1) as usize;
+    pps.weighted_pred                               = br.read_bool()?;
+    pps.weighted_bipred_idc                         = br.read(2)? as u8;
+    let qp                                          = br.read_se()? + 26;
+    validate!(qp > 0 && qp < 52);
+    pps.pic_init_qp = qp as u8;
+    let qs                                          = br.read_se()? + 26;
+    validate!(qs > 0 && qs < 52);
+    pps.pic_init_qs = qs as u8;
+    let off                                         = br.read_se()?;
+    validate!(off >= -12 && off <= 12);
+    pps.chroma_qp_index_offset = off as i8;
+    pps.deblocking_filter_control_present           = br.read_bool()?;
+    pps.constrained_intra_pred                      = br.read_bool()?;
+    pps.redundant_pic_cnt_present                   = br.read_bool()?;
+    if br.tell() < full_size {
+        pps.transform_8x8_mode                      = br.read_bool()?;
+        pps.pic_scaling_matrix_present              = br.read_bool()?;
+        if pps.pic_scaling_matrix_present {
+            let mut slist_present = [false; 6];
+            for (i, slist) in pps.scaling_list_4x4.iter_mut().enumerate() {
+                slist_present[i]                    = br.read_bool()?;
+                if slist_present[i] {
+                    parse_scaling_list(&mut br, slist, i < 3)?;
+                }
+            }
+            for i in 1..6 {
+                if i == 3 {
+                    continue;
+                }
+                if !slist_present[i] {
+                    pps.scaling_list_4x4[i] = pps.scaling_list_4x4[i - 1];
+                }
+            }
+
+            let mut slist_present = [false; 6];
+            let num_8x8 = if !pps.transform_8x8_mode { 0 } else if sps.chroma_format_idc != 3 { 2 } else { 6 };
+            for (i, slist) in pps.scaling_list_8x8.iter_mut().take(num_8x8).enumerate() {
+                slist_present[i]                    = br.read_bool()?;
+                if slist_present[i] {
+                    parse_scaling_list(&mut br, slist, (i & 1) == 0)?;
+                }
+            }
+            if num_8x8 > 2 {
+                for i in 2..6 {
+                    if !slist_present[i] {
+                        pps.scaling_list_8x8[i] = pps.scaling_list_8x8[i - 2];
+                    }
+                }
+            }
+        } else {
+            pps.scaling_list_4x4 = sps.scaling_list_4x4;
+            pps.scaling_list_8x8 = sps.scaling_list_8x8;
+        }
+        let off                                     = br.read_se()?;
+        validate!(off >= -12 && off <= 12);
+        pps.second_chroma_qp_index_offset = off as i8;
+    } else {
+        pps.second_chroma_qp_index_offset = pps.chroma_qp_index_offset;
+        pps.scaling_list_4x4 = sps.scaling_list_4x4;
+        pps.scaling_list_8x8 = sps.scaling_list_8x8;
+    }
+
+    Ok(pps)
+}
diff --git a/hwdec-vaapi/src/slice.rs b/hwdec-vaapi/src/slice.rs
new file mode 100644 (file)
index 0000000..692b50f
--- /dev/null
@@ -0,0 +1,430 @@
+use nihav_core::codecs::{DecoderResult, DecoderError};
+use nihav_core::frame::FrameType;
+use nihav_core::io::bitreader::*;
+
+use super::ReadUE;
+use super::sets::*;
+
+pub const MAX_FRAMES: usize = 32;
+
+#[derive(Clone,Copy,Debug,PartialEq)]
+pub enum SliceType {
+    I,
+    P,
+    B,
+    SI,
+    SP,
+}
+
+impl SliceType {
+    pub fn is_intra(self) -> bool {
+        matches!(self, SliceType::I | SliceType::SI)
+    }
+    pub fn is_p(self) -> bool {
+        matches!(self, SliceType::P | SliceType::SP)
+    }
+    pub fn is_b(self) -> bool { self == SliceType::B }
+    pub fn is_s(self) -> bool {
+        matches!(self, SliceType::SI | SliceType::SP)
+    }
+    pub fn to_frame_type(self) -> FrameType {
+        match self {
+            SliceType::I | SliceType::SI => FrameType::I,
+            SliceType::P | SliceType::SP => FrameType::P,
+            SliceType::B                 => FrameType::B,
+        }
+    }
+}
+
+const SLICE_TYPES: [SliceType; 10] = [
+    SliceType::P, SliceType::B, SliceType::I, SliceType::SP, SliceType::SI,
+    SliceType::P, SliceType::B, SliceType::I, SliceType::SP, SliceType::SI,
+];
+
+#[derive(Clone,Copy,Default)]
+pub struct WeightInfo {
+    pub luma_weighted:                      bool,
+    pub luma_weight:                        i8,
+    pub luma_offset:                        i8,
+    pub luma_shift:                         u8,
+    pub chroma_weighted:                    bool,
+    pub chroma_weight:                      [i8; 2],
+    pub chroma_offset:                      [i8; 2],
+    pub chroma_shift:                       u8,
+}
+
+impl WeightInfo {
+    pub fn is_weighted(&self) -> bool {
+        self.luma_weighted || self.chroma_weighted
+    }
+}
+
+#[derive(Clone,Copy)]
+pub struct ReorderingInfo {
+    pub reordering_of_pic_nums_idc:         [u8; MAX_FRAMES],
+    pub abs_diff_or_num:                    [u16; MAX_FRAMES],
+    pub num_ops:                            usize,
+}
+
+#[derive(Clone,Copy)]
+pub struct AdaptiveMarking {
+    pub memory_management_control_op:       [u8; MAX_FRAMES],
+    pub operation_arg:                      [u16; MAX_FRAMES],
+    pub operation_arg2:                     [u16; MAX_FRAMES],
+    pub num_ops:                            usize,
+}
+
+#[derive(Clone)]
+pub struct SliceHeader {
+    pub first_mb_in_slice:                  usize,
+    pub slice_type:                         SliceType,
+    pub same_slice_type:                    bool,
+    pub pic_parameter_set_id:               u32,
+    pub frame_num:                          u16,
+    pub field_pic:                          bool,
+    pub bottom_field:                       bool,
+    pub idr_pic_id:                         u16,
+    pub pic_order_cnt_lsb:                  u16,
+    pub delta_pic_order_cnt_bottom:         i32,
+    pub delta_pic_order_cnt:                [i32; 2],
+    pub redundant_pic_cnt:                  u8,
+    pub direct_spatial_mv_pred:             bool,
+    pub num_ref_idx_active_override:        bool,
+    pub num_ref_idx_l0_active:              usize,
+    pub num_ref_idx_l1_active:              usize,
+    pub ref_pic_list_reordering_l0:         bool,
+    pub reordering_list_l0:                 ReorderingInfo,
+    pub ref_pic_list_reordering_l1:         bool,
+    pub reordering_list_l1:                 ReorderingInfo,
+    pub luma_log2_weight_denom:             u8,
+    pub chroma_log2_weight_denom:           u8,
+    pub weights_l0:                         [WeightInfo; MAX_FRAMES],
+    pub weights_l1:                         [WeightInfo; MAX_FRAMES],
+    pub no_output_of_prior_pics:            bool,
+    pub long_term_reference:                bool,
+    pub adaptive_ref_pic_marking_mode:      bool,
+    pub adaptive_ref_pic_marking:           AdaptiveMarking,
+    pub cabac_init_idc:                     u8,
+    pub slice_qp_delta:                     i32,
+    pub slice_qp:                           u8,
+    pub sp_for_switch:                      bool,
+    pub slice_qs_delta:                     i32,
+    pub slice_qs:                           u8,
+    pub disable_deblocking_filter_idc:      u8,
+    pub slice_alpha_c0_offset:              i8,
+    pub slice_beta_offset:                  i8,
+    pub slice_group_change_cycle:           u32,
+}
+
+pub const DEF_WEIGHT_INFO: WeightInfo = WeightInfo {
+    luma_weighted:                      false,
+    luma_weight:                        0,
+    luma_offset:                        0,
+    luma_shift:                         0,
+    chroma_weighted:                    false,
+    chroma_weight:                      [0; 2],
+    chroma_offset:                      [0; 2],
+    chroma_shift:                       0,
+};
+
+impl SliceHeader {
+    #[allow(clippy::collapsible_else_if)]
+    pub fn get_weight(&self, list_id: u8, idx: usize) -> WeightInfo {
+        if list_id == 0 {
+            if idx < self.num_ref_idx_l0_active {
+                self.weights_l0[idx]
+            } else {
+                DEF_WEIGHT_INFO
+            }
+        } else {
+            if idx < self.num_ref_idx_l1_active {
+                self.weights_l1[idx]
+            } else {
+                DEF_WEIGHT_INFO
+            }
+        }
+    }
+}
+
+/*pub fn parse_slice_header_minimal(br: &mut BitReader) -> DecoderResult<(usize, SliceType)> {
+    let first_mb_in_slice                           = br.read_ue()? as usize;
+    let stype                                       = br.read_ue_lim(SLICE_TYPES.len() as u32 - 1)?;
+    let slice_type = SLICE_TYPES[stype as usize];
+    Ok((first_mb_in_slice, slice_type))
+}*/
+
+#[allow(clippy::cognitive_complexity)]
+#[allow(clippy::manual_range_contains)]
+pub fn parse_slice_header(br: &mut BitReader, sps_arr: &[SeqParameterSet], pps_arr: &[PicParameterSet], is_idr: bool, nal_ref_idc: u8) -> DecoderResult<SliceHeader> {
+    let mut hdr: SliceHeader = unsafe { std::mem::zeroed() };
+
+    hdr.first_mb_in_slice                           = br.read_ue()? as usize;
+    let stype                                       = br.read_ue_lim(SLICE_TYPES.len() as u32 - 1)?;
+    hdr.slice_type = SLICE_TYPES[stype as usize];
+    hdr.same_slice_type = stype >= 5;
+    hdr.pic_parameter_set_id                        = br.read_ue()?;
+
+    let mut pps_ptr = None;
+    for pps in pps_arr.iter() {
+        if pps.pic_parameter_set_id == hdr.pic_parameter_set_id {
+            pps_ptr = Some(pps);
+            break;
+        }
+    }
+    validate!(pps_ptr.is_some());
+    let pps = pps_ptr.unwrap();
+    let mut sps_ptr = None;
+    for sps in sps_arr.iter() {
+        if sps.seq_parameter_set_id == pps.seq_parameter_set_id {
+            sps_ptr = Some(sps);
+            break;
+        }
+    }
+    validate!(sps_ptr.is_some());
+    let sps = sps_ptr.unwrap();
+
+    hdr.frame_num                                   = br.read(sps.log2_max_frame_num)? as u16;
+    if !sps.frame_mbs_only {
+        hdr.field_pic                               = br.read_bool()?;
+        if hdr.field_pic {
+            hdr.bottom_field                        = br.read_bool()?;
+        }
+    }
+
+    if is_idr {
+        hdr.idr_pic_id                              = br.read_ue_lim(65535)? as u16;
+    }
+    if sps.pic_order_cnt_type == 0 {
+        hdr.pic_order_cnt_lsb                       = br.read(sps.log2_max_pic_order_cnt_lsb)? as u16;
+        if pps.pic_order_present && !hdr.field_pic {
+            hdr.delta_pic_order_cnt_bottom          = br.read_se()?;
+        }
+    } else if sps.pic_order_cnt_type == 1 && !sps.delta_pic_order_always_zero {
+        hdr.delta_pic_order_cnt[0]                  = br.read_se()?;
+        if pps.pic_order_present && !hdr.field_pic {
+            hdr.delta_pic_order_cnt[1]              = br.read_se()?;
+        }
+    }
+    if pps.redundant_pic_cnt_present {
+        hdr.redundant_pic_cnt                       = br.read_ue_lim(127)? as u8;
+    }
+    if hdr.slice_type.is_b() {
+        hdr.direct_spatial_mv_pred                  = br.read_bool()?;
+    }
+    if !hdr.slice_type.is_intra() {
+        hdr.num_ref_idx_active_override             = br.read_bool()?;
+        if hdr.num_ref_idx_active_override {
+            hdr.num_ref_idx_l0_active               = (br.read_ue_lim(15)? + 1) as usize;
+            if hdr.slice_type.is_b() {
+                hdr.num_ref_idx_l1_active           = (br.read_ue_lim(15)? + 1) as usize;
+            }
+        } else {
+            hdr.num_ref_idx_l0_active = pps.num_ref_idx_l0_active;
+            if hdr.slice_type.is_b() {
+                hdr.num_ref_idx_l1_active = pps.num_ref_idx_l1_active;
+            }
+        }
+    }
+    parse_ref_pic_list_reordering(&mut hdr, br)?;
+    if (pps.weighted_pred && hdr.slice_type.is_p()) ||
+        (pps.weighted_bipred_idc == 1 && hdr.slice_type.is_b()) {
+        parse_pred_weight_table(&mut hdr, br)?;
+    } else {
+        for weight in hdr.weights_l0[..hdr.num_ref_idx_l0_active].iter_mut() {
+            weight.luma_weighted = false;
+            weight.chroma_weighted = false;
+        }
+        for weight in hdr.weights_l1[..hdr.num_ref_idx_l1_active].iter_mut() {
+            weight.luma_weighted = false;
+            weight.chroma_weighted = false;
+        }
+    }
+    if nal_ref_idc != 0 {
+        if is_idr {
+            hdr.no_output_of_prior_pics             = br.read_bool()?;
+            hdr.long_term_reference                 = br.read_bool()?;
+        } else {
+            hdr.adaptive_ref_pic_marking_mode       = br.read_bool()?;
+            if hdr.adaptive_ref_pic_marking_mode {
+                let mark_info = &mut hdr.adaptive_ref_pic_marking;
+                loop {
+                    let memory_management_control_op = br.read_ue_lim(6)? as u8;
+                    if memory_management_control_op == 0 {
+                        break;
+                    }
+                    if mark_info.num_ops >= mark_info.memory_management_control_op.len() {
+                        return Err(DecoderError::NotImplemented);
+                    }
+                    mark_info.memory_management_control_op[mark_info.num_ops] = memory_management_control_op;
+                    mark_info.operation_arg[mark_info.num_ops] = match memory_management_control_op {
+                            1 | 3 => {
+                                let difference_of_pic_nums = br.read_ue()? + 1;
+                                difference_of_pic_nums as u16
+                            },
+                            2 => {
+                                let long_term_pic_num = br.read_ue_lim(65535)?;
+                                long_term_pic_num as u16
+                            },
+                            6 => {
+                                let long_term_frame_idx = br.read_ue_lim(65536)?;
+                                long_term_frame_idx as u16
+                            },
+                            4 => {
+                                let max_long_term_frame_idx_plus1 = br.read_ue()?;
+                                max_long_term_frame_idx_plus1 as u16
+                            },
+                            _ => 0,
+                        };
+                    mark_info.operation_arg2[mark_info.num_ops] = if memory_management_control_op == 3 {
+                            let long_term_frame_idx = br.read_ue_lim(65536)?;
+                            long_term_frame_idx as u16
+                        } else {
+                            0
+                        };
+                    mark_info.num_ops += 1;
+                }
+            }
+        }
+    }
+    if pps.entropy_coding_mode && !hdr.slice_type.is_intra() {
+        hdr.cabac_init_idc                          = br.read_ue_lim(2)? as u8;
+    }
+    hdr.slice_qp_delta                              = br.read_se()?;
+    let qp = i32::from(pps.pic_init_qp) + hdr.slice_qp_delta;
+    validate!(qp >= 0 && qp <= 51);
+    hdr.slice_qp = qp as u8;
+    if hdr.slice_type.is_s() {
+        if hdr.slice_type == SliceType::SP {
+            hdr.sp_for_switch                       = br.read_bool()?;
+        }
+        hdr.slice_qs_delta                          = br.read_se()?;
+        let qs = i32::from(pps.pic_init_qs) + hdr.slice_qs_delta;
+        validate!(qs >= 0 && qs <= 51);
+        hdr.slice_qs = qs as u8;
+    }
+    if pps.deblocking_filter_control_present {
+        hdr.disable_deblocking_filter_idc           = br.read_ue_lim(2)? as u8;
+        if hdr.disable_deblocking_filter_idc != 1 {
+            let val                                 = br.read_se()?;
+            validate!(val >= -6 && val <= 6);
+            hdr.slice_alpha_c0_offset = val as i8 * 2;
+            let val                                 = br.read_se()?;
+            validate!(val >= -6 && val <= 6);
+            hdr.slice_beta_offset = val as i8 * 2;
+        }
+    }
+    if pps.num_slice_groups > 1 && pps.slice_group_map_type >= 3 && pps.slice_group_map_type <= 5 {
+        hdr.slice_group_change_cycle                = br.read_ue()?;
+    }
+
+    Ok(hdr)
+}
+
+fn parse_ref_pic_list_reordering(hdr: &mut SliceHeader, br: &mut BitReader) -> DecoderResult<()> {
+    if !hdr.slice_type.is_intra() {
+        hdr.ref_pic_list_reordering_l0              = br.read_bool()?;
+        let reord_list = &mut hdr.reordering_list_l0;
+        reord_list.num_ops = 0;
+        if hdr.ref_pic_list_reordering_l0 {
+            loop {
+                let reordering_of_pic_nums_idc      = br.read_ue_lim(3)?;
+                if reordering_of_pic_nums_idc == 3 {
+                    break;
+                }
+                validate!(reord_list.num_ops < MAX_FRAMES);
+                reord_list.reordering_of_pic_nums_idc[reord_list.num_ops] = reordering_of_pic_nums_idc as u8;
+                if reordering_of_pic_nums_idc != 2 {
+                    let abs_diff_pic_num            = br.read_ue()? + 1;
+                    reord_list.abs_diff_or_num[reord_list.num_ops] = abs_diff_pic_num as u16;
+                } else {
+                    let long_term_pic_num           = br.read_ue()?;
+                    reord_list.abs_diff_or_num[reord_list.num_ops] = long_term_pic_num as u16;
+                }
+                reord_list.num_ops += 1;
+            }
+            validate!(reord_list.num_ops > 0);
+        }
+    }
+    if hdr.slice_type.is_b() {
+        hdr.ref_pic_list_reordering_l1              = br.read_bool()?;
+        let reord_list = &mut hdr.reordering_list_l1;
+        reord_list.num_ops = 0;
+        if hdr.ref_pic_list_reordering_l1 {
+            loop {
+                let reordering_of_pic_nums_idc      = br.read_ue_lim(3)?;
+                if reordering_of_pic_nums_idc == 3 {
+                    break;
+                }
+                validate!(reord_list.num_ops < MAX_FRAMES);
+                reord_list.reordering_of_pic_nums_idc[reord_list.num_ops] = reordering_of_pic_nums_idc as u8;
+                if reordering_of_pic_nums_idc != 2 {
+                    let abs_diff_pic_num            = br.read_ue()? + 1;
+                    reord_list.abs_diff_or_num[reord_list.num_ops] = abs_diff_pic_num as u16;
+                } else {
+                    let long_term_pic_num           = br.read_ue()?;
+                    reord_list.abs_diff_or_num[reord_list.num_ops] = long_term_pic_num as u16;
+                }
+                reord_list.num_ops += 1;
+            }
+            validate!(reord_list.num_ops > 0);
+        }
+    }
+    Ok(())
+}
+
+fn parse_pred_weight_table(hdr: &mut SliceHeader, br: &mut BitReader) -> DecoderResult<()> {
+    hdr.luma_log2_weight_denom                      = br.read_ue_lim(7)? as u8;
+    hdr.chroma_log2_weight_denom                    = br.read_ue_lim(7)? as u8;
+    for weight in hdr.weights_l0[..hdr.num_ref_idx_l0_active].iter_mut() {
+        weight.luma_weighted                        = br.read_bool()?;
+        if weight.luma_weighted {
+            let w                                   = br.read_se()?;
+            validate!(w >= -128 && w <= 127);
+            weight.luma_weight = w as i8;
+            let offset                              = br.read_se()?;
+            validate!(offset >= -128 && offset <= 127);
+            weight.luma_offset = offset as i8;
+        }
+        weight.luma_shift = hdr.luma_log2_weight_denom;
+
+        weight.chroma_weighted                      = br.read_bool()?;
+        if weight.chroma_weighted {
+            for i in 0..2 {
+                let w                               = br.read_se()?;
+                validate!(w >= -128 && w <= 127);
+                weight.chroma_weight[i] = w as i8;
+                let offset                          = br.read_se()?;
+                validate!(offset >= -128 && offset <= 127);
+                weight.chroma_offset[i] = offset as i8;
+            }
+        }
+        weight.chroma_shift = hdr.chroma_log2_weight_denom;
+    }
+    for weight in hdr.weights_l1[..hdr.num_ref_idx_l1_active].iter_mut() {
+        weight.luma_weighted                        = br.read_bool()?;
+        if weight.luma_weighted {
+            let w                                   = br.read_se()?;
+            validate!(w >= -128 && w <= 127);
+            weight.luma_weight = w as i8;
+            let offset                              = br.read_se()?;
+            validate!(offset >= -128 && offset <= 127);
+            weight.luma_offset = offset as i8;
+        }
+        weight.luma_shift = hdr.luma_log2_weight_denom;
+
+        weight.chroma_weighted                      = br.read_bool()?;
+        if weight.chroma_weighted {
+            for i in 0..2 {
+                let w                               = br.read_se()?;
+                validate!(w >= -128 && w <= 127);
+                weight.chroma_weight[i] = w as i8;
+                let offset                          = br.read_se()?;
+                validate!(offset >= -128 && offset <= 127);
+                weight.chroma_offset[i] = offset as i8;
+            }
+        }
+        weight.chroma_shift = hdr.chroma_log2_weight_denom;
+    }
+
+    Ok(())
+}