use NAPacketiser::attach_stream() where appropriate
[nihav-player.git] / hwdec-vaapi / src / lib.rs
1 use std::collections::VecDeque;
2 use std::convert::TryInto;
3 use std::rc::Rc;
4
5 use nihav_core::codecs::*;
6 use nihav_core::io::byteio::*;
7 use nihav_core::io::bitreader::*;
8 use nihav_core::io::intcode::*;
9
10 use libva::*;
11
12 #[cfg(debug_assertions)]
13 macro_rules! validate {
14 ($a:expr) => { if !$a { println!("check failed at {}:{}", file!(), line!()); return Err(DecoderError::InvalidData); } };
15 }
16 #[cfg(not(debug_assertions))]
17 macro_rules! validate {
18 ($a:expr) => { if !$a { return Err(DecoderError::InvalidData); } };
19 }
20
21 mod pic_ref;
22 pub use pic_ref::*;
23 #[allow(clippy::manual_range_contains)]
24 #[allow(clippy::needless_range_loop)]
25 mod sets;
26 use sets::*;
27 #[allow(clippy::manual_range_contains)]
28 mod slice;
29 use slice::*;
30
31 trait ReadUE {
32 fn read_ue(&mut self) -> DecoderResult<u32>;
33 fn read_te(&mut self, range: u32) -> DecoderResult<u32>;
34 fn read_ue_lim(&mut self, max_val: u32) -> DecoderResult<u32> {
35 let val = self.read_ue()?;
36 validate!(val <= max_val);
37 Ok(val)
38 }
39 fn read_se(&mut self) -> DecoderResult<i32> {
40 let val = self.read_ue()?;
41 if (val & 1) != 0 {
42 Ok (((val >> 1) as i32) + 1)
43 } else {
44 Ok (-((val >> 1) as i32))
45 }
46 }
47 }
48
49 impl<'a> ReadUE for BitReader<'a> {
50 fn read_ue(&mut self) -> DecoderResult<u32> {
51 Ok(self.read_code(UintCodeType::GammaP)? - 1)
52 }
53 fn read_te(&mut self, range: u32) -> DecoderResult<u32> {
54 if range == 1 {
55 if self.read_bool()? {
56 Ok(0)
57 } else {
58 Ok(1)
59 }
60 } else {
61 let val = self.read_ue()?;
62 validate!(val <= range);
63 Ok(val)
64 }
65 }
66 }
67
68 fn get_long_term_id(is_idr: bool, slice_hdr: &SliceHeader) -> Option<usize> {
69 if is_idr && !slice_hdr.long_term_reference {
70 None
71 } else {
72 let marking = &slice_hdr.adaptive_ref_pic_marking;
73 for (&op, &arg) in marking.memory_management_control_op.iter().zip(marking.operation_arg.iter()).take(marking.num_ops) {
74 if op == 6 {
75 return Some(arg as usize);
76 }
77 }
78 None
79 }
80 }
81
82 fn unescape_nal(src: &[u8], dst: &mut Vec<u8>) -> usize {
83 let mut off = 0;
84 let mut zrun = 0;
85 dst.clear();
86 dst.reserve(src.len());
87 while off < src.len() {
88 dst.push(src[off]);
89 if src[off] != 0 {
90 zrun = 0;
91 } else {
92 zrun += 1;
93 if zrun == 2 && off + 1 < src.len() && src[off + 1] == 0x03 {
94 zrun = 0;
95 off += 1;
96 }
97 if zrun >= 3 && off + 1 < src.len() && src[off + 1] == 0x01 {
98 off -= 3;
99 dst.truncate(off);
100 break;
101 }
102 }
103 off += 1;
104 }
105 off
106 }
107
108 fn make_dummy_h264_pic() -> PictureH264 {
109 PictureH264::new(VA_INVALID_ID, 0, H264PictureFlag::Invalid.into(), 0, 0)
110 }
111
112 trait MakePicH264 {
113 fn make_pic(&self) -> PictureH264;
114 }
115
116 impl MakePicH264 for PictureInfo {
117 fn make_pic(&self) -> PictureH264 {
118 let mut flags = H264PictureFlags::default();
119 let frame_idx = if let Some(id) = self.long_term {
120 flags |= H264PictureFlag::LongTermReference;
121 id as u32
122 } else {
123 if self.is_ref {
124 flags |= H264PictureFlag::ShortTermReference;
125 }
126 u32::from(self.id)
127 };
128 PictureH264::new(self.surface_id, frame_idx, flags, self.top_id as i32, self.bot_id as i32)
129 }
130 }
131
132 fn map_ref_list(refs: &[Option<PictureInfo>]) -> [PictureH264; 32] {
133 let mut ref_list = Vec::with_capacity(32);
134
135 for rpic in refs.iter() {
136 ref_list.push(rpic.as_ref().map_or_else(make_dummy_h264_pic, |pic| pic.make_pic()));
137 }
138
139 while ref_list.len() < 32 {
140 ref_list.push(make_dummy_h264_pic());
141 }
142 if let Ok(ret) = ref_list.try_into() {
143 ret
144 } else {
145 panic!("can't convert");
146 }
147 }
148
149 fn profile_name(profile: VAProfile::Type) -> &'static str {
150 match profile {
151 VAProfile::VAProfileMPEG2Simple => "MPEG2 Simple",
152 VAProfile::VAProfileMPEG2Main => "MPEG2 Main",
153 VAProfile::VAProfileMPEG4Simple => "MPEG4 Simple",
154 VAProfile::VAProfileMPEG4AdvancedSimple => "MPEG4 Advanced Simple",
155 VAProfile::VAProfileMPEG4Main => "MPEG4 Main",
156 VAProfile::VAProfileH264Baseline => "H264 Baseline",
157 VAProfile::VAProfileH264Main => "H264 Main",
158 VAProfile::VAProfileH264High => "H264 High",
159 VAProfile::VAProfileVC1Simple => "VC1 Simple",
160 VAProfile::VAProfileVC1Main => "VC1 Main",
161 VAProfile::VAProfileVC1Advanced => "VC1 Advanced",
162 VAProfile::VAProfileH263Baseline => "H263 Baseline",
163 VAProfile::VAProfileJPEGBaseline => "JPEG Baseline",
164 VAProfile::VAProfileH264ConstrainedBaseline => "H264 Constrained Baseline",
165 VAProfile::VAProfileVP8Version0_3 => "VP8",
166 VAProfile::VAProfileH264MultiviewHigh => "H.264 Multiview High",
167 VAProfile::VAProfileH264StereoHigh => "H264 Stereo High",
168 VAProfile::VAProfileHEVCMain => "H.EVC Main",
169 VAProfile::VAProfileHEVCMain10 => "H.EVC Main10",
170 VAProfile::VAProfileVP9Profile0 => "VP9 Profile 0",
171 VAProfile::VAProfileVP9Profile1 => "VP9 Profile 1",
172 VAProfile::VAProfileVP9Profile2 => "VP9 Profile 2",
173 VAProfile::VAProfileVP9Profile3 => "VP9 Profile 3",
174 VAProfile::VAProfileHEVCMain12 => "HEVC Main12",
175 VAProfile::VAProfileHEVCMain422_10 => "HEVC Main10 4:2:2",
176 VAProfile::VAProfileHEVCMain422_12 => "HEVC Main12 4:2:2",
177 VAProfile::VAProfileHEVCMain444 => "HEVC Main 4:4:4",
178 VAProfile::VAProfileHEVCMain444_10 => "HEVC Main10 4:4:4",
179 VAProfile::VAProfileHEVCMain444_12 => "HEVC Main12 4:4:4",
180 VAProfile::VAProfileHEVCSccMain => "HEVC SCC Main",
181 VAProfile::VAProfileHEVCSccMain10 => "HEVC SCC Main10",
182 VAProfile::VAProfileHEVCSccMain444 => "HEVC SCC Main 4:4:4",
183 VAProfile::VAProfileAV1Profile0 => "AV1 Profile 0",
184 VAProfile::VAProfileAV1Profile1 => "AV1 Profile 1",
185 VAProfile::VAProfileHEVCSccMain444_10 => "HEVC SCC Main10 4:4:4",
186 _ => "unknown",
187 }
188 }
189
190 const NUM_REF_PICS: usize = 16;
191
192 struct WaitingFrame {
193 ts: u64,
194 pic: Picture<PictureEnd>,
195 is_idr: bool,
196 is_ref: bool,
197 ftype: FrameType,
198 }
199
200 struct Reorderer {
201 last_ref_dts: Option<u64>,
202 ready_idx: usize,
203 frames: VecDeque<WaitingFrame>,
204 }
205
206 impl Default for Reorderer {
207 fn default() -> Self {
208 Self {
209 last_ref_dts: None,
210 ready_idx: 0,
211 frames: VecDeque::with_capacity(16),
212 }
213 }
214 }
215
216 impl Reorderer {
217 fn add_frame(&mut self, new_frame: WaitingFrame) {
218 if !new_frame.is_ref {
219 if self.frames.is_empty() {
220 self.frames.push_back(new_frame);
221 } else {
222 let new_dts = new_frame.ts;
223 let mut idx = 0;
224 for (i, frm) in self.frames.iter().enumerate() {
225 idx = i;
226 if frm.ts > new_dts {
227 break;
228 }
229 }
230 self.frames.insert(idx, new_frame);
231 }
232 } else {
233 for (i, frm) in self.frames.iter().enumerate() {
234 if Some(frm.ts) == self.last_ref_dts {
235 self.ready_idx = i + 1;
236 }
237 }
238 self.last_ref_dts = Some(new_frame.ts);
239 self.frames.push_back(new_frame);
240 }
241 }
242 fn get_frame(&mut self) -> Option<WaitingFrame> {
243 if self.ready_idx > 0 {
244 match self.frames[0].pic.query_status() {
245 _ if self.ready_idx > 16 => {},
246 Ok(VASurfaceStatus::Ready) => {},
247 Ok(VASurfaceStatus::Rendering) => return None,
248 _ => {
249 unimplemented!();
250 },
251 };
252 self.ready_idx -= 1;
253 self.frames.pop_front()
254 } else {
255 None
256 }
257 }
258 fn flush(&mut self) {
259 self.last_ref_dts = None;
260 self.ready_idx = 0;
261 }
262 }
263
264 #[allow(dead_code)]
265 struct VaapiInternals {
266 display: Rc<Display>,
267 context: Rc<Context>,
268 ref_pics: Vec<(Picture<PictureSync>, VASurfaceID)>,
269 surfaces: Vec<Surface>,
270 ifmt: VAImageFormat,
271 }
272
273 pub struct VaapiH264Decoder {
274 info: NACodecInfoRef,
275 vaapi: Option<VaapiInternals>,
276 spses: Vec<SeqParameterSet>,
277 ppses: Vec<PicParameterSet>,
278 frame_refs: FrameRefs,
279 nal_len: u8,
280 out_frm: NABufferType,
281 reorderer: Reorderer,
282 tb_num: u32,
283 tb_den: u32,
284 }
285
286 fn copy_luma_default(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
287 for (dline, sline) in dst.chunks_mut(dstride)
288 .zip(src.chunks(sstride))
289 .take(h) {
290 dline[..w].copy_from_slice(&sline[..w]);
291 }
292 }
293 #[cfg(not(target_arch="x86_64"))]
294 fn copy_luma(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
295 copy_luma_default(dst, dstride, src, sstride, w, h);
296 }
297 #[cfg(not(target_arch="x86_64"))]
298 fn deint_chroma(frm: NASimpleVideoFrame<u8>, src: &[u8], sstride: usize) {
299 let mut uoff = frm.offset[1];
300 let mut voff = frm.offset[2];
301 for cline in src.chunks(sstride).take(frm.height[1]) {
302 for (x, pair) in cline.chunks_exact(2).take(frm.width[1]).enumerate() {
303 frm.data[uoff + x] = pair[0];
304 frm.data[voff + x] = pair[1];
305 }
306 uoff += frm.stride[1];
307 voff += frm.stride[2];
308 }
309 }
310
311 #[cfg(target_arch="x86_64")]
312 use std::arch::asm;
313 #[cfg(target_arch="x86_64")]
314 fn copy_luma(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) {
315 if !is_x86_feature_detected!("avx") {
316 copy_luma_default(dst, dstride, src, sstride, w, h);
317 return;
318 }
319 if dst.as_ptr().align_offset(32) == 0 && src.as_ptr().align_offset(32) == 0 &&
320 (w % 64) == 0 && ((dstride | sstride) % 32) == 0 {
321 unsafe {
322 asm!(
323 "2:",
324 " mov {x}, {w}",
325 " 3:",
326 " vmovdqa ymm0, [{src}]",
327 " vmovdqa ymm1, [{src}+32]",
328 " vmovdqa [{dst}], ymm0",
329 " vmovdqa [{dst}+32], ymm1",
330 " add {src}, 64",
331 " add {dst}, 64",
332 " sub {x}, 64",
333 " jnz 3b",
334 " add {src}, {sstep}",
335 " add {dst}, {dstep}",
336 " dec {h}",
337 " jnz 2b",
338 dst = inout(reg) dst.as_mut_ptr() => _,
339 src = inout(reg) src.as_ptr() => _,
340 sstep = in(reg) sstride - w,
341 dstep = in(reg) dstride - w,
342 w = in(reg) w,
343 h = in(reg) h,
344 x = out(reg) _,
345 out("ymm0") _,
346 out("ymm1") _,
347 );
348 }
349 } else {
350 for (dline, sline) in dst.chunks_mut(dstride)
351 .zip(src.chunks(sstride))
352 .take(h) {
353 dline[..w].copy_from_slice(&sline[..w]);
354 }
355 }
356 }
357 #[cfg(target_arch="x86_64")]
358 fn deint_chroma(frm: NASimpleVideoFrame<u8>, src: &[u8], sstride: usize) {
359 unsafe {
360 let width = frm.width[1];
361 let height = frm.height[1];
362 let dst = frm.data.as_mut_ptr();
363 let udst = dst.add(frm.offset[1]);
364 let vdst = dst.add(frm.offset[2]);
365 let dstep = frm.stride[1] - width;
366 let sstep = sstride - width * 2;
367 asm!(
368 "2:",
369 " mov {tmp}, {width}",
370 " test {width}, 8",
371 " jz 3f",
372 " movaps xmm0, [{src}]",
373 " movaps xmm1, xmm0",
374 " psllw xmm0, 8",
375 " psrlw xmm1, 8",
376 " psrlw xmm0, 8",
377 " packuswb xmm1, xmm1",
378 " packuswb xmm0, xmm0",
379 " movq [{vdst}], xmm1",
380 " movq [{udst}], xmm0",
381 " add {src}, 16",
382 " add {vdst}, 8",
383 " add {udst}, 8",
384 " sub {tmp}, 8",
385 " 3:",
386 " movaps xmm0, [{src}]",
387 " movaps xmm1, [{src} + 16]",
388 " movaps xmm2, xmm0",
389 " movaps xmm3, xmm1",
390 " psllw xmm0, 8",
391 " psllw xmm1, 8",
392 " psrlw xmm2, 8",
393 " psrlw xmm3, 8",
394 " psrlw xmm0, 8",
395 " psrlw xmm1, 8",
396 " packuswb xmm2, xmm3",
397 " packuswb xmm0, xmm1",
398 " movups [{vdst}], xmm2",
399 " movups [{udst}], xmm0",
400 " add {src}, 32",
401 " add {vdst}, 16",
402 " add {udst}, 16",
403 " sub {tmp}, 16",
404 " jnz 3b",
405 " add {udst}, {dstep}",
406 " add {vdst}, {dstep}",
407 " add {src}, {sstep}",
408 " dec {height}",
409 " jnz 2b",
410 src = inout(reg) src.as_ptr() => _,
411 udst = inout(reg) udst => _,
412 vdst = inout(reg) vdst => _,
413 width = in(reg) width,
414 height = inout(reg) height => _,
415 dstep = in(reg) dstep,
416 sstep = in(reg) sstep,
417 tmp = out(reg) _,
418 out("xmm0") _,
419 out("xmm1") _,
420 out("xmm2") _,
421 out("xmm3") _,
422 );
423 }
424 }
425
426 fn fill_frame(ifmt: VAImageFormat, pic: &Picture<PictureSync>, frm: &mut NABufferType) -> DecoderResult<()> {
427 let mut vbuf = frm.get_vbuf().unwrap();
428 let (w, h) = pic.surface_size();
429 //let cur_ts = pic.timestamp();
430
431 let img = Image::new(pic, ifmt, w, h, true).expect("get image");
432
433 let iimg = img.image();
434 let imgdata: &[u8] = img.as_ref();
435
436 match iimg.format.fourcc().map_err(|_| DecoderError::InvalidData)? {
437 VAFourcc::NV12 => {
438 let frm = NASimpleVideoFrame::from_video_buf(&mut vbuf).unwrap();
439 validate!(iimg.width == (frm.width[0] as u16));
440 validate!(iimg.height == (frm.height[0] as u16));
441
442 copy_luma(&mut frm.data[frm.offset[0]..], frm.stride[0], &imgdata[iimg.offsets[0] as usize..], iimg.pitches[0] as usize, frm.width[0], frm.height[0]);
443
444 deint_chroma(frm, &imgdata[iimg.offsets[1] as usize..], iimg.pitches[1] as usize);
445 },
446 _ => unimplemented!(),
447 };
448 Ok(())
449 }
450
451 impl Default for VaapiH264Decoder {
452 fn default() -> Self {
453 Self {
454 info: NACodecInfoRef::default(),
455 vaapi: None,
456 spses: Vec::with_capacity(1),
457 ppses: Vec::with_capacity(4),
458 frame_refs: FrameRefs::new(),
459 nal_len: 0,
460 out_frm: NABufferType::None,
461 reorderer: Reorderer::default(),
462 tb_num: 0,
463 tb_den: 0,
464 }
465 }
466 }
467
468 impl VaapiH264Decoder {
469 pub fn new() -> Self { Self::default() }
470 pub fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()> {
471 if let NACodecTypeInfo::Video(vinfo) = info.get_properties() {
472 let edata = info.get_extradata().unwrap();
473 //print!("edata:"); for &el in edata.iter() { print!(" {:02X}", el); } println!();
474 let profile;
475 let mut nal_buf = Vec::with_capacity(1024);
476 if edata.len() > 11 && &edata[0..4] == b"avcC" {
477 let mut mr = MemoryReader::new_read(edata.as_slice());
478 let mut br = ByteReader::new(&mut mr);
479
480 br.read_skip(4)?;
481 let version = br.read_byte()?;
482 validate!(version == 1);
483 profile = br.read_byte()?;
484 let _compatibility = br.read_byte()?;
485 let _level = br.read_byte()?;
486 let b = br.read_byte()?;
487 validate!((b & 0xFC) == 0xFC);
488 self.nal_len = (b & 3) + 1;
489 let b = br.read_byte()?;
490 validate!((b & 0xE0) == 0xE0);
491 let num_sps = (b & 0x1F) as usize;
492 for _ in 0..num_sps {
493 let len = br.read_u16be()? as usize;
494 let offset = br.tell() as usize;
495 validate!((br.peek_byte()? & 0x1F) == 7);
496 let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf);
497 br.read_skip(len)?;
498 let sps = parse_sps(&nal_buf[1..])?;
499 self.spses.push(sps);
500 }
501 let num_pps = br.read_byte()? as usize;
502 for _ in 0..num_pps {
503 let len = br.read_u16be()? as usize;
504 let offset = br.tell() as usize;
505 validate!((br.peek_byte()? & 0x1F) == 8);
506 let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf);
507 br.read_skip(len)?;
508 let src = &nal_buf;
509
510 let mut full_size = src.len() * 8;
511 for &byte in src.iter().rev() {
512 if byte == 0 {
513 full_size -= 8;
514 } else {
515 full_size -= (byte.trailing_zeros() + 1) as usize;
516 break;
517 }
518 }
519 validate!(full_size > 0);
520
521 let pps = parse_pps(&src[1..], &self.spses, full_size - 8)?;
522 let mut found = false;
523 for stored_pps in self.ppses.iter_mut() {
524 if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id {
525 *stored_pps = pps.clone();
526 found = true;
527 break;
528 }
529 }
530 if !found {
531 self.ppses.push(pps);
532 }
533 }
534 if br.left() > 0 {
535 match profile {
536 100 | 110 | 122 | 144 => {
537 let b = br.read_byte()?;
538 // some encoders put something different here
539 if (b & 0xFC) != 0xFC {
540 return Ok(());
541 }
542 // b & 3 -> chroma format
543 let b = br.read_byte()?;
544 validate!((b & 0xF8) == 0xF8);
545 // b & 7 -> luma depth minus 8
546 let b = br.read_byte()?;
547 validate!((b & 0xF8) == 0xF8);
548 // b & 7 -> chroma depth minus 8
549 let num_spsext = br.read_byte()? as usize;
550 for _ in 0..num_spsext {
551 let len = br.read_u16be()? as usize;
552 // parse spsext
553 br.read_skip(len)?;
554 }
555 },
556 _ => {},
557 };
558 }
559 } else {
560 return Err(DecoderError::NotImplemented);
561 }
562
563 validate!(profile > 0);
564 let width = (vinfo.get_width() + 15) & !15;
565 let height = (vinfo.get_height() + 15) & !15;
566
567 let display = Display::open_silently().expect("open display");
568
569 let num_surfaces = self.spses[0].num_ref_frames + 4 + 64;
570
571 let va_profile = match profile {
572 66 => VAProfile::VAProfileH264ConstrainedBaseline,
573 77 => VAProfile::VAProfileH264Main,
574 88 | 100 | 110 | 122 => VAProfile::VAProfileH264High,
575 _ => return Err(DecoderError::NotImplemented),
576 };
577 if let Ok(profiles) = display.query_config_profiles() {
578 if !profiles.contains(&va_profile) {
579 println!("Profile {} ({}) not supported", profile, profile_name(va_profile));
580 return Err(DecoderError::NotImplemented);
581 }
582 } else {
583 return Err(DecoderError::Bug);
584 }
585 if let Ok(points) = display.query_config_entrypoints(va_profile) {
586 if !points.contains(&VAEntrypoint::VAEntrypointVLD) {
587 println!("no decoding support for this profile");
588 return Err(DecoderError::NotImplemented);
589 }
590 } else {
591 return Err(DecoderError::Bug);
592 }
593
594 let config = display.create_config(vec![
595 VAConfigAttrib { type_: VAConfigAttribType::VAConfigAttribRTFormat, value: RTFormat::YUV420.into() },
596 ], va_profile, VAEntrypoint::VAEntrypointVLD).map_err(|_| {
597 println!("config creation failed!");
598 DecoderError::Bug
599 })?;
600 let surfaces = display.create_surfaces(RTFormat::YUV420, None, width as u32, height as u32, Some(UsageHint::Decoder.into()), num_surfaces as u32).map_err(|_| DecoderError::AllocError)?;
601 let context = display.create_context(&config, width as i32, height as i32, Some(&surfaces), true).map_err(|_| DecoderError::Bug)?;
602
603 let ref_pics = Vec::new();
604
605 let image_formats = display.query_image_formats().map_err(|_| DecoderError::Bug)?;
606 validate!(!image_formats.is_empty());
607 let mut ifmt = image_formats[0];
608 for fmt in image_formats.iter() {
609 if fmt.bits_per_pixel == 12 {
610 ifmt = *fmt;
611 break;
612 }
613 }
614
615 self.vaapi = Some(VaapiInternals { display, context, ref_pics, surfaces, ifmt });
616
617 let vinfo = NAVideoInfo::new(width, height, false, YUV420_FORMAT);
618 self.info = NACodecInfo::new_ref(info.get_name(), NACodecTypeInfo::Video(vinfo), info.get_extradata()).into_ref();
619 self.out_frm = alloc_video_buffer(vinfo, 4)?;
620
621 Ok(())
622 } else {
623 Err(DecoderError::InvalidData)
624 }
625 }
626 fn decode(&mut self, pkt: &NAPacket) -> DecoderResult<()> {
627 let src = pkt.get_buffer();
628 let vactx = if let Some(ref mut ctx) = self.vaapi { ctx } else { return Err(DecoderError::Bug) };
629
630 let timestamp = pkt.get_dts().unwrap_or_else(|| pkt.get_pts().unwrap_or(0));
631
632 if vactx.surfaces.is_empty() {
633 panic!("ran out of free surfaces");
634 // return Err(DecoderError::AllocError);
635 }
636 let surface = vactx.surfaces.pop().unwrap();
637 let surface_id = surface.id();
638 let mut pic = Picture::new(timestamp, vactx.context.clone(), surface);
639 let mut is_ref = false;
640 let mut is_keyframe = false;
641
642 self.tb_num = pkt.ts.tb_num;
643 self.tb_den = pkt.ts.tb_den;
644
645 let mut mr = MemoryReader::new_read(&src);
646 let mut br = ByteReader::new(&mut mr);
647 let mut frame_type = FrameType::I;
648 let mut nal_buf = Vec::with_capacity(1024);
649 while br.left() > 0 {
650 let size = match self.nal_len {
651 1 => br.read_byte()? as usize,
652 2 => br.read_u16be()? as usize,
653 3 => br.read_u24be()? as usize,
654 4 => br.read_u32be()? as usize,
655 _ => unreachable!(),
656 };
657 validate!(br.left() >= (size as i64));
658 let offset = br.tell() as usize;
659 let raw_nal = &src[offset..][..size];
660 let _size = unescape_nal(raw_nal, &mut nal_buf);
661
662 let src = &nal_buf;
663 validate!((src[0] & 0x80) == 0);
664 let nal_ref_idc = src[0] >> 5;
665 let nal_unit_type = src[0] & 0x1F;
666
667 let mut full_size = src.len() * 8;
668 for &byte in src.iter().rev() {
669 if byte == 0 {
670 full_size -= 8;
671 } else {
672 full_size -= (byte.trailing_zeros() + 1) as usize;
673 break;
674 }
675 }
676 validate!(full_size > 0);
677
678 match nal_unit_type {
679 1 | 5 => {
680 let is_idr = nal_unit_type == 5;
681 is_ref |= nal_ref_idc != 0;
682 is_keyframe |= is_idr;
683 let mut br = BitReader::new(&src[..(full_size + 7)/8], BitReaderMode::BE);
684 br.skip(8)?;
685
686 let slice_hdr = parse_slice_header(&mut br, &self.spses, &self.ppses, is_idr, nal_ref_idc)?;
687 match slice_hdr.slice_type {
688 SliceType::P if frame_type != FrameType::B => frame_type = FrameType::P,
689 SliceType::SP if frame_type != FrameType::B => frame_type = FrameType::P,
690 SliceType::B => frame_type = FrameType::B,
691 _ => {},
692 };
693 let mut cur_sps = 0;
694 let mut cur_pps = 0;
695 let mut pps_found = false;
696 for (i, pps) in self.ppses.iter().enumerate() {
697 if pps.pic_parameter_set_id == slice_hdr.pic_parameter_set_id {
698 cur_pps = i;
699 pps_found = true;
700 break;
701 }
702 }
703 validate!(pps_found);
704 let mut sps_found = false;
705 for (i, sps) in self.spses.iter().enumerate() {
706 if sps.seq_parameter_set_id == self.ppses[cur_pps].seq_parameter_set_id {
707 cur_sps = i;
708 sps_found = true;
709 break;
710 }
711 }
712 validate!(sps_found);
713 let sps = &self.spses[cur_sps];
714 let pps = &self.ppses[cur_pps];
715
716 if slice_hdr.first_mb_in_slice == 0 {
717 let (top_id, bot_id) = self.frame_refs.calc_picture_num(&slice_hdr, is_idr, nal_ref_idc, sps);
718 if is_idr {
719 self.frame_refs.clear_refs();
720 for (pic, _) in vactx.ref_pics.drain(..) {
721 if let Ok(surf) = pic.take_surface() {
722 vactx.surfaces.push(surf);
723 } else {
724 panic!("can't take surface");
725 }
726 }
727 }
728 self.frame_refs.select_refs(sps, &slice_hdr, top_id);
729 let mut pic_refs = Vec::with_capacity(NUM_REF_PICS);
730 for pic in self.frame_refs.ref_pics.iter().rev().take(NUM_REF_PICS) {
731 pic_refs.push(pic.make_pic());
732 }
733 if slice_hdr.adaptive_ref_pic_marking_mode {
734 self.frame_refs.apply_adaptive_marking(&slice_hdr.adaptive_ref_pic_marking, slice_hdr.frame_num, 1 << sps.log2_max_frame_num)?;
735 }
736
737 while pic_refs.len() < NUM_REF_PICS {
738 pic_refs.push(make_dummy_h264_pic());
739 }
740
741 let mut flags = H264PictureFlags::default();
742 let frame_idx = if let Some(id) = get_long_term_id(is_idr, &slice_hdr) {
743 flags |= H264PictureFlag::LongTermReference;
744 id as u32
745 } else {
746 if nal_ref_idc != 0 {
747 flags |= H264PictureFlag::ShortTermReference;
748 }
749 u32::from(slice_hdr.frame_num)
750 };
751 let pic_refs: [PictureH264; NUM_REF_PICS] = pic_refs.try_into().unwrap_or_else(|_| panic!("can't convert"));
752
753 let h264pic = PictureH264::new(surface_id, frame_idx, flags, top_id as i32, bot_id as i32);
754
755 let seq_fields = H264SeqFields::new(
756 u32::from(sps.chroma_format_idc),
757 u32::from(sps.separate_colour_plane),
758 u32::from(sps.gaps_in_frame_num_value_allowed),
759 u32::from(sps.frame_mbs_only),
760 u32::from(sps.mb_adaptive_frame_field),
761 u32::from(sps.direct_8x8_inference),
762 u32::from(sps.level_idc >= 31),
763 u32::from(sps.log2_max_frame_num) - 4,
764 u32::from(sps.pic_order_cnt_type),
765 u32::from(sps.log2_max_pic_order_cnt_lsb).wrapping_sub(4),
766 u32::from(sps.delta_pic_order_always_zero)
767 );
768 let pic_fields = H264PicFields::new(
769 u32::from(pps.entropy_coding_mode),
770 u32::from(pps.weighted_pred),
771 u32::from(pps.weighted_bipred_idc),
772 u32::from(pps.transform_8x8_mode),
773 u32::from(slice_hdr.field_pic),
774 u32::from(pps.constrained_intra_pred),
775 u32::from(pps.pic_order_present),
776 u32::from(pps.deblocking_filter_control_present),
777 u32::from(pps.redundant_pic_cnt_present),
778 u32::from(nal_ref_idc != 0)
779 );
780 let ppd = PictureParameterBufferH264::new(
781 h264pic,
782 pic_refs,
783 sps.pic_width_in_mbs as u16 - 1,
784 sps.pic_height_in_mbs as u16 - 1,
785 sps.bit_depth_luma - 8,
786 sps.bit_depth_chroma - 8,
787 sps.num_ref_frames as u8,
788 &seq_fields,
789 pps.num_slice_groups as u8 - 1, // should be 0
790 pps.slice_group_map_type, // should be 0
791 0, //pps.slice_group_change_rate as u16 - 1,
792 pps.pic_init_qp as i8 - 26,
793 pps.pic_init_qs as i8 - 26,
794 pps.chroma_qp_index_offset,
795 pps.second_chroma_qp_index_offset,
796 &pic_fields,
797 slice_hdr.frame_num
798 );
799 let pic_param = BufferType::PictureParameter(PictureParameter::H264(ppd));
800 let buf = vactx.context.create_buffer(pic_param).map_err(|_| DecoderError::Bug)?;
801 pic.add_buffer(buf);
802
803 let mut scaling_list_8x8 = [[0; 64]; 2];
804 scaling_list_8x8[0].copy_from_slice(&pps.scaling_list_8x8[0]);
805 scaling_list_8x8[1].copy_from_slice(&pps.scaling_list_8x8[3]);
806 let iqmatrix = BufferType::IQMatrix(IQMatrix::H264(IQMatrixBufferH264::new(pps.scaling_list_4x4, scaling_list_8x8)));
807 let buf = vactx.context.create_buffer(iqmatrix).map_err(|_| DecoderError::Bug)?;
808 pic.add_buffer(buf);
809
810 let cpic = PictureInfo {
811 id: slice_hdr.frame_num,
812 full_id: top_id,
813 surface_id,
814 top_id, bot_id,
815 //pic_type: slice_hdr.slice_type.to_frame_type(),
816 is_ref,
817 is_idr,
818 long_term: get_long_term_id(is_idr, &slice_hdr),
819 };
820 if cpic.is_ref {
821 self.frame_refs.add_short_term(cpic.clone(), sps.num_ref_frames);
822 }
823 if let Some(lt_idx) = cpic.long_term {
824 self.frame_refs.add_long_term(lt_idx, cpic);
825 }
826 }
827
828 let mut luma_weight_l0 = [0i16; 32];
829 let mut luma_offset_l0 = [0i16; 32];
830 let mut chroma_weight_l0 = [[0i16; 2]; 32];
831 let mut chroma_offset_l0 = [[0i16; 2]; 32];
832 let mut luma_weight_l1 = [0i16; 32];
833 let mut luma_offset_l1 = [0i16; 32];
834 let mut chroma_weight_l1 = [[0i16; 2]; 32];
835 let mut chroma_offset_l1 = [[0i16; 2]; 32];
836 let mut luma_weighted_l0 = false;
837 let mut chroma_weighted_l0 = false;
838 let mut luma_weighted_l1 = false;
839 let mut chroma_weighted_l1 = false;
840 let mut luma_log2_weight_denom = slice_hdr.luma_log2_weight_denom;
841 let mut chroma_log2_weight_denom = slice_hdr.chroma_log2_weight_denom;
842
843 if (pps.weighted_pred && matches!(slice_hdr.slice_type, SliceType::P | SliceType::B)) || (pps.weighted_bipred_idc == 1 && slice_hdr.slice_type == SliceType::B) {
844 luma_weighted_l0 = true;
845 chroma_weighted_l0 = false;
846 for (i, winfo) in slice_hdr.weights_l0.iter().enumerate().take(slice_hdr.num_ref_idx_l0_active) {
847 if winfo.luma_weighted {
848 luma_weight_l0[i] = winfo.luma_weight.into();
849 luma_offset_l0[i] = winfo.luma_offset.into();
850 } else {
851 luma_weight_l0[i] = 1 << slice_hdr.luma_log2_weight_denom;
852 }
853 if winfo.chroma_weighted {
854 chroma_weight_l0[i][0] = winfo.chroma_weight[0].into();
855 chroma_weight_l0[i][1] = winfo.chroma_weight[1].into();
856 chroma_offset_l0[i][0] = winfo.chroma_offset[0].into();
857 chroma_offset_l0[i][1] = winfo.chroma_offset[1].into();
858 } else {
859 chroma_weight_l0[i][0] = 1 << slice_hdr.chroma_log2_weight_denom;
860 chroma_weight_l0[i][1] = 1 << slice_hdr.chroma_log2_weight_denom;
861 chroma_offset_l0[i][0] = 0;
862 chroma_offset_l0[i][1] = 0;
863 }
864 chroma_weighted_l0 |= winfo.chroma_weighted;
865 }
866 }
867 if pps.weighted_bipred_idc == 1 && slice_hdr.slice_type == SliceType::B {
868 luma_weighted_l1 = true;
869 chroma_weighted_l1 = sps.chroma_format_idc != 0;
870 for (i, winfo) in slice_hdr.weights_l1.iter().enumerate().take(slice_hdr.num_ref_idx_l1_active) {
871 if winfo.luma_weighted {
872 luma_weight_l1[i] = winfo.luma_weight.into();
873 luma_offset_l1[i] = winfo.luma_offset.into();
874 } else {
875 luma_weight_l1[i] = 1 << slice_hdr.luma_log2_weight_denom;
876 }
877 if chroma_weighted_l1 && winfo.chroma_weighted {
878 chroma_weight_l1[i][0] = winfo.chroma_weight[0].into();
879 chroma_weight_l1[i][1] = winfo.chroma_weight[1].into();
880 chroma_offset_l1[i][0] = winfo.chroma_offset[0].into();
881 chroma_offset_l1[i][1] = winfo.chroma_offset[1].into();
882 } else {
883 chroma_weight_l1[i][0] = 1 << slice_hdr.chroma_log2_weight_denom;
884 chroma_weight_l1[i][1] = 1 << slice_hdr.chroma_log2_weight_denom;
885 chroma_offset_l1[i][0] = 0;
886 chroma_offset_l1[i][1] = 0;
887 }
888 }
889 }
890 if pps.weighted_bipred_idc == 2 && slice_hdr.slice_type == SliceType::B {
891 let num_l0 = slice_hdr.num_ref_idx_l0_active;
892 let num_l1 = slice_hdr.num_ref_idx_l1_active;
893 if num_l0 != 1 || num_l1 != 1 { //xxx: also exclude symmetric case
894 luma_weighted_l0 = false;
895 luma_weighted_l1 = false;
896 chroma_weighted_l0 = false;
897 chroma_weighted_l1 = false;
898 luma_log2_weight_denom = 5;
899 chroma_log2_weight_denom = 5;
900
901 for w in luma_weight_l0.iter_mut() {
902 *w = 32;
903 }
904 for w in luma_weight_l1.iter_mut() {
905 *w = 32;
906 }
907 for w in chroma_weight_l0.iter_mut() {
908 *w = [32; 2];
909 }
910 for w in chroma_weight_l1.iter_mut() {
911 *w = [32; 2];
912 }
913 }
914 }
915
916 let ref_pic_list_0 = map_ref_list(&self.frame_refs.cur_refs.ref_list0);
917 let ref_pic_list_1 = map_ref_list(&self.frame_refs.cur_refs.ref_list1);
918
919 let slice_param = SliceParameterBufferH264::new(
920 raw_nal.len() as u32,
921 0, // no offset
922 VASliceDataFlag::All,
923 br.tell() as u16,
924 slice_hdr.first_mb_in_slice as u16,
925 match slice_hdr.slice_type {
926 SliceType::I => 2,
927 SliceType::P => 0,
928 SliceType::B => 1,
929 SliceType::SI => 4,
930 SliceType::SP => 3,
931 },
932 slice_hdr.direct_spatial_mv_pred as u8,
933 (slice_hdr.num_ref_idx_l0_active as u8).saturating_sub(1),
934 (slice_hdr.num_ref_idx_l1_active as u8).saturating_sub(1),
935 slice_hdr.cabac_init_idc,
936 slice_hdr.slice_qp_delta as i8,
937 slice_hdr.disable_deblocking_filter_idc,
938 slice_hdr.slice_alpha_c0_offset / 2,
939 slice_hdr.slice_beta_offset / 2,
940 ref_pic_list_0,
941 ref_pic_list_1,
942 luma_log2_weight_denom,
943 chroma_log2_weight_denom,
944 luma_weighted_l0 as u8, luma_weight_l0, luma_offset_l0,
945 chroma_weighted_l0 as u8, chroma_weight_l0, chroma_offset_l0,
946 luma_weighted_l1 as u8, luma_weight_l1, luma_offset_l1,
947 chroma_weighted_l1 as u8, chroma_weight_l1, chroma_offset_l1,
948 );
949 let slc_param = BufferType::SliceParameter(SliceParameter::H264(slice_param));
950 let buf = vactx.context.create_buffer(slc_param).map_err(|_| DecoderError::Bug)?;
951 pic.add_buffer(buf);
952
953 let slc_data = BufferType::SliceData(raw_nal.to_vec());
954 let buf = vactx.context.create_buffer(slc_data).map_err(|_| DecoderError::Bug)?;
955 pic.add_buffer(buf);
956 },
957 2 => { // slice data partition A
958 //slice header
959 //slice id = read_ue()
960 //cat 2 slice data (all but MB layer residual)
961 return Err(DecoderError::NotImplemented);
962 },
963 3 => { // slice data partition B
964 //slice id = read_ue()
965 //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() }
966 //cat 3 slice data (MB layer residual)
967 return Err(DecoderError::NotImplemented);
968 },
969 4 => { // slice data partition C
970 //slice id = read_ue()
971 //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() }
972 //cat 4 slice data (MB layer residual)
973 return Err(DecoderError::NotImplemented);
974 },
975 6 => {}, //SEI
976 7 => {
977 let sps = parse_sps(&src[1..])?;
978 self.spses.push(sps);
979 },
980 8 => {
981 validate!(full_size >= 8 + 16);
982 let pps = parse_pps(&src[1..], &self.spses, full_size - 8)?;
983 let mut found = false;
984 for stored_pps in self.ppses.iter_mut() {
985 if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id {
986 *stored_pps = pps.clone();
987 found = true;
988 break;
989 }
990 }
991 if !found {
992 self.ppses.push(pps);
993 }
994 },
995 9 => { // access unit delimiter
996 },
997 10 => {}, //end of sequence
998 11 => {}, //end of stream
999 12 => {}, //filler
1000 _ => {},
1001 };
1002
1003 br.read_skip(size)?;
1004 }
1005
1006 let bpic = pic.begin().expect("begin");
1007 let rpic = bpic.render().expect("render");
1008 let epic = rpic.end().expect("end");
1009
1010 self.reorderer.add_frame(WaitingFrame {
1011 pic: epic,
1012 is_idr: is_keyframe,
1013 is_ref,
1014 ftype: frame_type,
1015 ts: timestamp,
1016 });
1017
1018 let mut idx = 0;
1019 while idx < vactx.ref_pics.len() {
1020 let cur_surf_id = vactx.ref_pics[idx].1;
1021 if self.frame_refs.ref_pics.iter().any(|fref| fref.surface_id == cur_surf_id) {
1022 idx += 1;
1023 } else {
1024 let (pic, _) = vactx.ref_pics.remove(idx);
1025 if let Ok(surf) = pic.take_surface() {
1026 vactx.surfaces.push(surf);
1027 } else {
1028 panic!("can't take surface");
1029 }
1030 }
1031 }
1032
1033 Ok(())
1034 }
1035 fn get_frame(&mut self) -> Option<NAFrameRef> {
1036 if let Some(ref mut vactx) = self.vaapi {
1037 if let Some(frm) = self.reorderer.get_frame() {
1038 let ts = frm.ts;
1039 let is_idr = frm.is_idr;
1040 let is_ref = frm.is_ref;
1041 let ftype = frm.ftype;
1042 if let Ok(pic) = frm.pic.sync() {
1043 let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm);
1044
1045 if !is_ref {
1046 if let Ok(surf) = pic.take_surface() {
1047 vactx.surfaces.push(surf);
1048 } else {
1049 panic!("can't take surface");
1050 }
1051 } else {
1052 let id = pic.surface_id();
1053 vactx.ref_pics.push((pic, id));
1054 }
1055
1056 let ts = NATimeInfo::new(None, Some(ts), None, self.tb_num, self.tb_den);
1057 Some(NAFrame::new(ts, ftype, is_idr, self.info.clone(), self.out_frm.clone()).into_ref())
1058 } else {
1059 panic!("can't sync");
1060 }
1061 } else {
1062 None
1063 }
1064 } else {
1065 None
1066 }
1067 }
1068 fn get_last_frames(&mut self) -> Option<NAFrameRef> {
1069 if let Some(ref mut vactx) = self.vaapi {
1070 if let Some(frm) = self.reorderer.frames.pop_front() {
1071 let ts = frm.ts;
1072 let is_idr = frm.is_idr;
1073 let is_ref = frm.is_ref;
1074 let ftype = frm.ftype;
1075 if let Ok(pic) = frm.pic.sync() {
1076 let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm);
1077
1078 if !is_ref {
1079 if let Ok(surf) = pic.take_surface() {
1080 vactx.surfaces.push(surf);
1081 } else {
1082 panic!("can't take surface");
1083 }
1084 } else {
1085 let id = pic.surface_id();
1086 vactx.ref_pics.push((pic, id));
1087 }
1088
1089 let ts = NATimeInfo::new(None, Some(ts), None, self.tb_num, self.tb_den);
1090 Some(NAFrame::new(ts, ftype, is_idr, self.info.clone(), self.out_frm.clone()).into_ref())
1091 } else {
1092 panic!("can't sync");
1093 }
1094 } else {
1095 None
1096 }
1097 } else {
1098 None
1099 }
1100 }
1101 fn flush(&mut self) {
1102 self.frame_refs.clear_refs();
1103 if let Some(ref mut vactx) = self.vaapi {
1104 for frm in self.reorderer.frames.drain(..) {
1105 if let Ok(pic) = frm.pic.sync() {
1106 if let Ok(surf) = pic.take_surface() {
1107 vactx.surfaces.push(surf);
1108 } else {
1109 panic!("can't take surface");
1110 }
1111 } else {
1112 panic!("can't sync");
1113 }
1114 }
1115 self.reorderer.flush();
1116 for (pic, _) in vactx.ref_pics.drain(..) {
1117 if let Ok(surf) = pic.take_surface() {
1118 vactx.surfaces.push(surf);
1119 } else {
1120 panic!("can't take surface");
1121 }
1122 }
1123 }
1124 }
1125 }
1126
1127 impl NAOptionHandler for VaapiH264Decoder {
1128 fn get_supported_options(&self) -> &[NAOptionDefinition] { &[] }
1129 fn set_options(&mut self, _options: &[NAOption]) {}
1130 fn query_option_value(&self, _name: &str) -> Option<NAValue> { None }
1131 }
1132
1133 use std::thread::*;
1134 use std::sync::mpsc::*;
1135
1136 enum DecMessage {
1137 Init(NACodecInfoRef),
1138 Decode(NAPacket),
1139 Flush,
1140 GetFrame,
1141 GetLastFrames,
1142 End
1143 }
1144
1145 enum DecResponse {
1146 Ok,
1147 Nothing,
1148 Err(DecoderError),
1149 Frame(NAFrameRef),
1150 }
1151
1152 pub trait HWDecoder {
1153 fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()>;
1154 fn queue_pkt(&mut self, pkt: &NAPacket) -> DecoderResult<()>;
1155 fn get_frame(&mut self) -> Option<NAFrameRef>;
1156 fn get_last_frames(&mut self) -> Option<NAFrameRef>;
1157 fn flush(&mut self);
1158 }
1159
1160 pub struct HWWrapper {
1161 handle: Option<JoinHandle<DecoderResult<()>>>,
1162 send: SyncSender<DecMessage>,
1163 recv: Receiver<DecResponse>,
1164 }
1165
1166 #[allow(clippy::new_without_default)]
1167 impl HWWrapper {
1168 pub fn new() -> Self {
1169 let (in_send, in_recv) = sync_channel(1);
1170 let (out_send, out_recv) = sync_channel(1);
1171 let handle = std::thread::spawn(move || {
1172 let receiver = in_recv;
1173 let sender = out_send;
1174 let mut dec = VaapiH264Decoder::new();
1175 while let Ok(msg) = receiver.recv() {
1176 match msg {
1177 DecMessage::Init(info) => {
1178 let msg = if let Err(err) = dec.init(info) {
1179 DecResponse::Err(err)
1180 } else {
1181 DecResponse::Ok
1182 };
1183 sender.send(msg).map_err(|_| DecoderError::Bug)?;
1184 },
1185 DecMessage::Decode(pkt) => {
1186 let msg = match dec.decode(&pkt) {
1187 Ok(()) => DecResponse::Ok,
1188 Err(err) => DecResponse::Err(err),
1189 };
1190 sender.send(msg).map_err(|_| DecoderError::Bug)?;
1191 },
1192 DecMessage::GetFrame => {
1193 let msg = match dec.get_frame() {
1194 Some(frm) => DecResponse::Frame(frm),
1195 None => DecResponse::Nothing,
1196 };
1197 sender.send(msg).map_err(|_| DecoderError::Bug)?;
1198 },
1199 DecMessage::GetLastFrames => {
1200 let msg = match dec.get_last_frames() {
1201 Some(frm) => DecResponse::Frame(frm),
1202 None => DecResponse::Nothing,
1203 };
1204 sender.send(msg).map_err(|_| DecoderError::Bug)?;
1205 },
1206 DecMessage::Flush => dec.flush(),
1207 DecMessage::End => return Ok(()),
1208 };
1209 }
1210 Err(DecoderError::Bug)
1211 });
1212
1213 Self {
1214 handle: Some(handle),
1215 send: in_send,
1216 recv: out_recv,
1217 }
1218 }
1219 }
1220
1221 impl HWDecoder for HWWrapper {
1222 fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()> {
1223 if self.send.send(DecMessage::Init(info)).is_ok() {
1224 match self.recv.recv() {
1225 Ok(DecResponse::Ok) => Ok(()),
1226 Ok(DecResponse::Err(err)) => Err(err),
1227 Err(_) => Err(DecoderError::Bug),
1228 _ => unreachable!(),
1229 }
1230 } else {
1231 Err(DecoderError::Bug)
1232 }
1233 }
1234 fn queue_pkt(&mut self, pkt: &NAPacket) -> DecoderResult<()> {
1235 let pkt2 = NAPacket::new_from_refbuf(pkt.get_stream(), pkt.ts, pkt.keyframe, pkt.get_buffer());
1236 if self.send.send(DecMessage::Decode(pkt2)).is_ok() {
1237 match self.recv.recv() {
1238 Ok(DecResponse::Ok) => Ok(()),
1239 Ok(DecResponse::Err(err)) => Err(err),
1240 Err(_) => Err(DecoderError::Bug),
1241 _ => unreachable!(),
1242 }
1243 } else {
1244 Err(DecoderError::Bug)
1245 }
1246 }
1247 fn get_frame(&mut self) -> Option<NAFrameRef> {
1248 if self.send.send(DecMessage::GetFrame).is_ok() {
1249 match self.recv.recv() {
1250 Ok(DecResponse::Frame(frm)) => Some(frm),
1251 Ok(DecResponse::Nothing) => None,
1252 Err(_) => None,
1253 _ => unreachable!(),
1254 }
1255 } else {
1256 None
1257 }
1258 }
1259 fn get_last_frames(&mut self) -> Option<NAFrameRef> {
1260 if self.send.send(DecMessage::GetLastFrames).is_ok() {
1261 match self.recv.recv() {
1262 Ok(DecResponse::Frame(frm)) => Some(frm),
1263 Ok(DecResponse::Nothing) => None,
1264 Err(_) => None,
1265 _ => unreachable!(),
1266 }
1267 } else {
1268 None
1269 }
1270 }
1271 fn flush(&mut self) {
1272 let _ = self.send.send(DecMessage::Flush);
1273 }
1274 }
1275
1276 impl Drop for HWWrapper {
1277 fn drop(&mut self) {
1278 if self.send.send(DecMessage::End).is_ok() {
1279 let mut handle = None;
1280 std::mem::swap(&mut handle, &mut self.handle);
1281 if let Some(hdl) = handle {
1282 let _ = hdl.join();
1283 }
1284 }
1285 }
1286 }
1287
1288 impl NAOptionHandler for HWWrapper {
1289 fn get_supported_options(&self) -> &[NAOptionDefinition] { &[] }
1290 fn set_options(&mut self, _options: &[NAOption]) {}
1291 fn query_option_value(&self, _name: &str) -> Option<NAValue> { None }
1292 }
1293
1294 pub fn new_h264_hwdec() -> Box<dyn HWDecoder + Send> {
1295 Box::new(HWWrapper::new())
1296 }
1297
1298 #[cfg(test)]
1299 mod test {
1300 use nihav_core::codecs::*;
1301 use nihav_core::io::byteio::*;
1302 use nihav_core::demuxers::{RegisteredDemuxers, create_demuxer};
1303 use nihav_commonfmt::generic_register_all_demuxers;
1304 use super::VaapiH264Decoder;
1305 use std::io::prelude::*;
1306
1307 fn decode_h264(name: &str, dname: &str, dmx_reg: &RegisteredDemuxers, opfx: &str) -> DecoderResult<()> {
1308 let dmx_f = dmx_reg.find_demuxer(dname).expect("demuxer exists");
1309 let file = std::fs::File::open(name).expect("file exists");
1310 let mut fr = FileReader::new_read(file);
1311 let mut br = ByteReader::new(&mut fr);
1312 let mut dmx = create_demuxer(dmx_f, &mut br).expect("create demuxer");
1313
1314 let mut vstream_id = 0;
1315 let mut dec = VaapiH264Decoder::new();
1316 for stream in dmx.get_streams() {
1317 if stream.get_media_type() == StreamType::Video {
1318 dec.init(stream.get_info()).expect("inited");
1319 vstream_id = stream.get_id();
1320 break;
1321 }
1322 }
1323
1324 let mut frameno = 0;
1325 while let Ok(pkt) = dmx.get_frame() {
1326 if pkt.get_stream().get_id() != vstream_id {
1327 continue;
1328 }
1329 dec.decode(&pkt).expect("decoded");
1330 let frm = dec.get_last_frames().expect("get frame");
1331 let timestamp = frm.get_dts().unwrap_or_else(|| frm.get_pts().unwrap_or(0));
1332
1333 let pic = frm.get_buffer().get_vbuf().expect("got picture");
1334
1335 let nname = format!("assets/test_out/{}{:06}_{}.pgm", opfx, timestamp, frameno);
1336 frameno += 1;
1337 let mut file = std::fs::File::create(&nname).expect("create file");
1338 let (w, h) = pic.get_dimensions(0);
1339 file.write_all(format!("P5\n{} {}\n255\n", w, h * 3 / 2).as_bytes()).expect("header written");
1340 let data = pic.get_data();
1341 for yline in data.chunks(pic.get_stride(0)).take(h) {
1342 file.write_all(&yline[..w]).expect("Y line written");
1343 }
1344 for (uline, vline) in data[pic.get_offset(1)..].chunks(pic.get_stride(1))
1345 .zip(data[pic.get_offset(2)..].chunks(pic.get_stride(2))).take(h / 2) {
1346 file.write_all(&uline[..w / 2]).expect("U line written");
1347 file.write_all(&vline[..w / 2]).expect("V line written");
1348 }
1349 }
1350 Ok(())
1351 }
1352
1353
1354 // samples if not specified otherwise come from H.264 conformance suite
1355
1356 #[test]
1357 fn test_h264_simple() {
1358 let mut dmx_reg = RegisteredDemuxers::new();
1359 generic_register_all_demuxers(&mut dmx_reg);
1360
1361 decode_h264("assets/ITU/DimpledSpanishCuckoo-mobile.mp4", "mov", &dmx_reg, "hw").unwrap();
1362 }
1363 }