]>
Commit | Line | Data |
---|---|---|
e5ccd68d KS |
1 | use std::collections::VecDeque; |
2 | use std::convert::TryInto; | |
3 | use std::rc::Rc; | |
4 | ||
5 | use nihav_core::codecs::*; | |
6 | use nihav_core::io::byteio::*; | |
7 | use nihav_core::io::bitreader::*; | |
8 | use nihav_core::io::intcode::*; | |
9 | ||
10 | use libva::*; | |
11 | ||
12 | #[cfg(debug_assertions)] | |
13 | macro_rules! validate { | |
14 | ($a:expr) => { if !$a { println!("check failed at {}:{}", file!(), line!()); return Err(DecoderError::InvalidData); } }; | |
15 | } | |
16 | #[cfg(not(debug_assertions))] | |
17 | macro_rules! validate { | |
18 | ($a:expr) => { if !$a { return Err(DecoderError::InvalidData); } }; | |
19 | } | |
20 | ||
21 | mod pic_ref; | |
22 | pub use pic_ref::*; | |
23 | #[allow(clippy::manual_range_contains)] | |
24 | #[allow(clippy::needless_range_loop)] | |
25 | mod sets; | |
26 | use sets::*; | |
27 | #[allow(clippy::manual_range_contains)] | |
28 | mod slice; | |
29 | use slice::*; | |
30 | ||
31 | trait ReadUE { | |
32 | fn read_ue(&mut self) -> DecoderResult<u32>; | |
33 | fn read_te(&mut self, range: u32) -> DecoderResult<u32>; | |
34 | fn read_ue_lim(&mut self, max_val: u32) -> DecoderResult<u32> { | |
35 | let val = self.read_ue()?; | |
36 | validate!(val <= max_val); | |
37 | Ok(val) | |
38 | } | |
39 | fn read_se(&mut self) -> DecoderResult<i32> { | |
40 | let val = self.read_ue()?; | |
41 | if (val & 1) != 0 { | |
42 | Ok (((val >> 1) as i32) + 1) | |
43 | } else { | |
44 | Ok (-((val >> 1) as i32)) | |
45 | } | |
46 | } | |
47 | } | |
48 | ||
49 | impl<'a> ReadUE for BitReader<'a> { | |
50 | fn read_ue(&mut self) -> DecoderResult<u32> { | |
51 | Ok(self.read_code(UintCodeType::GammaP)? - 1) | |
52 | } | |
53 | fn read_te(&mut self, range: u32) -> DecoderResult<u32> { | |
54 | if range == 1 { | |
55 | if self.read_bool()? { | |
56 | Ok(0) | |
57 | } else { | |
58 | Ok(1) | |
59 | } | |
60 | } else { | |
61 | let val = self.read_ue()?; | |
62 | validate!(val <= range); | |
63 | Ok(val) | |
64 | } | |
65 | } | |
66 | } | |
67 | ||
68 | fn get_long_term_id(is_idr: bool, slice_hdr: &SliceHeader) -> Option<usize> { | |
69 | if is_idr && !slice_hdr.long_term_reference { | |
70 | None | |
71 | } else { | |
72 | let marking = &slice_hdr.adaptive_ref_pic_marking; | |
73 | for (&op, &arg) in marking.memory_management_control_op.iter().zip(marking.operation_arg.iter()).take(marking.num_ops) { | |
74 | if op == 6 { | |
75 | return Some(arg as usize); | |
76 | } | |
77 | } | |
78 | None | |
79 | } | |
80 | } | |
81 | ||
82 | fn unescape_nal(src: &[u8], dst: &mut Vec<u8>) -> usize { | |
83 | let mut off = 0; | |
84 | let mut zrun = 0; | |
85 | dst.clear(); | |
86 | dst.reserve(src.len()); | |
87 | while off < src.len() { | |
88 | dst.push(src[off]); | |
89 | if src[off] != 0 { | |
90 | zrun = 0; | |
91 | } else { | |
92 | zrun += 1; | |
93 | if zrun == 2 && off + 1 < src.len() && src[off + 1] == 0x03 { | |
94 | zrun = 0; | |
95 | off += 1; | |
96 | } | |
97 | if zrun >= 3 && off + 1 < src.len() && src[off + 1] == 0x01 { | |
98 | off -= 3; | |
99 | dst.truncate(off); | |
100 | break; | |
101 | } | |
102 | } | |
103 | off += 1; | |
104 | } | |
105 | off | |
106 | } | |
107 | ||
108 | fn make_dummy_h264_pic() -> PictureH264 { | |
109 | PictureH264::new(VA_INVALID_ID, 0, H264PictureFlag::Invalid.into(), 0, 0) | |
110 | } | |
111 | ||
112 | trait MakePicH264 { | |
113 | fn make_pic(&self) -> PictureH264; | |
114 | } | |
115 | ||
116 | impl MakePicH264 for PictureInfo { | |
117 | fn make_pic(&self) -> PictureH264 { | |
118 | let mut flags = H264PictureFlags::default(); | |
119 | let frame_idx = if let Some(id) = self.long_term { | |
120 | flags |= H264PictureFlag::LongTermReference; | |
121 | id as u32 | |
122 | } else { | |
123 | if self.is_ref { | |
124 | flags |= H264PictureFlag::ShortTermReference; | |
125 | } | |
126 | u32::from(self.id) | |
127 | }; | |
128 | PictureH264::new(self.surface_id, frame_idx, flags, self.top_id as i32, self.bot_id as i32) | |
129 | } | |
130 | } | |
131 | ||
132 | fn map_ref_list(refs: &[Option<PictureInfo>]) -> [PictureH264; 32] { | |
133 | let mut ref_list = Vec::with_capacity(32); | |
134 | ||
135 | for rpic in refs.iter() { | |
136 | ref_list.push(rpic.as_ref().map_or_else(make_dummy_h264_pic, |pic| pic.make_pic())); | |
137 | } | |
138 | ||
139 | while ref_list.len() < 32 { | |
140 | ref_list.push(make_dummy_h264_pic()); | |
141 | } | |
142 | if let Ok(ret) = ref_list.try_into() { | |
143 | ret | |
144 | } else { | |
145 | panic!("can't convert"); | |
146 | } | |
147 | } | |
148 | ||
149 | fn profile_name(profile: VAProfile::Type) -> &'static str { | |
150 | match profile { | |
151 | VAProfile::VAProfileMPEG2Simple => "MPEG2 Simple", | |
152 | VAProfile::VAProfileMPEG2Main => "MPEG2 Main", | |
153 | VAProfile::VAProfileMPEG4Simple => "MPEG4 Simple", | |
154 | VAProfile::VAProfileMPEG4AdvancedSimple => "MPEG4 Advanced Simple", | |
155 | VAProfile::VAProfileMPEG4Main => "MPEG4 Main", | |
156 | VAProfile::VAProfileH264Baseline => "H264 Baseline", | |
157 | VAProfile::VAProfileH264Main => "H264 Main", | |
158 | VAProfile::VAProfileH264High => "H264 High", | |
159 | VAProfile::VAProfileVC1Simple => "VC1 Simple", | |
160 | VAProfile::VAProfileVC1Main => "VC1 Main", | |
161 | VAProfile::VAProfileVC1Advanced => "VC1 Advanced", | |
162 | VAProfile::VAProfileH263Baseline => "H263 Baseline", | |
163 | VAProfile::VAProfileJPEGBaseline => "JPEG Baseline", | |
164 | VAProfile::VAProfileH264ConstrainedBaseline => "H264 Constrained Baseline", | |
165 | VAProfile::VAProfileVP8Version0_3 => "VP8", | |
166 | VAProfile::VAProfileH264MultiviewHigh => "H.264 Multiview High", | |
167 | VAProfile::VAProfileH264StereoHigh => "H264 Stereo High", | |
168 | VAProfile::VAProfileHEVCMain => "H.EVC Main", | |
169 | VAProfile::VAProfileHEVCMain10 => "H.EVC Main10", | |
170 | VAProfile::VAProfileVP9Profile0 => "VP9 Profile 0", | |
171 | VAProfile::VAProfileVP9Profile1 => "VP9 Profile 1", | |
172 | VAProfile::VAProfileVP9Profile2 => "VP9 Profile 2", | |
173 | VAProfile::VAProfileVP9Profile3 => "VP9 Profile 3", | |
174 | VAProfile::VAProfileHEVCMain12 => "HEVC Main12", | |
175 | VAProfile::VAProfileHEVCMain422_10 => "HEVC Main10 4:2:2", | |
176 | VAProfile::VAProfileHEVCMain422_12 => "HEVC Main12 4:2:2", | |
177 | VAProfile::VAProfileHEVCMain444 => "HEVC Main 4:4:4", | |
178 | VAProfile::VAProfileHEVCMain444_10 => "HEVC Main10 4:4:4", | |
179 | VAProfile::VAProfileHEVCMain444_12 => "HEVC Main12 4:4:4", | |
180 | VAProfile::VAProfileHEVCSccMain => "HEVC SCC Main", | |
181 | VAProfile::VAProfileHEVCSccMain10 => "HEVC SCC Main10", | |
182 | VAProfile::VAProfileHEVCSccMain444 => "HEVC SCC Main 4:4:4", | |
183 | VAProfile::VAProfileAV1Profile0 => "AV1 Profile 0", | |
184 | VAProfile::VAProfileAV1Profile1 => "AV1 Profile 1", | |
185 | VAProfile::VAProfileHEVCSccMain444_10 => "HEVC SCC Main10 4:4:4", | |
186 | _ => "unknown", | |
187 | } | |
188 | } | |
189 | ||
190 | const NUM_REF_PICS: usize = 16; | |
191 | ||
192 | struct WaitingFrame { | |
193 | ts: u64, | |
194 | pic: Picture<PictureEnd>, | |
195 | is_idr: bool, | |
196 | is_ref: bool, | |
197 | ftype: FrameType, | |
198 | } | |
199 | ||
200 | struct Reorderer { | |
201 | last_ref_dts: Option<u64>, | |
202 | ready_idx: usize, | |
203 | frames: VecDeque<WaitingFrame>, | |
204 | } | |
205 | ||
206 | impl Default for Reorderer { | |
207 | fn default() -> Self { | |
208 | Self { | |
209 | last_ref_dts: None, | |
210 | ready_idx: 0, | |
211 | frames: VecDeque::with_capacity(16), | |
212 | } | |
213 | } | |
214 | } | |
215 | ||
216 | impl Reorderer { | |
217 | fn add_frame(&mut self, new_frame: WaitingFrame) { | |
218 | if !new_frame.is_ref { | |
219 | if self.frames.is_empty() { | |
220 | self.frames.push_back(new_frame); | |
221 | } else { | |
222 | let new_dts = new_frame.ts; | |
223 | let mut idx = 0; | |
224 | for (i, frm) in self.frames.iter().enumerate() { | |
225 | idx = i; | |
226 | if frm.ts > new_dts { | |
227 | break; | |
228 | } | |
229 | } | |
230 | self.frames.insert(idx, new_frame); | |
231 | } | |
232 | } else { | |
233 | for (i, frm) in self.frames.iter().enumerate() { | |
234 | if Some(frm.ts) == self.last_ref_dts { | |
235 | self.ready_idx = i + 1; | |
236 | } | |
237 | } | |
238 | self.last_ref_dts = Some(new_frame.ts); | |
239 | self.frames.push_back(new_frame); | |
240 | } | |
241 | } | |
242 | fn get_frame(&mut self) -> Option<WaitingFrame> { | |
243 | if self.ready_idx > 0 { | |
244 | match self.frames[0].pic.query_status() { | |
245 | _ if self.ready_idx > 16 => {}, | |
246 | Ok(VASurfaceStatus::Ready) => {}, | |
247 | Ok(VASurfaceStatus::Rendering) => return None, | |
248 | _ => { | |
249 | unimplemented!(); | |
250 | }, | |
251 | }; | |
252 | self.ready_idx -= 1; | |
253 | self.frames.pop_front() | |
254 | } else { | |
255 | None | |
256 | } | |
257 | } | |
258 | fn flush(&mut self) { | |
259 | self.last_ref_dts = None; | |
260 | self.ready_idx = 0; | |
261 | } | |
262 | } | |
263 | ||
264 | #[allow(dead_code)] | |
265 | struct VaapiInternals { | |
266 | display: Rc<Display>, | |
267 | context: Rc<Context>, | |
268 | ref_pics: Vec<(Picture<PictureSync>, VASurfaceID)>, | |
269 | surfaces: Vec<Surface>, | |
270 | ifmt: VAImageFormat, | |
271 | } | |
272 | ||
273 | pub struct VaapiH264Decoder { | |
274 | info: NACodecInfoRef, | |
275 | vaapi: Option<VaapiInternals>, | |
276 | spses: Vec<SeqParameterSet>, | |
277 | ppses: Vec<PicParameterSet>, | |
278 | frame_refs: FrameRefs, | |
279 | nal_len: u8, | |
280 | out_frm: NABufferType, | |
281 | reorderer: Reorderer, | |
282 | tb_num: u32, | |
283 | tb_den: u32, | |
284 | } | |
285 | ||
25685ca4 KS |
286 | #[cfg(not(target_arch="x86_64"))] |
287 | fn copy_luma(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { | |
288 | for (dline, sline) in dst.chunks_mut(dstride) | |
289 | .zip(src.chunks(sstride)) | |
290 | .take(h) { | |
291 | dline[..w].copy_from_slice(&sline[..w]); | |
292 | } | |
293 | } | |
a439fb0b KS |
294 | #[cfg(not(target_arch="x86_64"))] |
295 | fn deint_chroma(frm: NASimpleVideoFrame<u8>, src: &[u8], sstride: usize) { | |
296 | let mut uoff = frm.offset[1]; | |
297 | let mut voff = frm.offset[2]; | |
298 | for cline in src.chunks(sstride).take(frm.height[1]) { | |
299 | for (x, pair) in cline.chunks_exact(2).take(frm.width[1]).enumerate() { | |
300 | frm.data[uoff + x] = pair[0]; | |
301 | frm.data[voff + x] = pair[1]; | |
302 | } | |
303 | uoff += frm.stride[1]; | |
304 | voff += frm.stride[2]; | |
305 | } | |
306 | } | |
307 | ||
308 | #[cfg(target_arch="x86_64")] | |
309 | use std::arch::asm; | |
310 | #[cfg(target_arch="x86_64")] | |
25685ca4 KS |
311 | fn copy_luma(dst: &mut [u8], dstride: usize, src: &[u8], sstride: usize, w: usize, h: usize) { |
312 | if dst.as_ptr().align_offset(32) == 0 && src.as_ptr().align_offset(32) == 0 && | |
313 | (w % 64) == 0 && ((dstride | sstride) % 32) == 0 { | |
314 | unsafe { | |
315 | asm!( | |
316 | "2:", | |
317 | " mov {x}, {w}", | |
318 | " 3:", | |
319 | " vmovdqa ymm0, [{src}]", | |
320 | " vmovdqa ymm1, [{src}+32]", | |
321 | " vmovdqa [{dst}], ymm0", | |
322 | " vmovdqa [{dst}+32], ymm1", | |
323 | " add {src}, 64", | |
324 | " add {dst}, 64", | |
325 | " sub {x}, 64", | |
326 | " jnz 3b", | |
327 | " add {src}, {sstep}", | |
328 | " add {dst}, {dstep}", | |
329 | " dec {h}", | |
330 | " jnz 2b", | |
331 | dst = inout(reg) dst.as_mut_ptr() => _, | |
332 | src = inout(reg) src.as_ptr() => _, | |
333 | sstep = in(reg) sstride - w, | |
334 | dstep = in(reg) dstride - w, | |
335 | w = in(reg) w, | |
336 | h = in(reg) h, | |
337 | x = out(reg) _, | |
338 | out("ymm0") _, | |
339 | out("ymm1") _, | |
340 | ); | |
341 | } | |
342 | } else { | |
343 | for (dline, sline) in dst.chunks_mut(dstride) | |
344 | .zip(src.chunks(sstride)) | |
345 | .take(h) { | |
346 | dline[..w].copy_from_slice(&sline[..w]); | |
347 | } | |
348 | } | |
349 | } | |
350 | #[cfg(target_arch="x86_64")] | |
a439fb0b KS |
351 | fn deint_chroma(frm: NASimpleVideoFrame<u8>, src: &[u8], sstride: usize) { |
352 | unsafe { | |
353 | let width = frm.width[1]; | |
354 | let height = frm.height[1]; | |
355 | let dst = frm.data.as_mut_ptr(); | |
356 | let udst = dst.add(frm.offset[1]); | |
357 | let vdst = dst.add(frm.offset[2]); | |
358 | let dstep = frm.stride[1] - width; | |
359 | let sstep = sstride - width * 2; | |
360 | asm!( | |
361 | "2:", | |
362 | " mov {tmp}, {width}", | |
363 | " test {width}, 8", | |
364 | " jz 3f", | |
365 | " movaps xmm0, [{src}]", | |
366 | " movaps xmm1, xmm0", | |
367 | " psllw xmm0, 8", | |
368 | " psrlw xmm1, 8", | |
369 | " psrlw xmm0, 8", | |
370 | " packuswb xmm1, xmm1", | |
371 | " packuswb xmm0, xmm0", | |
372 | " movq [{vdst}], xmm1", | |
373 | " movq [{udst}], xmm0", | |
374 | " add {src}, 16", | |
375 | " add {vdst}, 8", | |
376 | " add {udst}, 8", | |
377 | " sub {tmp}, 8", | |
378 | " 3:", | |
379 | " movaps xmm0, [{src}]", | |
380 | " movaps xmm1, [{src} + 16]", | |
381 | " movaps xmm2, xmm0", | |
382 | " movaps xmm3, xmm1", | |
383 | " psllw xmm0, 8", | |
384 | " psllw xmm1, 8", | |
385 | " psrlw xmm2, 8", | |
386 | " psrlw xmm3, 8", | |
387 | " psrlw xmm0, 8", | |
388 | " psrlw xmm1, 8", | |
389 | " packuswb xmm2, xmm3", | |
390 | " packuswb xmm0, xmm1", | |
391 | " movups [{vdst}], xmm2", | |
392 | " movups [{udst}], xmm0", | |
393 | " add {src}, 32", | |
394 | " add {vdst}, 16", | |
395 | " add {udst}, 16", | |
396 | " sub {tmp}, 16", | |
397 | " jnz 3b", | |
398 | " add {udst}, {dstep}", | |
399 | " add {vdst}, {dstep}", | |
400 | " add {src}, {sstep}", | |
401 | " dec {height}", | |
402 | " jnz 2b", | |
403 | src = inout(reg) src.as_ptr() => _, | |
404 | udst = inout(reg) udst => _, | |
405 | vdst = inout(reg) vdst => _, | |
406 | width = in(reg) width, | |
407 | height = inout(reg) height => _, | |
408 | dstep = in(reg) dstep, | |
409 | sstep = in(reg) sstep, | |
410 | tmp = out(reg) _, | |
411 | out("xmm0") _, | |
412 | out("xmm1") _, | |
413 | out("xmm2") _, | |
414 | out("xmm3") _, | |
415 | ); | |
416 | } | |
417 | } | |
418 | ||
e5ccd68d KS |
419 | fn fill_frame(ifmt: VAImageFormat, pic: &Picture<PictureSync>, frm: &mut NABufferType) -> DecoderResult<()> { |
420 | let mut vbuf = frm.get_vbuf().unwrap(); | |
421 | let (w, h) = pic.surface_size(); | |
422 | //let cur_ts = pic.timestamp(); | |
423 | ||
424 | let img = Image::new(pic, ifmt, w, h, true).expect("get image"); | |
425 | ||
426 | let iimg = img.image(); | |
427 | let imgdata: &[u8] = img.as_ref(); | |
428 | ||
429 | match iimg.format.fourcc().map_err(|_| DecoderError::InvalidData)? { | |
430 | VAFourcc::NV12 => { | |
431 | let frm = NASimpleVideoFrame::from_video_buf(&mut vbuf).unwrap(); | |
432 | validate!(iimg.width == (frm.width[0] as u16)); | |
433 | validate!(iimg.height == (frm.height[0] as u16)); | |
434 | ||
25685ca4 | 435 | copy_luma(&mut frm.data[frm.offset[0]..], frm.stride[0], &imgdata[iimg.offsets[0] as usize..], iimg.pitches[0] as usize, frm.width[0], frm.height[0]); |
e5ccd68d | 436 | |
a439fb0b | 437 | deint_chroma(frm, &imgdata[iimg.offsets[1] as usize..], iimg.pitches[1] as usize); |
e5ccd68d KS |
438 | }, |
439 | _ => unimplemented!(), | |
440 | }; | |
441 | Ok(()) | |
442 | } | |
443 | ||
444 | impl Default for VaapiH264Decoder { | |
445 | fn default() -> Self { | |
446 | Self { | |
447 | info: NACodecInfoRef::default(), | |
448 | vaapi: None, | |
449 | spses: Vec::with_capacity(1), | |
450 | ppses: Vec::with_capacity(4), | |
451 | frame_refs: FrameRefs::new(), | |
452 | nal_len: 0, | |
453 | out_frm: NABufferType::None, | |
454 | reorderer: Reorderer::default(), | |
455 | tb_num: 0, | |
456 | tb_den: 0, | |
457 | } | |
458 | } | |
459 | } | |
460 | ||
461 | impl VaapiH264Decoder { | |
462 | pub fn new() -> Self { Self::default() } | |
463 | pub fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()> { | |
464 | if let NACodecTypeInfo::Video(vinfo) = info.get_properties() { | |
465 | let edata = info.get_extradata().unwrap(); | |
466 | //print!("edata:"); for &el in edata.iter() { print!(" {:02X}", el); } println!(); | |
467 | let profile; | |
468 | let mut nal_buf = Vec::with_capacity(1024); | |
469 | if edata.len() > 11 && &edata[0..4] == b"avcC" { | |
470 | let mut mr = MemoryReader::new_read(edata.as_slice()); | |
471 | let mut br = ByteReader::new(&mut mr); | |
472 | ||
473 | br.read_skip(4)?; | |
474 | let version = br.read_byte()?; | |
475 | validate!(version == 1); | |
476 | profile = br.read_byte()?; | |
477 | let _compatibility = br.read_byte()?; | |
478 | let _level = br.read_byte()?; | |
479 | let b = br.read_byte()?; | |
480 | validate!((b & 0xFC) == 0xFC); | |
481 | self.nal_len = (b & 3) + 1; | |
482 | let b = br.read_byte()?; | |
483 | validate!((b & 0xE0) == 0xE0); | |
484 | let num_sps = (b & 0x1F) as usize; | |
485 | for _ in 0..num_sps { | |
486 | let len = br.read_u16be()? as usize; | |
487 | let offset = br.tell() as usize; | |
488 | validate!((br.peek_byte()? & 0x1F) == 7); | |
489 | let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf); | |
490 | br.read_skip(len)?; | |
491 | let sps = parse_sps(&nal_buf[1..])?; | |
492 | self.spses.push(sps); | |
493 | } | |
494 | let num_pps = br.read_byte()? as usize; | |
495 | for _ in 0..num_pps { | |
496 | let len = br.read_u16be()? as usize; | |
497 | let offset = br.tell() as usize; | |
498 | validate!((br.peek_byte()? & 0x1F) == 8); | |
499 | let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf); | |
500 | br.read_skip(len)?; | |
501 | let src = &nal_buf; | |
502 | ||
503 | let mut full_size = src.len() * 8; | |
504 | for &byte in src.iter().rev() { | |
505 | if byte == 0 { | |
506 | full_size -= 8; | |
507 | } else { | |
508 | full_size -= (byte.trailing_zeros() + 1) as usize; | |
509 | break; | |
510 | } | |
511 | } | |
512 | validate!(full_size > 0); | |
513 | ||
514 | let pps = parse_pps(&src[1..], &self.spses, full_size - 8)?; | |
515 | let mut found = false; | |
516 | for stored_pps in self.ppses.iter_mut() { | |
517 | if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id { | |
518 | *stored_pps = pps.clone(); | |
519 | found = true; | |
520 | break; | |
521 | } | |
522 | } | |
523 | if !found { | |
524 | self.ppses.push(pps); | |
525 | } | |
526 | } | |
527 | if br.left() > 0 { | |
528 | match profile { | |
529 | 100 | 110 | 122 | 144 => { | |
530 | let b = br.read_byte()?; | |
bf56f153 KS |
531 | // some encoders put something different here |
532 | if (b & 0xFC) != 0xFC { | |
533 | return Ok(()); | |
534 | } | |
e5ccd68d KS |
535 | // b & 3 -> chroma format |
536 | let b = br.read_byte()?; | |
537 | validate!((b & 0xF8) == 0xF8); | |
538 | // b & 7 -> luma depth minus 8 | |
539 | let b = br.read_byte()?; | |
540 | validate!((b & 0xF8) == 0xF8); | |
541 | // b & 7 -> chroma depth minus 8 | |
542 | let num_spsext = br.read_byte()? as usize; | |
543 | for _ in 0..num_spsext { | |
544 | let len = br.read_u16be()? as usize; | |
545 | // parse spsext | |
546 | br.read_skip(len)?; | |
547 | } | |
548 | }, | |
549 | _ => {}, | |
550 | }; | |
551 | } | |
552 | } else { | |
553 | return Err(DecoderError::NotImplemented); | |
554 | } | |
555 | ||
556 | validate!(profile > 0); | |
557 | let width = (vinfo.get_width() + 15) & !15; | |
558 | let height = (vinfo.get_height() + 15) & !15; | |
559 | ||
560 | let display = Display::open_silently().expect("open display"); | |
561 | ||
562 | let num_surfaces = self.spses[0].num_ref_frames + 4 + 64; | |
563 | ||
564 | let va_profile = match profile { | |
565 | 66 => VAProfile::VAProfileH264ConstrainedBaseline, | |
566 | 77 => VAProfile::VAProfileH264Main, | |
567 | 88 | 100 | 110 | 122 => VAProfile::VAProfileH264High, | |
568 | _ => return Err(DecoderError::NotImplemented), | |
569 | }; | |
570 | if let Ok(profiles) = display.query_config_profiles() { | |
571 | if !profiles.contains(&va_profile) { | |
572 | println!("Profile {} ({}) not supported", profile, profile_name(va_profile)); | |
573 | return Err(DecoderError::NotImplemented); | |
574 | } | |
575 | } else { | |
576 | return Err(DecoderError::Bug); | |
577 | } | |
578 | if let Ok(points) = display.query_config_entrypoints(va_profile) { | |
579 | if !points.contains(&VAEntrypoint::VAEntrypointVLD) { | |
580 | println!("no decoding support for this profile"); | |
581 | return Err(DecoderError::NotImplemented); | |
582 | } | |
583 | } else { | |
584 | return Err(DecoderError::Bug); | |
585 | } | |
586 | ||
587 | let config = display.create_config(vec![ | |
588 | VAConfigAttrib { type_: VAConfigAttribType::VAConfigAttribRTFormat, value: RTFormat::YUV420.into() }, | |
589 | ], va_profile, VAEntrypoint::VAEntrypointVLD).map_err(|_| { | |
590 | println!("config creation failed!"); | |
591 | DecoderError::Bug | |
592 | })?; | |
593 | let surfaces = display.create_surfaces(RTFormat::YUV420, None, width as u32, height as u32, Some(UsageHint::Decoder.into()), num_surfaces as u32).map_err(|_| DecoderError::AllocError)?; | |
594 | let context = display.create_context(&config, width as i32, height as i32, Some(&surfaces), true).map_err(|_| DecoderError::Bug)?; | |
595 | ||
596 | let ref_pics = Vec::new(); | |
597 | ||
598 | let image_formats = display.query_image_formats().map_err(|_| DecoderError::Bug)?; | |
599 | validate!(!image_formats.is_empty()); | |
600 | let mut ifmt = image_formats[0]; | |
601 | for fmt in image_formats.iter() { | |
602 | if fmt.bits_per_pixel == 12 { | |
603 | ifmt = *fmt; | |
604 | break; | |
605 | } | |
606 | } | |
607 | ||
608 | self.vaapi = Some(VaapiInternals { display, context, ref_pics, surfaces, ifmt }); | |
609 | ||
610 | let vinfo = NAVideoInfo::new(width, height, false, YUV420_FORMAT); | |
611 | self.info = NACodecInfo::new_ref(info.get_name(), NACodecTypeInfo::Video(vinfo), info.get_extradata()).into_ref(); | |
612 | self.out_frm = alloc_video_buffer(vinfo, 4)?; | |
613 | ||
614 | Ok(()) | |
615 | } else { | |
616 | Err(DecoderError::InvalidData) | |
617 | } | |
618 | } | |
619 | fn decode(&mut self, pkt: &NAPacket) -> DecoderResult<()> { | |
620 | let src = pkt.get_buffer(); | |
621 | let vactx = if let Some(ref mut ctx) = self.vaapi { ctx } else { return Err(DecoderError::Bug) }; | |
622 | ||
623 | let timestamp = pkt.get_dts().unwrap_or_else(|| pkt.get_pts().unwrap_or(0)); | |
624 | ||
625 | if vactx.surfaces.is_empty() { | |
626 | panic!("ran out of free surfaces"); | |
627 | // return Err(DecoderError::AllocError); | |
628 | } | |
629 | let surface = vactx.surfaces.pop().unwrap(); | |
630 | let surface_id = surface.id(); | |
631 | let mut pic = Picture::new(timestamp, vactx.context.clone(), surface); | |
632 | let mut is_ref = false; | |
633 | let mut is_keyframe = false; | |
634 | ||
635 | self.tb_num = pkt.ts.tb_num; | |
636 | self.tb_den = pkt.ts.tb_den; | |
637 | ||
638 | let mut mr = MemoryReader::new_read(&src); | |
639 | let mut br = ByteReader::new(&mut mr); | |
640 | let mut frame_type = FrameType::I; | |
641 | let mut nal_buf = Vec::with_capacity(1024); | |
642 | while br.left() > 0 { | |
643 | let size = match self.nal_len { | |
644 | 1 => br.read_byte()? as usize, | |
645 | 2 => br.read_u16be()? as usize, | |
646 | 3 => br.read_u24be()? as usize, | |
647 | 4 => br.read_u32be()? as usize, | |
648 | _ => unreachable!(), | |
649 | }; | |
650 | validate!(br.left() >= (size as i64)); | |
651 | let offset = br.tell() as usize; | |
652 | let raw_nal = &src[offset..][..size]; | |
653 | let _size = unescape_nal(raw_nal, &mut nal_buf); | |
654 | ||
655 | let src = &nal_buf; | |
656 | validate!((src[0] & 0x80) == 0); | |
657 | let nal_ref_idc = src[0] >> 5; | |
658 | let nal_unit_type = src[0] & 0x1F; | |
659 | ||
660 | let mut full_size = src.len() * 8; | |
661 | for &byte in src.iter().rev() { | |
662 | if byte == 0 { | |
663 | full_size -= 8; | |
664 | } else { | |
665 | full_size -= (byte.trailing_zeros() + 1) as usize; | |
666 | break; | |
667 | } | |
668 | } | |
669 | validate!(full_size > 0); | |
670 | ||
671 | match nal_unit_type { | |
672 | 1 | 5 => { | |
673 | let is_idr = nal_unit_type == 5; | |
674 | is_ref |= nal_ref_idc != 0; | |
675 | is_keyframe |= is_idr; | |
676 | let mut br = BitReader::new(&src[..(full_size + 7)/8], BitReaderMode::BE); | |
677 | br.skip(8)?; | |
678 | ||
679 | let slice_hdr = parse_slice_header(&mut br, &self.spses, &self.ppses, is_idr, nal_ref_idc)?; | |
680 | match slice_hdr.slice_type { | |
681 | SliceType::P if frame_type != FrameType::B => frame_type = FrameType::P, | |
682 | SliceType::SP if frame_type != FrameType::B => frame_type = FrameType::P, | |
683 | SliceType::B => frame_type = FrameType::B, | |
684 | _ => {}, | |
685 | }; | |
686 | let mut cur_sps = 0; | |
687 | let mut cur_pps = 0; | |
688 | let mut pps_found = false; | |
689 | for (i, pps) in self.ppses.iter().enumerate() { | |
690 | if pps.pic_parameter_set_id == slice_hdr.pic_parameter_set_id { | |
691 | cur_pps = i; | |
692 | pps_found = true; | |
693 | break; | |
694 | } | |
695 | } | |
696 | validate!(pps_found); | |
697 | let mut sps_found = false; | |
698 | for (i, sps) in self.spses.iter().enumerate() { | |
699 | if sps.seq_parameter_set_id == self.ppses[cur_pps].seq_parameter_set_id { | |
700 | cur_sps = i; | |
701 | sps_found = true; | |
702 | break; | |
703 | } | |
704 | } | |
705 | validate!(sps_found); | |
706 | let sps = &self.spses[cur_sps]; | |
707 | let pps = &self.ppses[cur_pps]; | |
708 | ||
709 | if slice_hdr.first_mb_in_slice == 0 { | |
710 | let (top_id, bot_id) = self.frame_refs.calc_picture_num(&slice_hdr, is_idr, nal_ref_idc, sps); | |
711 | if is_idr { | |
712 | self.frame_refs.clear_refs(); | |
713 | for (pic, _) in vactx.ref_pics.drain(..) { | |
714 | if let Ok(surf) = pic.take_surface() { | |
715 | vactx.surfaces.push(surf); | |
716 | } else { | |
717 | panic!("can't take surface"); | |
718 | } | |
719 | } | |
720 | } | |
721 | self.frame_refs.select_refs(sps, &slice_hdr, top_id); | |
722 | let mut pic_refs = Vec::with_capacity(NUM_REF_PICS); | |
723 | for pic in self.frame_refs.ref_pics.iter().rev().take(NUM_REF_PICS) { | |
724 | pic_refs.push(pic.make_pic()); | |
725 | } | |
726 | if slice_hdr.adaptive_ref_pic_marking_mode { | |
727 | self.frame_refs.apply_adaptive_marking(&slice_hdr.adaptive_ref_pic_marking, slice_hdr.frame_num, 1 << sps.log2_max_frame_num)?; | |
728 | } | |
729 | ||
730 | while pic_refs.len() < NUM_REF_PICS { | |
731 | pic_refs.push(make_dummy_h264_pic()); | |
732 | } | |
733 | ||
734 | let mut flags = H264PictureFlags::default(); | |
735 | let frame_idx = if let Some(id) = get_long_term_id(is_idr, &slice_hdr) { | |
736 | flags |= H264PictureFlag::LongTermReference; | |
737 | id as u32 | |
738 | } else { | |
739 | if nal_ref_idc != 0 { | |
740 | flags |= H264PictureFlag::ShortTermReference; | |
741 | } | |
742 | u32::from(slice_hdr.frame_num) | |
743 | }; | |
744 | let pic_refs: [PictureH264; NUM_REF_PICS] = pic_refs.try_into().unwrap_or_else(|_| panic!("can't convert")); | |
745 | ||
746 | let h264pic = PictureH264::new(surface_id, frame_idx, flags, top_id as i32, bot_id as i32); | |
747 | ||
748 | let seq_fields = H264SeqFields::new( | |
749 | u32::from(sps.chroma_format_idc), | |
750 | u32::from(sps.separate_colour_plane), | |
751 | u32::from(sps.gaps_in_frame_num_value_allowed), | |
752 | u32::from(sps.frame_mbs_only), | |
753 | u32::from(sps.mb_adaptive_frame_field), | |
754 | u32::from(sps.direct_8x8_inference), | |
755 | u32::from(sps.level_idc >= 31), | |
756 | u32::from(sps.log2_max_frame_num) - 4, | |
757 | u32::from(sps.pic_order_cnt_type), | |
758 | u32::from(sps.log2_max_pic_order_cnt_lsb).wrapping_sub(4), | |
759 | u32::from(sps.delta_pic_order_always_zero) | |
760 | ); | |
761 | let pic_fields = H264PicFields::new( | |
762 | u32::from(pps.entropy_coding_mode), | |
763 | u32::from(pps.weighted_pred), | |
764 | u32::from(pps.weighted_bipred_idc), | |
765 | u32::from(pps.transform_8x8_mode), | |
766 | u32::from(slice_hdr.field_pic), | |
767 | u32::from(pps.constrained_intra_pred), | |
768 | u32::from(pps.pic_order_present), | |
769 | u32::from(pps.deblocking_filter_control_present), | |
770 | u32::from(pps.redundant_pic_cnt_present), | |
771 | u32::from(nal_ref_idc != 0) | |
772 | ); | |
773 | let ppd = PictureParameterBufferH264::new( | |
774 | h264pic, | |
775 | pic_refs, | |
776 | sps.pic_width_in_mbs as u16 - 1, | |
777 | sps.pic_height_in_mbs as u16 - 1, | |
778 | sps.bit_depth_luma - 8, | |
779 | sps.bit_depth_chroma - 8, | |
780 | sps.num_ref_frames as u8, | |
781 | &seq_fields, | |
782 | pps.num_slice_groups as u8 - 1, // should be 0 | |
783 | pps.slice_group_map_type, // should be 0 | |
784 | 0, //pps.slice_group_change_rate as u16 - 1, | |
785 | pps.pic_init_qp as i8 - 26, | |
786 | pps.pic_init_qs as i8 - 26, | |
787 | pps.chroma_qp_index_offset, | |
788 | pps.second_chroma_qp_index_offset, | |
789 | &pic_fields, | |
790 | slice_hdr.frame_num | |
791 | ); | |
792 | let pic_param = BufferType::PictureParameter(PictureParameter::H264(ppd)); | |
793 | let buf = vactx.context.create_buffer(pic_param).map_err(|_| DecoderError::Bug)?; | |
794 | pic.add_buffer(buf); | |
795 | ||
796 | let mut scaling_list_8x8 = [[0; 64]; 2]; | |
797 | scaling_list_8x8[0].copy_from_slice(&pps.scaling_list_8x8[0]); | |
798 | scaling_list_8x8[1].copy_from_slice(&pps.scaling_list_8x8[3]); | |
799 | let iqmatrix = BufferType::IQMatrix(IQMatrix::H264(IQMatrixBufferH264::new(pps.scaling_list_4x4, scaling_list_8x8))); | |
800 | let buf = vactx.context.create_buffer(iqmatrix).map_err(|_| DecoderError::Bug)?; | |
801 | pic.add_buffer(buf); | |
802 | ||
803 | let cpic = PictureInfo { | |
804 | id: slice_hdr.frame_num, | |
805 | full_id: top_id, | |
806 | surface_id, | |
807 | top_id, bot_id, | |
808 | //pic_type: slice_hdr.slice_type.to_frame_type(), | |
809 | is_ref, | |
810 | is_idr, | |
811 | long_term: get_long_term_id(is_idr, &slice_hdr), | |
812 | }; | |
813 | if cpic.is_ref { | |
814 | self.frame_refs.add_short_term(cpic.clone(), sps.num_ref_frames); | |
815 | } | |
816 | if let Some(lt_idx) = cpic.long_term { | |
817 | self.frame_refs.add_long_term(lt_idx, cpic); | |
818 | } | |
819 | } | |
820 | ||
821 | let mut luma_weight_l0 = [0i16; 32]; | |
822 | let mut luma_offset_l0 = [0i16; 32]; | |
823 | let mut chroma_weight_l0 = [[0i16; 2]; 32]; | |
824 | let mut chroma_offset_l0 = [[0i16; 2]; 32]; | |
825 | let mut luma_weight_l1 = [0i16; 32]; | |
826 | let mut luma_offset_l1 = [0i16; 32]; | |
827 | let mut chroma_weight_l1 = [[0i16; 2]; 32]; | |
828 | let mut chroma_offset_l1 = [[0i16; 2]; 32]; | |
829 | let mut luma_weighted_l0 = false; | |
830 | let mut chroma_weighted_l0 = false; | |
831 | let mut luma_weighted_l1 = false; | |
832 | let mut chroma_weighted_l1 = false; | |
833 | let mut luma_log2_weight_denom = slice_hdr.luma_log2_weight_denom; | |
834 | let mut chroma_log2_weight_denom = slice_hdr.chroma_log2_weight_denom; | |
835 | ||
836 | if (pps.weighted_pred && matches!(slice_hdr.slice_type, SliceType::P | SliceType::B)) || (pps.weighted_bipred_idc == 1 && slice_hdr.slice_type == SliceType::B) { | |
837 | luma_weighted_l0 = true; | |
838 | chroma_weighted_l0 = false; | |
839 | for (i, winfo) in slice_hdr.weights_l0.iter().enumerate().take(slice_hdr.num_ref_idx_l0_active) { | |
840 | if winfo.luma_weighted { | |
841 | luma_weight_l0[i] = winfo.luma_weight.into(); | |
842 | luma_offset_l0[i] = winfo.luma_offset.into(); | |
843 | } else { | |
844 | luma_weight_l0[i] = 1 << slice_hdr.luma_log2_weight_denom; | |
845 | } | |
846 | if winfo.chroma_weighted { | |
847 | chroma_weight_l0[i][0] = winfo.chroma_weight[0].into(); | |
848 | chroma_weight_l0[i][1] = winfo.chroma_weight[1].into(); | |
849 | chroma_offset_l0[i][0] = winfo.chroma_offset[0].into(); | |
850 | chroma_offset_l0[i][1] = winfo.chroma_offset[1].into(); | |
851 | } else { | |
852 | chroma_weight_l0[i][0] = 1 << slice_hdr.chroma_log2_weight_denom; | |
853 | chroma_weight_l0[i][1] = 1 << slice_hdr.chroma_log2_weight_denom; | |
854 | chroma_offset_l0[i][0] = 0; | |
855 | chroma_offset_l0[i][1] = 0; | |
856 | } | |
857 | chroma_weighted_l0 |= winfo.chroma_weighted; | |
858 | } | |
859 | } | |
860 | if pps.weighted_bipred_idc == 1 && slice_hdr.slice_type == SliceType::B { | |
861 | luma_weighted_l1 = true; | |
862 | chroma_weighted_l1 = sps.chroma_format_idc != 0; | |
863 | for (i, winfo) in slice_hdr.weights_l1.iter().enumerate().take(slice_hdr.num_ref_idx_l1_active) { | |
864 | if winfo.luma_weighted { | |
865 | luma_weight_l1[i] = winfo.luma_weight.into(); | |
866 | luma_offset_l1[i] = winfo.luma_offset.into(); | |
867 | } else { | |
868 | luma_weight_l1[i] = 1 << slice_hdr.luma_log2_weight_denom; | |
869 | } | |
870 | if chroma_weighted_l1 && winfo.chroma_weighted { | |
871 | chroma_weight_l1[i][0] = winfo.chroma_weight[0].into(); | |
872 | chroma_weight_l1[i][1] = winfo.chroma_weight[1].into(); | |
873 | chroma_offset_l1[i][0] = winfo.chroma_offset[0].into(); | |
874 | chroma_offset_l1[i][1] = winfo.chroma_offset[1].into(); | |
875 | } else { | |
876 | chroma_weight_l1[i][0] = 1 << slice_hdr.chroma_log2_weight_denom; | |
877 | chroma_weight_l1[i][1] = 1 << slice_hdr.chroma_log2_weight_denom; | |
878 | chroma_offset_l1[i][0] = 0; | |
879 | chroma_offset_l1[i][1] = 0; | |
880 | } | |
881 | } | |
882 | } | |
883 | if pps.weighted_bipred_idc == 2 && slice_hdr.slice_type == SliceType::B { | |
884 | let num_l0 = slice_hdr.num_ref_idx_l0_active; | |
885 | let num_l1 = slice_hdr.num_ref_idx_l1_active; | |
886 | if num_l0 != 1 || num_l1 != 1 { //xxx: also exclude symmetric case | |
887 | luma_weighted_l0 = false; | |
888 | luma_weighted_l1 = false; | |
889 | chroma_weighted_l0 = false; | |
890 | chroma_weighted_l1 = false; | |
891 | luma_log2_weight_denom = 5; | |
892 | chroma_log2_weight_denom = 5; | |
893 | ||
894 | for w in luma_weight_l0.iter_mut() { | |
895 | *w = 32; | |
896 | } | |
897 | for w in luma_weight_l1.iter_mut() { | |
898 | *w = 32; | |
899 | } | |
900 | for w in chroma_weight_l0.iter_mut() { | |
901 | *w = [32; 2]; | |
902 | } | |
903 | for w in chroma_weight_l1.iter_mut() { | |
904 | *w = [32; 2]; | |
905 | } | |
906 | } | |
907 | } | |
908 | ||
909 | let ref_pic_list_0 = map_ref_list(&self.frame_refs.cur_refs.ref_list0); | |
910 | let ref_pic_list_1 = map_ref_list(&self.frame_refs.cur_refs.ref_list1); | |
911 | ||
912 | let slice_param = SliceParameterBufferH264::new( | |
913 | raw_nal.len() as u32, | |
914 | 0, // no offset | |
915 | VASliceDataFlag::All, | |
916 | br.tell() as u16, | |
917 | slice_hdr.first_mb_in_slice as u16, | |
918 | match slice_hdr.slice_type { | |
919 | SliceType::I => 2, | |
920 | SliceType::P => 0, | |
921 | SliceType::B => 1, | |
922 | SliceType::SI => 4, | |
923 | SliceType::SP => 3, | |
924 | }, | |
925 | slice_hdr.direct_spatial_mv_pred as u8, | |
926 | (slice_hdr.num_ref_idx_l0_active as u8).saturating_sub(1), | |
927 | (slice_hdr.num_ref_idx_l1_active as u8).saturating_sub(1), | |
928 | slice_hdr.cabac_init_idc, | |
929 | slice_hdr.slice_qp_delta as i8, | |
930 | slice_hdr.disable_deblocking_filter_idc, | |
931 | slice_hdr.slice_alpha_c0_offset / 2, | |
932 | slice_hdr.slice_beta_offset / 2, | |
933 | ref_pic_list_0, | |
934 | ref_pic_list_1, | |
935 | luma_log2_weight_denom, | |
936 | chroma_log2_weight_denom, | |
937 | luma_weighted_l0 as u8, luma_weight_l0, luma_offset_l0, | |
938 | chroma_weighted_l0 as u8, chroma_weight_l0, chroma_offset_l0, | |
939 | luma_weighted_l1 as u8, luma_weight_l1, luma_offset_l1, | |
940 | chroma_weighted_l1 as u8, chroma_weight_l1, chroma_offset_l1, | |
941 | ); | |
942 | let slc_param = BufferType::SliceParameter(SliceParameter::H264(slice_param)); | |
943 | let buf = vactx.context.create_buffer(slc_param).map_err(|_| DecoderError::Bug)?; | |
944 | pic.add_buffer(buf); | |
945 | ||
946 | let slc_data = BufferType::SliceData(raw_nal.to_vec()); | |
947 | let buf = vactx.context.create_buffer(slc_data).map_err(|_| DecoderError::Bug)?; | |
948 | pic.add_buffer(buf); | |
949 | }, | |
950 | 2 => { // slice data partition A | |
951 | //slice header | |
952 | //slice id = read_ue() | |
953 | //cat 2 slice data (all but MB layer residual) | |
954 | return Err(DecoderError::NotImplemented); | |
955 | }, | |
956 | 3 => { // slice data partition B | |
957 | //slice id = read_ue() | |
958 | //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() } | |
959 | //cat 3 slice data (MB layer residual) | |
960 | return Err(DecoderError::NotImplemented); | |
961 | }, | |
962 | 4 => { // slice data partition C | |
963 | //slice id = read_ue() | |
964 | //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() } | |
965 | //cat 4 slice data (MB layer residual) | |
966 | return Err(DecoderError::NotImplemented); | |
967 | }, | |
968 | 6 => {}, //SEI | |
969 | 7 => { | |
970 | let sps = parse_sps(&src[1..])?; | |
971 | self.spses.push(sps); | |
972 | }, | |
973 | 8 => { | |
974 | validate!(full_size >= 8 + 16); | |
975 | let pps = parse_pps(&src[1..], &self.spses, full_size - 8)?; | |
976 | let mut found = false; | |
977 | for stored_pps in self.ppses.iter_mut() { | |
978 | if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id { | |
979 | *stored_pps = pps.clone(); | |
980 | found = true; | |
981 | break; | |
982 | } | |
983 | } | |
984 | if !found { | |
985 | self.ppses.push(pps); | |
986 | } | |
987 | }, | |
988 | 9 => { // access unit delimiter | |
989 | }, | |
990 | 10 => {}, //end of sequence | |
991 | 11 => {}, //end of stream | |
992 | 12 => {}, //filler | |
993 | _ => {}, | |
994 | }; | |
995 | ||
996 | br.read_skip(size)?; | |
997 | } | |
998 | ||
999 | let bpic = pic.begin().expect("begin"); | |
1000 | let rpic = bpic.render().expect("render"); | |
1001 | let epic = rpic.end().expect("end"); | |
1002 | ||
1003 | self.reorderer.add_frame(WaitingFrame { | |
1004 | pic: epic, | |
1005 | is_idr: is_keyframe, | |
1006 | is_ref, | |
1007 | ftype: frame_type, | |
1008 | ts: timestamp, | |
1009 | }); | |
1010 | ||
1011 | let mut idx = 0; | |
1012 | while idx < vactx.ref_pics.len() { | |
1013 | let cur_surf_id = vactx.ref_pics[idx].1; | |
1014 | if self.frame_refs.ref_pics.iter().any(|fref| fref.surface_id == cur_surf_id) { | |
1015 | idx += 1; | |
1016 | } else { | |
1017 | let (pic, _) = vactx.ref_pics.remove(idx); | |
1018 | if let Ok(surf) = pic.take_surface() { | |
1019 | vactx.surfaces.push(surf); | |
1020 | } else { | |
1021 | panic!("can't take surface"); | |
1022 | } | |
1023 | } | |
1024 | } | |
1025 | ||
1026 | Ok(()) | |
1027 | } | |
1028 | fn get_frame(&mut self) -> Option<NAFrameRef> { | |
1029 | if let Some(ref mut vactx) = self.vaapi { | |
1030 | if let Some(frm) = self.reorderer.get_frame() { | |
1031 | let ts = frm.ts; | |
1032 | let is_idr = frm.is_idr; | |
1033 | let is_ref = frm.is_ref; | |
1034 | let ftype = frm.ftype; | |
1035 | if let Ok(pic) = frm.pic.sync() { | |
1036 | let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm); | |
1037 | ||
1038 | if !is_ref { | |
1039 | if let Ok(surf) = pic.take_surface() { | |
1040 | vactx.surfaces.push(surf); | |
1041 | } else { | |
1042 | panic!("can't take surface"); | |
1043 | } | |
1044 | } else { | |
1045 | let id = pic.surface_id(); | |
1046 | vactx.ref_pics.push((pic, id)); | |
1047 | } | |
1048 | ||
1049 | let ts = NATimeInfo::new(None, Some(ts), None, self.tb_num, self.tb_den); | |
1050 | Some(NAFrame::new(ts, ftype, is_idr, self.info.clone(), self.out_frm.clone()).into_ref()) | |
1051 | } else { | |
1052 | panic!("can't sync"); | |
1053 | } | |
1054 | } else { | |
1055 | None | |
1056 | } | |
1057 | } else { | |
1058 | None | |
1059 | } | |
1060 | } | |
1061 | fn get_last_frames(&mut self) -> Option<NAFrameRef> { | |
1062 | if let Some(ref mut vactx) = self.vaapi { | |
1063 | if let Some(frm) = self.reorderer.frames.pop_front() { | |
1064 | let ts = frm.ts; | |
1065 | let is_idr = frm.is_idr; | |
1066 | let is_ref = frm.is_ref; | |
1067 | let ftype = frm.ftype; | |
1068 | if let Ok(pic) = frm.pic.sync() { | |
1069 | let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm); | |
1070 | ||
1071 | if !is_ref { | |
1072 | if let Ok(surf) = pic.take_surface() { | |
1073 | vactx.surfaces.push(surf); | |
1074 | } else { | |
1075 | panic!("can't take surface"); | |
1076 | } | |
1077 | } else { | |
1078 | let id = pic.surface_id(); | |
1079 | vactx.ref_pics.push((pic, id)); | |
1080 | } | |
1081 | ||
1082 | let ts = NATimeInfo::new(None, Some(ts), None, self.tb_num, self.tb_den); | |
1083 | Some(NAFrame::new(ts, ftype, is_idr, self.info.clone(), self.out_frm.clone()).into_ref()) | |
1084 | } else { | |
1085 | panic!("can't sync"); | |
1086 | } | |
1087 | } else { | |
1088 | None | |
1089 | } | |
1090 | } else { | |
1091 | None | |
1092 | } | |
1093 | } | |
1094 | fn flush(&mut self) { | |
1095 | self.frame_refs.clear_refs(); | |
1096 | if let Some(ref mut vactx) = self.vaapi { | |
1097 | for frm in self.reorderer.frames.drain(..) { | |
1098 | if let Ok(pic) = frm.pic.sync() { | |
1099 | if let Ok(surf) = pic.take_surface() { | |
1100 | vactx.surfaces.push(surf); | |
1101 | } else { | |
1102 | panic!("can't take surface"); | |
1103 | } | |
1104 | } else { | |
1105 | panic!("can't sync"); | |
1106 | } | |
1107 | } | |
1108 | self.reorderer.flush(); | |
1109 | for (pic, _) in vactx.ref_pics.drain(..) { | |
1110 | if let Ok(surf) = pic.take_surface() { | |
1111 | vactx.surfaces.push(surf); | |
1112 | } else { | |
1113 | panic!("can't take surface"); | |
1114 | } | |
1115 | } | |
1116 | } | |
1117 | } | |
1118 | } | |
1119 | ||
1120 | impl NAOptionHandler for VaapiH264Decoder { | |
1121 | fn get_supported_options(&self) -> &[NAOptionDefinition] { &[] } | |
1122 | fn set_options(&mut self, _options: &[NAOption]) {} | |
1123 | fn query_option_value(&self, _name: &str) -> Option<NAValue> { None } | |
1124 | } | |
1125 | ||
1126 | use std::thread::*; | |
1127 | use std::sync::mpsc::*; | |
1128 | ||
1129 | enum DecMessage { | |
1130 | Init(NACodecInfoRef), | |
1131 | Decode(NAPacket), | |
1132 | Flush, | |
1133 | GetFrame, | |
1134 | GetLastFrames, | |
1135 | End | |
1136 | } | |
1137 | ||
1138 | enum DecResponse { | |
1139 | Ok, | |
1140 | Nothing, | |
1141 | Err(DecoderError), | |
1142 | Frame(NAFrameRef), | |
1143 | } | |
1144 | ||
1145 | pub trait HWDecoder { | |
1146 | fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()>; | |
1147 | fn queue_pkt(&mut self, pkt: &NAPacket) -> DecoderResult<()>; | |
1148 | fn get_frame(&mut self) -> Option<NAFrameRef>; | |
1149 | fn get_last_frames(&mut self) -> Option<NAFrameRef>; | |
1150 | fn flush(&mut self); | |
1151 | } | |
1152 | ||
1153 | pub struct HWWrapper { | |
1154 | handle: Option<JoinHandle<DecoderResult<()>>>, | |
1155 | send: SyncSender<DecMessage>, | |
1156 | recv: Receiver<DecResponse>, | |
1157 | } | |
1158 | ||
1159 | #[allow(clippy::new_without_default)] | |
1160 | impl HWWrapper { | |
1161 | pub fn new() -> Self { | |
1162 | let (in_send, in_recv) = sync_channel(1); | |
1163 | let (out_send, out_recv) = sync_channel(1); | |
1164 | let handle = std::thread::spawn(move || { | |
1165 | let receiver = in_recv; | |
1166 | let sender = out_send; | |
1167 | let mut dec = VaapiH264Decoder::new(); | |
1168 | while let Ok(msg) = receiver.recv() { | |
1169 | match msg { | |
1170 | DecMessage::Init(info) => { | |
1171 | let msg = if let Err(err) = dec.init(info) { | |
1172 | DecResponse::Err(err) | |
1173 | } else { | |
1174 | DecResponse::Ok | |
1175 | }; | |
1176 | sender.send(msg).map_err(|_| DecoderError::Bug)?; | |
1177 | }, | |
1178 | DecMessage::Decode(pkt) => { | |
1179 | let msg = match dec.decode(&pkt) { | |
1180 | Ok(()) => DecResponse::Ok, | |
1181 | Err(err) => DecResponse::Err(err), | |
1182 | }; | |
1183 | sender.send(msg).map_err(|_| DecoderError::Bug)?; | |
1184 | }, | |
1185 | DecMessage::GetFrame => { | |
1186 | let msg = match dec.get_frame() { | |
1187 | Some(frm) => DecResponse::Frame(frm), | |
1188 | None => DecResponse::Nothing, | |
1189 | }; | |
1190 | sender.send(msg).map_err(|_| DecoderError::Bug)?; | |
1191 | }, | |
1192 | DecMessage::GetLastFrames => { | |
1193 | let msg = match dec.get_last_frames() { | |
1194 | Some(frm) => DecResponse::Frame(frm), | |
1195 | None => DecResponse::Nothing, | |
1196 | }; | |
1197 | sender.send(msg).map_err(|_| DecoderError::Bug)?; | |
1198 | }, | |
1199 | DecMessage::Flush => dec.flush(), | |
1200 | DecMessage::End => return Ok(()), | |
1201 | }; | |
1202 | } | |
1203 | Err(DecoderError::Bug) | |
1204 | }); | |
1205 | ||
1206 | Self { | |
1207 | handle: Some(handle), | |
1208 | send: in_send, | |
1209 | recv: out_recv, | |
1210 | } | |
1211 | } | |
1212 | } | |
1213 | ||
1214 | impl HWDecoder for HWWrapper { | |
1215 | fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()> { | |
1216 | if self.send.send(DecMessage::Init(info)).is_ok() { | |
1217 | match self.recv.recv() { | |
1218 | Ok(DecResponse::Ok) => Ok(()), | |
1219 | Ok(DecResponse::Err(err)) => Err(err), | |
1220 | Err(_) => Err(DecoderError::Bug), | |
1221 | _ => unreachable!(), | |
1222 | } | |
1223 | } else { | |
1224 | Err(DecoderError::Bug) | |
1225 | } | |
1226 | } | |
1227 | fn queue_pkt(&mut self, pkt: &NAPacket) -> DecoderResult<()> { | |
1228 | let pkt2 = NAPacket::new_from_refbuf(pkt.get_stream(), pkt.ts, pkt.keyframe, pkt.get_buffer()); | |
1229 | if self.send.send(DecMessage::Decode(pkt2)).is_ok() { | |
1230 | match self.recv.recv() { | |
1231 | Ok(DecResponse::Ok) => Ok(()), | |
1232 | Ok(DecResponse::Err(err)) => Err(err), | |
1233 | Err(_) => Err(DecoderError::Bug), | |
1234 | _ => unreachable!(), | |
1235 | } | |
1236 | } else { | |
1237 | Err(DecoderError::Bug) | |
1238 | } | |
1239 | } | |
1240 | fn get_frame(&mut self) -> Option<NAFrameRef> { | |
1241 | if self.send.send(DecMessage::GetFrame).is_ok() { | |
1242 | match self.recv.recv() { | |
1243 | Ok(DecResponse::Frame(frm)) => Some(frm), | |
1244 | Ok(DecResponse::Nothing) => None, | |
1245 | Err(_) => None, | |
1246 | _ => unreachable!(), | |
1247 | } | |
1248 | } else { | |
1249 | None | |
1250 | } | |
1251 | } | |
1252 | fn get_last_frames(&mut self) -> Option<NAFrameRef> { | |
1253 | if self.send.send(DecMessage::GetLastFrames).is_ok() { | |
1254 | match self.recv.recv() { | |
1255 | Ok(DecResponse::Frame(frm)) => Some(frm), | |
1256 | Ok(DecResponse::Nothing) => None, | |
1257 | Err(_) => None, | |
1258 | _ => unreachable!(), | |
1259 | } | |
1260 | } else { | |
1261 | None | |
1262 | } | |
1263 | } | |
1264 | fn flush(&mut self) { | |
1265 | let _ = self.send.send(DecMessage::Flush); | |
1266 | } | |
1267 | } | |
1268 | ||
1269 | impl Drop for HWWrapper { | |
1270 | fn drop(&mut self) { | |
1271 | if self.send.send(DecMessage::End).is_ok() { | |
1272 | let mut handle = None; | |
1273 | std::mem::swap(&mut handle, &mut self.handle); | |
1274 | if let Some(hdl) = handle { | |
1275 | let _ = hdl.join(); | |
1276 | } | |
1277 | } | |
1278 | } | |
1279 | } | |
1280 | ||
1281 | impl NAOptionHandler for HWWrapper { | |
1282 | fn get_supported_options(&self) -> &[NAOptionDefinition] { &[] } | |
1283 | fn set_options(&mut self, _options: &[NAOption]) {} | |
1284 | fn query_option_value(&self, _name: &str) -> Option<NAValue> { None } | |
1285 | } | |
1286 | ||
1287 | pub fn new_h264_hwdec() -> Box<dyn HWDecoder + Send> { | |
1288 | Box::new(HWWrapper::new()) | |
1289 | } | |
1290 | ||
1291 | #[cfg(test)] | |
1292 | mod test { | |
1293 | use nihav_core::codecs::*; | |
1294 | use nihav_core::io::byteio::*; | |
1295 | use nihav_core::demuxers::{RegisteredDemuxers, create_demuxer}; | |
1296 | use nihav_commonfmt::generic_register_all_demuxers; | |
1297 | use super::VaapiH264Decoder; | |
1298 | use std::io::prelude::*; | |
1299 | ||
1300 | fn decode_h264(name: &str, dname: &str, dmx_reg: &RegisteredDemuxers, opfx: &str) -> DecoderResult<()> { | |
1301 | let dmx_f = dmx_reg.find_demuxer(dname).expect("demuxer exists"); | |
1302 | let file = std::fs::File::open(name).expect("file exists"); | |
1303 | let mut fr = FileReader::new_read(file); | |
1304 | let mut br = ByteReader::new(&mut fr); | |
1305 | let mut dmx = create_demuxer(dmx_f, &mut br).expect("create demuxer"); | |
1306 | ||
1307 | let mut vstream_id = 0; | |
1308 | let mut dec = VaapiH264Decoder::new(); | |
1309 | for stream in dmx.get_streams() { | |
1310 | if stream.get_media_type() == StreamType::Video { | |
1311 | dec.init(stream.get_info()).expect("inited"); | |
1312 | vstream_id = stream.get_id(); | |
1313 | break; | |
1314 | } | |
1315 | } | |
1316 | ||
1317 | let mut frameno = 0; | |
1318 | while let Ok(pkt) = dmx.get_frame() { | |
1319 | if pkt.get_stream().get_id() != vstream_id { | |
1320 | continue; | |
1321 | } | |
1322 | dec.decode(&pkt).expect("decoded"); | |
1323 | let frm = dec.get_last_frames().expect("get frame"); | |
1324 | let timestamp = frm.get_dts().unwrap_or_else(|| frm.get_pts().unwrap_or(0)); | |
1325 | ||
1326 | let pic = frm.get_buffer().get_vbuf().expect("got picture"); | |
1327 | ||
1328 | let nname = format!("assets/test_out/{}{:06}_{}.pgm", opfx, timestamp, frameno); | |
1329 | frameno += 1; | |
1330 | let mut file = std::fs::File::create(&nname).expect("create file"); | |
1331 | let (w, h) = pic.get_dimensions(0); | |
1332 | file.write_all(format!("P5\n{} {}\n255\n", w, h * 3 / 2).as_bytes()).expect("header written"); | |
1333 | let data = pic.get_data(); | |
1334 | for yline in data.chunks(pic.get_stride(0)).take(h) { | |
1335 | file.write_all(&yline[..w]).expect("Y line written"); | |
1336 | } | |
1337 | for (uline, vline) in data[pic.get_offset(1)..].chunks(pic.get_stride(1)) | |
1338 | .zip(data[pic.get_offset(2)..].chunks(pic.get_stride(2))).take(h / 2) { | |
1339 | file.write_all(&uline[..w / 2]).expect("U line written"); | |
1340 | file.write_all(&vline[..w / 2]).expect("V line written"); | |
1341 | } | |
1342 | } | |
1343 | Ok(()) | |
1344 | } | |
1345 | ||
1346 | ||
1347 | // samples if not specified otherwise come from H.264 conformance suite | |
1348 | ||
1349 | #[test] | |
1350 | fn test_h264_simple() { | |
1351 | let mut dmx_reg = RegisteredDemuxers::new(); | |
1352 | generic_register_all_demuxers(&mut dmx_reg); | |
1353 | ||
1354 | decode_h264("assets/ITU/DimpledSpanishCuckoo-mobile.mp4", "mov", &dmx_reg, "hw").unwrap(); | |
1355 | } | |
1356 | } |