]>
Commit | Line | Data |
---|---|---|
1 | use std::collections::VecDeque; | |
2 | use std::convert::TryInto; | |
3 | use std::rc::Rc; | |
4 | ||
5 | use nihav_core::codecs::*; | |
6 | use nihav_core::io::byteio::*; | |
7 | use nihav_core::io::bitreader::*; | |
8 | use nihav_core::io::intcode::*; | |
9 | ||
10 | use libva::*; | |
11 | ||
12 | #[cfg(debug_assertions)] | |
13 | macro_rules! validate { | |
14 | ($a:expr) => { if !$a { println!("check failed at {}:{}", file!(), line!()); return Err(DecoderError::InvalidData); } }; | |
15 | } | |
16 | #[cfg(not(debug_assertions))] | |
17 | macro_rules! validate { | |
18 | ($a:expr) => { if !$a { return Err(DecoderError::InvalidData); } }; | |
19 | } | |
20 | ||
21 | mod pic_ref; | |
22 | pub use pic_ref::*; | |
23 | #[allow(clippy::manual_range_contains)] | |
24 | #[allow(clippy::needless_range_loop)] | |
25 | mod sets; | |
26 | use sets::*; | |
27 | #[allow(clippy::manual_range_contains)] | |
28 | mod slice; | |
29 | use slice::*; | |
30 | ||
31 | trait ReadUE { | |
32 | fn read_ue(&mut self) -> DecoderResult<u32>; | |
33 | fn read_te(&mut self, range: u32) -> DecoderResult<u32>; | |
34 | fn read_ue_lim(&mut self, max_val: u32) -> DecoderResult<u32> { | |
35 | let val = self.read_ue()?; | |
36 | validate!(val <= max_val); | |
37 | Ok(val) | |
38 | } | |
39 | fn read_se(&mut self) -> DecoderResult<i32> { | |
40 | let val = self.read_ue()?; | |
41 | if (val & 1) != 0 { | |
42 | Ok (((val >> 1) as i32) + 1) | |
43 | } else { | |
44 | Ok (-((val >> 1) as i32)) | |
45 | } | |
46 | } | |
47 | } | |
48 | ||
49 | impl<'a> ReadUE for BitReader<'a> { | |
50 | fn read_ue(&mut self) -> DecoderResult<u32> { | |
51 | Ok(self.read_code(UintCodeType::GammaP)? - 1) | |
52 | } | |
53 | fn read_te(&mut self, range: u32) -> DecoderResult<u32> { | |
54 | if range == 1 { | |
55 | if self.read_bool()? { | |
56 | Ok(0) | |
57 | } else { | |
58 | Ok(1) | |
59 | } | |
60 | } else { | |
61 | let val = self.read_ue()?; | |
62 | validate!(val <= range); | |
63 | Ok(val) | |
64 | } | |
65 | } | |
66 | } | |
67 | ||
68 | fn get_long_term_id(is_idr: bool, slice_hdr: &SliceHeader) -> Option<usize> { | |
69 | if is_idr && !slice_hdr.long_term_reference { | |
70 | None | |
71 | } else { | |
72 | let marking = &slice_hdr.adaptive_ref_pic_marking; | |
73 | for (&op, &arg) in marking.memory_management_control_op.iter().zip(marking.operation_arg.iter()).take(marking.num_ops) { | |
74 | if op == 6 { | |
75 | return Some(arg as usize); | |
76 | } | |
77 | } | |
78 | None | |
79 | } | |
80 | } | |
81 | ||
82 | fn unescape_nal(src: &[u8], dst: &mut Vec<u8>) -> usize { | |
83 | let mut off = 0; | |
84 | let mut zrun = 0; | |
85 | dst.clear(); | |
86 | dst.reserve(src.len()); | |
87 | while off < src.len() { | |
88 | dst.push(src[off]); | |
89 | if src[off] != 0 { | |
90 | zrun = 0; | |
91 | } else { | |
92 | zrun += 1; | |
93 | if zrun == 2 && off + 1 < src.len() && src[off + 1] == 0x03 { | |
94 | zrun = 0; | |
95 | off += 1; | |
96 | } | |
97 | if zrun >= 3 && off + 1 < src.len() && src[off + 1] == 0x01 { | |
98 | off -= 3; | |
99 | dst.truncate(off); | |
100 | break; | |
101 | } | |
102 | } | |
103 | off += 1; | |
104 | } | |
105 | off | |
106 | } | |
107 | ||
108 | fn make_dummy_h264_pic() -> PictureH264 { | |
109 | PictureH264::new(VA_INVALID_ID, 0, H264PictureFlag::Invalid.into(), 0, 0) | |
110 | } | |
111 | ||
112 | trait MakePicH264 { | |
113 | fn make_pic(&self) -> PictureH264; | |
114 | } | |
115 | ||
116 | impl MakePicH264 for PictureInfo { | |
117 | fn make_pic(&self) -> PictureH264 { | |
118 | let mut flags = H264PictureFlags::default(); | |
119 | let frame_idx = if let Some(id) = self.long_term { | |
120 | flags |= H264PictureFlag::LongTermReference; | |
121 | id as u32 | |
122 | } else { | |
123 | if self.is_ref { | |
124 | flags |= H264PictureFlag::ShortTermReference; | |
125 | } | |
126 | u32::from(self.id) | |
127 | }; | |
128 | PictureH264::new(self.surface_id, frame_idx, flags, self.top_id as i32, self.bot_id as i32) | |
129 | } | |
130 | } | |
131 | ||
132 | fn map_ref_list(refs: &[Option<PictureInfo>]) -> [PictureH264; 32] { | |
133 | let mut ref_list = Vec::with_capacity(32); | |
134 | ||
135 | for rpic in refs.iter() { | |
136 | ref_list.push(rpic.as_ref().map_or_else(make_dummy_h264_pic, |pic| pic.make_pic())); | |
137 | } | |
138 | ||
139 | while ref_list.len() < 32 { | |
140 | ref_list.push(make_dummy_h264_pic()); | |
141 | } | |
142 | if let Ok(ret) = ref_list.try_into() { | |
143 | ret | |
144 | } else { | |
145 | panic!("can't convert"); | |
146 | } | |
147 | } | |
148 | ||
149 | fn profile_name(profile: VAProfile::Type) -> &'static str { | |
150 | match profile { | |
151 | VAProfile::VAProfileMPEG2Simple => "MPEG2 Simple", | |
152 | VAProfile::VAProfileMPEG2Main => "MPEG2 Main", | |
153 | VAProfile::VAProfileMPEG4Simple => "MPEG4 Simple", | |
154 | VAProfile::VAProfileMPEG4AdvancedSimple => "MPEG4 Advanced Simple", | |
155 | VAProfile::VAProfileMPEG4Main => "MPEG4 Main", | |
156 | VAProfile::VAProfileH264Baseline => "H264 Baseline", | |
157 | VAProfile::VAProfileH264Main => "H264 Main", | |
158 | VAProfile::VAProfileH264High => "H264 High", | |
159 | VAProfile::VAProfileVC1Simple => "VC1 Simple", | |
160 | VAProfile::VAProfileVC1Main => "VC1 Main", | |
161 | VAProfile::VAProfileVC1Advanced => "VC1 Advanced", | |
162 | VAProfile::VAProfileH263Baseline => "H263 Baseline", | |
163 | VAProfile::VAProfileJPEGBaseline => "JPEG Baseline", | |
164 | VAProfile::VAProfileH264ConstrainedBaseline => "H264 Constrained Baseline", | |
165 | VAProfile::VAProfileVP8Version0_3 => "VP8", | |
166 | VAProfile::VAProfileH264MultiviewHigh => "H.264 Multiview High", | |
167 | VAProfile::VAProfileH264StereoHigh => "H264 Stereo High", | |
168 | VAProfile::VAProfileHEVCMain => "H.EVC Main", | |
169 | VAProfile::VAProfileHEVCMain10 => "H.EVC Main10", | |
170 | VAProfile::VAProfileVP9Profile0 => "VP9 Profile 0", | |
171 | VAProfile::VAProfileVP9Profile1 => "VP9 Profile 1", | |
172 | VAProfile::VAProfileVP9Profile2 => "VP9 Profile 2", | |
173 | VAProfile::VAProfileVP9Profile3 => "VP9 Profile 3", | |
174 | VAProfile::VAProfileHEVCMain12 => "HEVC Main12", | |
175 | VAProfile::VAProfileHEVCMain422_10 => "HEVC Main10 4:2:2", | |
176 | VAProfile::VAProfileHEVCMain422_12 => "HEVC Main12 4:2:2", | |
177 | VAProfile::VAProfileHEVCMain444 => "HEVC Main 4:4:4", | |
178 | VAProfile::VAProfileHEVCMain444_10 => "HEVC Main10 4:4:4", | |
179 | VAProfile::VAProfileHEVCMain444_12 => "HEVC Main12 4:4:4", | |
180 | VAProfile::VAProfileHEVCSccMain => "HEVC SCC Main", | |
181 | VAProfile::VAProfileHEVCSccMain10 => "HEVC SCC Main10", | |
182 | VAProfile::VAProfileHEVCSccMain444 => "HEVC SCC Main 4:4:4", | |
183 | VAProfile::VAProfileAV1Profile0 => "AV1 Profile 0", | |
184 | VAProfile::VAProfileAV1Profile1 => "AV1 Profile 1", | |
185 | VAProfile::VAProfileHEVCSccMain444_10 => "HEVC SCC Main10 4:4:4", | |
186 | _ => "unknown", | |
187 | } | |
188 | } | |
189 | ||
190 | const NUM_REF_PICS: usize = 16; | |
191 | ||
192 | struct WaitingFrame { | |
193 | ts: u64, | |
194 | pic: Picture<PictureEnd>, | |
195 | is_idr: bool, | |
196 | is_ref: bool, | |
197 | ftype: FrameType, | |
198 | } | |
199 | ||
200 | struct Reorderer { | |
201 | last_ref_dts: Option<u64>, | |
202 | ready_idx: usize, | |
203 | frames: VecDeque<WaitingFrame>, | |
204 | } | |
205 | ||
206 | impl Default for Reorderer { | |
207 | fn default() -> Self { | |
208 | Self { | |
209 | last_ref_dts: None, | |
210 | ready_idx: 0, | |
211 | frames: VecDeque::with_capacity(16), | |
212 | } | |
213 | } | |
214 | } | |
215 | ||
216 | impl Reorderer { | |
217 | fn add_frame(&mut self, new_frame: WaitingFrame) { | |
218 | if !new_frame.is_ref { | |
219 | if self.frames.is_empty() { | |
220 | self.frames.push_back(new_frame); | |
221 | } else { | |
222 | let new_dts = new_frame.ts; | |
223 | let mut idx = 0; | |
224 | for (i, frm) in self.frames.iter().enumerate() { | |
225 | idx = i; | |
226 | if frm.ts > new_dts { | |
227 | break; | |
228 | } | |
229 | } | |
230 | self.frames.insert(idx, new_frame); | |
231 | } | |
232 | } else { | |
233 | for (i, frm) in self.frames.iter().enumerate() { | |
234 | if Some(frm.ts) == self.last_ref_dts { | |
235 | self.ready_idx = i + 1; | |
236 | } | |
237 | } | |
238 | self.last_ref_dts = Some(new_frame.ts); | |
239 | self.frames.push_back(new_frame); | |
240 | } | |
241 | } | |
242 | fn get_frame(&mut self) -> Option<WaitingFrame> { | |
243 | if self.ready_idx > 0 { | |
244 | match self.frames[0].pic.query_status() { | |
245 | _ if self.ready_idx > 16 => {}, | |
246 | Ok(VASurfaceStatus::Ready) => {}, | |
247 | Ok(VASurfaceStatus::Rendering) => return None, | |
248 | _ => { | |
249 | unimplemented!(); | |
250 | }, | |
251 | }; | |
252 | self.ready_idx -= 1; | |
253 | self.frames.pop_front() | |
254 | } else { | |
255 | None | |
256 | } | |
257 | } | |
258 | fn flush(&mut self) { | |
259 | self.last_ref_dts = None; | |
260 | self.ready_idx = 0; | |
261 | } | |
262 | } | |
263 | ||
264 | #[allow(dead_code)] | |
265 | struct VaapiInternals { | |
266 | display: Rc<Display>, | |
267 | context: Rc<Context>, | |
268 | ref_pics: Vec<(Picture<PictureSync>, VASurfaceID)>, | |
269 | surfaces: Vec<Surface>, | |
270 | ifmt: VAImageFormat, | |
271 | } | |
272 | ||
273 | pub struct VaapiH264Decoder { | |
274 | info: NACodecInfoRef, | |
275 | vaapi: Option<VaapiInternals>, | |
276 | spses: Vec<SeqParameterSet>, | |
277 | ppses: Vec<PicParameterSet>, | |
278 | frame_refs: FrameRefs, | |
279 | nal_len: u8, | |
280 | out_frm: NABufferType, | |
281 | reorderer: Reorderer, | |
282 | tb_num: u32, | |
283 | tb_den: u32, | |
284 | } | |
285 | ||
286 | #[cfg(not(target_arch="x86_64"))] | |
287 | fn deint_chroma(frm: NASimpleVideoFrame<u8>, src: &[u8], sstride: usize) { | |
288 | let mut uoff = frm.offset[1]; | |
289 | let mut voff = frm.offset[2]; | |
290 | for cline in src.chunks(sstride).take(frm.height[1]) { | |
291 | for (x, pair) in cline.chunks_exact(2).take(frm.width[1]).enumerate() { | |
292 | frm.data[uoff + x] = pair[0]; | |
293 | frm.data[voff + x] = pair[1]; | |
294 | } | |
295 | uoff += frm.stride[1]; | |
296 | voff += frm.stride[2]; | |
297 | } | |
298 | } | |
299 | ||
300 | #[cfg(target_arch="x86_64")] | |
301 | use std::arch::asm; | |
302 | #[cfg(target_arch="x86_64")] | |
303 | fn deint_chroma(frm: NASimpleVideoFrame<u8>, src: &[u8], sstride: usize) { | |
304 | unsafe { | |
305 | let width = frm.width[1]; | |
306 | let height = frm.height[1]; | |
307 | let dst = frm.data.as_mut_ptr(); | |
308 | let udst = dst.add(frm.offset[1]); | |
309 | let vdst = dst.add(frm.offset[2]); | |
310 | let dstep = frm.stride[1] - width; | |
311 | let sstep = sstride - width * 2; | |
312 | asm!( | |
313 | "2:", | |
314 | " mov {tmp}, {width}", | |
315 | " test {width}, 8", | |
316 | " jz 3f", | |
317 | " movaps xmm0, [{src}]", | |
318 | " movaps xmm1, xmm0", | |
319 | " psllw xmm0, 8", | |
320 | " psrlw xmm1, 8", | |
321 | " psrlw xmm0, 8", | |
322 | " packuswb xmm1, xmm1", | |
323 | " packuswb xmm0, xmm0", | |
324 | " movq [{vdst}], xmm1", | |
325 | " movq [{udst}], xmm0", | |
326 | " add {src}, 16", | |
327 | " add {vdst}, 8", | |
328 | " add {udst}, 8", | |
329 | " sub {tmp}, 8", | |
330 | " 3:", | |
331 | " movaps xmm0, [{src}]", | |
332 | " movaps xmm1, [{src} + 16]", | |
333 | " movaps xmm2, xmm0", | |
334 | " movaps xmm3, xmm1", | |
335 | " psllw xmm0, 8", | |
336 | " psllw xmm1, 8", | |
337 | " psrlw xmm2, 8", | |
338 | " psrlw xmm3, 8", | |
339 | " psrlw xmm0, 8", | |
340 | " psrlw xmm1, 8", | |
341 | " packuswb xmm2, xmm3", | |
342 | " packuswb xmm0, xmm1", | |
343 | " movups [{vdst}], xmm2", | |
344 | " movups [{udst}], xmm0", | |
345 | " add {src}, 32", | |
346 | " add {vdst}, 16", | |
347 | " add {udst}, 16", | |
348 | " sub {tmp}, 16", | |
349 | " jnz 3b", | |
350 | " add {udst}, {dstep}", | |
351 | " add {vdst}, {dstep}", | |
352 | " add {src}, {sstep}", | |
353 | " dec {height}", | |
354 | " jnz 2b", | |
355 | src = inout(reg) src.as_ptr() => _, | |
356 | udst = inout(reg) udst => _, | |
357 | vdst = inout(reg) vdst => _, | |
358 | width = in(reg) width, | |
359 | height = inout(reg) height => _, | |
360 | dstep = in(reg) dstep, | |
361 | sstep = in(reg) sstep, | |
362 | tmp = out(reg) _, | |
363 | out("xmm0") _, | |
364 | out("xmm1") _, | |
365 | out("xmm2") _, | |
366 | out("xmm3") _, | |
367 | ); | |
368 | } | |
369 | } | |
370 | ||
371 | fn fill_frame(ifmt: VAImageFormat, pic: &Picture<PictureSync>, frm: &mut NABufferType) -> DecoderResult<()> { | |
372 | let mut vbuf = frm.get_vbuf().unwrap(); | |
373 | let (w, h) = pic.surface_size(); | |
374 | //let cur_ts = pic.timestamp(); | |
375 | ||
376 | let img = Image::new(pic, ifmt, w, h, true).expect("get image"); | |
377 | ||
378 | let iimg = img.image(); | |
379 | let imgdata: &[u8] = img.as_ref(); | |
380 | ||
381 | match iimg.format.fourcc().map_err(|_| DecoderError::InvalidData)? { | |
382 | VAFourcc::NV12 => { | |
383 | let frm = NASimpleVideoFrame::from_video_buf(&mut vbuf).unwrap(); | |
384 | validate!(iimg.width == (frm.width[0] as u16)); | |
385 | validate!(iimg.height == (frm.height[0] as u16)); | |
386 | ||
387 | for (dline, sline) in frm.data[frm.offset[0]..].chunks_mut(frm.stride[0]) | |
388 | .zip(imgdata[iimg.offsets[0] as usize..].chunks(iimg.pitches[0] as usize)) | |
389 | .take(frm.height[0]) { | |
390 | dline[..frm.width[0]].copy_from_slice(&sline[..frm.width[0]]); | |
391 | } | |
392 | ||
393 | deint_chroma(frm, &imgdata[iimg.offsets[1] as usize..], iimg.pitches[1] as usize); | |
394 | }, | |
395 | _ => unimplemented!(), | |
396 | }; | |
397 | Ok(()) | |
398 | } | |
399 | ||
400 | impl Default for VaapiH264Decoder { | |
401 | fn default() -> Self { | |
402 | Self { | |
403 | info: NACodecInfoRef::default(), | |
404 | vaapi: None, | |
405 | spses: Vec::with_capacity(1), | |
406 | ppses: Vec::with_capacity(4), | |
407 | frame_refs: FrameRefs::new(), | |
408 | nal_len: 0, | |
409 | out_frm: NABufferType::None, | |
410 | reorderer: Reorderer::default(), | |
411 | tb_num: 0, | |
412 | tb_den: 0, | |
413 | } | |
414 | } | |
415 | } | |
416 | ||
417 | impl VaapiH264Decoder { | |
418 | pub fn new() -> Self { Self::default() } | |
419 | pub fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()> { | |
420 | if let NACodecTypeInfo::Video(vinfo) = info.get_properties() { | |
421 | let edata = info.get_extradata().unwrap(); | |
422 | //print!("edata:"); for &el in edata.iter() { print!(" {:02X}", el); } println!(); | |
423 | let profile; | |
424 | let mut nal_buf = Vec::with_capacity(1024); | |
425 | if edata.len() > 11 && &edata[0..4] == b"avcC" { | |
426 | let mut mr = MemoryReader::new_read(edata.as_slice()); | |
427 | let mut br = ByteReader::new(&mut mr); | |
428 | ||
429 | br.read_skip(4)?; | |
430 | let version = br.read_byte()?; | |
431 | validate!(version == 1); | |
432 | profile = br.read_byte()?; | |
433 | let _compatibility = br.read_byte()?; | |
434 | let _level = br.read_byte()?; | |
435 | let b = br.read_byte()?; | |
436 | validate!((b & 0xFC) == 0xFC); | |
437 | self.nal_len = (b & 3) + 1; | |
438 | let b = br.read_byte()?; | |
439 | validate!((b & 0xE0) == 0xE0); | |
440 | let num_sps = (b & 0x1F) as usize; | |
441 | for _ in 0..num_sps { | |
442 | let len = br.read_u16be()? as usize; | |
443 | let offset = br.tell() as usize; | |
444 | validate!((br.peek_byte()? & 0x1F) == 7); | |
445 | let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf); | |
446 | br.read_skip(len)?; | |
447 | let sps = parse_sps(&nal_buf[1..])?; | |
448 | self.spses.push(sps); | |
449 | } | |
450 | let num_pps = br.read_byte()? as usize; | |
451 | for _ in 0..num_pps { | |
452 | let len = br.read_u16be()? as usize; | |
453 | let offset = br.tell() as usize; | |
454 | validate!((br.peek_byte()? & 0x1F) == 8); | |
455 | let _size = unescape_nal(&edata[offset..][..len], &mut nal_buf); | |
456 | br.read_skip(len)?; | |
457 | let src = &nal_buf; | |
458 | ||
459 | let mut full_size = src.len() * 8; | |
460 | for &byte in src.iter().rev() { | |
461 | if byte == 0 { | |
462 | full_size -= 8; | |
463 | } else { | |
464 | full_size -= (byte.trailing_zeros() + 1) as usize; | |
465 | break; | |
466 | } | |
467 | } | |
468 | validate!(full_size > 0); | |
469 | ||
470 | let pps = parse_pps(&src[1..], &self.spses, full_size - 8)?; | |
471 | let mut found = false; | |
472 | for stored_pps in self.ppses.iter_mut() { | |
473 | if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id { | |
474 | *stored_pps = pps.clone(); | |
475 | found = true; | |
476 | break; | |
477 | } | |
478 | } | |
479 | if !found { | |
480 | self.ppses.push(pps); | |
481 | } | |
482 | } | |
483 | if br.left() > 0 { | |
484 | match profile { | |
485 | 100 | 110 | 122 | 144 => { | |
486 | let b = br.read_byte()?; | |
487 | validate!((b & 0xFC) == 0xFC); | |
488 | // b & 3 -> chroma format | |
489 | let b = br.read_byte()?; | |
490 | validate!((b & 0xF8) == 0xF8); | |
491 | // b & 7 -> luma depth minus 8 | |
492 | let b = br.read_byte()?; | |
493 | validate!((b & 0xF8) == 0xF8); | |
494 | // b & 7 -> chroma depth minus 8 | |
495 | let num_spsext = br.read_byte()? as usize; | |
496 | for _ in 0..num_spsext { | |
497 | let len = br.read_u16be()? as usize; | |
498 | // parse spsext | |
499 | br.read_skip(len)?; | |
500 | } | |
501 | }, | |
502 | _ => {}, | |
503 | }; | |
504 | } | |
505 | } else { | |
506 | return Err(DecoderError::NotImplemented); | |
507 | } | |
508 | ||
509 | validate!(profile > 0); | |
510 | let width = (vinfo.get_width() + 15) & !15; | |
511 | let height = (vinfo.get_height() + 15) & !15; | |
512 | ||
513 | let display = Display::open_silently().expect("open display"); | |
514 | ||
515 | let num_surfaces = self.spses[0].num_ref_frames + 4 + 64; | |
516 | ||
517 | let va_profile = match profile { | |
518 | 66 => VAProfile::VAProfileH264ConstrainedBaseline, | |
519 | 77 => VAProfile::VAProfileH264Main, | |
520 | 88 | 100 | 110 | 122 => VAProfile::VAProfileH264High, | |
521 | _ => return Err(DecoderError::NotImplemented), | |
522 | }; | |
523 | if let Ok(profiles) = display.query_config_profiles() { | |
524 | if !profiles.contains(&va_profile) { | |
525 | println!("Profile {} ({}) not supported", profile, profile_name(va_profile)); | |
526 | return Err(DecoderError::NotImplemented); | |
527 | } | |
528 | } else { | |
529 | return Err(DecoderError::Bug); | |
530 | } | |
531 | if let Ok(points) = display.query_config_entrypoints(va_profile) { | |
532 | if !points.contains(&VAEntrypoint::VAEntrypointVLD) { | |
533 | println!("no decoding support for this profile"); | |
534 | return Err(DecoderError::NotImplemented); | |
535 | } | |
536 | } else { | |
537 | return Err(DecoderError::Bug); | |
538 | } | |
539 | ||
540 | let config = display.create_config(vec![ | |
541 | VAConfigAttrib { type_: VAConfigAttribType::VAConfigAttribRTFormat, value: RTFormat::YUV420.into() }, | |
542 | ], va_profile, VAEntrypoint::VAEntrypointVLD).map_err(|_| { | |
543 | println!("config creation failed!"); | |
544 | DecoderError::Bug | |
545 | })?; | |
546 | let surfaces = display.create_surfaces(RTFormat::YUV420, None, width as u32, height as u32, Some(UsageHint::Decoder.into()), num_surfaces as u32).map_err(|_| DecoderError::AllocError)?; | |
547 | let context = display.create_context(&config, width as i32, height as i32, Some(&surfaces), true).map_err(|_| DecoderError::Bug)?; | |
548 | ||
549 | let ref_pics = Vec::new(); | |
550 | ||
551 | let image_formats = display.query_image_formats().map_err(|_| DecoderError::Bug)?; | |
552 | validate!(!image_formats.is_empty()); | |
553 | let mut ifmt = image_formats[0]; | |
554 | for fmt in image_formats.iter() { | |
555 | if fmt.bits_per_pixel == 12 { | |
556 | ifmt = *fmt; | |
557 | break; | |
558 | } | |
559 | } | |
560 | ||
561 | self.vaapi = Some(VaapiInternals { display, context, ref_pics, surfaces, ifmt }); | |
562 | ||
563 | let vinfo = NAVideoInfo::new(width, height, false, YUV420_FORMAT); | |
564 | self.info = NACodecInfo::new_ref(info.get_name(), NACodecTypeInfo::Video(vinfo), info.get_extradata()).into_ref(); | |
565 | self.out_frm = alloc_video_buffer(vinfo, 4)?; | |
566 | ||
567 | Ok(()) | |
568 | } else { | |
569 | Err(DecoderError::InvalidData) | |
570 | } | |
571 | } | |
572 | fn decode(&mut self, pkt: &NAPacket) -> DecoderResult<()> { | |
573 | let src = pkt.get_buffer(); | |
574 | let vactx = if let Some(ref mut ctx) = self.vaapi { ctx } else { return Err(DecoderError::Bug) }; | |
575 | ||
576 | let timestamp = pkt.get_dts().unwrap_or_else(|| pkt.get_pts().unwrap_or(0)); | |
577 | ||
578 | if vactx.surfaces.is_empty() { | |
579 | panic!("ran out of free surfaces"); | |
580 | // return Err(DecoderError::AllocError); | |
581 | } | |
582 | let surface = vactx.surfaces.pop().unwrap(); | |
583 | let surface_id = surface.id(); | |
584 | let mut pic = Picture::new(timestamp, vactx.context.clone(), surface); | |
585 | let mut is_ref = false; | |
586 | let mut is_keyframe = false; | |
587 | ||
588 | self.tb_num = pkt.ts.tb_num; | |
589 | self.tb_den = pkt.ts.tb_den; | |
590 | ||
591 | let mut mr = MemoryReader::new_read(&src); | |
592 | let mut br = ByteReader::new(&mut mr); | |
593 | let mut frame_type = FrameType::I; | |
594 | let mut nal_buf = Vec::with_capacity(1024); | |
595 | while br.left() > 0 { | |
596 | let size = match self.nal_len { | |
597 | 1 => br.read_byte()? as usize, | |
598 | 2 => br.read_u16be()? as usize, | |
599 | 3 => br.read_u24be()? as usize, | |
600 | 4 => br.read_u32be()? as usize, | |
601 | _ => unreachable!(), | |
602 | }; | |
603 | validate!(br.left() >= (size as i64)); | |
604 | let offset = br.tell() as usize; | |
605 | let raw_nal = &src[offset..][..size]; | |
606 | let _size = unescape_nal(raw_nal, &mut nal_buf); | |
607 | ||
608 | let src = &nal_buf; | |
609 | validate!((src[0] & 0x80) == 0); | |
610 | let nal_ref_idc = src[0] >> 5; | |
611 | let nal_unit_type = src[0] & 0x1F; | |
612 | ||
613 | let mut full_size = src.len() * 8; | |
614 | for &byte in src.iter().rev() { | |
615 | if byte == 0 { | |
616 | full_size -= 8; | |
617 | } else { | |
618 | full_size -= (byte.trailing_zeros() + 1) as usize; | |
619 | break; | |
620 | } | |
621 | } | |
622 | validate!(full_size > 0); | |
623 | ||
624 | match nal_unit_type { | |
625 | 1 | 5 => { | |
626 | let is_idr = nal_unit_type == 5; | |
627 | is_ref |= nal_ref_idc != 0; | |
628 | is_keyframe |= is_idr; | |
629 | let mut br = BitReader::new(&src[..(full_size + 7)/8], BitReaderMode::BE); | |
630 | br.skip(8)?; | |
631 | ||
632 | let slice_hdr = parse_slice_header(&mut br, &self.spses, &self.ppses, is_idr, nal_ref_idc)?; | |
633 | match slice_hdr.slice_type { | |
634 | SliceType::P if frame_type != FrameType::B => frame_type = FrameType::P, | |
635 | SliceType::SP if frame_type != FrameType::B => frame_type = FrameType::P, | |
636 | SliceType::B => frame_type = FrameType::B, | |
637 | _ => {}, | |
638 | }; | |
639 | let mut cur_sps = 0; | |
640 | let mut cur_pps = 0; | |
641 | let mut pps_found = false; | |
642 | for (i, pps) in self.ppses.iter().enumerate() { | |
643 | if pps.pic_parameter_set_id == slice_hdr.pic_parameter_set_id { | |
644 | cur_pps = i; | |
645 | pps_found = true; | |
646 | break; | |
647 | } | |
648 | } | |
649 | validate!(pps_found); | |
650 | let mut sps_found = false; | |
651 | for (i, sps) in self.spses.iter().enumerate() { | |
652 | if sps.seq_parameter_set_id == self.ppses[cur_pps].seq_parameter_set_id { | |
653 | cur_sps = i; | |
654 | sps_found = true; | |
655 | break; | |
656 | } | |
657 | } | |
658 | validate!(sps_found); | |
659 | let sps = &self.spses[cur_sps]; | |
660 | let pps = &self.ppses[cur_pps]; | |
661 | ||
662 | if slice_hdr.first_mb_in_slice == 0 { | |
663 | let (top_id, bot_id) = self.frame_refs.calc_picture_num(&slice_hdr, is_idr, nal_ref_idc, sps); | |
664 | if is_idr { | |
665 | self.frame_refs.clear_refs(); | |
666 | for (pic, _) in vactx.ref_pics.drain(..) { | |
667 | if let Ok(surf) = pic.take_surface() { | |
668 | vactx.surfaces.push(surf); | |
669 | } else { | |
670 | panic!("can't take surface"); | |
671 | } | |
672 | } | |
673 | } | |
674 | self.frame_refs.select_refs(sps, &slice_hdr, top_id); | |
675 | let mut pic_refs = Vec::with_capacity(NUM_REF_PICS); | |
676 | for pic in self.frame_refs.ref_pics.iter().rev().take(NUM_REF_PICS) { | |
677 | pic_refs.push(pic.make_pic()); | |
678 | } | |
679 | if slice_hdr.adaptive_ref_pic_marking_mode { | |
680 | self.frame_refs.apply_adaptive_marking(&slice_hdr.adaptive_ref_pic_marking, slice_hdr.frame_num, 1 << sps.log2_max_frame_num)?; | |
681 | } | |
682 | ||
683 | while pic_refs.len() < NUM_REF_PICS { | |
684 | pic_refs.push(make_dummy_h264_pic()); | |
685 | } | |
686 | ||
687 | let mut flags = H264PictureFlags::default(); | |
688 | let frame_idx = if let Some(id) = get_long_term_id(is_idr, &slice_hdr) { | |
689 | flags |= H264PictureFlag::LongTermReference; | |
690 | id as u32 | |
691 | } else { | |
692 | if nal_ref_idc != 0 { | |
693 | flags |= H264PictureFlag::ShortTermReference; | |
694 | } | |
695 | u32::from(slice_hdr.frame_num) | |
696 | }; | |
697 | let pic_refs: [PictureH264; NUM_REF_PICS] = pic_refs.try_into().unwrap_or_else(|_| panic!("can't convert")); | |
698 | ||
699 | let h264pic = PictureH264::new(surface_id, frame_idx, flags, top_id as i32, bot_id as i32); | |
700 | ||
701 | let seq_fields = H264SeqFields::new( | |
702 | u32::from(sps.chroma_format_idc), | |
703 | u32::from(sps.separate_colour_plane), | |
704 | u32::from(sps.gaps_in_frame_num_value_allowed), | |
705 | u32::from(sps.frame_mbs_only), | |
706 | u32::from(sps.mb_adaptive_frame_field), | |
707 | u32::from(sps.direct_8x8_inference), | |
708 | u32::from(sps.level_idc >= 31), | |
709 | u32::from(sps.log2_max_frame_num) - 4, | |
710 | u32::from(sps.pic_order_cnt_type), | |
711 | u32::from(sps.log2_max_pic_order_cnt_lsb).wrapping_sub(4), | |
712 | u32::from(sps.delta_pic_order_always_zero) | |
713 | ); | |
714 | let pic_fields = H264PicFields::new( | |
715 | u32::from(pps.entropy_coding_mode), | |
716 | u32::from(pps.weighted_pred), | |
717 | u32::from(pps.weighted_bipred_idc), | |
718 | u32::from(pps.transform_8x8_mode), | |
719 | u32::from(slice_hdr.field_pic), | |
720 | u32::from(pps.constrained_intra_pred), | |
721 | u32::from(pps.pic_order_present), | |
722 | u32::from(pps.deblocking_filter_control_present), | |
723 | u32::from(pps.redundant_pic_cnt_present), | |
724 | u32::from(nal_ref_idc != 0) | |
725 | ); | |
726 | let ppd = PictureParameterBufferH264::new( | |
727 | h264pic, | |
728 | pic_refs, | |
729 | sps.pic_width_in_mbs as u16 - 1, | |
730 | sps.pic_height_in_mbs as u16 - 1, | |
731 | sps.bit_depth_luma - 8, | |
732 | sps.bit_depth_chroma - 8, | |
733 | sps.num_ref_frames as u8, | |
734 | &seq_fields, | |
735 | pps.num_slice_groups as u8 - 1, // should be 0 | |
736 | pps.slice_group_map_type, // should be 0 | |
737 | 0, //pps.slice_group_change_rate as u16 - 1, | |
738 | pps.pic_init_qp as i8 - 26, | |
739 | pps.pic_init_qs as i8 - 26, | |
740 | pps.chroma_qp_index_offset, | |
741 | pps.second_chroma_qp_index_offset, | |
742 | &pic_fields, | |
743 | slice_hdr.frame_num | |
744 | ); | |
745 | let pic_param = BufferType::PictureParameter(PictureParameter::H264(ppd)); | |
746 | let buf = vactx.context.create_buffer(pic_param).map_err(|_| DecoderError::Bug)?; | |
747 | pic.add_buffer(buf); | |
748 | ||
749 | let mut scaling_list_8x8 = [[0; 64]; 2]; | |
750 | scaling_list_8x8[0].copy_from_slice(&pps.scaling_list_8x8[0]); | |
751 | scaling_list_8x8[1].copy_from_slice(&pps.scaling_list_8x8[3]); | |
752 | let iqmatrix = BufferType::IQMatrix(IQMatrix::H264(IQMatrixBufferH264::new(pps.scaling_list_4x4, scaling_list_8x8))); | |
753 | let buf = vactx.context.create_buffer(iqmatrix).map_err(|_| DecoderError::Bug)?; | |
754 | pic.add_buffer(buf); | |
755 | ||
756 | let cpic = PictureInfo { | |
757 | id: slice_hdr.frame_num, | |
758 | full_id: top_id, | |
759 | surface_id, | |
760 | top_id, bot_id, | |
761 | //pic_type: slice_hdr.slice_type.to_frame_type(), | |
762 | is_ref, | |
763 | is_idr, | |
764 | long_term: get_long_term_id(is_idr, &slice_hdr), | |
765 | }; | |
766 | if cpic.is_ref { | |
767 | self.frame_refs.add_short_term(cpic.clone(), sps.num_ref_frames); | |
768 | } | |
769 | if let Some(lt_idx) = cpic.long_term { | |
770 | self.frame_refs.add_long_term(lt_idx, cpic); | |
771 | } | |
772 | } | |
773 | ||
774 | let mut luma_weight_l0 = [0i16; 32]; | |
775 | let mut luma_offset_l0 = [0i16; 32]; | |
776 | let mut chroma_weight_l0 = [[0i16; 2]; 32]; | |
777 | let mut chroma_offset_l0 = [[0i16; 2]; 32]; | |
778 | let mut luma_weight_l1 = [0i16; 32]; | |
779 | let mut luma_offset_l1 = [0i16; 32]; | |
780 | let mut chroma_weight_l1 = [[0i16; 2]; 32]; | |
781 | let mut chroma_offset_l1 = [[0i16; 2]; 32]; | |
782 | let mut luma_weighted_l0 = false; | |
783 | let mut chroma_weighted_l0 = false; | |
784 | let mut luma_weighted_l1 = false; | |
785 | let mut chroma_weighted_l1 = false; | |
786 | let mut luma_log2_weight_denom = slice_hdr.luma_log2_weight_denom; | |
787 | let mut chroma_log2_weight_denom = slice_hdr.chroma_log2_weight_denom; | |
788 | ||
789 | if (pps.weighted_pred && matches!(slice_hdr.slice_type, SliceType::P | SliceType::B)) || (pps.weighted_bipred_idc == 1 && slice_hdr.slice_type == SliceType::B) { | |
790 | luma_weighted_l0 = true; | |
791 | chroma_weighted_l0 = false; | |
792 | for (i, winfo) in slice_hdr.weights_l0.iter().enumerate().take(slice_hdr.num_ref_idx_l0_active) { | |
793 | if winfo.luma_weighted { | |
794 | luma_weight_l0[i] = winfo.luma_weight.into(); | |
795 | luma_offset_l0[i] = winfo.luma_offset.into(); | |
796 | } else { | |
797 | luma_weight_l0[i] = 1 << slice_hdr.luma_log2_weight_denom; | |
798 | } | |
799 | if winfo.chroma_weighted { | |
800 | chroma_weight_l0[i][0] = winfo.chroma_weight[0].into(); | |
801 | chroma_weight_l0[i][1] = winfo.chroma_weight[1].into(); | |
802 | chroma_offset_l0[i][0] = winfo.chroma_offset[0].into(); | |
803 | chroma_offset_l0[i][1] = winfo.chroma_offset[1].into(); | |
804 | } else { | |
805 | chroma_weight_l0[i][0] = 1 << slice_hdr.chroma_log2_weight_denom; | |
806 | chroma_weight_l0[i][1] = 1 << slice_hdr.chroma_log2_weight_denom; | |
807 | chroma_offset_l0[i][0] = 0; | |
808 | chroma_offset_l0[i][1] = 0; | |
809 | } | |
810 | chroma_weighted_l0 |= winfo.chroma_weighted; | |
811 | } | |
812 | } | |
813 | if pps.weighted_bipred_idc == 1 && slice_hdr.slice_type == SliceType::B { | |
814 | luma_weighted_l1 = true; | |
815 | chroma_weighted_l1 = sps.chroma_format_idc != 0; | |
816 | for (i, winfo) in slice_hdr.weights_l1.iter().enumerate().take(slice_hdr.num_ref_idx_l1_active) { | |
817 | if winfo.luma_weighted { | |
818 | luma_weight_l1[i] = winfo.luma_weight.into(); | |
819 | luma_offset_l1[i] = winfo.luma_offset.into(); | |
820 | } else { | |
821 | luma_weight_l1[i] = 1 << slice_hdr.luma_log2_weight_denom; | |
822 | } | |
823 | if chroma_weighted_l1 && winfo.chroma_weighted { | |
824 | chroma_weight_l1[i][0] = winfo.chroma_weight[0].into(); | |
825 | chroma_weight_l1[i][1] = winfo.chroma_weight[1].into(); | |
826 | chroma_offset_l1[i][0] = winfo.chroma_offset[0].into(); | |
827 | chroma_offset_l1[i][1] = winfo.chroma_offset[1].into(); | |
828 | } else { | |
829 | chroma_weight_l1[i][0] = 1 << slice_hdr.chroma_log2_weight_denom; | |
830 | chroma_weight_l1[i][1] = 1 << slice_hdr.chroma_log2_weight_denom; | |
831 | chroma_offset_l1[i][0] = 0; | |
832 | chroma_offset_l1[i][1] = 0; | |
833 | } | |
834 | } | |
835 | } | |
836 | if pps.weighted_bipred_idc == 2 && slice_hdr.slice_type == SliceType::B { | |
837 | let num_l0 = slice_hdr.num_ref_idx_l0_active; | |
838 | let num_l1 = slice_hdr.num_ref_idx_l1_active; | |
839 | if num_l0 != 1 || num_l1 != 1 { //xxx: also exclude symmetric case | |
840 | luma_weighted_l0 = false; | |
841 | luma_weighted_l1 = false; | |
842 | chroma_weighted_l0 = false; | |
843 | chroma_weighted_l1 = false; | |
844 | luma_log2_weight_denom = 5; | |
845 | chroma_log2_weight_denom = 5; | |
846 | ||
847 | for w in luma_weight_l0.iter_mut() { | |
848 | *w = 32; | |
849 | } | |
850 | for w in luma_weight_l1.iter_mut() { | |
851 | *w = 32; | |
852 | } | |
853 | for w in chroma_weight_l0.iter_mut() { | |
854 | *w = [32; 2]; | |
855 | } | |
856 | for w in chroma_weight_l1.iter_mut() { | |
857 | *w = [32; 2]; | |
858 | } | |
859 | } | |
860 | } | |
861 | ||
862 | let ref_pic_list_0 = map_ref_list(&self.frame_refs.cur_refs.ref_list0); | |
863 | let ref_pic_list_1 = map_ref_list(&self.frame_refs.cur_refs.ref_list1); | |
864 | ||
865 | let slice_param = SliceParameterBufferH264::new( | |
866 | raw_nal.len() as u32, | |
867 | 0, // no offset | |
868 | VASliceDataFlag::All, | |
869 | br.tell() as u16, | |
870 | slice_hdr.first_mb_in_slice as u16, | |
871 | match slice_hdr.slice_type { | |
872 | SliceType::I => 2, | |
873 | SliceType::P => 0, | |
874 | SliceType::B => 1, | |
875 | SliceType::SI => 4, | |
876 | SliceType::SP => 3, | |
877 | }, | |
878 | slice_hdr.direct_spatial_mv_pred as u8, | |
879 | (slice_hdr.num_ref_idx_l0_active as u8).saturating_sub(1), | |
880 | (slice_hdr.num_ref_idx_l1_active as u8).saturating_sub(1), | |
881 | slice_hdr.cabac_init_idc, | |
882 | slice_hdr.slice_qp_delta as i8, | |
883 | slice_hdr.disable_deblocking_filter_idc, | |
884 | slice_hdr.slice_alpha_c0_offset / 2, | |
885 | slice_hdr.slice_beta_offset / 2, | |
886 | ref_pic_list_0, | |
887 | ref_pic_list_1, | |
888 | luma_log2_weight_denom, | |
889 | chroma_log2_weight_denom, | |
890 | luma_weighted_l0 as u8, luma_weight_l0, luma_offset_l0, | |
891 | chroma_weighted_l0 as u8, chroma_weight_l0, chroma_offset_l0, | |
892 | luma_weighted_l1 as u8, luma_weight_l1, luma_offset_l1, | |
893 | chroma_weighted_l1 as u8, chroma_weight_l1, chroma_offset_l1, | |
894 | ); | |
895 | let slc_param = BufferType::SliceParameter(SliceParameter::H264(slice_param)); | |
896 | let buf = vactx.context.create_buffer(slc_param).map_err(|_| DecoderError::Bug)?; | |
897 | pic.add_buffer(buf); | |
898 | ||
899 | let slc_data = BufferType::SliceData(raw_nal.to_vec()); | |
900 | let buf = vactx.context.create_buffer(slc_data).map_err(|_| DecoderError::Bug)?; | |
901 | pic.add_buffer(buf); | |
902 | }, | |
903 | 2 => { // slice data partition A | |
904 | //slice header | |
905 | //slice id = read_ue() | |
906 | //cat 2 slice data (all but MB layer residual) | |
907 | return Err(DecoderError::NotImplemented); | |
908 | }, | |
909 | 3 => { // slice data partition B | |
910 | //slice id = read_ue() | |
911 | //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() } | |
912 | //cat 3 slice data (MB layer residual) | |
913 | return Err(DecoderError::NotImplemented); | |
914 | }, | |
915 | 4 => { // slice data partition C | |
916 | //slice id = read_ue() | |
917 | //if pps.redundant_pic_cnt_present { redundant_pic_cnt = read_ue() } | |
918 | //cat 4 slice data (MB layer residual) | |
919 | return Err(DecoderError::NotImplemented); | |
920 | }, | |
921 | 6 => {}, //SEI | |
922 | 7 => { | |
923 | let sps = parse_sps(&src[1..])?; | |
924 | self.spses.push(sps); | |
925 | }, | |
926 | 8 => { | |
927 | validate!(full_size >= 8 + 16); | |
928 | let pps = parse_pps(&src[1..], &self.spses, full_size - 8)?; | |
929 | let mut found = false; | |
930 | for stored_pps in self.ppses.iter_mut() { | |
931 | if stored_pps.pic_parameter_set_id == pps.pic_parameter_set_id { | |
932 | *stored_pps = pps.clone(); | |
933 | found = true; | |
934 | break; | |
935 | } | |
936 | } | |
937 | if !found { | |
938 | self.ppses.push(pps); | |
939 | } | |
940 | }, | |
941 | 9 => { // access unit delimiter | |
942 | }, | |
943 | 10 => {}, //end of sequence | |
944 | 11 => {}, //end of stream | |
945 | 12 => {}, //filler | |
946 | _ => {}, | |
947 | }; | |
948 | ||
949 | br.read_skip(size)?; | |
950 | } | |
951 | ||
952 | let bpic = pic.begin().expect("begin"); | |
953 | let rpic = bpic.render().expect("render"); | |
954 | let epic = rpic.end().expect("end"); | |
955 | ||
956 | self.reorderer.add_frame(WaitingFrame { | |
957 | pic: epic, | |
958 | is_idr: is_keyframe, | |
959 | is_ref, | |
960 | ftype: frame_type, | |
961 | ts: timestamp, | |
962 | }); | |
963 | ||
964 | let mut idx = 0; | |
965 | while idx < vactx.ref_pics.len() { | |
966 | let cur_surf_id = vactx.ref_pics[idx].1; | |
967 | if self.frame_refs.ref_pics.iter().any(|fref| fref.surface_id == cur_surf_id) { | |
968 | idx += 1; | |
969 | } else { | |
970 | let (pic, _) = vactx.ref_pics.remove(idx); | |
971 | if let Ok(surf) = pic.take_surface() { | |
972 | vactx.surfaces.push(surf); | |
973 | } else { | |
974 | panic!("can't take surface"); | |
975 | } | |
976 | } | |
977 | } | |
978 | ||
979 | Ok(()) | |
980 | } | |
981 | fn get_frame(&mut self) -> Option<NAFrameRef> { | |
982 | if let Some(ref mut vactx) = self.vaapi { | |
983 | if let Some(frm) = self.reorderer.get_frame() { | |
984 | let ts = frm.ts; | |
985 | let is_idr = frm.is_idr; | |
986 | let is_ref = frm.is_ref; | |
987 | let ftype = frm.ftype; | |
988 | if let Ok(pic) = frm.pic.sync() { | |
989 | let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm); | |
990 | ||
991 | if !is_ref { | |
992 | if let Ok(surf) = pic.take_surface() { | |
993 | vactx.surfaces.push(surf); | |
994 | } else { | |
995 | panic!("can't take surface"); | |
996 | } | |
997 | } else { | |
998 | let id = pic.surface_id(); | |
999 | vactx.ref_pics.push((pic, id)); | |
1000 | } | |
1001 | ||
1002 | let ts = NATimeInfo::new(None, Some(ts), None, self.tb_num, self.tb_den); | |
1003 | Some(NAFrame::new(ts, ftype, is_idr, self.info.clone(), self.out_frm.clone()).into_ref()) | |
1004 | } else { | |
1005 | panic!("can't sync"); | |
1006 | } | |
1007 | } else { | |
1008 | None | |
1009 | } | |
1010 | } else { | |
1011 | None | |
1012 | } | |
1013 | } | |
1014 | fn get_last_frames(&mut self) -> Option<NAFrameRef> { | |
1015 | if let Some(ref mut vactx) = self.vaapi { | |
1016 | if let Some(frm) = self.reorderer.frames.pop_front() { | |
1017 | let ts = frm.ts; | |
1018 | let is_idr = frm.is_idr; | |
1019 | let is_ref = frm.is_ref; | |
1020 | let ftype = frm.ftype; | |
1021 | if let Ok(pic) = frm.pic.sync() { | |
1022 | let _ = fill_frame(vactx.ifmt, &pic, &mut self.out_frm); | |
1023 | ||
1024 | if !is_ref { | |
1025 | if let Ok(surf) = pic.take_surface() { | |
1026 | vactx.surfaces.push(surf); | |
1027 | } else { | |
1028 | panic!("can't take surface"); | |
1029 | } | |
1030 | } else { | |
1031 | let id = pic.surface_id(); | |
1032 | vactx.ref_pics.push((pic, id)); | |
1033 | } | |
1034 | ||
1035 | let ts = NATimeInfo::new(None, Some(ts), None, self.tb_num, self.tb_den); | |
1036 | Some(NAFrame::new(ts, ftype, is_idr, self.info.clone(), self.out_frm.clone()).into_ref()) | |
1037 | } else { | |
1038 | panic!("can't sync"); | |
1039 | } | |
1040 | } else { | |
1041 | None | |
1042 | } | |
1043 | } else { | |
1044 | None | |
1045 | } | |
1046 | } | |
1047 | fn flush(&mut self) { | |
1048 | self.frame_refs.clear_refs(); | |
1049 | if let Some(ref mut vactx) = self.vaapi { | |
1050 | for frm in self.reorderer.frames.drain(..) { | |
1051 | if let Ok(pic) = frm.pic.sync() { | |
1052 | if let Ok(surf) = pic.take_surface() { | |
1053 | vactx.surfaces.push(surf); | |
1054 | } else { | |
1055 | panic!("can't take surface"); | |
1056 | } | |
1057 | } else { | |
1058 | panic!("can't sync"); | |
1059 | } | |
1060 | } | |
1061 | self.reorderer.flush(); | |
1062 | for (pic, _) in vactx.ref_pics.drain(..) { | |
1063 | if let Ok(surf) = pic.take_surface() { | |
1064 | vactx.surfaces.push(surf); | |
1065 | } else { | |
1066 | panic!("can't take surface"); | |
1067 | } | |
1068 | } | |
1069 | } | |
1070 | } | |
1071 | } | |
1072 | ||
1073 | impl NAOptionHandler for VaapiH264Decoder { | |
1074 | fn get_supported_options(&self) -> &[NAOptionDefinition] { &[] } | |
1075 | fn set_options(&mut self, _options: &[NAOption]) {} | |
1076 | fn query_option_value(&self, _name: &str) -> Option<NAValue> { None } | |
1077 | } | |
1078 | ||
1079 | use std::thread::*; | |
1080 | use std::sync::mpsc::*; | |
1081 | ||
1082 | enum DecMessage { | |
1083 | Init(NACodecInfoRef), | |
1084 | Decode(NAPacket), | |
1085 | Flush, | |
1086 | GetFrame, | |
1087 | GetLastFrames, | |
1088 | End | |
1089 | } | |
1090 | ||
1091 | enum DecResponse { | |
1092 | Ok, | |
1093 | Nothing, | |
1094 | Err(DecoderError), | |
1095 | Frame(NAFrameRef), | |
1096 | } | |
1097 | ||
1098 | pub trait HWDecoder { | |
1099 | fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()>; | |
1100 | fn queue_pkt(&mut self, pkt: &NAPacket) -> DecoderResult<()>; | |
1101 | fn get_frame(&mut self) -> Option<NAFrameRef>; | |
1102 | fn get_last_frames(&mut self) -> Option<NAFrameRef>; | |
1103 | fn flush(&mut self); | |
1104 | } | |
1105 | ||
1106 | pub struct HWWrapper { | |
1107 | handle: Option<JoinHandle<DecoderResult<()>>>, | |
1108 | send: SyncSender<DecMessage>, | |
1109 | recv: Receiver<DecResponse>, | |
1110 | } | |
1111 | ||
1112 | #[allow(clippy::new_without_default)] | |
1113 | impl HWWrapper { | |
1114 | pub fn new() -> Self { | |
1115 | let (in_send, in_recv) = sync_channel(1); | |
1116 | let (out_send, out_recv) = sync_channel(1); | |
1117 | let handle = std::thread::spawn(move || { | |
1118 | let receiver = in_recv; | |
1119 | let sender = out_send; | |
1120 | let mut dec = VaapiH264Decoder::new(); | |
1121 | while let Ok(msg) = receiver.recv() { | |
1122 | match msg { | |
1123 | DecMessage::Init(info) => { | |
1124 | let msg = if let Err(err) = dec.init(info) { | |
1125 | DecResponse::Err(err) | |
1126 | } else { | |
1127 | DecResponse::Ok | |
1128 | }; | |
1129 | sender.send(msg).map_err(|_| DecoderError::Bug)?; | |
1130 | }, | |
1131 | DecMessage::Decode(pkt) => { | |
1132 | let msg = match dec.decode(&pkt) { | |
1133 | Ok(()) => DecResponse::Ok, | |
1134 | Err(err) => DecResponse::Err(err), | |
1135 | }; | |
1136 | sender.send(msg).map_err(|_| DecoderError::Bug)?; | |
1137 | }, | |
1138 | DecMessage::GetFrame => { | |
1139 | let msg = match dec.get_frame() { | |
1140 | Some(frm) => DecResponse::Frame(frm), | |
1141 | None => DecResponse::Nothing, | |
1142 | }; | |
1143 | sender.send(msg).map_err(|_| DecoderError::Bug)?; | |
1144 | }, | |
1145 | DecMessage::GetLastFrames => { | |
1146 | let msg = match dec.get_last_frames() { | |
1147 | Some(frm) => DecResponse::Frame(frm), | |
1148 | None => DecResponse::Nothing, | |
1149 | }; | |
1150 | sender.send(msg).map_err(|_| DecoderError::Bug)?; | |
1151 | }, | |
1152 | DecMessage::Flush => dec.flush(), | |
1153 | DecMessage::End => return Ok(()), | |
1154 | }; | |
1155 | } | |
1156 | Err(DecoderError::Bug) | |
1157 | }); | |
1158 | ||
1159 | Self { | |
1160 | handle: Some(handle), | |
1161 | send: in_send, | |
1162 | recv: out_recv, | |
1163 | } | |
1164 | } | |
1165 | } | |
1166 | ||
1167 | impl HWDecoder for HWWrapper { | |
1168 | fn init(&mut self, info: NACodecInfoRef) -> DecoderResult<()> { | |
1169 | if self.send.send(DecMessage::Init(info)).is_ok() { | |
1170 | match self.recv.recv() { | |
1171 | Ok(DecResponse::Ok) => Ok(()), | |
1172 | Ok(DecResponse::Err(err)) => Err(err), | |
1173 | Err(_) => Err(DecoderError::Bug), | |
1174 | _ => unreachable!(), | |
1175 | } | |
1176 | } else { | |
1177 | Err(DecoderError::Bug) | |
1178 | } | |
1179 | } | |
1180 | fn queue_pkt(&mut self, pkt: &NAPacket) -> DecoderResult<()> { | |
1181 | let pkt2 = NAPacket::new_from_refbuf(pkt.get_stream(), pkt.ts, pkt.keyframe, pkt.get_buffer()); | |
1182 | if self.send.send(DecMessage::Decode(pkt2)).is_ok() { | |
1183 | match self.recv.recv() { | |
1184 | Ok(DecResponse::Ok) => Ok(()), | |
1185 | Ok(DecResponse::Err(err)) => Err(err), | |
1186 | Err(_) => Err(DecoderError::Bug), | |
1187 | _ => unreachable!(), | |
1188 | } | |
1189 | } else { | |
1190 | Err(DecoderError::Bug) | |
1191 | } | |
1192 | } | |
1193 | fn get_frame(&mut self) -> Option<NAFrameRef> { | |
1194 | if self.send.send(DecMessage::GetFrame).is_ok() { | |
1195 | match self.recv.recv() { | |
1196 | Ok(DecResponse::Frame(frm)) => Some(frm), | |
1197 | Ok(DecResponse::Nothing) => None, | |
1198 | Err(_) => None, | |
1199 | _ => unreachable!(), | |
1200 | } | |
1201 | } else { | |
1202 | None | |
1203 | } | |
1204 | } | |
1205 | fn get_last_frames(&mut self) -> Option<NAFrameRef> { | |
1206 | if self.send.send(DecMessage::GetLastFrames).is_ok() { | |
1207 | match self.recv.recv() { | |
1208 | Ok(DecResponse::Frame(frm)) => Some(frm), | |
1209 | Ok(DecResponse::Nothing) => None, | |
1210 | Err(_) => None, | |
1211 | _ => unreachable!(), | |
1212 | } | |
1213 | } else { | |
1214 | None | |
1215 | } | |
1216 | } | |
1217 | fn flush(&mut self) { | |
1218 | let _ = self.send.send(DecMessage::Flush); | |
1219 | } | |
1220 | } | |
1221 | ||
1222 | impl Drop for HWWrapper { | |
1223 | fn drop(&mut self) { | |
1224 | if self.send.send(DecMessage::End).is_ok() { | |
1225 | let mut handle = None; | |
1226 | std::mem::swap(&mut handle, &mut self.handle); | |
1227 | if let Some(hdl) = handle { | |
1228 | let _ = hdl.join(); | |
1229 | } | |
1230 | } | |
1231 | } | |
1232 | } | |
1233 | ||
1234 | impl NAOptionHandler for HWWrapper { | |
1235 | fn get_supported_options(&self) -> &[NAOptionDefinition] { &[] } | |
1236 | fn set_options(&mut self, _options: &[NAOption]) {} | |
1237 | fn query_option_value(&self, _name: &str) -> Option<NAValue> { None } | |
1238 | } | |
1239 | ||
1240 | pub fn new_h264_hwdec() -> Box<dyn HWDecoder + Send> { | |
1241 | Box::new(HWWrapper::new()) | |
1242 | } | |
1243 | ||
1244 | #[cfg(test)] | |
1245 | mod test { | |
1246 | use nihav_core::codecs::*; | |
1247 | use nihav_core::io::byteio::*; | |
1248 | use nihav_core::demuxers::{RegisteredDemuxers, create_demuxer}; | |
1249 | use nihav_commonfmt::generic_register_all_demuxers; | |
1250 | use super::VaapiH264Decoder; | |
1251 | use std::io::prelude::*; | |
1252 | ||
1253 | fn decode_h264(name: &str, dname: &str, dmx_reg: &RegisteredDemuxers, opfx: &str) -> DecoderResult<()> { | |
1254 | let dmx_f = dmx_reg.find_demuxer(dname).expect("demuxer exists"); | |
1255 | let file = std::fs::File::open(name).expect("file exists"); | |
1256 | let mut fr = FileReader::new_read(file); | |
1257 | let mut br = ByteReader::new(&mut fr); | |
1258 | let mut dmx = create_demuxer(dmx_f, &mut br).expect("create demuxer"); | |
1259 | ||
1260 | let mut vstream_id = 0; | |
1261 | let mut dec = VaapiH264Decoder::new(); | |
1262 | for stream in dmx.get_streams() { | |
1263 | if stream.get_media_type() == StreamType::Video { | |
1264 | dec.init(stream.get_info()).expect("inited"); | |
1265 | vstream_id = stream.get_id(); | |
1266 | break; | |
1267 | } | |
1268 | } | |
1269 | ||
1270 | let mut frameno = 0; | |
1271 | while let Ok(pkt) = dmx.get_frame() { | |
1272 | if pkt.get_stream().get_id() != vstream_id { | |
1273 | continue; | |
1274 | } | |
1275 | dec.decode(&pkt).expect("decoded"); | |
1276 | let frm = dec.get_last_frames().expect("get frame"); | |
1277 | let timestamp = frm.get_dts().unwrap_or_else(|| frm.get_pts().unwrap_or(0)); | |
1278 | ||
1279 | let pic = frm.get_buffer().get_vbuf().expect("got picture"); | |
1280 | ||
1281 | let nname = format!("assets/test_out/{}{:06}_{}.pgm", opfx, timestamp, frameno); | |
1282 | frameno += 1; | |
1283 | let mut file = std::fs::File::create(&nname).expect("create file"); | |
1284 | let (w, h) = pic.get_dimensions(0); | |
1285 | file.write_all(format!("P5\n{} {}\n255\n", w, h * 3 / 2).as_bytes()).expect("header written"); | |
1286 | let data = pic.get_data(); | |
1287 | for yline in data.chunks(pic.get_stride(0)).take(h) { | |
1288 | file.write_all(&yline[..w]).expect("Y line written"); | |
1289 | } | |
1290 | for (uline, vline) in data[pic.get_offset(1)..].chunks(pic.get_stride(1)) | |
1291 | .zip(data[pic.get_offset(2)..].chunks(pic.get_stride(2))).take(h / 2) { | |
1292 | file.write_all(&uline[..w / 2]).expect("U line written"); | |
1293 | file.write_all(&vline[..w / 2]).expect("V line written"); | |
1294 | } | |
1295 | } | |
1296 | Ok(()) | |
1297 | } | |
1298 | ||
1299 | ||
1300 | // samples if not specified otherwise come from H.264 conformance suite | |
1301 | ||
1302 | #[test] | |
1303 | fn test_h264_simple() { | |
1304 | let mut dmx_reg = RegisteredDemuxers::new(); | |
1305 | generic_register_all_demuxers(&mut dmx_reg); | |
1306 | ||
1307 | decode_h264("assets/ITU/DimpledSpanishCuckoo-mobile.mp4", "mov", &dmx_reg, "hw").unwrap(); | |
1308 | } | |
1309 | } |