1 //! Container format detection.
3 //! Usually user does not know the container format of the opened file.
4 //! That is why format detection functionality is needed.
5 //! This module contains the set of rules to detect container not merely by file extension but also by its content if possible.
10 //! use nihav_registry::detect::detect_format;
11 //! use std::fs::File;
12 //! use nihav_core::io::byteio::*;
14 //! let name = "mediafile.ogv";
15 //! let mut file = File::open(name).unwrap();
16 //! let mut br = FileReader::new_read(&mut file);
17 //! let result = detect_format(name, &mut br);
18 //! if let Some((name, score)) = result {
19 //! println!("detected format {} with score {:?}", name, score);
22 use std::io::SeekFrom;
23 use nihav_core::io::byteio::ByteIO;
25 /// Format detection score.
26 #[derive(Debug,Clone,Copy,PartialEq)]
27 pub enum DetectionScore {
28 /// Format is not detected.
30 /// Format matched by file extension.
32 /// Format matches by markers inside the file.
37 /// Checks whether current detection score is less than a value it is compared against.
38 pub fn less(self, other: DetectionScore) -> bool {
39 (self as i32) < (other as i32)
57 fn val(&self) -> u64 {
59 Arg::Byte(b) => { u64::from(b) }
60 Arg::U16BE(v) => { u64::from(v) }
61 Arg::U16LE(v) => { u64::from(v) }
62 Arg::U24BE(v) => { u64::from(v) }
63 Arg::U24LE(v) => { u64::from(v) }
64 Arg::U32BE(v) => { u64::from(v) }
65 Arg::U32LE(v) => { u64::from(v) }
66 Arg::U64BE(v) => { v }
67 Arg::U64LE(v) => { v }
70 fn read_val(&self, src: &mut dyn ByteIO) -> Option<u64> {
73 let res = src.peek_byte();
74 if res.is_err() { return None; }
75 Some(u64::from(res.unwrap()))
78 let res = src.peek_u16be();
79 if res.is_err() { return None; }
80 Some(u64::from(res.unwrap()))
83 let res = src.peek_u16le();
84 if res.is_err() { return None; }
85 Some(u64::from(res.unwrap()))
88 let res = src.peek_u24be();
89 if res.is_err() { return None; }
90 Some(u64::from(res.unwrap()))
93 let res = src.peek_u24le();
94 if res.is_err() { return None; }
95 Some(u64::from(res.unwrap()))
98 let res = src.peek_u32be();
99 if res.is_err() { return None; }
100 Some(u64::from(res.unwrap()))
103 let res = src.peek_u32le();
104 if res.is_err() { return None; }
105 Some(u64::from(res.unwrap()))
108 let res = src.peek_u64be();
109 if res.is_err() { return None; }
113 let res = src.peek_u64le();
114 if res.is_err() { return None; }
119 fn eq(&self, src: &mut dyn ByteIO) -> bool {
120 if let Some(rval) = self.read_val(src) {
126 fn ge(&self, src: &mut dyn ByteIO) -> bool {
127 if let Some(rval) = self.read_val(src) {
133 fn gt(&self, src: &mut dyn ByteIO) -> bool {
134 if let Some(rval) = self.read_val(src) {
140 fn le(&self, src: &mut dyn ByteIO) -> bool {
141 if let Some(rval) = self.read_val(src) {
147 fn lt(&self, src: &mut dyn ByteIO) -> bool {
148 if let Some(rval) = self.read_val(src) {
158 Or(&'a CC<'a>, &'a CC<'a>),
169 fn eval(&self, src: &mut dyn ByteIO) -> bool {
171 CC::Or(a, b) => { a.eval(src) || b.eval(src) },
172 CC::Eq(ref arg) => { arg.eq(src) },
173 CC::In(ref a, ref b) => { a.ge(src) && b.le(src) },
174 CC::Lt(ref arg) => { arg.lt(src) },
175 CC::Le(ref arg) => { arg.le(src) },
176 CC::Gt(ref arg) => { arg.gt(src) },
177 CC::Ge(ref arg) => { arg.ge(src) },
179 let mut val: Vec<u8> = vec![0; strng.len()];
180 let res = src.peek_buf(val.as_mut_slice());
181 if res.is_err() { return false; }
188 struct CheckItem<'a> {
194 struct DetectConditions<'a> {
195 demux_name: &'static str,
196 extensions: &'static str,
197 conditions: &'a [CheckItem<'a>],
200 const DETECTORS: &[DetectConditions] = &[
209 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"RIFF"), &CC::Str(b"ON2 ")) },
210 CheckItem{offs: 8, cond: &CC::Or(&CC::Or(&CC::Str(b"AVI LIST"),
211 &CC::Str(b"AVIXLIST")),
212 &CC::Str(b"ON2fLIST")) },
213 CheckItem{offs: 20, cond: &CC::Str(b"hdrlavih")},
217 demux_name: "avi-dib",
219 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"RIFF") },
220 CheckItem{offs: 8, cond: &CC::Str(b"AVI LIST")},
221 CheckItem{offs: 20, cond: &CC::Str(b"hdrlhdra")},
227 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"RIFF") },
228 CheckItem{offs: 8, cond: &CC::Str(b"WAVEfmt ") }
234 conditions: &[CheckItem{offs: 4, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"),
236 &CC::Str(b"ftyp")) }],
241 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"GIF87a"),
242 &CC::Str(b"GIF89a")) }],
247 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"\x00\x00\x00\x08wide") },
248 CheckItem{offs: 12, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"),
250 &CC::Str(b"ftyp")) }],
253 demux_name: "mov-macbin",
254 extensions: ".mov,.bin",
255 conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::Byte(0))},
256 CheckItem{offs: 0x41, cond: &CC::Str(b"MooV")},
257 CheckItem{offs: 0x7A, cond: &CC::Eq(Arg::Byte(0x81))},
258 CheckItem{offs: 0x7B, cond: &CC::Eq(Arg::Byte(0x81))},
259 CheckItem{offs: 0x84, cond: &CC::Or(&CC::Str(b"mdat"), &CC::Str(b"moov"))}],
262 demux_name: "mov-macbin",
263 extensions: ".mov,.bin",
264 conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::Byte(0))},
265 CheckItem{offs: 0x41, cond: &CC::Str(b"MooV")},
266 CheckItem{offs: 0x45, cond: &CC::Or(&CC::Str(b"PrMr"), &CC::Str(b"TVOD"))},
267 CheckItem{offs: 0x7A, cond: &CC::Eq(Arg::Byte(0x81))},
268 CheckItem{offs: 0x7B, cond: &CC::Eq(Arg::Byte(0x81))}],
271 demux_name: "mov-resfork",
273 conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U32BE(0x100))},
274 CheckItem{offs: 0x108, cond: &CC::Str(b"moov")}],
277 demux_name: "yuv4mpeg",
279 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"YUV4MPEG2 ") }],
282 demux_name: "armovie",
284 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"ARMovie\n") }],
289 conditions: &[CheckItem{offs: 0x00, cond: &CC::Str(b"ACEF") },
290 CheckItem{offs: 0x18, cond: &CC::Eq(Arg::U32LE(64))}],
295 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"FLV") },
296 CheckItem{offs: 3, cond: &CC::Le(Arg::Byte(1)) }],
300 extensions: ".avs,.dvi",
301 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"IVDV")},
302 CheckItem{offs: 12, cond: &CC::Str(b"SSVA")}],
307 conditions: &[CheckItem{offs: 0, cond: &CC::Str(&[0x50, 0xEF, 0x81, 0x19, 0xB3, 0xBD, 0xD0, 0x11, 0xA3, 0xE5, 0x00, 0xA0, 0xC9, 0x24, 0x44])},
308 CheckItem{offs: 15, cond: &CC::Or(&CC::Eq(Arg::Byte(0x36)), &CC::Eq(Arg::Byte(0x37)))}],
313 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"DKIF\x00\x00")},
314 CheckItem{offs: 6, cond: &CC::Ge(Arg::U16LE(32))}],
319 conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U32LE(0x29111994))}],
324 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"ANIM")},
325 CheckItem{offs: 8, cond: &CC::Str(b"AHDR")}],
328 demux_name: "smush-mcmp",
330 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"MCMP")},
331 CheckItem{offs: 6, cond: &CC::Eq(Arg::Byte(0))},
332 CheckItem{offs: 7, cond: &CC::Eq(Arg::Byte(0))}],
337 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"SANM")},
338 CheckItem{offs: 8, cond: &CC::Str(b"SHDR")}],
348 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"IDVCd")}],
351 demux_name: "tealmov",
353 conditions: &[CheckItem{offs: 0x3C, cond: &CC::Str(b"MvieTlMv")}],
356 demux_name: "realaudio",
357 extensions: ".ra,.ram",
358 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b".ra\xFD")}],
361 demux_name: "realmedia",
362 extensions: ".rm,.rmvb,.rma,.ra,.ram",
363 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".RMF"), &CC::Str(b".RMP")) },
364 CheckItem{offs: 4, cond: &CC::Ge(Arg::U32BE(10))}],
367 demux_name: "real_ivr",
369 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".R1M"), &CC::Str(b".REC"))}],
373 extensions: ".bik,.bk2",
374 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::In(Arg::U32BE(0x42494B62), // BIKb
375 Arg::U32BE(0x42494B7B)), // BIKz
376 &CC::In(Arg::U32BE(0x4B423261), // KB2a
377 Arg::U32BE(0x4B42327B)))}], // KB2z
380 demux_name: "smacker",
382 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"SMK2"), &CC::Str(b"SMK4"))}],
387 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"MAC ") },
388 CheckItem{offs: 4, cond: &CC::In(Arg::U16LE(3800), Arg::U16LE(3990))}],
393 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"fLaC") }],
398 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"TTA1") }],
401 demux_name: "wavpack",
403 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"wvpk") },
404 CheckItem{offs: 8, cond: &CC::In(Arg::U16LE(0x402), Arg::U16LE(0x410))}],
409 conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))},
410 CheckItem{offs: 2, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}],
415 conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))},
416 CheckItem{offs: 3, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}],
426 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"BMVi") },
427 CheckItem{offs: 32, cond: &CC::Str(b"DATA")}],
431 extensions: ".dtv,.avc",
435 demux_name: "sierra-seq",
446 /// Tries to detect container format.
448 /// This function tries to determine container format using both file extension and checking against container specific markers inside.
449 /// In case of success the function returns short container name and the detection score.
450 /// Result should have the highest detection score among tested.
451 pub fn detect_format(name: &str, src: &mut dyn ByteIO) -> Option<(&'static str, DetectionScore)> {
452 let mut result = None;
453 let lname = name.to_lowercase();
454 for detector in DETECTORS {
455 let mut score = DetectionScore::No;
456 if !name.is_empty() {
457 for ext in detector.extensions.split(',') {
458 if lname.ends_with(ext) {
459 score = DetectionScore::ExtensionMatches;
464 let mut passed = !detector.conditions.is_empty();
465 for ck in detector.conditions {
466 let ret = src.seek(SeekFrom::Start(u64::from(ck.offs)));
471 if !ck.cond.eval(src) {
477 score = DetectionScore::MagicMatches;
479 if score == DetectionScore::MagicMatches {
480 return Some((detector.demux_name, score));
482 if result.is_none() && score != DetectionScore::No {
483 result = Some((detector.demux_name, score));
484 } else if result.is_some() {
485 let (_, oldsc) = result.unwrap();
486 if oldsc.less(score) {
487 result = Some((detector.demux_name, score));
494 /// Tries to detect container format for provided file name.
495 pub fn detect_format_by_name(name: &str) -> Option<&'static str> {
499 let lname = name.to_lowercase();
500 for detector in DETECTORS {
501 for ext in detector.extensions.split(',') {
502 if lname.ends_with(ext) {
503 return Some(detector.demux_name);
514 use nihav_core::io::byteio::*;
517 fn test_avi_detect() {
518 let name = "assets/Indeo/laser05.avi";
519 let mut file = File::open(name).unwrap();
520 let mut br = FileReader::new_read(&mut file);
521 let (name, score) = detect_format(name, &mut br).unwrap();
522 assert_eq!(name, "avi");
523 assert_eq!(score, DetectionScore::MagicMatches);
527 fn test_gdv_detect() {
528 let name = "assets/Game/intro1.gdv";
529 let mut file = File::open(name).unwrap();
530 let mut br = FileReader::new_read(&mut file);
531 let (name, score) = detect_format(name, &mut br).unwrap();
532 assert_eq!(name, "gdv");
533 assert_eq!(score, DetectionScore::MagicMatches);