1 //! Container format detection.
3 //! Usually user does not know the container format of the opened file.
4 //! That is why format detection functionality is needed.
5 //! This module contains the set of rules to detect container not merely by file extension but also by its content if possible.
10 //! use nihav_registry::detect::detect_format;
11 //! use std::fs::File;
12 //! use nihav_core::io::byteio::*;
14 //! let name = "mediafile.ogv";
15 //! let mut file = File::open(name).unwrap();
16 //! let mut filereader = FileReader::new_read(&mut file);
17 //! let mut br = ByteReader::new(&mut filereader);
18 //! let result = detect_format(name, &mut br);
19 //! if let Some((name, score)) = result {
20 //! println!("detected format {} with score {:?}", name, score);
23 use std::io::SeekFrom;
24 use nihav_core::io::byteio::ByteReader;
26 /// Format detection score.
27 #[derive(Debug,Clone,Copy,PartialEq)]
28 pub enum DetectionScore {
29 /// Format is not detected.
31 /// Format matched by file extension.
33 /// Format matches by markers inside the file.
38 /// Checks whether current detection score is less than a value it is compared against.
39 pub fn less(self, other: DetectionScore) -> bool {
40 (self as i32) < (other as i32)
58 fn val(&self) -> u64 {
60 Arg::Byte(b) => { u64::from(b) }
61 Arg::U16BE(v) => { u64::from(v) }
62 Arg::U16LE(v) => { u64::from(v) }
63 Arg::U24BE(v) => { u64::from(v) }
64 Arg::U24LE(v) => { u64::from(v) }
65 Arg::U32BE(v) => { u64::from(v) }
66 Arg::U32LE(v) => { u64::from(v) }
67 Arg::U64BE(v) => { v }
68 Arg::U64LE(v) => { v }
71 fn read_val(&self, src: &mut ByteReader) -> Option<u64> {
74 let res = src.peek_byte();
75 if res.is_err() { return None; }
76 Some(u64::from(res.unwrap()))
79 let res = src.peek_u16be();
80 if res.is_err() { return None; }
81 Some(u64::from(res.unwrap()))
84 let res = src.peek_u16le();
85 if res.is_err() { return None; }
86 Some(u64::from(res.unwrap()))
89 let res = src.peek_u24be();
90 if res.is_err() { return None; }
91 Some(u64::from(res.unwrap()))
94 let res = src.peek_u24le();
95 if res.is_err() { return None; }
96 Some(u64::from(res.unwrap()))
99 let res = src.peek_u32be();
100 if res.is_err() { return None; }
101 Some(u64::from(res.unwrap()))
104 let res = src.peek_u32le();
105 if res.is_err() { return None; }
106 Some(u64::from(res.unwrap()))
109 let res = src.peek_u64be();
110 if res.is_err() { return None; }
114 let res = src.peek_u64le();
115 if res.is_err() { return None; }
120 fn eq(&self, src: &mut ByteReader) -> bool {
121 if let Some(rval) = self.read_val(src) {
127 fn ge(&self, src: &mut ByteReader) -> bool {
128 if let Some(rval) = self.read_val(src) {
134 fn gt(&self, src: &mut ByteReader) -> bool {
135 if let Some(rval) = self.read_val(src) {
141 fn le(&self, src: &mut ByteReader) -> bool {
142 if let Some(rval) = self.read_val(src) {
148 fn lt(&self, src: &mut ByteReader) -> bool {
149 if let Some(rval) = self.read_val(src) {
159 Or(&'a CC<'a>, &'a CC<'a>),
170 fn eval(&self, src: &mut ByteReader) -> bool {
172 CC::Or(a, b) => { a.eval(src) || b.eval(src) },
173 CC::Eq(ref arg) => { arg.eq(src) },
174 CC::In(ref a, ref b) => { a.ge(src) && b.le(src) },
175 CC::Lt(ref arg) => { arg.lt(src) },
176 CC::Le(ref arg) => { arg.le(src) },
177 CC::Gt(ref arg) => { arg.gt(src) },
178 CC::Ge(ref arg) => { arg.ge(src) },
180 let mut val: Vec<u8> = vec![0; strng.len()];
181 let res = src.peek_buf(val.as_mut_slice());
182 if res.is_err() { return false; }
189 struct CheckItem<'a> {
195 struct DetectConditions<'a> {
196 demux_name: &'static str,
197 extensions: &'static str,
198 conditions: &'a [CheckItem<'a>],
201 const DETECTORS: &[DetectConditions] = &[
205 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"RIFF"), &CC::Str(b"ON2 ")) },
206 CheckItem{offs: 8, cond: &CC::Or(&CC::Or(&CC::Str(b"AVI LIST"),
207 &CC::Str(b"AVIXLIST")),
208 &CC::Str(b"ON2fLIST")) },
214 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"RIFF") },
215 CheckItem{offs: 8, cond: &CC::Str(b"WAVEfmt ") }
221 conditions: &[CheckItem{offs: 4, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"),
223 &CC::Str(b"ftyp")) }],
228 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"\x00\x00\x00\x08wide") },
229 CheckItem{offs: 12, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"),
231 &CC::Str(b"ftyp")) }],
234 demux_name: "mov-macbin",
235 extensions: ".mov,.bin",
236 conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::Byte(0))},
237 CheckItem{offs: 0x41, cond: &CC::Str(b"MooV")},
238 CheckItem{offs: 0x7A, cond: &CC::Eq(Arg::Byte(0x81))},
239 CheckItem{offs: 0x7B, cond: &CC::Eq(Arg::Byte(0x81))},
240 CheckItem{offs: 0x84, cond: &CC::Str(b"mdat")}],
243 demux_name: "yuv4mpeg",
245 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"YUV4MPEG2 ") }],
250 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"FLV") },
251 CheckItem{offs: 3, cond: &CC::Le(Arg::Byte(1)) }],
256 conditions: &[CheckItem{offs: 0, cond: &CC::Str(&[0x50, 0xEF, 0x81, 0x19, 0xB3, 0xBD, 0xD0, 0x11, 0xA3, 0xE5, 0x00, 0xA0, 0xC9, 0x24, 0x44])},
257 CheckItem{offs: 15, cond: &CC::Or(&CC::Eq(Arg::Byte(0x36)), &CC::Eq(Arg::Byte(0x37)))}],
262 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"DKIF\x00\x00")},
263 CheckItem{offs: 6, cond: &CC::Ge(Arg::U16LE(32))}],
266 demux_name: "arxel-cnm",
268 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"CNM UNR\x00")}],
273 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"FCMP")}],
278 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"2TSF")}],
283 conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U32LE(0x29111994))}],
286 demux_name: "fable-imax",
288 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"IMAX") },
289 CheckItem{offs: 10, cond: &CC::Eq(Arg::U16LE(0x102)) }],
292 demux_name: "hl-fmv",
294 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"FMV*") },
295 CheckItem{offs: 4, cond: &CC::Eq(Arg::U32LE(0)) }],
298 demux_name: "legend-q",
300 conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U16LE(0x6839))},
301 CheckItem{offs: 2, cond: &CC::In(Arg::Byte(3), Arg::Byte(7))}],
305 extensions: ".vb,.vbc,.fcp,.son",
306 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"SIFF")},
307 CheckItem{offs: 4, cond: &CC::Or(
309 &CC::Str(b"VBV1VBHD"),
310 &CC::Str(b"SOUNSHDR")),
311 &CC::Str(b"FCPKFCHD"))}],
316 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"ANIM")},
317 CheckItem{offs: 8, cond: &CC::Str(b"AHDR")}],
320 demux_name: "smush-mcmp",
322 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"MCMP")},
323 CheckItem{offs: 6, cond: &CC::Eq(Arg::Byte(0))},
324 CheckItem{offs: 7, cond: &CC::Eq(Arg::Byte(0))}],
329 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"SANM")},
330 CheckItem{offs: 8, cond: &CC::Str(b"SHDR")}],
333 demux_name: "realaudio",
334 extensions: ".ra,.ram",
335 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b".ra\xFD")}],
338 demux_name: "realmedia",
339 extensions: ".rm,.rmvb,.rma,.ra,.ram",
340 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".RMF"), &CC::Str(b".RMP")) },
341 CheckItem{offs: 4, cond: &CC::Ge(Arg::U32BE(10))}],
344 demux_name: "real_ivr",
346 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".R1M"), &CC::Str(b".REC"))}],
350 extensions: ".bik,.bk2",
351 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::In(Arg::U32BE(0x42494B62), // BIKb
352 Arg::U32BE(0x42494B7B)), // BIKz
353 &CC::In(Arg::U32BE(0x4B423261), // KB2a
354 Arg::U32BE(0x4B42327B)))}], // KB2z
357 demux_name: "smacker",
359 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"SMK2"), &CC::Str(b"SMK4"))}],
364 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"MAC ") },
365 CheckItem{offs: 4, cond: &CC::In(Arg::U16LE(3800), Arg::U16LE(3990))}],
370 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"fLaC") }],
375 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"TTA1") }],
378 demux_name: "wavpack",
380 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"wvpk") },
381 CheckItem{offs: 8, cond: &CC::In(Arg::U16LE(0x402), Arg::U16LE(0x410))}],
386 conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))},
387 CheckItem{offs: 2, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}],
392 conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))},
393 CheckItem{offs: 3, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}],
403 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"BMVi") },
404 CheckItem{offs: 32, cond: &CC::Str(b"DATA")}],
414 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"VXDS") }],
418 /// Tries to detect container format.
420 /// This function tries to determine container format using both file extension and checking against container specific markers inside.
421 /// In case of success the function returns short container name and the detection score.
422 /// Result should have the highest detection score among tested.
423 pub fn detect_format(name: &str, src: &mut ByteReader) -> Option<(&'static str, DetectionScore)> {
424 let mut result = None;
425 let lname = name.to_lowercase();
426 for detector in DETECTORS {
427 let mut score = DetectionScore::No;
428 if !name.is_empty() {
429 for ext in detector.extensions.split(',') {
430 if lname.ends_with(ext) {
431 score = DetectionScore::ExtensionMatches;
436 let mut passed = !detector.conditions.is_empty();
437 for ck in detector.conditions {
438 let ret = src.seek(SeekFrom::Start(u64::from(ck.offs)));
443 if !ck.cond.eval(src) {
449 score = DetectionScore::MagicMatches;
451 if score == DetectionScore::MagicMatches {
452 return Some((detector.demux_name, score));
454 if result.is_none() && score != DetectionScore::No {
455 result = Some((detector.demux_name, score));
456 } else if result.is_some() {
457 let (_, oldsc) = result.unwrap();
458 if oldsc.less(score) {
459 result = Some((detector.demux_name, score));
466 /// Tries to detect container format for provided file name.
467 pub fn detect_format_by_name(name: &str) -> Option<&'static str> {
471 let lname = name.to_lowercase();
472 for detector in DETECTORS {
473 for ext in detector.extensions.split(',') {
474 if lname.ends_with(ext) {
475 return Some(detector.demux_name);
486 use nihav_core::io::byteio::*;
489 fn test_avi_detect() {
490 let name = "assets/Indeo/laser05.avi";
491 let mut file = File::open(name).unwrap();
492 let mut fr = FileReader::new_read(&mut file);
493 let mut br = ByteReader::new(&mut fr);
494 let (name, score) = detect_format(name, &mut br).unwrap();
495 assert_eq!(name, "avi");
496 assert_eq!(score, DetectionScore::MagicMatches);
500 fn test_gdv_detect() {
501 let name = "assets/Game/intro1.gdv";
502 let mut file = File::open(name).unwrap();
503 let mut fr = FileReader::new_read(&mut file);
504 let mut br = ByteReader::new(&mut fr);
505 let (name, score) = detect_format(name, &mut br).unwrap();
506 assert_eq!(name, "gdv");
507 assert_eq!(score, DetectionScore::MagicMatches);