add Arxel Tribe video support
[nihav.git] / nihav-registry / src / detect.rs
1 //! Container format detection.
2 //!
3 //! Usually user does not know the container format of the opened file.
4 //! That is why format detection functionality is needed.
5 //! This module contains the set of rules to detect container not merely by file extension but also by its content if possible.
6 //!
7 //! # Examples
8 //!
9 //! ```no_run
10 //! use nihav_registry::detect::detect_format;
11 //! use std::fs::File;
12 //! use nihav_core::io::byteio::*;
13 //!
14 //! let name = "mediafile.ogv";
15 //! let mut file = File::open(name).unwrap();
16 //! let mut filereader = FileReader::new_read(&mut file);
17 //! let mut br = ByteReader::new(&mut filereader);
18 //! let result = detect_format(name, &mut br);
19 //! if let Some((name, score)) = result {
20 //! println!("detected format {} with score {:?}", name, score);
21 //! }
22 //! ```
23 use std::io::SeekFrom;
24 use nihav_core::io::byteio::ByteReader;
25
26 /// Format detection score.
27 #[derive(Debug,Clone,Copy,PartialEq)]
28 pub enum DetectionScore {
29 /// Format is not detected.
30 No,
31 /// Format matched by file extension.
32 ExtensionMatches,
33 /// Format matches by markers inside the file.
34 MagicMatches,
35 }
36
37 impl DetectionScore {
38 /// Checks whether current detection score is less than a value it is compared against.
39 pub fn less(self, other: DetectionScore) -> bool {
40 (self as i32) < (other as i32)
41 }
42 }
43
44 #[allow(dead_code)]
45 enum Arg {
46 Byte(u8),
47 U16BE(u16),
48 U16LE(u16),
49 U24BE(u32),
50 U24LE(u32),
51 U32BE(u32),
52 U32LE(u32),
53 U64BE(u64),
54 U64LE(u64),
55 }
56
57 impl Arg {
58 fn val(&self) -> u64 {
59 match *self {
60 Arg::Byte(b) => { u64::from(b) }
61 Arg::U16BE(v) => { u64::from(v) }
62 Arg::U16LE(v) => { u64::from(v) }
63 Arg::U24BE(v) => { u64::from(v) }
64 Arg::U24LE(v) => { u64::from(v) }
65 Arg::U32BE(v) => { u64::from(v) }
66 Arg::U32LE(v) => { u64::from(v) }
67 Arg::U64BE(v) => { v }
68 Arg::U64LE(v) => { v }
69 }
70 }
71 fn read_val(&self, src: &mut ByteReader) -> Option<u64> {
72 match *self {
73 Arg::Byte(_) => {
74 let res = src.peek_byte();
75 if res.is_err() { return None; }
76 Some(u64::from(res.unwrap()))
77 }
78 Arg::U16BE(_) => {
79 let res = src.peek_u16be();
80 if res.is_err() { return None; }
81 Some(u64::from(res.unwrap()))
82 }
83 Arg::U16LE(_) => {
84 let res = src.peek_u16le();
85 if res.is_err() { return None; }
86 Some(u64::from(res.unwrap()))
87 }
88 Arg::U24BE(_) => {
89 let res = src.peek_u24be();
90 if res.is_err() { return None; }
91 Some(u64::from(res.unwrap()))
92 }
93 Arg::U24LE(_) => {
94 let res = src.peek_u24le();
95 if res.is_err() { return None; }
96 Some(u64::from(res.unwrap()))
97 }
98 Arg::U32BE(_) => {
99 let res = src.peek_u32be();
100 if res.is_err() { return None; }
101 Some(u64::from(res.unwrap()))
102 }
103 Arg::U32LE(_) => {
104 let res = src.peek_u32le();
105 if res.is_err() { return None; }
106 Some(u64::from(res.unwrap()))
107 }
108 Arg::U64BE(_) => {
109 let res = src.peek_u64be();
110 if res.is_err() { return None; }
111 Some(res.unwrap())
112 }
113 Arg::U64LE(_) => {
114 let res = src.peek_u64le();
115 if res.is_err() { return None; }
116 Some(res.unwrap())
117 }
118 }
119 }
120 fn eq(&self, src: &mut ByteReader) -> bool {
121 if let Some(rval) = self.read_val(src) {
122 rval == self.val()
123 } else {
124 false
125 }
126 }
127 fn ge(&self, src: &mut ByteReader) -> bool {
128 if let Some(rval) = self.read_val(src) {
129 rval >= self.val()
130 } else {
131 false
132 }
133 }
134 fn gt(&self, src: &mut ByteReader) -> bool {
135 if let Some(rval) = self.read_val(src) {
136 rval > self.val()
137 } else {
138 false
139 }
140 }
141 fn le(&self, src: &mut ByteReader) -> bool {
142 if let Some(rval) = self.read_val(src) {
143 rval <= self.val()
144 } else {
145 false
146 }
147 }
148 fn lt(&self, src: &mut ByteReader) -> bool {
149 if let Some(rval) = self.read_val(src) {
150 rval < self.val()
151 } else {
152 false
153 }
154 }
155 }
156
157 #[allow(dead_code)]
158 enum CC<'a> {
159 Or(&'a CC<'a>, &'a CC<'a>),
160 Eq(Arg),
161 Str(&'static [u8]),
162 In(Arg, Arg),
163 Lt(Arg),
164 Le(Arg),
165 Gt(Arg),
166 Ge(Arg),
167 }
168
169 impl<'a> CC<'a> {
170 fn eval(&self, src: &mut ByteReader) -> bool {
171 match *self {
172 CC::Or(a, b) => { a.eval(src) || b.eval(src) },
173 CC::Eq(ref arg) => { arg.eq(src) },
174 CC::In(ref a, ref b) => { a.ge(src) && b.le(src) },
175 CC::Lt(ref arg) => { arg.lt(src) },
176 CC::Le(ref arg) => { arg.le(src) },
177 CC::Gt(ref arg) => { arg.gt(src) },
178 CC::Ge(ref arg) => { arg.ge(src) },
179 CC::Str(str) => {
180 let mut val: Vec<u8> = vec![0; str.len()];
181 let res = src.peek_buf(val.as_mut_slice());
182 if res.is_err() { return false; }
183 val == str
184 }
185 }
186 }
187 }
188
189 struct CheckItem<'a> {
190 offs: u32,
191 cond: &'a CC<'a>,
192 }
193
194 #[allow(dead_code)]
195 struct DetectConditions<'a> {
196 demux_name: &'static str,
197 extensions: &'static str,
198 conditions: &'a [CheckItem<'a>],
199 }
200
201 const DETECTORS: &[DetectConditions] = &[
202 DetectConditions {
203 demux_name: "avi",
204 extensions: ".avi",
205 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"RIFF"), &CC::Str(b"ON2 ")) },
206 CheckItem{offs: 8, cond: &CC::Or(&CC::Or(&CC::Str(b"AVI LIST"),
207 &CC::Str(b"AVIXLIST")),
208 &CC::Str(b"ON2fLIST")) },
209 ]
210 },
211 DetectConditions {
212 demux_name: "wav",
213 extensions: ".wav",
214 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"RIFF") },
215 CheckItem{offs: 8, cond: &CC::Str(b"WAVEfmt ") }
216 ]
217 },
218 DetectConditions {
219 demux_name: "mov",
220 extensions: ".mov",
221 conditions: &[CheckItem{offs: 4, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"),
222 &CC::Str(b"moov")),
223 &CC::Str(b"ftyp")) }],
224 },
225 DetectConditions {
226 demux_name: "mov",
227 extensions: ".mov",
228 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"\x00\x00\x00\x08wide") },
229 CheckItem{offs: 12, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"),
230 &CC::Str(b"moov")),
231 &CC::Str(b"ftyp")) }],
232 },
233 DetectConditions {
234 demux_name: "yuv4mpeg",
235 extensions: ".y4m",
236 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"YUV4MPEG2 ") }],
237 },
238 DetectConditions {
239 demux_name: "flv",
240 extensions: ".flv",
241 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"FLV") },
242 CheckItem{offs: 3, cond: &CC::Le(Arg::Byte(1)) }],
243 },
244 DetectConditions {
245 demux_name: "ivf",
246 extensions: ".ivf",
247 conditions: &[CheckItem{offs: 0, cond: &CC::Str(&[0x50, 0xEF, 0x81, 0x19, 0xB3, 0xBD, 0xD0, 0x11, 0xA3, 0xE5, 0x00, 0xA0, 0xC9, 0x24, 0x44])},
248 CheckItem{offs: 15, cond: &CC::Or(&CC::Eq(Arg::Byte(0x36)), &CC::Eq(Arg::Byte(0x37)))}],
249 },
250 DetectConditions {
251 demux_name: "dkivf",
252 extensions: ".ivf",
253 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"DKIF\x00\x00")},
254 CheckItem{offs: 6, cond: &CC::Ge(Arg::U16LE(32))}],
255 },
256 DetectConditions {
257 demux_name: "arxel-cnm",
258 extensions: ".cnm",
259 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"CNM UNR\x00")}],
260 },
261 DetectConditions {
262 demux_name: "fcmp",
263 extensions: ".cmp",
264 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"FCMP")}],
265 },
266 DetectConditions {
267 demux_name: "fst",
268 extensions: ".fst",
269 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"2TSF")}],
270 },
271 DetectConditions {
272 demux_name: "gdv",
273 extensions: ".gdv",
274 conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U32LE(0x29111994))}],
275 },
276 DetectConditions {
277 demux_name: "fable-imax",
278 extensions: ".imx",
279 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"IMAX") },
280 CheckItem{offs: 10, cond: &CC::Eq(Arg::U16LE(0x102)) }],
281 },
282 DetectConditions {
283 demux_name: "hl-fmv",
284 extensions: ".fmv",
285 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"FMV*") },
286 CheckItem{offs: 4, cond: &CC::Eq(Arg::U32LE(0)) }],
287 },
288 DetectConditions {
289 demux_name: "legend-q",
290 extensions: ".q",
291 conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U16LE(0x6839))},
292 CheckItem{offs: 2, cond: &CC::In(Arg::Byte(3), Arg::Byte(7))}],
293 },
294 DetectConditions {
295 demux_name: "siff",
296 extensions: ".vb,.vbc,.fcp,.son",
297 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"SIFF")},
298 CheckItem{offs: 4, cond: &CC::Or(
299 &CC::Or(
300 &CC::Str(b"VBV1VBHD"),
301 &CC::Str(b"SOUNSHDR")),
302 &CC::Str(b"FCPKFCHD"))}],
303 },
304 DetectConditions {
305 demux_name: "smush",
306 extensions: ".san",
307 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"ANIM")},
308 CheckItem{offs: 8, cond: &CC::Str(b"AHDR")}],
309 },
310 DetectConditions {
311 demux_name: "smush-mcmp",
312 extensions: ".imc",
313 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"MCMP")},
314 CheckItem{offs: 6, cond: &CC::Eq(Arg::Byte(0))},
315 CheckItem{offs: 7, cond: &CC::Eq(Arg::Byte(0))}],
316 },
317 DetectConditions {
318 demux_name: "smush",
319 extensions: ".snm",
320 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"SANM")},
321 CheckItem{offs: 8, cond: &CC::Str(b"SHDR")}],
322 },
323 DetectConditions {
324 demux_name: "realaudio",
325 extensions: ".ra,.ram",
326 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b".ra\xFD")}],
327 },
328 DetectConditions {
329 demux_name: "realmedia",
330 extensions: ".rm,.rmvb,.rma,.ra,.ram",
331 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".RMF"), &CC::Str(b".RMP")) },
332 CheckItem{offs: 4, cond: &CC::Ge(Arg::U32BE(10))}],
333 },
334 DetectConditions {
335 demux_name: "real_ivr",
336 extensions: ".ivr",
337 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".R1M"), &CC::Str(b".REC"))}],
338 },
339 DetectConditions {
340 demux_name: "bink",
341 extensions: ".bik,.bk2",
342 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::In(Arg::U32BE(0x42494B62), // BIKb
343 Arg::U32BE(0x42494B7B)), // BIKz
344 &CC::In(Arg::U32BE(0x4B423261), // KB2a
345 Arg::U32BE(0x4B42327B)))}], // KB2z
346 },
347 DetectConditions {
348 demux_name: "smacker",
349 extensions: ".smk",
350 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"SMK2"), &CC::Str(b"SMK4"))}],
351 },
352 DetectConditions {
353 demux_name: "ape",
354 extensions: ".ape",
355 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"MAC ") },
356 CheckItem{offs: 4, cond: &CC::In(Arg::U16LE(3800), Arg::U16LE(3990))}],
357 },
358 DetectConditions {
359 demux_name: "flac",
360 extensions: ".flac",
361 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"fLaC") }],
362 },
363 DetectConditions {
364 demux_name: "tta",
365 extensions: ".tta",
366 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"TTA1") }],
367 },
368 DetectConditions {
369 demux_name: "wavpack",
370 extensions: ".wv",
371 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"wvpk") },
372 CheckItem{offs: 8, cond: &CC::In(Arg::U16LE(0x402), Arg::U16LE(0x410))}],
373 },
374 DetectConditions {
375 demux_name: "vivo",
376 extensions: ".viv",
377 conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))},
378 CheckItem{offs: 2, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}],
379 },
380 DetectConditions {
381 demux_name: "vivo",
382 extensions: ".viv",
383 conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))},
384 CheckItem{offs: 3, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}],
385 },
386 DetectConditions {
387 demux_name: "bmv",
388 extensions: ".bmv",
389 conditions: &[],
390 },
391 DetectConditions {
392 demux_name: "bmv3",
393 extensions: ".bmv",
394 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"BMVi") },
395 CheckItem{offs: 32, cond: &CC::Str(b"DATA")}],
396 },
397 DetectConditions {
398 demux_name: "vmd",
399 extensions: ".vmd",
400 conditions: &[],
401 },
402 DetectConditions {
403 demux_name: "vx",
404 extensions: ".vx",
405 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"VXDS") }],
406 },
407 ];
408
409 /// Tries to detect container format.
410 ///
411 /// This function tries to determine container format using both file extension and checking against container specific markers inside.
412 /// In case of success the function returns short container name and the detection score.
413 /// Result should have the highest detection score among tested.
414 pub fn detect_format(name: &str, src: &mut ByteReader) -> Option<(&'static str, DetectionScore)> {
415 let mut result = None;
416 let lname = name.to_lowercase();
417 for detector in DETECTORS {
418 let mut score = DetectionScore::No;
419 if !name.is_empty() {
420 for ext in detector.extensions.split(',') {
421 if lname.ends_with(ext) {
422 score = DetectionScore::ExtensionMatches;
423 break;
424 }
425 }
426 }
427 let mut passed = !detector.conditions.is_empty();
428 for ck in detector.conditions {
429 let ret = src.seek(SeekFrom::Start(u64::from(ck.offs)));
430 if ret.is_err() {
431 passed = false;
432 break;
433 }
434 if !ck.cond.eval(src) {
435 passed = false;
436 break;
437 }
438 }
439 if passed {
440 score = DetectionScore::MagicMatches;
441 }
442 if score == DetectionScore::MagicMatches {
443 return Some((detector.demux_name, score));
444 }
445 if result.is_none() && score != DetectionScore::No {
446 result = Some((detector.demux_name, score));
447 } else if result.is_some() {
448 let (_, oldsc) = result.unwrap();
449 if oldsc.less(score) {
450 result = Some((detector.demux_name, score));
451 }
452 }
453 }
454 result
455 }
456
457 /// Tries to detect container format for provided file name.
458 pub fn detect_format_by_name(name: &str) -> Option<&'static str> {
459 if name.is_empty() {
460 return None;
461 }
462 let lname = name.to_lowercase();
463 for detector in DETECTORS {
464 for ext in detector.extensions.split(',') {
465 if lname.ends_with(ext) {
466 return Some(detector.demux_name);
467 }
468 }
469 }
470 None
471 }
472
473 #[cfg(test)]
474 mod test {
475 use super::*;
476 use std::fs::File;
477 use nihav_core::io::byteio::*;
478
479 #[test]
480 fn test_avi_detect() {
481 let name = "assets/Indeo/laser05.avi";
482 let mut file = File::open(name).unwrap();
483 let mut fr = FileReader::new_read(&mut file);
484 let mut br = ByteReader::new(&mut fr);
485 let (name, score) = detect_format(name, &mut br).unwrap();
486 assert_eq!(name, "avi");
487 assert_eq!(score, DetectionScore::MagicMatches);
488 }
489
490 #[test]
491 fn test_gdv_detect() {
492 let name = "assets/Game/intro1.gdv";
493 let mut file = File::open(name).unwrap();
494 let mut fr = FileReader::new_read(&mut file);
495 let mut br = ByteReader::new(&mut fr);
496 let (name, score) = detect_format(name, &mut br).unwrap();
497 assert_eq!(name, "gdv");
498 assert_eq!(score, DetectionScore::MagicMatches);
499 }
500 }