]> git.nihav.org Git - nihav.git/blob - nihav-registry/src/detect.rs
63db8c4e7ccd11bd41f23ea26b09521a5f3b80c2
[nihav.git] / nihav-registry / src / detect.rs
1 //! Container format detection.
2 //!
3 //! Usually user does not know the container format of the opened file.
4 //! That is why format detection functionality is needed.
5 //! This module contains the set of rules to detect container not merely by file extension but also by its content if possible.
6 //!
7 //! # Examples
8 //!
9 //! ```no_run
10 //! use nihav_registry::detect::detect_format;
11 //! use std::fs::File;
12 //! use nihav_core::io::byteio::*;
13 //!
14 //! let name = "mediafile.ogv";
15 //! let mut file = File::open(name).unwrap();
16 //! let mut br = FileReader::new_read(&mut file);
17 //! let result = detect_format(name, &mut br);
18 //! if let Some((name, score)) = result {
19 //! println!("detected format {} with score {:?}", name, score);
20 //! }
21 //! ```
22 use std::io::SeekFrom;
23 use nihav_core::io::byteio::ByteIO;
24
25 /// Format detection score.
26 #[derive(Debug,Clone,Copy,PartialEq)]
27 pub enum DetectionScore {
28 /// Format is not detected.
29 No,
30 /// Format matched by file extension.
31 ExtensionMatches,
32 /// Format matches by markers inside the file.
33 MagicMatches,
34 }
35
36 impl DetectionScore {
37 /// Checks whether current detection score is less than a value it is compared against.
38 pub fn less(self, other: DetectionScore) -> bool {
39 (self as i32) < (other as i32)
40 }
41 }
42
43 #[allow(dead_code)]
44 enum Arg {
45 Byte(u8),
46 U16BE(u16),
47 U16LE(u16),
48 U24BE(u32),
49 U24LE(u32),
50 U32BE(u32),
51 U32LE(u32),
52 U64BE(u64),
53 U64LE(u64),
54 }
55
56 impl Arg {
57 fn val(&self) -> u64 {
58 match *self {
59 Arg::Byte(b) => { u64::from(b) }
60 Arg::U16BE(v) => { u64::from(v) }
61 Arg::U16LE(v) => { u64::from(v) }
62 Arg::U24BE(v) => { u64::from(v) }
63 Arg::U24LE(v) => { u64::from(v) }
64 Arg::U32BE(v) => { u64::from(v) }
65 Arg::U32LE(v) => { u64::from(v) }
66 Arg::U64BE(v) => { v }
67 Arg::U64LE(v) => { v }
68 }
69 }
70 fn read_val(&self, src: &mut dyn ByteIO) -> Option<u64> {
71 match *self {
72 Arg::Byte(_) => {
73 let res = src.peek_byte();
74 if res.is_err() { return None; }
75 Some(u64::from(res.unwrap()))
76 }
77 Arg::U16BE(_) => {
78 let res = src.peek_u16be();
79 if res.is_err() { return None; }
80 Some(u64::from(res.unwrap()))
81 }
82 Arg::U16LE(_) => {
83 let res = src.peek_u16le();
84 if res.is_err() { return None; }
85 Some(u64::from(res.unwrap()))
86 }
87 Arg::U24BE(_) => {
88 let res = src.peek_u24be();
89 if res.is_err() { return None; }
90 Some(u64::from(res.unwrap()))
91 }
92 Arg::U24LE(_) => {
93 let res = src.peek_u24le();
94 if res.is_err() { return None; }
95 Some(u64::from(res.unwrap()))
96 }
97 Arg::U32BE(_) => {
98 let res = src.peek_u32be();
99 if res.is_err() { return None; }
100 Some(u64::from(res.unwrap()))
101 }
102 Arg::U32LE(_) => {
103 let res = src.peek_u32le();
104 if res.is_err() { return None; }
105 Some(u64::from(res.unwrap()))
106 }
107 Arg::U64BE(_) => {
108 let res = src.peek_u64be();
109 if res.is_err() { return None; }
110 Some(res.unwrap())
111 }
112 Arg::U64LE(_) => {
113 let res = src.peek_u64le();
114 if res.is_err() { return None; }
115 Some(res.unwrap())
116 }
117 }
118 }
119 fn eq(&self, src: &mut dyn ByteIO) -> bool {
120 if let Some(rval) = self.read_val(src) {
121 rval == self.val()
122 } else {
123 false
124 }
125 }
126 fn ge(&self, src: &mut dyn ByteIO) -> bool {
127 if let Some(rval) = self.read_val(src) {
128 rval >= self.val()
129 } else {
130 false
131 }
132 }
133 fn gt(&self, src: &mut dyn ByteIO) -> bool {
134 if let Some(rval) = self.read_val(src) {
135 rval > self.val()
136 } else {
137 false
138 }
139 }
140 fn le(&self, src: &mut dyn ByteIO) -> bool {
141 if let Some(rval) = self.read_val(src) {
142 rval <= self.val()
143 } else {
144 false
145 }
146 }
147 fn lt(&self, src: &mut dyn ByteIO) -> bool {
148 if let Some(rval) = self.read_val(src) {
149 rval < self.val()
150 } else {
151 false
152 }
153 }
154 }
155
156 #[allow(dead_code)]
157 enum CC<'a> {
158 Or(&'a CC<'a>, &'a CC<'a>),
159 Eq(Arg),
160 Str(&'static [u8]),
161 In(Arg, Arg),
162 Lt(Arg),
163 Le(Arg),
164 Gt(Arg),
165 Ge(Arg),
166 }
167
168 impl<'a> CC<'a> {
169 fn eval(&self, src: &mut dyn ByteIO) -> bool {
170 match *self {
171 CC::Or(a, b) => { a.eval(src) || b.eval(src) },
172 CC::Eq(ref arg) => { arg.eq(src) },
173 CC::In(ref a, ref b) => { a.ge(src) && b.le(src) },
174 CC::Lt(ref arg) => { arg.lt(src) },
175 CC::Le(ref arg) => { arg.le(src) },
176 CC::Gt(ref arg) => { arg.gt(src) },
177 CC::Ge(ref arg) => { arg.ge(src) },
178 CC::Str(strng) => {
179 let mut val: Vec<u8> = vec![0; strng.len()];
180 let res = src.peek_buf(val.as_mut_slice());
181 if res.is_err() { return false; }
182 val == strng
183 }
184 }
185 }
186 }
187
188 struct CheckItem<'a> {
189 offs: u32,
190 cond: &'a CC<'a>,
191 }
192
193 #[allow(dead_code)]
194 struct DetectConditions<'a> {
195 demux_name: &'static str,
196 extensions: &'static str,
197 conditions: &'a [CheckItem<'a>],
198 }
199
200 const DETECTORS: &[DetectConditions] = &[
201 DetectConditions {
202 demux_name: "avi",
203 extensions: ".avi",
204 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"RIFF"), &CC::Str(b"ON2 ")) },
205 CheckItem{offs: 8, cond: &CC::Or(&CC::Or(&CC::Str(b"AVI LIST"),
206 &CC::Str(b"AVIXLIST")),
207 &CC::Str(b"ON2fLIST")) },
208 CheckItem{offs: 20, cond: &CC::Str(b"hdrlavih")},
209 ]
210 },
211 DetectConditions {
212 demux_name: "avi-dib",
213 extensions: ".avi",
214 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"RIFF") },
215 CheckItem{offs: 8, cond: &CC::Str(b"AVI LIST")},
216 CheckItem{offs: 20, cond: &CC::Str(b"hdrlhdra")},
217 ]
218 },
219 DetectConditions {
220 demux_name: "wav",
221 extensions: ".wav",
222 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"RIFF") },
223 CheckItem{offs: 8, cond: &CC::Str(b"WAVEfmt ") }
224 ]
225 },
226 DetectConditions {
227 demux_name: "mov",
228 extensions: ".mov",
229 conditions: &[CheckItem{offs: 4, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"),
230 &CC::Str(b"moov")),
231 &CC::Str(b"ftyp")) }],
232 },
233 DetectConditions {
234 demux_name: "gif",
235 extensions: ".gif",
236 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"GIF87a"),
237 &CC::Str(b"GIF89a")) }],
238 },
239 DetectConditions {
240 demux_name: "mov",
241 extensions: ".mov",
242 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"\x00\x00\x00\x08wide") },
243 CheckItem{offs: 12, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"),
244 &CC::Str(b"moov")),
245 &CC::Str(b"ftyp")) }],
246 },
247 DetectConditions {
248 demux_name: "mov-macbin",
249 extensions: ".mov,.bin",
250 conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::Byte(0))},
251 CheckItem{offs: 0x41, cond: &CC::Str(b"MooV")},
252 CheckItem{offs: 0x7A, cond: &CC::Eq(Arg::Byte(0x81))},
253 CheckItem{offs: 0x7B, cond: &CC::Eq(Arg::Byte(0x81))},
254 CheckItem{offs: 0x84, cond: &CC::Str(b"mdat")}],
255 },
256 DetectConditions {
257 demux_name: "mov-resfork",
258 extensions: ".mov",
259 conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U32BE(0x100))},
260 CheckItem{offs: 0x108, cond: &CC::Str(b"moov")}],
261 },
262 DetectConditions {
263 demux_name: "yuv4mpeg",
264 extensions: ".y4m",
265 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"YUV4MPEG2 ") }],
266 },
267 DetectConditions {
268 demux_name: "armovie",
269 extensions: ".rpl",
270 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"ARMovie\n") }],
271 },
272 DetectConditions {
273 demux_name: "tca",
274 extensions: ".tca",
275 conditions: &[CheckItem{offs: 0x00, cond: &CC::Str(b"ACEF") },
276 CheckItem{offs: 0x18, cond: &CC::Eq(Arg::U32LE(64))}],
277 },
278 DetectConditions {
279 demux_name: "flv",
280 extensions: ".flv",
281 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"FLV") },
282 CheckItem{offs: 3, cond: &CC::Le(Arg::Byte(1)) }],
283 },
284 DetectConditions {
285 demux_name: "dvi",
286 extensions: ".avs,.dvi",
287 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"IVDV")},
288 CheckItem{offs: 12, cond: &CC::Str(b"SSVA")}],
289 },
290 DetectConditions {
291 demux_name: "ivf",
292 extensions: ".ivf",
293 conditions: &[CheckItem{offs: 0, cond: &CC::Str(&[0x50, 0xEF, 0x81, 0x19, 0xB3, 0xBD, 0xD0, 0x11, 0xA3, 0xE5, 0x00, 0xA0, 0xC9, 0x24, 0x44])},
294 CheckItem{offs: 15, cond: &CC::Or(&CC::Eq(Arg::Byte(0x36)), &CC::Eq(Arg::Byte(0x37)))}],
295 },
296 DetectConditions {
297 demux_name: "dkivf",
298 extensions: ".ivf",
299 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"DKIF\x00\x00")},
300 CheckItem{offs: 6, cond: &CC::Ge(Arg::U16LE(32))}],
301 },
302 DetectConditions {
303 demux_name: "gdv",
304 extensions: ".gdv",
305 conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U32LE(0x29111994))}],
306 },
307 DetectConditions {
308 demux_name: "smush",
309 extensions: ".san",
310 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"ANIM")},
311 CheckItem{offs: 8, cond: &CC::Str(b"AHDR")}],
312 },
313 DetectConditions {
314 demux_name: "smush-mcmp",
315 extensions: ".imc",
316 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"MCMP")},
317 CheckItem{offs: 6, cond: &CC::Eq(Arg::Byte(0))},
318 CheckItem{offs: 7, cond: &CC::Eq(Arg::Byte(0))}],
319 },
320 DetectConditions {
321 demux_name: "smush",
322 extensions: ".snm",
323 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"SANM")},
324 CheckItem{offs: 8, cond: &CC::Str(b"SHDR")}],
325 },
326 DetectConditions {
327 demux_name: "mvi",
328 extensions: ".mvi",
329 conditions: &[],
330 },
331 DetectConditions {
332 demux_name: "qpeg",
333 extensions: ".dvc",
334 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"IDVCd")}],
335 },
336 DetectConditions {
337 demux_name: "tealmov",
338 extensions: ".pdb",
339 conditions: &[CheckItem{offs: 0x3C, cond: &CC::Str(b"MvieTlMv")}],
340 },
341 DetectConditions {
342 demux_name: "realaudio",
343 extensions: ".ra,.ram",
344 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b".ra\xFD")}],
345 },
346 DetectConditions {
347 demux_name: "realmedia",
348 extensions: ".rm,.rmvb,.rma,.ra,.ram",
349 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".RMF"), &CC::Str(b".RMP")) },
350 CheckItem{offs: 4, cond: &CC::Ge(Arg::U32BE(10))}],
351 },
352 DetectConditions {
353 demux_name: "real_ivr",
354 extensions: ".ivr",
355 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".R1M"), &CC::Str(b".REC"))}],
356 },
357 DetectConditions {
358 demux_name: "bink",
359 extensions: ".bik,.bk2",
360 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::In(Arg::U32BE(0x42494B62), // BIKb
361 Arg::U32BE(0x42494B7B)), // BIKz
362 &CC::In(Arg::U32BE(0x4B423261), // KB2a
363 Arg::U32BE(0x4B42327B)))}], // KB2z
364 },
365 DetectConditions {
366 demux_name: "smacker",
367 extensions: ".smk",
368 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"SMK2"), &CC::Str(b"SMK4"))}],
369 },
370 DetectConditions {
371 demux_name: "ape",
372 extensions: ".ape",
373 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"MAC ") },
374 CheckItem{offs: 4, cond: &CC::In(Arg::U16LE(3800), Arg::U16LE(3990))}],
375 },
376 DetectConditions {
377 demux_name: "flac",
378 extensions: ".flac",
379 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"fLaC") }],
380 },
381 DetectConditions {
382 demux_name: "tta",
383 extensions: ".tta",
384 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"TTA1") }],
385 },
386 DetectConditions {
387 demux_name: "wavpack",
388 extensions: ".wv",
389 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"wvpk") },
390 CheckItem{offs: 8, cond: &CC::In(Arg::U16LE(0x402), Arg::U16LE(0x410))}],
391 },
392 DetectConditions {
393 demux_name: "vivo",
394 extensions: ".viv",
395 conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))},
396 CheckItem{offs: 2, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}],
397 },
398 DetectConditions {
399 demux_name: "vivo",
400 extensions: ".viv",
401 conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))},
402 CheckItem{offs: 3, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}],
403 },
404 DetectConditions {
405 demux_name: "bmv",
406 extensions: ".bmv",
407 conditions: &[],
408 },
409 DetectConditions {
410 demux_name: "bmv3",
411 extensions: ".bmv",
412 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"BMVi") },
413 CheckItem{offs: 32, cond: &CC::Str(b"DATA")}],
414 },
415 DetectConditions {
416 demux_name: "sga",
417 extensions: ".dtv,.avc",
418 conditions: &[],
419 },
420 DetectConditions {
421 demux_name: "sierra-seq",
422 extensions: ".seq",
423 conditions: &[],
424 },
425 DetectConditions {
426 demux_name: "vmd",
427 extensions: ".vmd",
428 conditions: &[],
429 },
430 ];
431
432 /// Tries to detect container format.
433 ///
434 /// This function tries to determine container format using both file extension and checking against container specific markers inside.
435 /// In case of success the function returns short container name and the detection score.
436 /// Result should have the highest detection score among tested.
437 pub fn detect_format(name: &str, src: &mut dyn ByteIO) -> Option<(&'static str, DetectionScore)> {
438 let mut result = None;
439 let lname = name.to_lowercase();
440 for detector in DETECTORS {
441 let mut score = DetectionScore::No;
442 if !name.is_empty() {
443 for ext in detector.extensions.split(',') {
444 if lname.ends_with(ext) {
445 score = DetectionScore::ExtensionMatches;
446 break;
447 }
448 }
449 }
450 let mut passed = !detector.conditions.is_empty();
451 for ck in detector.conditions {
452 let ret = src.seek(SeekFrom::Start(u64::from(ck.offs)));
453 if ret.is_err() {
454 passed = false;
455 break;
456 }
457 if !ck.cond.eval(src) {
458 passed = false;
459 break;
460 }
461 }
462 if passed {
463 score = DetectionScore::MagicMatches;
464 }
465 if score == DetectionScore::MagicMatches {
466 return Some((detector.demux_name, score));
467 }
468 if result.is_none() && score != DetectionScore::No {
469 result = Some((detector.demux_name, score));
470 } else if result.is_some() {
471 let (_, oldsc) = result.unwrap();
472 if oldsc.less(score) {
473 result = Some((detector.demux_name, score));
474 }
475 }
476 }
477 result
478 }
479
480 /// Tries to detect container format for provided file name.
481 pub fn detect_format_by_name(name: &str) -> Option<&'static str> {
482 if name.is_empty() {
483 return None;
484 }
485 let lname = name.to_lowercase();
486 for detector in DETECTORS {
487 for ext in detector.extensions.split(',') {
488 if lname.ends_with(ext) {
489 return Some(detector.demux_name);
490 }
491 }
492 }
493 None
494 }
495
496 #[cfg(test)]
497 mod test {
498 use super::*;
499 use std::fs::File;
500 use nihav_core::io::byteio::*;
501
502 #[test]
503 fn test_avi_detect() {
504 let name = "assets/Indeo/laser05.avi";
505 let mut file = File::open(name).unwrap();
506 let mut br = FileReader::new_read(&mut file);
507 let (name, score) = detect_format(name, &mut br).unwrap();
508 assert_eq!(name, "avi");
509 assert_eq!(score, DetectionScore::MagicMatches);
510 }
511
512 #[test]
513 fn test_gdv_detect() {
514 let name = "assets/Game/intro1.gdv";
515 let mut file = File::open(name).unwrap();
516 let mut br = FileReader::new_read(&mut file);
517 let (name, score) = detect_format(name, &mut br).unwrap();
518 assert_eq!(name, "gdv");
519 assert_eq!(score, DetectionScore::MagicMatches);
520 }
521 }