add LinePack decoder
[nihav.git] / nihav-registry / src / detect.rs
CommitLineData
1b5ebe1d
KS
1//! Container format detection.
2//!
3//! Usually user does not know the container format of the opened file.
4//! That is why format detection functionality is needed.
5//! This module contains the set of rules to detect container not merely by file extension but also by its content if possible.
6//!
7//! # Examples
8//!
9//! ```no_run
01f55c6a 10//! use nihav_registry::detect::detect_format;
1b5ebe1d
KS
11//! use std::fs::File;
12//! use nihav_core::io::byteio::*;
13//!
14//! let name = "mediafile.ogv";
15//! let mut file = File::open(name).unwrap();
16//! let mut filereader = FileReader::new_read(&mut file);
17//! let mut br = ByteReader::new(&mut filereader);
18//! let result = detect_format(name, &mut br);
19//! if let Some((name, score)) = result {
20//! println!("detected format {} with score {:?}", name, score);
21//! }
22//! ```
d8ce0de0 23use std::io::SeekFrom;
32f7cbe5 24use nihav_core::io::byteio::ByteReader;
d8ce0de0 25
1b5ebe1d 26/// Format detection score.
d8ce0de0
KS
27#[derive(Debug,Clone,Copy,PartialEq)]
28pub enum DetectionScore {
1b5ebe1d 29 /// Format is not detected.
d8ce0de0 30 No,
1b5ebe1d 31 /// Format matched by file extension.
d8ce0de0 32 ExtensionMatches,
1b5ebe1d 33 /// Format matches by markers inside the file.
d8ce0de0
KS
34 MagicMatches,
35}
36
37impl DetectionScore {
1b5ebe1d 38 /// Checks whether current detection score is less than a value it is compared against.
e243ceb4
KS
39 pub fn less(self, other: DetectionScore) -> bool {
40 (self as i32) < (other as i32)
d8ce0de0
KS
41 }
42}
43
44#[allow(dead_code)]
b5bd2ae4 45enum Arg {
d8ce0de0
KS
46 Byte(u8),
47 U16BE(u16),
48 U16LE(u16),
49 U24BE(u32),
50 U24LE(u32),
51 U32BE(u32),
52 U32LE(u32),
53 U64BE(u64),
54 U64LE(u64),
55}
56
b5bd2ae4 57impl Arg {
d8ce0de0
KS
58 fn val(&self) -> u64 {
59 match *self {
e243ceb4
KS
60 Arg::Byte(b) => { u64::from(b) }
61 Arg::U16BE(v) => { u64::from(v) }
62 Arg::U16LE(v) => { u64::from(v) }
63 Arg::U24BE(v) => { u64::from(v) }
64 Arg::U24LE(v) => { u64::from(v) }
65 Arg::U32BE(v) => { u64::from(v) }
66 Arg::U32LE(v) => { u64::from(v) }
b5bd2ae4
KS
67 Arg::U64BE(v) => { v }
68 Arg::U64LE(v) => { v }
d8ce0de0
KS
69 }
70 }
71 fn read_val(&self, src: &mut ByteReader) -> Option<u64> {
72 match *self {
b5bd2ae4 73 Arg::Byte(_) => {
d8ce0de0 74 let res = src.peek_byte();
e243ceb4
KS
75 if res.is_err() { return None; }
76 Some(u64::from(res.unwrap()))
d8ce0de0 77 }
b5bd2ae4 78 Arg::U16BE(_) => {
d8ce0de0 79 let res = src.peek_u16be();
e243ceb4
KS
80 if res.is_err() { return None; }
81 Some(u64::from(res.unwrap()))
d8ce0de0 82 }
b5bd2ae4 83 Arg::U16LE(_) => {
d8ce0de0 84 let res = src.peek_u16le();
e243ceb4
KS
85 if res.is_err() { return None; }
86 Some(u64::from(res.unwrap()))
d8ce0de0 87 }
b5bd2ae4 88 Arg::U24BE(_) => {
d8ce0de0 89 let res = src.peek_u24be();
e243ceb4
KS
90 if res.is_err() { return None; }
91 Some(u64::from(res.unwrap()))
d8ce0de0 92 }
b5bd2ae4 93 Arg::U24LE(_) => {
d8ce0de0 94 let res = src.peek_u24le();
e243ceb4
KS
95 if res.is_err() { return None; }
96 Some(u64::from(res.unwrap()))
d8ce0de0 97 }
b5bd2ae4 98 Arg::U32BE(_) => {
d8ce0de0 99 let res = src.peek_u32be();
e243ceb4
KS
100 if res.is_err() { return None; }
101 Some(u64::from(res.unwrap()))
d8ce0de0 102 }
b5bd2ae4 103 Arg::U32LE(_) => {
d8ce0de0 104 let res = src.peek_u32le();
e243ceb4
KS
105 if res.is_err() { return None; }
106 Some(u64::from(res.unwrap()))
d8ce0de0 107 }
b5bd2ae4 108 Arg::U64BE(_) => {
d8ce0de0 109 let res = src.peek_u64be();
e243ceb4 110 if res.is_err() { return None; }
d8ce0de0
KS
111 Some(res.unwrap())
112 }
b5bd2ae4 113 Arg::U64LE(_) => {
d8ce0de0 114 let res = src.peek_u64le();
e243ceb4 115 if res.is_err() { return None; }
d8ce0de0
KS
116 Some(res.unwrap())
117 }
118 }
119 }
120 fn eq(&self, src: &mut ByteReader) -> bool {
01f55c6a
KS
121 if let Some(rval) = self.read_val(src) {
122 rval == self.val()
123 } else {
124 false
125 }
d8ce0de0
KS
126 }
127 fn ge(&self, src: &mut ByteReader) -> bool {
01f55c6a
KS
128 if let Some(rval) = self.read_val(src) {
129 rval >= self.val()
130 } else {
131 false
132 }
d8ce0de0
KS
133 }
134 fn gt(&self, src: &mut ByteReader) -> bool {
01f55c6a
KS
135 if let Some(rval) = self.read_val(src) {
136 rval > self.val()
137 } else {
138 false
139 }
d8ce0de0
KS
140 }
141 fn le(&self, src: &mut ByteReader) -> bool {
01f55c6a
KS
142 if let Some(rval) = self.read_val(src) {
143 rval <= self.val()
144 } else {
145 false
146 }
d8ce0de0
KS
147 }
148 fn lt(&self, src: &mut ByteReader) -> bool {
01f55c6a
KS
149 if let Some(rval) = self.read_val(src) {
150 rval < self.val()
151 } else {
152 false
153 }
d8ce0de0
KS
154 }
155}
156
157#[allow(dead_code)]
b5bd2ae4
KS
158enum CC<'a> {
159 Or(&'a CC<'a>, &'a CC<'a>),
160 Eq(Arg),
161 Str(&'static [u8]),
162 In(Arg, Arg),
163 Lt(Arg),
164 Le(Arg),
165 Gt(Arg),
166 Ge(Arg),
d8ce0de0
KS
167}
168
b5bd2ae4 169impl<'a> CC<'a> {
d8ce0de0
KS
170 fn eval(&self, src: &mut ByteReader) -> bool {
171 match *self {
6f263099 172 CC::Or(a, b) => { a.eval(src) || b.eval(src) },
b5bd2ae4 173 CC::Eq(ref arg) => { arg.eq(src) },
4d477e23 174 CC::In(ref a, ref b) => { a.ge(src) && b.le(src) },
b5bd2ae4
KS
175 CC::Lt(ref arg) => { arg.lt(src) },
176 CC::Le(ref arg) => { arg.le(src) },
177 CC::Gt(ref arg) => { arg.gt(src) },
178 CC::Ge(ref arg) => { arg.ge(src) },
405cec9e
KS
179 CC::Str(strng) => {
180 let mut val: Vec<u8> = vec![0; strng.len()];
d8ce0de0 181 let res = src.peek_buf(val.as_mut_slice());
e243ceb4 182 if res.is_err() { return false; }
405cec9e 183 val == strng
d8ce0de0
KS
184 }
185 }
186 }
187}
188
189struct CheckItem<'a> {
190 offs: u32,
b5bd2ae4 191 cond: &'a CC<'a>,
d8ce0de0
KS
192}
193
194#[allow(dead_code)]
195struct DetectConditions<'a> {
196 demux_name: &'static str,
197 extensions: &'static str,
198 conditions: &'a [CheckItem<'a>],
199}
200
201const DETECTORS: &[DetectConditions] = &[
202 DetectConditions {
203 demux_name: "avi",
204 extensions: ".avi",
b5bd2ae4
KS
205 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"RIFF"), &CC::Str(b"ON2 ")) },
206 CheckItem{offs: 8, cond: &CC::Or(&CC::Or(&CC::Str(b"AVI LIST"),
207 &CC::Str(b"AVIXLIST")),
208 &CC::Str(b"ON2fLIST")) },
209 ]
d8ce0de0 210 },
283abfa6
KS
211 DetectConditions {
212 demux_name: "wav",
213 extensions: ".wav",
214 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"RIFF") },
215 CheckItem{offs: 8, cond: &CC::Str(b"WAVEfmt ") }
216 ]
217 },
bdf66deb
KS
218 DetectConditions {
219 demux_name: "mov",
220 extensions: ".mov",
221 conditions: &[CheckItem{offs: 4, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"),
222 &CC::Str(b"moov")),
223 &CC::Str(b"ftyp")) }],
224 },
fc39649d
KS
225 DetectConditions {
226 demux_name: "gif",
227 extensions: ".gif",
228 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"GIF87a"),
229 &CC::Str(b"GIF89a")) }],
230 },
bdf66deb
KS
231 DetectConditions {
232 demux_name: "mov",
233 extensions: ".mov",
234 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"\x00\x00\x00\x08wide") },
235 CheckItem{offs: 12, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"),
236 &CC::Str(b"moov")),
237 &CC::Str(b"ftyp")) }],
238 },
88fd1059
KS
239 DetectConditions {
240 demux_name: "mov-macbin",
241 extensions: ".mov,.bin",
242 conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::Byte(0))},
243 CheckItem{offs: 0x41, cond: &CC::Str(b"MooV")},
244 CheckItem{offs: 0x7A, cond: &CC::Eq(Arg::Byte(0x81))},
245 CheckItem{offs: 0x7B, cond: &CC::Eq(Arg::Byte(0x81))},
246 CheckItem{offs: 0x84, cond: &CC::Str(b"mdat")}],
247 },
dbb2cbc9
KS
248 DetectConditions {
249 demux_name: "yuv4mpeg",
250 extensions: ".y4m",
251 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"YUV4MPEG2 ") }],
252 },
2d6ff332
KS
253 DetectConditions {
254 demux_name: "armovie",
255 extensions: ".rpl",
256 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"ARMovie\n") }],
257 },
92d9fb69
KS
258 DetectConditions {
259 demux_name: "flv",
260 extensions: ".flv",
261 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"FLV") },
262 CheckItem{offs: 3, cond: &CC::Le(Arg::Byte(1)) }],
263 },
e01d4987
KS
264 DetectConditions {
265 demux_name: "ivf",
266 extensions: ".ivf",
267 conditions: &[CheckItem{offs: 0, cond: &CC::Str(&[0x50, 0xEF, 0x81, 0x19, 0xB3, 0xBD, 0xD0, 0x11, 0xA3, 0xE5, 0x00, 0xA0, 0xC9, 0x24, 0x44])},
268 CheckItem{offs: 15, cond: &CC::Or(&CC::Eq(Arg::Byte(0x36)), &CC::Eq(Arg::Byte(0x37)))}],
269 },
fd7e6906 270 DetectConditions {
b8e71e0a 271 demux_name: "dkivf",
fd7e6906
KS
272 extensions: ".ivf",
273 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"DKIF\x00\x00")},
274 CheckItem{offs: 6, cond: &CC::Ge(Arg::U16LE(32))}],
275 },
1991b7da
KS
276 DetectConditions {
277 demux_name: "arxel-cnm",
278 extensions: ".cnm",
279 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"CNM UNR\x00")}],
280 },
8d91d85f
KS
281 DetectConditions {
282 demux_name: "fcmp",
283 extensions: ".cmp",
284 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"FCMP")}],
285 },
286 DetectConditions {
287 demux_name: "fst",
288 extensions: ".fst",
289 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"2TSF")}],
290 },
d8ce0de0
KS
291 DetectConditions {
292 demux_name: "gdv",
293 extensions: ".gdv",
b5bd2ae4 294 conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U32LE(0x29111994))}],
d8ce0de0 295 },
3813fe8a
KS
296 DetectConditions {
297 demux_name: "fable-imax",
298 extensions: ".imx",
299 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"IMAX") },
300 CheckItem{offs: 10, cond: &CC::Eq(Arg::U16LE(0x102)) }],
301 },
ff6a914f
KS
302 DetectConditions {
303 demux_name: "hl-fmv",
304 extensions: ".fmv",
305 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"FMV*") },
306 CheckItem{offs: 4, cond: &CC::Eq(Arg::U32LE(0)) }],
307 },
afe1e5ba
KS
308 DetectConditions {
309 demux_name: "legend-q",
310 extensions: ".q",
311 conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U16LE(0x6839))},
2826a23f 312 CheckItem{offs: 2, cond: &CC::In(Arg::Byte(3), Arg::Byte(7))}],
afe1e5ba 313 },
561d0f79
KS
314 DetectConditions {
315 demux_name: "siff",
316 extensions: ".vb,.vbc,.fcp,.son",
317 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"SIFF")},
318 CheckItem{offs: 4, cond: &CC::Or(
319 &CC::Or(
320 &CC::Str(b"VBV1VBHD"),
321 &CC::Str(b"SOUNSHDR")),
322 &CC::Str(b"FCPKFCHD"))}],
323 },
c17769db
KS
324 DetectConditions {
325 demux_name: "smush",
326 extensions: ".san",
327 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"ANIM")},
328 CheckItem{offs: 8, cond: &CC::Str(b"AHDR")}],
329 },
9e08bfdd
KS
330 DetectConditions {
331 demux_name: "smush-mcmp",
332 extensions: ".imc",
333 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"MCMP")},
334 CheckItem{offs: 6, cond: &CC::Eq(Arg::Byte(0))},
335 CheckItem{offs: 7, cond: &CC::Eq(Arg::Byte(0))}],
336 },
c17769db
KS
337 DetectConditions {
338 demux_name: "smush",
339 extensions: ".snm",
340 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"SANM")},
341 CheckItem{offs: 8, cond: &CC::Str(b"SHDR")}],
342 },
ce52b3b5
KS
343 DetectConditions {
344 demux_name: "realaudio",
345 extensions: ".ra,.ram",
346 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b".ra\xFD")}],
347 },
348 DetectConditions {
349 demux_name: "realmedia",
350 extensions: ".rm,.rmvb,.rma,.ra,.ram",
db5cc44b 351 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".RMF"), &CC::Str(b".RMP")) },
ce52b3b5
KS
352 CheckItem{offs: 4, cond: &CC::Ge(Arg::U32BE(10))}],
353 },
354 DetectConditions {
355 demux_name: "real_ivr",
356 extensions: ".ivr",
357 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".R1M"), &CC::Str(b".REC"))}],
358 },
c6c21059
KS
359 DetectConditions {
360 demux_name: "bink",
4d477e23 361 extensions: ".bik,.bk2",
4998874b
KS
362 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::In(Arg::U32BE(0x42494B62), // BIKb
363 Arg::U32BE(0x42494B7B)), // BIKz
c6c21059
KS
364 &CC::In(Arg::U32BE(0x4B423261), // KB2a
365 Arg::U32BE(0x4B42327B)))}], // KB2z
366 },
606c448e
KS
367 DetectConditions {
368 demux_name: "smacker",
369 extensions: ".smk",
370 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"SMK2"), &CC::Str(b"SMK4"))}],
371 },
87927c57
KS
372 DetectConditions {
373 demux_name: "ape",
374 extensions: ".ape",
375 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"MAC ") },
376 CheckItem{offs: 4, cond: &CC::In(Arg::U16LE(3800), Arg::U16LE(3990))}],
377 },
378 DetectConditions {
379 demux_name: "flac",
380 extensions: ".flac",
381 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"fLaC") }],
382 },
383 DetectConditions {
384 demux_name: "tta",
385 extensions: ".tta",
386 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"TTA1") }],
387 },
388 DetectConditions {
389 demux_name: "wavpack",
390 extensions: ".wv",
391 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"wvpk") },
392 CheckItem{offs: 8, cond: &CC::In(Arg::U16LE(0x402), Arg::U16LE(0x410))}],
393 },
128253cc 394 DetectConditions {
31cf33ac
KS
395 demux_name: "vivo",
396 extensions: ".viv",
397 conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))},
398 CheckItem{offs: 2, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}],
399 },
400 DetectConditions {
401 demux_name: "vivo",
402 extensions: ".viv",
403 conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))},
404 CheckItem{offs: 3, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}],
405 },
406 DetectConditions {
128253cc
KS
407 demux_name: "bmv",
408 extensions: ".bmv",
409 conditions: &[],
410 },
ecda1cc1
KS
411 DetectConditions {
412 demux_name: "bmv3",
413 extensions: ".bmv",
414 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"BMVi") },
415 CheckItem{offs: 32, cond: &CC::Str(b"DATA")}],
416 },
fead60e3
KS
417 DetectConditions {
418 demux_name: "sga",
419 extensions: ".dtv,.avc",
420 conditions: &[],
421 },
9895bd7b
KS
422 DetectConditions {
423 demux_name: "vmd",
424 extensions: ".vmd",
425 conditions: &[],
426 },
55d85231
KS
427 DetectConditions {
428 demux_name: "vx",
429 extensions: ".vx",
430 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"VXDS") }],
431 },
d8ce0de0
KS
432];
433
1b5ebe1d
KS
434/// Tries to detect container format.
435///
436/// This function tries to determine container format using both file extension and checking against container specific markers inside.
437/// In case of success the function returns short container name and the detection score.
438/// Result should have the highest detection score among tested.
d8ce0de0
KS
439pub fn detect_format(name: &str, src: &mut ByteReader) -> Option<(&'static str, DetectionScore)> {
440 let mut result = None;
441 let lname = name.to_lowercase();
442 for detector in DETECTORS {
443 let mut score = DetectionScore::No;
e243ceb4 444 if !name.is_empty() {
d8ce0de0
KS
445 for ext in detector.extensions.split(',') {
446 if lname.ends_with(ext) {
447 score = DetectionScore::ExtensionMatches;
448 break;
449 }
450 }
451 }
e243ceb4 452 let mut passed = !detector.conditions.is_empty();
d8ce0de0 453 for ck in detector.conditions {
e243ceb4
KS
454 let ret = src.seek(SeekFrom::Start(u64::from(ck.offs)));
455 if ret.is_err() {
d8ce0de0
KS
456 passed = false;
457 break;
458 }
459 if !ck.cond.eval(src) {
460 passed = false;
461 break;
462 }
463 }
464 if passed {
465 score = DetectionScore::MagicMatches;
466 }
467 if score == DetectionScore::MagicMatches {
468 return Some((detector.demux_name, score));
469 }
4d477e23 470 if result.is_none() && score != DetectionScore::No {
d8ce0de0 471 result = Some((detector.demux_name, score));
4d477e23 472 } else if result.is_some() {
d8ce0de0
KS
473 let (_, oldsc) = result.unwrap();
474 if oldsc.less(score) {
475 result = Some((detector.demux_name, score));
476 }
477 }
478 }
479 result
480}
481
4b56e8c9 482/// Tries to detect container format for provided file name.
ac818eac 483pub fn detect_format_by_name(name: &str) -> Option<&'static str> {
4b56e8c9
KS
484 if name.is_empty() {
485 return None;
486 }
487 let lname = name.to_lowercase();
488 for detector in DETECTORS {
489 for ext in detector.extensions.split(',') {
490 if lname.ends_with(ext) {
491 return Some(detector.demux_name);
492 }
493 }
494 }
495 None
496}
497
d8ce0de0
KS
498#[cfg(test)]
499mod test {
500 use super::*;
501 use std::fs::File;
01f55c6a 502 use nihav_core::io::byteio::*;
d8ce0de0
KS
503
504 #[test]
505 fn test_avi_detect() {
250c49f6 506 let name = "assets/Indeo/laser05.avi";
d8ce0de0
KS
507 let mut file = File::open(name).unwrap();
508 let mut fr = FileReader::new_read(&mut file);
509 let mut br = ByteReader::new(&mut fr);
510 let (name, score) = detect_format(name, &mut br).unwrap();
511 assert_eq!(name, "avi");
512 assert_eq!(score, DetectionScore::MagicMatches);
513 }
514
515 #[test]
516 fn test_gdv_detect() {
250c49f6 517 let name = "assets/Game/intro1.gdv";
d8ce0de0
KS
518 let mut file = File::open(name).unwrap();
519 let mut fr = FileReader::new_read(&mut file);
520 let mut br = ByteReader::new(&mut fr);
521 let (name, score) = detect_format(name, &mut br).unwrap();
522 assert_eq!(name, "gdv");
523 assert_eq!(score, DetectionScore::MagicMatches);
524 }
b5bd2ae4 525}