]>
Commit | Line | Data |
---|---|---|
1b5ebe1d KS |
1 | //! Container format detection. |
2 | //! | |
3 | //! Usually user does not know the container format of the opened file. | |
4 | //! That is why format detection functionality is needed. | |
5 | //! This module contains the set of rules to detect container not merely by file extension but also by its content if possible. | |
6 | //! | |
7 | //! # Examples | |
8 | //! | |
9 | //! ```no_run | |
01f55c6a | 10 | //! use nihav_registry::detect::detect_format; |
1b5ebe1d KS |
11 | //! use std::fs::File; |
12 | //! use nihav_core::io::byteio::*; | |
13 | //! | |
14 | //! let name = "mediafile.ogv"; | |
15 | //! let mut file = File::open(name).unwrap(); | |
16 | //! let mut filereader = FileReader::new_read(&mut file); | |
17 | //! let mut br = ByteReader::new(&mut filereader); | |
18 | //! let result = detect_format(name, &mut br); | |
19 | //! if let Some((name, score)) = result { | |
20 | //! println!("detected format {} with score {:?}", name, score); | |
21 | //! } | |
22 | //! ``` | |
d8ce0de0 | 23 | use std::io::SeekFrom; |
32f7cbe5 | 24 | use nihav_core::io::byteio::ByteReader; |
d8ce0de0 | 25 | |
1b5ebe1d | 26 | /// Format detection score. |
d8ce0de0 KS |
27 | #[derive(Debug,Clone,Copy,PartialEq)] |
28 | pub enum DetectionScore { | |
1b5ebe1d | 29 | /// Format is not detected. |
d8ce0de0 | 30 | No, |
1b5ebe1d | 31 | /// Format matched by file extension. |
d8ce0de0 | 32 | ExtensionMatches, |
1b5ebe1d | 33 | /// Format matches by markers inside the file. |
d8ce0de0 KS |
34 | MagicMatches, |
35 | } | |
36 | ||
37 | impl DetectionScore { | |
1b5ebe1d | 38 | /// Checks whether current detection score is less than a value it is compared against. |
e243ceb4 KS |
39 | pub fn less(self, other: DetectionScore) -> bool { |
40 | (self as i32) < (other as i32) | |
d8ce0de0 KS |
41 | } |
42 | } | |
43 | ||
44 | #[allow(dead_code)] | |
b5bd2ae4 | 45 | enum Arg { |
d8ce0de0 KS |
46 | Byte(u8), |
47 | U16BE(u16), | |
48 | U16LE(u16), | |
49 | U24BE(u32), | |
50 | U24LE(u32), | |
51 | U32BE(u32), | |
52 | U32LE(u32), | |
53 | U64BE(u64), | |
54 | U64LE(u64), | |
55 | } | |
56 | ||
b5bd2ae4 | 57 | impl Arg { |
d8ce0de0 KS |
58 | fn val(&self) -> u64 { |
59 | match *self { | |
e243ceb4 KS |
60 | Arg::Byte(b) => { u64::from(b) } |
61 | Arg::U16BE(v) => { u64::from(v) } | |
62 | Arg::U16LE(v) => { u64::from(v) } | |
63 | Arg::U24BE(v) => { u64::from(v) } | |
64 | Arg::U24LE(v) => { u64::from(v) } | |
65 | Arg::U32BE(v) => { u64::from(v) } | |
66 | Arg::U32LE(v) => { u64::from(v) } | |
b5bd2ae4 KS |
67 | Arg::U64BE(v) => { v } |
68 | Arg::U64LE(v) => { v } | |
d8ce0de0 KS |
69 | } |
70 | } | |
71 | fn read_val(&self, src: &mut ByteReader) -> Option<u64> { | |
72 | match *self { | |
b5bd2ae4 | 73 | Arg::Byte(_) => { |
d8ce0de0 | 74 | let res = src.peek_byte(); |
e243ceb4 KS |
75 | if res.is_err() { return None; } |
76 | Some(u64::from(res.unwrap())) | |
d8ce0de0 | 77 | } |
b5bd2ae4 | 78 | Arg::U16BE(_) => { |
d8ce0de0 | 79 | let res = src.peek_u16be(); |
e243ceb4 KS |
80 | if res.is_err() { return None; } |
81 | Some(u64::from(res.unwrap())) | |
d8ce0de0 | 82 | } |
b5bd2ae4 | 83 | Arg::U16LE(_) => { |
d8ce0de0 | 84 | let res = src.peek_u16le(); |
e243ceb4 KS |
85 | if res.is_err() { return None; } |
86 | Some(u64::from(res.unwrap())) | |
d8ce0de0 | 87 | } |
b5bd2ae4 | 88 | Arg::U24BE(_) => { |
d8ce0de0 | 89 | let res = src.peek_u24be(); |
e243ceb4 KS |
90 | if res.is_err() { return None; } |
91 | Some(u64::from(res.unwrap())) | |
d8ce0de0 | 92 | } |
b5bd2ae4 | 93 | Arg::U24LE(_) => { |
d8ce0de0 | 94 | let res = src.peek_u24le(); |
e243ceb4 KS |
95 | if res.is_err() { return None; } |
96 | Some(u64::from(res.unwrap())) | |
d8ce0de0 | 97 | } |
b5bd2ae4 | 98 | Arg::U32BE(_) => { |
d8ce0de0 | 99 | let res = src.peek_u32be(); |
e243ceb4 KS |
100 | if res.is_err() { return None; } |
101 | Some(u64::from(res.unwrap())) | |
d8ce0de0 | 102 | } |
b5bd2ae4 | 103 | Arg::U32LE(_) => { |
d8ce0de0 | 104 | let res = src.peek_u32le(); |
e243ceb4 KS |
105 | if res.is_err() { return None; } |
106 | Some(u64::from(res.unwrap())) | |
d8ce0de0 | 107 | } |
b5bd2ae4 | 108 | Arg::U64BE(_) => { |
d8ce0de0 | 109 | let res = src.peek_u64be(); |
e243ceb4 | 110 | if res.is_err() { return None; } |
d8ce0de0 KS |
111 | Some(res.unwrap()) |
112 | } | |
b5bd2ae4 | 113 | Arg::U64LE(_) => { |
d8ce0de0 | 114 | let res = src.peek_u64le(); |
e243ceb4 | 115 | if res.is_err() { return None; } |
d8ce0de0 KS |
116 | Some(res.unwrap()) |
117 | } | |
118 | } | |
119 | } | |
120 | fn eq(&self, src: &mut ByteReader) -> bool { | |
01f55c6a KS |
121 | if let Some(rval) = self.read_val(src) { |
122 | rval == self.val() | |
123 | } else { | |
124 | false | |
125 | } | |
d8ce0de0 KS |
126 | } |
127 | fn ge(&self, src: &mut ByteReader) -> bool { | |
01f55c6a KS |
128 | if let Some(rval) = self.read_val(src) { |
129 | rval >= self.val() | |
130 | } else { | |
131 | false | |
132 | } | |
d8ce0de0 KS |
133 | } |
134 | fn gt(&self, src: &mut ByteReader) -> bool { | |
01f55c6a KS |
135 | if let Some(rval) = self.read_val(src) { |
136 | rval > self.val() | |
137 | } else { | |
138 | false | |
139 | } | |
d8ce0de0 KS |
140 | } |
141 | fn le(&self, src: &mut ByteReader) -> bool { | |
01f55c6a KS |
142 | if let Some(rval) = self.read_val(src) { |
143 | rval <= self.val() | |
144 | } else { | |
145 | false | |
146 | } | |
d8ce0de0 KS |
147 | } |
148 | fn lt(&self, src: &mut ByteReader) -> bool { | |
01f55c6a KS |
149 | if let Some(rval) = self.read_val(src) { |
150 | rval < self.val() | |
151 | } else { | |
152 | false | |
153 | } | |
d8ce0de0 KS |
154 | } |
155 | } | |
156 | ||
157 | #[allow(dead_code)] | |
b5bd2ae4 KS |
158 | enum CC<'a> { |
159 | Or(&'a CC<'a>, &'a CC<'a>), | |
160 | Eq(Arg), | |
161 | Str(&'static [u8]), | |
162 | In(Arg, Arg), | |
163 | Lt(Arg), | |
164 | Le(Arg), | |
165 | Gt(Arg), | |
166 | Ge(Arg), | |
d8ce0de0 KS |
167 | } |
168 | ||
b5bd2ae4 | 169 | impl<'a> CC<'a> { |
d8ce0de0 KS |
170 | fn eval(&self, src: &mut ByteReader) -> bool { |
171 | match *self { | |
b5bd2ae4 KS |
172 | CC::Or (ref a, ref b) => { a.eval(src) || b.eval(src) }, |
173 | CC::Eq(ref arg) => { arg.eq(src) }, | |
4d477e23 | 174 | CC::In(ref a, ref b) => { a.ge(src) && b.le(src) }, |
b5bd2ae4 KS |
175 | CC::Lt(ref arg) => { arg.lt(src) }, |
176 | CC::Le(ref arg) => { arg.le(src) }, | |
177 | CC::Gt(ref arg) => { arg.gt(src) }, | |
178 | CC::Ge(ref arg) => { arg.ge(src) }, | |
179 | CC::Str(str) => { | |
e243ceb4 | 180 | let mut val: Vec<u8> = vec![0; str.len()]; |
d8ce0de0 | 181 | let res = src.peek_buf(val.as_mut_slice()); |
e243ceb4 | 182 | if res.is_err() { return false; } |
d8ce0de0 KS |
183 | val == str |
184 | } | |
185 | } | |
186 | } | |
187 | } | |
188 | ||
189 | struct CheckItem<'a> { | |
190 | offs: u32, | |
b5bd2ae4 | 191 | cond: &'a CC<'a>, |
d8ce0de0 KS |
192 | } |
193 | ||
194 | #[allow(dead_code)] | |
195 | struct DetectConditions<'a> { | |
196 | demux_name: &'static str, | |
197 | extensions: &'static str, | |
198 | conditions: &'a [CheckItem<'a>], | |
199 | } | |
200 | ||
201 | const DETECTORS: &[DetectConditions] = &[ | |
202 | DetectConditions { | |
203 | demux_name: "avi", | |
204 | extensions: ".avi", | |
b5bd2ae4 KS |
205 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"RIFF"), &CC::Str(b"ON2 ")) }, |
206 | CheckItem{offs: 8, cond: &CC::Or(&CC::Or(&CC::Str(b"AVI LIST"), | |
207 | &CC::Str(b"AVIXLIST")), | |
208 | &CC::Str(b"ON2fLIST")) }, | |
209 | ] | |
d8ce0de0 | 210 | }, |
283abfa6 KS |
211 | DetectConditions { |
212 | demux_name: "wav", | |
213 | extensions: ".wav", | |
214 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"RIFF") }, | |
215 | CheckItem{offs: 8, cond: &CC::Str(b"WAVEfmt ") } | |
216 | ] | |
217 | }, | |
bdf66deb KS |
218 | DetectConditions { |
219 | demux_name: "mov", | |
220 | extensions: ".mov", | |
221 | conditions: &[CheckItem{offs: 4, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"), | |
222 | &CC::Str(b"moov")), | |
223 | &CC::Str(b"ftyp")) }], | |
224 | }, | |
225 | DetectConditions { | |
226 | demux_name: "mov", | |
227 | extensions: ".mov", | |
228 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"\x00\x00\x00\x08wide") }, | |
229 | CheckItem{offs: 12, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"), | |
230 | &CC::Str(b"moov")), | |
231 | &CC::Str(b"ftyp")) }], | |
232 | }, | |
dbb2cbc9 KS |
233 | DetectConditions { |
234 | demux_name: "yuv4mpeg", | |
235 | extensions: ".y4m", | |
236 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"YUV4MPEG2 ") }], | |
237 | }, | |
92d9fb69 KS |
238 | DetectConditions { |
239 | demux_name: "flv", | |
240 | extensions: ".flv", | |
241 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"FLV") }, | |
242 | CheckItem{offs: 3, cond: &CC::Le(Arg::Byte(1)) }], | |
243 | }, | |
fd7e6906 KS |
244 | DetectConditions { |
245 | demux_name: "ivf", | |
246 | extensions: ".ivf", | |
247 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"DKIF\x00\x00")}, | |
248 | CheckItem{offs: 6, cond: &CC::Ge(Arg::U16LE(32))}], | |
249 | }, | |
8d91d85f KS |
250 | DetectConditions { |
251 | demux_name: "fcmp", | |
252 | extensions: ".cmp", | |
253 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"FCMP")}], | |
254 | }, | |
255 | DetectConditions { | |
256 | demux_name: "fst", | |
257 | extensions: ".fst", | |
258 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"2TSF")}], | |
259 | }, | |
d8ce0de0 KS |
260 | DetectConditions { |
261 | demux_name: "gdv", | |
262 | extensions: ".gdv", | |
b5bd2ae4 | 263 | conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U32LE(0x29111994))}], |
d8ce0de0 | 264 | }, |
3813fe8a KS |
265 | DetectConditions { |
266 | demux_name: "fable-imax", | |
267 | extensions: ".imx", | |
268 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"IMAX") }, | |
269 | CheckItem{offs: 10, cond: &CC::Eq(Arg::U16LE(0x102)) }], | |
270 | }, | |
afe1e5ba KS |
271 | DetectConditions { |
272 | demux_name: "legend-q", | |
273 | extensions: ".q", | |
274 | conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U16LE(0x6839))}, | |
275 | CheckItem{offs: 2, cond: &CC::In(Arg::Byte(3), Arg::Byte(5))}], | |
276 | }, | |
c17769db KS |
277 | DetectConditions { |
278 | demux_name: "smush", | |
279 | extensions: ".san", | |
280 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"ANIM")}, | |
281 | CheckItem{offs: 8, cond: &CC::Str(b"AHDR")}], | |
282 | }, | |
9e08bfdd KS |
283 | DetectConditions { |
284 | demux_name: "smush-mcmp", | |
285 | extensions: ".imc", | |
286 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"MCMP")}, | |
287 | CheckItem{offs: 6, cond: &CC::Eq(Arg::Byte(0))}, | |
288 | CheckItem{offs: 7, cond: &CC::Eq(Arg::Byte(0))}], | |
289 | }, | |
c17769db KS |
290 | DetectConditions { |
291 | demux_name: "smush", | |
292 | extensions: ".snm", | |
293 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"SANM")}, | |
294 | CheckItem{offs: 8, cond: &CC::Str(b"SHDR")}], | |
295 | }, | |
ce52b3b5 KS |
296 | DetectConditions { |
297 | demux_name: "realaudio", | |
298 | extensions: ".ra,.ram", | |
299 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b".ra\xFD")}], | |
300 | }, | |
301 | DetectConditions { | |
302 | demux_name: "realmedia", | |
303 | extensions: ".rm,.rmvb,.rma,.ra,.ram", | |
db5cc44b | 304 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".RMF"), &CC::Str(b".RMP")) }, |
ce52b3b5 KS |
305 | CheckItem{offs: 4, cond: &CC::Ge(Arg::U32BE(10))}], |
306 | }, | |
307 | DetectConditions { | |
308 | demux_name: "real_ivr", | |
309 | extensions: ".ivr", | |
310 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".R1M"), &CC::Str(b".REC"))}], | |
311 | }, | |
c6c21059 KS |
312 | DetectConditions { |
313 | demux_name: "bink", | |
4d477e23 | 314 | extensions: ".bik,.bk2", |
4998874b KS |
315 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::In(Arg::U32BE(0x42494B62), // BIKb |
316 | Arg::U32BE(0x42494B7B)), // BIKz | |
c6c21059 KS |
317 | &CC::In(Arg::U32BE(0x4B423261), // KB2a |
318 | Arg::U32BE(0x4B42327B)))}], // KB2z | |
319 | }, | |
606c448e KS |
320 | DetectConditions { |
321 | demux_name: "smacker", | |
322 | extensions: ".smk", | |
323 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"SMK2"), &CC::Str(b"SMK4"))}], | |
324 | }, | |
87927c57 KS |
325 | DetectConditions { |
326 | demux_name: "ape", | |
327 | extensions: ".ape", | |
328 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"MAC ") }, | |
329 | CheckItem{offs: 4, cond: &CC::In(Arg::U16LE(3800), Arg::U16LE(3990))}], | |
330 | }, | |
331 | DetectConditions { | |
332 | demux_name: "flac", | |
333 | extensions: ".flac", | |
334 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"fLaC") }], | |
335 | }, | |
336 | DetectConditions { | |
337 | demux_name: "tta", | |
338 | extensions: ".tta", | |
339 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"TTA1") }], | |
340 | }, | |
341 | DetectConditions { | |
342 | demux_name: "wavpack", | |
343 | extensions: ".wv", | |
344 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"wvpk") }, | |
345 | CheckItem{offs: 8, cond: &CC::In(Arg::U16LE(0x402), Arg::U16LE(0x410))}], | |
346 | }, | |
128253cc | 347 | DetectConditions { |
31cf33ac KS |
348 | demux_name: "vivo", |
349 | extensions: ".viv", | |
350 | conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))}, | |
351 | CheckItem{offs: 2, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}], | |
352 | }, | |
353 | DetectConditions { | |
354 | demux_name: "vivo", | |
355 | extensions: ".viv", | |
356 | conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))}, | |
357 | CheckItem{offs: 3, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}], | |
358 | }, | |
359 | DetectConditions { | |
128253cc KS |
360 | demux_name: "bmv", |
361 | extensions: ".bmv", | |
362 | conditions: &[], | |
363 | }, | |
ecda1cc1 KS |
364 | DetectConditions { |
365 | demux_name: "bmv3", | |
366 | extensions: ".bmv", | |
367 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"BMVi") }, | |
368 | CheckItem{offs: 32, cond: &CC::Str(b"DATA")}], | |
369 | }, | |
9895bd7b KS |
370 | DetectConditions { |
371 | demux_name: "vmd", | |
372 | extensions: ".vmd", | |
373 | conditions: &[], | |
374 | }, | |
55d85231 KS |
375 | DetectConditions { |
376 | demux_name: "vx", | |
377 | extensions: ".vx", | |
378 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"VXDS") }], | |
379 | }, | |
d8ce0de0 KS |
380 | ]; |
381 | ||
1b5ebe1d KS |
382 | /// Tries to detect container format. |
383 | /// | |
384 | /// This function tries to determine container format using both file extension and checking against container specific markers inside. | |
385 | /// In case of success the function returns short container name and the detection score. | |
386 | /// Result should have the highest detection score among tested. | |
d8ce0de0 KS |
387 | pub fn detect_format(name: &str, src: &mut ByteReader) -> Option<(&'static str, DetectionScore)> { |
388 | let mut result = None; | |
389 | let lname = name.to_lowercase(); | |
390 | for detector in DETECTORS { | |
391 | let mut score = DetectionScore::No; | |
e243ceb4 | 392 | if !name.is_empty() { |
d8ce0de0 KS |
393 | for ext in detector.extensions.split(',') { |
394 | if lname.ends_with(ext) { | |
395 | score = DetectionScore::ExtensionMatches; | |
396 | break; | |
397 | } | |
398 | } | |
399 | } | |
e243ceb4 | 400 | let mut passed = !detector.conditions.is_empty(); |
d8ce0de0 | 401 | for ck in detector.conditions { |
e243ceb4 KS |
402 | let ret = src.seek(SeekFrom::Start(u64::from(ck.offs))); |
403 | if ret.is_err() { | |
d8ce0de0 KS |
404 | passed = false; |
405 | break; | |
406 | } | |
407 | if !ck.cond.eval(src) { | |
408 | passed = false; | |
409 | break; | |
410 | } | |
411 | } | |
412 | if passed { | |
413 | score = DetectionScore::MagicMatches; | |
414 | } | |
415 | if score == DetectionScore::MagicMatches { | |
416 | return Some((detector.demux_name, score)); | |
417 | } | |
4d477e23 | 418 | if result.is_none() && score != DetectionScore::No { |
d8ce0de0 | 419 | result = Some((detector.demux_name, score)); |
4d477e23 | 420 | } else if result.is_some() { |
d8ce0de0 KS |
421 | let (_, oldsc) = result.unwrap(); |
422 | if oldsc.less(score) { | |
423 | result = Some((detector.demux_name, score)); | |
424 | } | |
425 | } | |
426 | } | |
427 | result | |
428 | } | |
429 | ||
4b56e8c9 | 430 | /// Tries to detect container format for provided file name. |
ac818eac | 431 | pub fn detect_format_by_name(name: &str) -> Option<&'static str> { |
4b56e8c9 KS |
432 | if name.is_empty() { |
433 | return None; | |
434 | } | |
435 | let lname = name.to_lowercase(); | |
436 | for detector in DETECTORS { | |
437 | for ext in detector.extensions.split(',') { | |
438 | if lname.ends_with(ext) { | |
439 | return Some(detector.demux_name); | |
440 | } | |
441 | } | |
442 | } | |
443 | None | |
444 | } | |
445 | ||
d8ce0de0 KS |
446 | #[cfg(test)] |
447 | mod test { | |
448 | use super::*; | |
449 | use std::fs::File; | |
01f55c6a | 450 | use nihav_core::io::byteio::*; |
d8ce0de0 KS |
451 | |
452 | #[test] | |
453 | fn test_avi_detect() { | |
250c49f6 | 454 | let name = "assets/Indeo/laser05.avi"; |
d8ce0de0 KS |
455 | let mut file = File::open(name).unwrap(); |
456 | let mut fr = FileReader::new_read(&mut file); | |
457 | let mut br = ByteReader::new(&mut fr); | |
458 | let (name, score) = detect_format(name, &mut br).unwrap(); | |
459 | assert_eq!(name, "avi"); | |
460 | assert_eq!(score, DetectionScore::MagicMatches); | |
461 | } | |
462 | ||
463 | #[test] | |
464 | fn test_gdv_detect() { | |
250c49f6 | 465 | let name = "assets/Game/intro1.gdv"; |
d8ce0de0 KS |
466 | let mut file = File::open(name).unwrap(); |
467 | let mut fr = FileReader::new_read(&mut file); | |
468 | let mut br = ByteReader::new(&mut fr); | |
469 | let (name, score) = detect_format(name, &mut br).unwrap(); | |
470 | assert_eq!(name, "gdv"); | |
471 | assert_eq!(score, DetectionScore::MagicMatches); | |
472 | } | |
b5bd2ae4 | 473 | } |