]>
Commit | Line | Data |
---|---|---|
1 | //! Container format detection. | |
2 | //! | |
3 | //! Usually user does not know the container format of the opened file. | |
4 | //! That is why format detection functionality is needed. | |
5 | //! This module contains the set of rules to detect container not merely by file extension but also by its content if possible. | |
6 | //! | |
7 | //! # Examples | |
8 | //! | |
9 | //! ```no_run | |
10 | //! use nihav_core::detect::detect_format; | |
11 | //! use std::fs::File; | |
12 | //! use nihav_core::io::byteio::*; | |
13 | //! | |
14 | //! let name = "mediafile.ogv"; | |
15 | //! let mut file = File::open(name).unwrap(); | |
16 | //! let mut filereader = FileReader::new_read(&mut file); | |
17 | //! let mut br = ByteReader::new(&mut filereader); | |
18 | //! let result = detect_format(name, &mut br); | |
19 | //! if let Some((name, score)) = result { | |
20 | //! println!("detected format {} with score {:?}", name, score); | |
21 | //! } | |
22 | //! ``` | |
23 | use std::io::SeekFrom; | |
24 | use nihav_core::io::byteio::ByteReader; | |
25 | ||
26 | /// Format detection score. | |
27 | #[derive(Debug,Clone,Copy,PartialEq)] | |
28 | pub enum DetectionScore { | |
29 | /// Format is not detected. | |
30 | No, | |
31 | /// Format matched by file extension. | |
32 | ExtensionMatches, | |
33 | /// Format matches by markers inside the file. | |
34 | MagicMatches, | |
35 | } | |
36 | ||
37 | impl DetectionScore { | |
38 | /// Checks whether current detection score is less than a value it is compared against. | |
39 | pub fn less(self, other: DetectionScore) -> bool { | |
40 | (self as i32) < (other as i32) | |
41 | } | |
42 | } | |
43 | ||
44 | #[allow(dead_code)] | |
45 | enum Arg { | |
46 | Byte(u8), | |
47 | U16BE(u16), | |
48 | U16LE(u16), | |
49 | U24BE(u32), | |
50 | U24LE(u32), | |
51 | U32BE(u32), | |
52 | U32LE(u32), | |
53 | U64BE(u64), | |
54 | U64LE(u64), | |
55 | } | |
56 | ||
57 | impl Arg { | |
58 | fn val(&self) -> u64 { | |
59 | match *self { | |
60 | Arg::Byte(b) => { u64::from(b) } | |
61 | Arg::U16BE(v) => { u64::from(v) } | |
62 | Arg::U16LE(v) => { u64::from(v) } | |
63 | Arg::U24BE(v) => { u64::from(v) } | |
64 | Arg::U24LE(v) => { u64::from(v) } | |
65 | Arg::U32BE(v) => { u64::from(v) } | |
66 | Arg::U32LE(v) => { u64::from(v) } | |
67 | Arg::U64BE(v) => { v } | |
68 | Arg::U64LE(v) => { v } | |
69 | } | |
70 | } | |
71 | fn read_val(&self, src: &mut ByteReader) -> Option<u64> { | |
72 | match *self { | |
73 | Arg::Byte(_) => { | |
74 | let res = src.peek_byte(); | |
75 | if res.is_err() { return None; } | |
76 | Some(u64::from(res.unwrap())) | |
77 | } | |
78 | Arg::U16BE(_) => { | |
79 | let res = src.peek_u16be(); | |
80 | if res.is_err() { return None; } | |
81 | Some(u64::from(res.unwrap())) | |
82 | } | |
83 | Arg::U16LE(_) => { | |
84 | let res = src.peek_u16le(); | |
85 | if res.is_err() { return None; } | |
86 | Some(u64::from(res.unwrap())) | |
87 | } | |
88 | Arg::U24BE(_) => { | |
89 | let res = src.peek_u24be(); | |
90 | if res.is_err() { return None; } | |
91 | Some(u64::from(res.unwrap())) | |
92 | } | |
93 | Arg::U24LE(_) => { | |
94 | let res = src.peek_u24le(); | |
95 | if res.is_err() { return None; } | |
96 | Some(u64::from(res.unwrap())) | |
97 | } | |
98 | Arg::U32BE(_) => { | |
99 | let res = src.peek_u32be(); | |
100 | if res.is_err() { return None; } | |
101 | Some(u64::from(res.unwrap())) | |
102 | } | |
103 | Arg::U32LE(_) => { | |
104 | let res = src.peek_u32le(); | |
105 | if res.is_err() { return None; } | |
106 | Some(u64::from(res.unwrap())) | |
107 | } | |
108 | Arg::U64BE(_) => { | |
109 | let res = src.peek_u64be(); | |
110 | if res.is_err() { return None; } | |
111 | Some(res.unwrap()) | |
112 | } | |
113 | Arg::U64LE(_) => { | |
114 | let res = src.peek_u64le(); | |
115 | if res.is_err() { return None; } | |
116 | Some(res.unwrap()) | |
117 | } | |
118 | } | |
119 | } | |
120 | fn eq(&self, src: &mut ByteReader) -> bool { | |
121 | let val = self.read_val(src); | |
122 | if val.is_none() { false } | |
123 | else { val.unwrap() == self.val() } | |
124 | } | |
125 | fn ge(&self, src: &mut ByteReader) -> bool { | |
126 | let val = self.read_val(src); | |
127 | if val.is_none() { false } | |
128 | else { val.unwrap() >= self.val() } | |
129 | } | |
130 | fn gt(&self, src: &mut ByteReader) -> bool { | |
131 | let val = self.read_val(src); | |
132 | if val.is_none() { false } | |
133 | else { val.unwrap() > self.val() } | |
134 | } | |
135 | fn le(&self, src: &mut ByteReader) -> bool { | |
136 | let val = self.read_val(src); | |
137 | if val.is_none() { false } | |
138 | else { val.unwrap() <= self.val() } | |
139 | } | |
140 | fn lt(&self, src: &mut ByteReader) -> bool { | |
141 | let val = self.read_val(src); | |
142 | if val.is_none() { false } | |
143 | else { val.unwrap() < self.val() } | |
144 | } | |
145 | } | |
146 | ||
147 | #[allow(dead_code)] | |
148 | enum CC<'a> { | |
149 | Or(&'a CC<'a>, &'a CC<'a>), | |
150 | Eq(Arg), | |
151 | Str(&'static [u8]), | |
152 | In(Arg, Arg), | |
153 | Lt(Arg), | |
154 | Le(Arg), | |
155 | Gt(Arg), | |
156 | Ge(Arg), | |
157 | } | |
158 | ||
159 | impl<'a> CC<'a> { | |
160 | fn eval(&self, src: &mut ByteReader) -> bool { | |
161 | match *self { | |
162 | CC::Or (ref a, ref b) => { a.eval(src) || b.eval(src) }, | |
163 | CC::Eq(ref arg) => { arg.eq(src) }, | |
164 | CC::In(ref a, ref b) => { a.ge(src) && b.le(src) }, | |
165 | CC::Lt(ref arg) => { arg.lt(src) }, | |
166 | CC::Le(ref arg) => { arg.le(src) }, | |
167 | CC::Gt(ref arg) => { arg.gt(src) }, | |
168 | CC::Ge(ref arg) => { arg.ge(src) }, | |
169 | CC::Str(str) => { | |
170 | let mut val: Vec<u8> = vec![0; str.len()]; | |
171 | let res = src.peek_buf(val.as_mut_slice()); | |
172 | if res.is_err() { return false; } | |
173 | val == str | |
174 | } | |
175 | } | |
176 | } | |
177 | } | |
178 | ||
179 | struct CheckItem<'a> { | |
180 | offs: u32, | |
181 | cond: &'a CC<'a>, | |
182 | } | |
183 | ||
184 | #[allow(dead_code)] | |
185 | struct DetectConditions<'a> { | |
186 | demux_name: &'static str, | |
187 | extensions: &'static str, | |
188 | conditions: &'a [CheckItem<'a>], | |
189 | } | |
190 | ||
191 | const DETECTORS: &[DetectConditions] = &[ | |
192 | DetectConditions { | |
193 | demux_name: "avi", | |
194 | extensions: ".avi", | |
195 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"RIFF"), &CC::Str(b"ON2 ")) }, | |
196 | CheckItem{offs: 8, cond: &CC::Or(&CC::Or(&CC::Str(b"AVI LIST"), | |
197 | &CC::Str(b"AVIXLIST")), | |
198 | &CC::Str(b"ON2fLIST")) }, | |
199 | ] | |
200 | }, | |
201 | DetectConditions { | |
202 | demux_name: "wav", | |
203 | extensions: ".wav", | |
204 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"RIFF") }, | |
205 | CheckItem{offs: 8, cond: &CC::Str(b"WAVEfmt ") } | |
206 | ] | |
207 | }, | |
208 | DetectConditions { | |
209 | demux_name: "mov", | |
210 | extensions: ".mov", | |
211 | conditions: &[CheckItem{offs: 4, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"), | |
212 | &CC::Str(b"moov")), | |
213 | &CC::Str(b"ftyp")) }], | |
214 | }, | |
215 | DetectConditions { | |
216 | demux_name: "mov", | |
217 | extensions: ".mov", | |
218 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"\x00\x00\x00\x08wide") }, | |
219 | CheckItem{offs: 12, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"), | |
220 | &CC::Str(b"moov")), | |
221 | &CC::Str(b"ftyp")) }], | |
222 | }, | |
223 | DetectConditions { | |
224 | demux_name: "gdv", | |
225 | extensions: ".gdv", | |
226 | conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U32LE(0x29111994))}], | |
227 | }, | |
228 | DetectConditions { | |
229 | demux_name: "realaudio", | |
230 | extensions: ".ra,.ram", | |
231 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b".ra\xFD")}], | |
232 | }, | |
233 | DetectConditions { | |
234 | demux_name: "realmedia", | |
235 | extensions: ".rm,.rmvb,.rma,.ra,.ram", | |
236 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".RMF"), &CC::Str(b".RMP")) }, | |
237 | CheckItem{offs: 4, cond: &CC::Ge(Arg::U32BE(10))}], | |
238 | }, | |
239 | DetectConditions { | |
240 | demux_name: "real_ivr", | |
241 | extensions: ".ivr", | |
242 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".R1M"), &CC::Str(b".REC"))}], | |
243 | }, | |
244 | DetectConditions { | |
245 | demux_name: "bink", | |
246 | extensions: ".bik,.bk2", | |
247 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::In(Arg::U32BE(0x42494B62), // BIKb | |
248 | Arg::U32BE(0x42494B7B)), // BIKz | |
249 | &CC::In(Arg::U32BE(0x4B423261), // KB2a | |
250 | Arg::U32BE(0x4B42327B)))}], // KB2z | |
251 | }, | |
252 | DetectConditions { | |
253 | demux_name: "smacker", | |
254 | extensions: ".smk", | |
255 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"SMK2"), &CC::Str(b"SMK4"))}], | |
256 | }, | |
257 | DetectConditions { | |
258 | demux_name: "vivo", | |
259 | extensions: ".viv", | |
260 | conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))}, | |
261 | CheckItem{offs: 2, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}], | |
262 | }, | |
263 | DetectConditions { | |
264 | demux_name: "vivo", | |
265 | extensions: ".viv", | |
266 | conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))}, | |
267 | CheckItem{offs: 3, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}], | |
268 | }, | |
269 | DetectConditions { | |
270 | demux_name: "bmv", | |
271 | extensions: ".bmv", | |
272 | conditions: &[], | |
273 | }, | |
274 | DetectConditions { | |
275 | demux_name: "bmv3", | |
276 | extensions: ".bmv", | |
277 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"BMVi") }, | |
278 | CheckItem{offs: 32, cond: &CC::Str(b"DATA")}], | |
279 | }, | |
280 | DetectConditions { | |
281 | demux_name: "vmd", | |
282 | extensions: ".vmd", | |
283 | conditions: &[], | |
284 | }, | |
285 | ]; | |
286 | ||
287 | /// Tries to detect container format. | |
288 | /// | |
289 | /// This function tries to determine container format using both file extension and checking against container specific markers inside. | |
290 | /// In case of success the function returns short container name and the detection score. | |
291 | /// Result should have the highest detection score among tested. | |
292 | pub fn detect_format(name: &str, src: &mut ByteReader) -> Option<(&'static str, DetectionScore)> { | |
293 | let mut result = None; | |
294 | let lname = name.to_lowercase(); | |
295 | for detector in DETECTORS { | |
296 | let mut score = DetectionScore::No; | |
297 | if !name.is_empty() { | |
298 | for ext in detector.extensions.split(',') { | |
299 | if lname.ends_with(ext) { | |
300 | score = DetectionScore::ExtensionMatches; | |
301 | break; | |
302 | } | |
303 | } | |
304 | } | |
305 | let mut passed = !detector.conditions.is_empty(); | |
306 | for ck in detector.conditions { | |
307 | let ret = src.seek(SeekFrom::Start(u64::from(ck.offs))); | |
308 | if ret.is_err() { | |
309 | passed = false; | |
310 | break; | |
311 | } | |
312 | if !ck.cond.eval(src) { | |
313 | passed = false; | |
314 | break; | |
315 | } | |
316 | } | |
317 | if passed { | |
318 | score = DetectionScore::MagicMatches; | |
319 | } | |
320 | if score == DetectionScore::MagicMatches { | |
321 | return Some((detector.demux_name, score)); | |
322 | } | |
323 | if result.is_none() && score != DetectionScore::No { | |
324 | result = Some((detector.demux_name, score)); | |
325 | } else if result.is_some() { | |
326 | let (_, oldsc) = result.unwrap(); | |
327 | if oldsc.less(score) { | |
328 | result = Some((detector.demux_name, score)); | |
329 | } | |
330 | } | |
331 | } | |
332 | result | |
333 | } | |
334 | ||
335 | /// Tries to detect container format for provided file name. | |
336 | pub fn detect_format_by_name(name: &str) -> Option<(&'static str)> { | |
337 | if name.is_empty() { | |
338 | return None; | |
339 | } | |
340 | let lname = name.to_lowercase(); | |
341 | for detector in DETECTORS { | |
342 | for ext in detector.extensions.split(',') { | |
343 | if lname.ends_with(ext) { | |
344 | return Some(detector.demux_name); | |
345 | } | |
346 | } | |
347 | } | |
348 | None | |
349 | } | |
350 | ||
351 | #[cfg(test)] | |
352 | mod test { | |
353 | use super::*; | |
354 | use std::fs::File; | |
355 | use crate::io::byteio::*; | |
356 | ||
357 | #[test] | |
358 | fn test_avi_detect() { | |
359 | let name = "assets/Indeo/laser05.avi"; | |
360 | let mut file = File::open(name).unwrap(); | |
361 | let mut fr = FileReader::new_read(&mut file); | |
362 | let mut br = ByteReader::new(&mut fr); | |
363 | let (name, score) = detect_format(name, &mut br).unwrap(); | |
364 | assert_eq!(name, "avi"); | |
365 | assert_eq!(score, DetectionScore::MagicMatches); | |
366 | } | |
367 | ||
368 | #[test] | |
369 | fn test_gdv_detect() { | |
370 | let name = "assets/Game/intro1.gdv"; | |
371 | let mut file = File::open(name).unwrap(); | |
372 | let mut fr = FileReader::new_read(&mut file); | |
373 | let mut br = ByteReader::new(&mut fr); | |
374 | let (name, score) = detect_format(name, &mut br).unwrap(); | |
375 | assert_eq!(name, "gdv"); | |
376 | assert_eq!(score, DetectionScore::MagicMatches); | |
377 | } | |
378 | } |