add VivoActive (imperfect) format family support
[nihav.git] / nihav-registry / src / detect.rs
1 //! Container format detection.
2 //!
3 //! Usually user does not know the container format of the opened file.
4 //! That is why format detection functionality is needed.
5 //! This module contains the set of rules to detect container not merely by file extension but also by its content if possible.
6 //!
7 //! # Examples
8 //!
9 //! ```no_run
10 //! use nihav_core::detect::detect_format;
11 //! use std::fs::File;
12 //! use nihav_core::io::byteio::*;
13 //!
14 //! let name = "mediafile.ogv";
15 //! let mut file = File::open(name).unwrap();
16 //! let mut filereader = FileReader::new_read(&mut file);
17 //! let mut br = ByteReader::new(&mut filereader);
18 //! let result = detect_format(name, &mut br);
19 //! if let Some((name, score)) = result {
20 //! println!("detected format {} with score {:?}", name, score);
21 //! }
22 //! ```
23 use std::io::SeekFrom;
24 use nihav_core::io::byteio::ByteReader;
25
26 /// Format detection score.
27 #[derive(Debug,Clone,Copy,PartialEq)]
28 pub enum DetectionScore {
29 /// Format is not detected.
30 No,
31 /// Format matched by file extension.
32 ExtensionMatches,
33 /// Format matches by markers inside the file.
34 MagicMatches,
35 }
36
37 impl DetectionScore {
38 /// Checks whether current detection score is less than a value it is compared against.
39 pub fn less(self, other: DetectionScore) -> bool {
40 (self as i32) < (other as i32)
41 }
42 }
43
44 #[allow(dead_code)]
45 enum Arg {
46 Byte(u8),
47 U16BE(u16),
48 U16LE(u16),
49 U24BE(u32),
50 U24LE(u32),
51 U32BE(u32),
52 U32LE(u32),
53 U64BE(u64),
54 U64LE(u64),
55 }
56
57 impl Arg {
58 fn val(&self) -> u64 {
59 match *self {
60 Arg::Byte(b) => { u64::from(b) }
61 Arg::U16BE(v) => { u64::from(v) }
62 Arg::U16LE(v) => { u64::from(v) }
63 Arg::U24BE(v) => { u64::from(v) }
64 Arg::U24LE(v) => { u64::from(v) }
65 Arg::U32BE(v) => { u64::from(v) }
66 Arg::U32LE(v) => { u64::from(v) }
67 Arg::U64BE(v) => { v }
68 Arg::U64LE(v) => { v }
69 }
70 }
71 fn read_val(&self, src: &mut ByteReader) -> Option<u64> {
72 match *self {
73 Arg::Byte(_) => {
74 let res = src.peek_byte();
75 if res.is_err() { return None; }
76 Some(u64::from(res.unwrap()))
77 }
78 Arg::U16BE(_) => {
79 let res = src.peek_u16be();
80 if res.is_err() { return None; }
81 Some(u64::from(res.unwrap()))
82 }
83 Arg::U16LE(_) => {
84 let res = src.peek_u16le();
85 if res.is_err() { return None; }
86 Some(u64::from(res.unwrap()))
87 }
88 Arg::U24BE(_) => {
89 let res = src.peek_u24be();
90 if res.is_err() { return None; }
91 Some(u64::from(res.unwrap()))
92 }
93 Arg::U24LE(_) => {
94 let res = src.peek_u24le();
95 if res.is_err() { return None; }
96 Some(u64::from(res.unwrap()))
97 }
98 Arg::U32BE(_) => {
99 let res = src.peek_u32be();
100 if res.is_err() { return None; }
101 Some(u64::from(res.unwrap()))
102 }
103 Arg::U32LE(_) => {
104 let res = src.peek_u32le();
105 if res.is_err() { return None; }
106 Some(u64::from(res.unwrap()))
107 }
108 Arg::U64BE(_) => {
109 let res = src.peek_u64be();
110 if res.is_err() { return None; }
111 Some(res.unwrap())
112 }
113 Arg::U64LE(_) => {
114 let res = src.peek_u64le();
115 if res.is_err() { return None; }
116 Some(res.unwrap())
117 }
118 }
119 }
120 fn eq(&self, src: &mut ByteReader) -> bool {
121 let val = self.read_val(src);
122 if val.is_none() { false }
123 else { val.unwrap() == self.val() }
124 }
125 fn ge(&self, src: &mut ByteReader) -> bool {
126 let val = self.read_val(src);
127 if val.is_none() { false }
128 else { val.unwrap() >= self.val() }
129 }
130 fn gt(&self, src: &mut ByteReader) -> bool {
131 let val = self.read_val(src);
132 if val.is_none() { false }
133 else { val.unwrap() > self.val() }
134 }
135 fn le(&self, src: &mut ByteReader) -> bool {
136 let val = self.read_val(src);
137 if val.is_none() { false }
138 else { val.unwrap() <= self.val() }
139 }
140 fn lt(&self, src: &mut ByteReader) -> bool {
141 let val = self.read_val(src);
142 if val.is_none() { false }
143 else { val.unwrap() < self.val() }
144 }
145 }
146
147 #[allow(dead_code)]
148 enum CC<'a> {
149 Or(&'a CC<'a>, &'a CC<'a>),
150 Eq(Arg),
151 Str(&'static [u8]),
152 In(Arg, Arg),
153 Lt(Arg),
154 Le(Arg),
155 Gt(Arg),
156 Ge(Arg),
157 }
158
159 impl<'a> CC<'a> {
160 fn eval(&self, src: &mut ByteReader) -> bool {
161 match *self {
162 CC::Or (ref a, ref b) => { a.eval(src) || b.eval(src) },
163 CC::Eq(ref arg) => { arg.eq(src) },
164 CC::In(ref a, ref b) => { a.ge(src) && b.le(src) },
165 CC::Lt(ref arg) => { arg.lt(src) },
166 CC::Le(ref arg) => { arg.le(src) },
167 CC::Gt(ref arg) => { arg.gt(src) },
168 CC::Ge(ref arg) => { arg.ge(src) },
169 CC::Str(str) => {
170 let mut val: Vec<u8> = vec![0; str.len()];
171 let res = src.peek_buf(val.as_mut_slice());
172 if res.is_err() { return false; }
173 val == str
174 }
175 }
176 }
177 }
178
179 struct CheckItem<'a> {
180 offs: u32,
181 cond: &'a CC<'a>,
182 }
183
184 #[allow(dead_code)]
185 struct DetectConditions<'a> {
186 demux_name: &'static str,
187 extensions: &'static str,
188 conditions: &'a [CheckItem<'a>],
189 }
190
191 const DETECTORS: &[DetectConditions] = &[
192 DetectConditions {
193 demux_name: "avi",
194 extensions: ".avi",
195 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"RIFF"), &CC::Str(b"ON2 ")) },
196 CheckItem{offs: 8, cond: &CC::Or(&CC::Or(&CC::Str(b"AVI LIST"),
197 &CC::Str(b"AVIXLIST")),
198 &CC::Str(b"ON2fLIST")) },
199 ]
200 },
201 DetectConditions {
202 demux_name: "mov",
203 extensions: ".mov",
204 conditions: &[CheckItem{offs: 4, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"),
205 &CC::Str(b"moov")),
206 &CC::Str(b"ftyp")) }],
207 },
208 DetectConditions {
209 demux_name: "mov",
210 extensions: ".mov",
211 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"\x00\x00\x00\x08wide") },
212 CheckItem{offs: 12, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"),
213 &CC::Str(b"moov")),
214 &CC::Str(b"ftyp")) }],
215 },
216 DetectConditions {
217 demux_name: "gdv",
218 extensions: ".gdv",
219 conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U32LE(0x29111994))}],
220 },
221 DetectConditions {
222 demux_name: "realaudio",
223 extensions: ".ra,.ram",
224 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b".ra\xFD")}],
225 },
226 DetectConditions {
227 demux_name: "realmedia",
228 extensions: ".rm,.rmvb,.rma,.ra,.ram",
229 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".RMF"), &CC::Str(b".RMP")) },
230 CheckItem{offs: 4, cond: &CC::Ge(Arg::U32BE(10))}],
231 },
232 DetectConditions {
233 demux_name: "real_ivr",
234 extensions: ".ivr",
235 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".R1M"), &CC::Str(b".REC"))}],
236 },
237 DetectConditions {
238 demux_name: "bink",
239 extensions: ".bik,.bk2",
240 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::In(Arg::U32BE(0x42494B62), // BIKb
241 Arg::U32BE(0x42494B7B)), // BIKz
242 &CC::In(Arg::U32BE(0x4B423261), // KB2a
243 Arg::U32BE(0x4B42327B)))}], // KB2z
244 },
245 DetectConditions {
246 demux_name: "smacker",
247 extensions: ".smk",
248 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"SMK2"), &CC::Str(b"SMK4"))}],
249 },
250 DetectConditions {
251 demux_name: "vivo",
252 extensions: ".viv",
253 conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))},
254 CheckItem{offs: 2, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}],
255 },
256 DetectConditions {
257 demux_name: "vivo",
258 extensions: ".viv",
259 conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))},
260 CheckItem{offs: 3, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}],
261 },
262 DetectConditions {
263 demux_name: "bmv",
264 extensions: ".bmv",
265 conditions: &[],
266 },
267 DetectConditions {
268 demux_name: "bmv3",
269 extensions: ".bmv",
270 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"BMVi") },
271 CheckItem{offs: 32, cond: &CC::Str(b"DATA")}],
272 },
273 DetectConditions {
274 demux_name: "vmd",
275 extensions: ".vmd",
276 conditions: &[],
277 },
278 ];
279
280 /// Tries to detect container format.
281 ///
282 /// This function tries to determine container format using both file extension and checking against container specific markers inside.
283 /// In case of success the function returns short container name and the detection score.
284 /// Result should have the highest detection score among tested.
285 pub fn detect_format(name: &str, src: &mut ByteReader) -> Option<(&'static str, DetectionScore)> {
286 let mut result = None;
287 let lname = name.to_lowercase();
288 for detector in DETECTORS {
289 let mut score = DetectionScore::No;
290 if !name.is_empty() {
291 for ext in detector.extensions.split(',') {
292 if lname.ends_with(ext) {
293 score = DetectionScore::ExtensionMatches;
294 break;
295 }
296 }
297 }
298 let mut passed = !detector.conditions.is_empty();
299 for ck in detector.conditions {
300 let ret = src.seek(SeekFrom::Start(u64::from(ck.offs)));
301 if ret.is_err() {
302 passed = false;
303 break;
304 }
305 if !ck.cond.eval(src) {
306 passed = false;
307 break;
308 }
309 }
310 if passed {
311 score = DetectionScore::MagicMatches;
312 }
313 if score == DetectionScore::MagicMatches {
314 return Some((detector.demux_name, score));
315 }
316 if result.is_none() && score != DetectionScore::No {
317 result = Some((detector.demux_name, score));
318 } else if result.is_some() {
319 let (_, oldsc) = result.unwrap();
320 if oldsc.less(score) {
321 result = Some((detector.demux_name, score));
322 }
323 }
324 }
325 result
326 }
327
328 #[cfg(test)]
329 mod test {
330 use super::*;
331 use std::fs::File;
332 use crate::io::byteio::*;
333
334 #[test]
335 fn test_avi_detect() {
336 let name = "assets/Indeo/laser05.avi";
337 let mut file = File::open(name).unwrap();
338 let mut fr = FileReader::new_read(&mut file);
339 let mut br = ByteReader::new(&mut fr);
340 let (name, score) = detect_format(name, &mut br).unwrap();
341 assert_eq!(name, "avi");
342 assert_eq!(score, DetectionScore::MagicMatches);
343 }
344
345 #[test]
346 fn test_gdv_detect() {
347 let name = "assets/Game/intro1.gdv";
348 let mut file = File::open(name).unwrap();
349 let mut fr = FileReader::new_read(&mut file);
350 let mut br = ByteReader::new(&mut fr);
351 let (name, score) = detect_format(name, &mut br).unwrap();
352 assert_eq!(name, "gdv");
353 assert_eq!(score, DetectionScore::MagicMatches);
354 }
355 }