split nihav-registry from nihav-core
[nihav.git] / nihav-registry / src / detect.rs
CommitLineData
1b5ebe1d
KS
1//! Container format detection.
2//!
3//! Usually user does not know the container format of the opened file.
4//! That is why format detection functionality is needed.
5//! This module contains the set of rules to detect container not merely by file extension but also by its content if possible.
6//!
7//! # Examples
8//!
9//! ```no_run
10//! use nihav_core::detect::detect_format;
11//! use std::fs::File;
12//! use nihav_core::io::byteio::*;
13//!
14//! let name = "mediafile.ogv";
15//! let mut file = File::open(name).unwrap();
16//! let mut filereader = FileReader::new_read(&mut file);
17//! let mut br = ByteReader::new(&mut filereader);
18//! let result = detect_format(name, &mut br);
19//! if let Some((name, score)) = result {
20//! println!("detected format {} with score {:?}", name, score);
21//! }
22//! ```
d8ce0de0 23use std::io::SeekFrom;
32f7cbe5 24use nihav_core::io::byteio::ByteReader;
d8ce0de0 25
1b5ebe1d 26/// Format detection score.
d8ce0de0
KS
27#[derive(Debug,Clone,Copy,PartialEq)]
28pub enum DetectionScore {
1b5ebe1d 29 /// Format is not detected.
d8ce0de0 30 No,
1b5ebe1d 31 /// Format matched by file extension.
d8ce0de0 32 ExtensionMatches,
1b5ebe1d 33 /// Format matches by markers inside the file.
d8ce0de0
KS
34 MagicMatches,
35}
36
37impl DetectionScore {
1b5ebe1d 38 /// Checks whether current detection score is less than a value it is compared against.
e243ceb4
KS
39 pub fn less(self, other: DetectionScore) -> bool {
40 (self as i32) < (other as i32)
d8ce0de0
KS
41 }
42}
43
44#[allow(dead_code)]
b5bd2ae4 45enum Arg {
d8ce0de0
KS
46 Byte(u8),
47 U16BE(u16),
48 U16LE(u16),
49 U24BE(u32),
50 U24LE(u32),
51 U32BE(u32),
52 U32LE(u32),
53 U64BE(u64),
54 U64LE(u64),
55}
56
b5bd2ae4 57impl Arg {
d8ce0de0
KS
58 fn val(&self) -> u64 {
59 match *self {
e243ceb4
KS
60 Arg::Byte(b) => { u64::from(b) }
61 Arg::U16BE(v) => { u64::from(v) }
62 Arg::U16LE(v) => { u64::from(v) }
63 Arg::U24BE(v) => { u64::from(v) }
64 Arg::U24LE(v) => { u64::from(v) }
65 Arg::U32BE(v) => { u64::from(v) }
66 Arg::U32LE(v) => { u64::from(v) }
b5bd2ae4
KS
67 Arg::U64BE(v) => { v }
68 Arg::U64LE(v) => { v }
d8ce0de0
KS
69 }
70 }
71 fn read_val(&self, src: &mut ByteReader) -> Option<u64> {
72 match *self {
b5bd2ae4 73 Arg::Byte(_) => {
d8ce0de0 74 let res = src.peek_byte();
e243ceb4
KS
75 if res.is_err() { return None; }
76 Some(u64::from(res.unwrap()))
d8ce0de0 77 }
b5bd2ae4 78 Arg::U16BE(_) => {
d8ce0de0 79 let res = src.peek_u16be();
e243ceb4
KS
80 if res.is_err() { return None; }
81 Some(u64::from(res.unwrap()))
d8ce0de0 82 }
b5bd2ae4 83 Arg::U16LE(_) => {
d8ce0de0 84 let res = src.peek_u16le();
e243ceb4
KS
85 if res.is_err() { return None; }
86 Some(u64::from(res.unwrap()))
d8ce0de0 87 }
b5bd2ae4 88 Arg::U24BE(_) => {
d8ce0de0 89 let res = src.peek_u24be();
e243ceb4
KS
90 if res.is_err() { return None; }
91 Some(u64::from(res.unwrap()))
d8ce0de0 92 }
b5bd2ae4 93 Arg::U24LE(_) => {
d8ce0de0 94 let res = src.peek_u24le();
e243ceb4
KS
95 if res.is_err() { return None; }
96 Some(u64::from(res.unwrap()))
d8ce0de0 97 }
b5bd2ae4 98 Arg::U32BE(_) => {
d8ce0de0 99 let res = src.peek_u32be();
e243ceb4
KS
100 if res.is_err() { return None; }
101 Some(u64::from(res.unwrap()))
d8ce0de0 102 }
b5bd2ae4 103 Arg::U32LE(_) => {
d8ce0de0 104 let res = src.peek_u32le();
e243ceb4
KS
105 if res.is_err() { return None; }
106 Some(u64::from(res.unwrap()))
d8ce0de0 107 }
b5bd2ae4 108 Arg::U64BE(_) => {
d8ce0de0 109 let res = src.peek_u64be();
e243ceb4 110 if res.is_err() { return None; }
d8ce0de0
KS
111 Some(res.unwrap())
112 }
b5bd2ae4 113 Arg::U64LE(_) => {
d8ce0de0 114 let res = src.peek_u64le();
e243ceb4 115 if res.is_err() { return None; }
d8ce0de0
KS
116 Some(res.unwrap())
117 }
118 }
119 }
120 fn eq(&self, src: &mut ByteReader) -> bool {
121 let val = self.read_val(src);
e243ceb4 122 if val.is_none() { false }
d8ce0de0
KS
123 else { val.unwrap() == self.val() }
124 }
125 fn ge(&self, src: &mut ByteReader) -> bool {
126 let val = self.read_val(src);
e243ceb4 127 if val.is_none() { false }
d8ce0de0
KS
128 else { val.unwrap() >= self.val() }
129 }
130 fn gt(&self, src: &mut ByteReader) -> bool {
131 let val = self.read_val(src);
e243ceb4 132 if val.is_none() { false }
d8ce0de0
KS
133 else { val.unwrap() > self.val() }
134 }
135 fn le(&self, src: &mut ByteReader) -> bool {
136 let val = self.read_val(src);
e243ceb4 137 if val.is_none() { false }
d8ce0de0
KS
138 else { val.unwrap() <= self.val() }
139 }
140 fn lt(&self, src: &mut ByteReader) -> bool {
141 let val = self.read_val(src);
e243ceb4 142 if val.is_none() { false }
d8ce0de0
KS
143 else { val.unwrap() < self.val() }
144 }
145}
146
147#[allow(dead_code)]
b5bd2ae4
KS
148enum CC<'a> {
149 Or(&'a CC<'a>, &'a CC<'a>),
150 Eq(Arg),
151 Str(&'static [u8]),
152 In(Arg, Arg),
153 Lt(Arg),
154 Le(Arg),
155 Gt(Arg),
156 Ge(Arg),
d8ce0de0
KS
157}
158
b5bd2ae4 159impl<'a> CC<'a> {
d8ce0de0
KS
160 fn eval(&self, src: &mut ByteReader) -> bool {
161 match *self {
b5bd2ae4
KS
162 CC::Or (ref a, ref b) => { a.eval(src) || b.eval(src) },
163 CC::Eq(ref arg) => { arg.eq(src) },
4d477e23 164 CC::In(ref a, ref b) => { a.ge(src) && b.le(src) },
b5bd2ae4
KS
165 CC::Lt(ref arg) => { arg.lt(src) },
166 CC::Le(ref arg) => { arg.le(src) },
167 CC::Gt(ref arg) => { arg.gt(src) },
168 CC::Ge(ref arg) => { arg.ge(src) },
169 CC::Str(str) => {
e243ceb4 170 let mut val: Vec<u8> = vec![0; str.len()];
d8ce0de0 171 let res = src.peek_buf(val.as_mut_slice());
e243ceb4 172 if res.is_err() { return false; }
d8ce0de0
KS
173 val == str
174 }
175 }
176 }
177}
178
179struct CheckItem<'a> {
180 offs: u32,
b5bd2ae4 181 cond: &'a CC<'a>,
d8ce0de0
KS
182}
183
184#[allow(dead_code)]
185struct DetectConditions<'a> {
186 demux_name: &'static str,
187 extensions: &'static str,
188 conditions: &'a [CheckItem<'a>],
189}
190
191const DETECTORS: &[DetectConditions] = &[
192 DetectConditions {
193 demux_name: "avi",
194 extensions: ".avi",
b5bd2ae4
KS
195 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"RIFF"), &CC::Str(b"ON2 ")) },
196 CheckItem{offs: 8, cond: &CC::Or(&CC::Or(&CC::Str(b"AVI LIST"),
197 &CC::Str(b"AVIXLIST")),
198 &CC::Str(b"ON2fLIST")) },
199 ]
d8ce0de0
KS
200 },
201 DetectConditions {
202 demux_name: "gdv",
203 extensions: ".gdv",
b5bd2ae4 204 conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U32LE(0x29111994))}],
d8ce0de0 205 },
ce52b3b5
KS
206 DetectConditions {
207 demux_name: "realaudio",
208 extensions: ".ra,.ram",
209 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b".ra\xFD")}],
210 },
211 DetectConditions {
212 demux_name: "realmedia",
213 extensions: ".rm,.rmvb,.rma,.ra,.ram",
db5cc44b 214 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".RMF"), &CC::Str(b".RMP")) },
ce52b3b5
KS
215 CheckItem{offs: 4, cond: &CC::Ge(Arg::U32BE(10))}],
216 },
217 DetectConditions {
218 demux_name: "real_ivr",
219 extensions: ".ivr",
220 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".R1M"), &CC::Str(b".REC"))}],
221 },
c6c21059
KS
222 DetectConditions {
223 demux_name: "bink",
4d477e23 224 extensions: ".bik,.bk2",
c6c21059
KS
225 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::In(Arg::U32BE(0x32494B62), // BIKb
226 Arg::U32BE(0x32494B7B)), // BIKz
227 &CC::In(Arg::U32BE(0x4B423261), // KB2a
228 Arg::U32BE(0x4B42327B)))}], // KB2z
229 },
606c448e
KS
230 DetectConditions {
231 demux_name: "smacker",
232 extensions: ".smk",
233 conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"SMK2"), &CC::Str(b"SMK4"))}],
234 },
128253cc
KS
235 DetectConditions {
236 demux_name: "bmv",
237 extensions: ".bmv",
238 conditions: &[],
239 },
ecda1cc1
KS
240 DetectConditions {
241 demux_name: "bmv3",
242 extensions: ".bmv",
243 conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"BMVi") },
244 CheckItem{offs: 32, cond: &CC::Str(b"DATA")}],
245 },
9895bd7b
KS
246 DetectConditions {
247 demux_name: "vmd",
248 extensions: ".vmd",
249 conditions: &[],
250 },
d8ce0de0
KS
251];
252
1b5ebe1d
KS
253/// Tries to detect container format.
254///
255/// This function tries to determine container format using both file extension and checking against container specific markers inside.
256/// In case of success the function returns short container name and the detection score.
257/// Result should have the highest detection score among tested.
d8ce0de0
KS
258pub fn detect_format(name: &str, src: &mut ByteReader) -> Option<(&'static str, DetectionScore)> {
259 let mut result = None;
260 let lname = name.to_lowercase();
261 for detector in DETECTORS {
262 let mut score = DetectionScore::No;
e243ceb4 263 if !name.is_empty() {
d8ce0de0
KS
264 for ext in detector.extensions.split(',') {
265 if lname.ends_with(ext) {
266 score = DetectionScore::ExtensionMatches;
267 break;
268 }
269 }
270 }
e243ceb4 271 let mut passed = !detector.conditions.is_empty();
d8ce0de0 272 for ck in detector.conditions {
e243ceb4
KS
273 let ret = src.seek(SeekFrom::Start(u64::from(ck.offs)));
274 if ret.is_err() {
d8ce0de0
KS
275 passed = false;
276 break;
277 }
278 if !ck.cond.eval(src) {
279 passed = false;
280 break;
281 }
282 }
283 if passed {
284 score = DetectionScore::MagicMatches;
285 }
286 if score == DetectionScore::MagicMatches {
287 return Some((detector.demux_name, score));
288 }
4d477e23 289 if result.is_none() && score != DetectionScore::No {
d8ce0de0 290 result = Some((detector.demux_name, score));
4d477e23 291 } else if result.is_some() {
d8ce0de0
KS
292 let (_, oldsc) = result.unwrap();
293 if oldsc.less(score) {
294 result = Some((detector.demux_name, score));
295 }
296 }
297 }
298 result
299}
300
301#[cfg(test)]
302mod test {
303 use super::*;
304 use std::fs::File;
aca89041 305 use crate::io::byteio::*;
d8ce0de0
KS
306
307 #[test]
308 fn test_avi_detect() {
250c49f6 309 let name = "assets/Indeo/laser05.avi";
d8ce0de0
KS
310 let mut file = File::open(name).unwrap();
311 let mut fr = FileReader::new_read(&mut file);
312 let mut br = ByteReader::new(&mut fr);
313 let (name, score) = detect_format(name, &mut br).unwrap();
314 assert_eq!(name, "avi");
315 assert_eq!(score, DetectionScore::MagicMatches);
316 }
317
318 #[test]
319 fn test_gdv_detect() {
250c49f6 320 let name = "assets/Game/intro1.gdv";
d8ce0de0
KS
321 let mut file = File::open(name).unwrap();
322 let mut fr = FileReader::new_read(&mut file);
323 let mut br = ByteReader::new(&mut fr);
324 let (name, score) = detect_format(name, &mut br).unwrap();
325 assert_eq!(name, "gdv");
326 assert_eq!(score, DetectionScore::MagicMatches);
327 }
b5bd2ae4 328}