| 1 | //! Container format detection. |
| 2 | //! |
| 3 | //! Usually user does not know the container format of the opened file. |
| 4 | //! That is why format detection functionality is needed. |
| 5 | //! This module contains the set of rules to detect container not merely by file extension but also by its content if possible. |
| 6 | //! |
| 7 | //! # Examples |
| 8 | //! |
| 9 | //! ```no_run |
| 10 | //! use nihav_core::detect::detect_format; |
| 11 | //! use std::fs::File; |
| 12 | //! use nihav_core::io::byteio::*; |
| 13 | //! |
| 14 | //! let name = "mediafile.ogv"; |
| 15 | //! let mut file = File::open(name).unwrap(); |
| 16 | //! let mut filereader = FileReader::new_read(&mut file); |
| 17 | //! let mut br = ByteReader::new(&mut filereader); |
| 18 | //! let result = detect_format(name, &mut br); |
| 19 | //! if let Some((name, score)) = result { |
| 20 | //! println!("detected format {} with score {:?}", name, score); |
| 21 | //! } |
| 22 | //! ``` |
| 23 | use std::io::SeekFrom; |
| 24 | use nihav_core::io::byteio::ByteReader; |
| 25 | |
| 26 | /// Format detection score. |
| 27 | #[derive(Debug,Clone,Copy,PartialEq)] |
| 28 | pub enum DetectionScore { |
| 29 | /// Format is not detected. |
| 30 | No, |
| 31 | /// Format matched by file extension. |
| 32 | ExtensionMatches, |
| 33 | /// Format matches by markers inside the file. |
| 34 | MagicMatches, |
| 35 | } |
| 36 | |
| 37 | impl DetectionScore { |
| 38 | /// Checks whether current detection score is less than a value it is compared against. |
| 39 | pub fn less(self, other: DetectionScore) -> bool { |
| 40 | (self as i32) < (other as i32) |
| 41 | } |
| 42 | } |
| 43 | |
| 44 | #[allow(dead_code)] |
| 45 | enum Arg { |
| 46 | Byte(u8), |
| 47 | U16BE(u16), |
| 48 | U16LE(u16), |
| 49 | U24BE(u32), |
| 50 | U24LE(u32), |
| 51 | U32BE(u32), |
| 52 | U32LE(u32), |
| 53 | U64BE(u64), |
| 54 | U64LE(u64), |
| 55 | } |
| 56 | |
| 57 | impl Arg { |
| 58 | fn val(&self) -> u64 { |
| 59 | match *self { |
| 60 | Arg::Byte(b) => { u64::from(b) } |
| 61 | Arg::U16BE(v) => { u64::from(v) } |
| 62 | Arg::U16LE(v) => { u64::from(v) } |
| 63 | Arg::U24BE(v) => { u64::from(v) } |
| 64 | Arg::U24LE(v) => { u64::from(v) } |
| 65 | Arg::U32BE(v) => { u64::from(v) } |
| 66 | Arg::U32LE(v) => { u64::from(v) } |
| 67 | Arg::U64BE(v) => { v } |
| 68 | Arg::U64LE(v) => { v } |
| 69 | } |
| 70 | } |
| 71 | fn read_val(&self, src: &mut ByteReader) -> Option<u64> { |
| 72 | match *self { |
| 73 | Arg::Byte(_) => { |
| 74 | let res = src.peek_byte(); |
| 75 | if res.is_err() { return None; } |
| 76 | Some(u64::from(res.unwrap())) |
| 77 | } |
| 78 | Arg::U16BE(_) => { |
| 79 | let res = src.peek_u16be(); |
| 80 | if res.is_err() { return None; } |
| 81 | Some(u64::from(res.unwrap())) |
| 82 | } |
| 83 | Arg::U16LE(_) => { |
| 84 | let res = src.peek_u16le(); |
| 85 | if res.is_err() { return None; } |
| 86 | Some(u64::from(res.unwrap())) |
| 87 | } |
| 88 | Arg::U24BE(_) => { |
| 89 | let res = src.peek_u24be(); |
| 90 | if res.is_err() { return None; } |
| 91 | Some(u64::from(res.unwrap())) |
| 92 | } |
| 93 | Arg::U24LE(_) => { |
| 94 | let res = src.peek_u24le(); |
| 95 | if res.is_err() { return None; } |
| 96 | Some(u64::from(res.unwrap())) |
| 97 | } |
| 98 | Arg::U32BE(_) => { |
| 99 | let res = src.peek_u32be(); |
| 100 | if res.is_err() { return None; } |
| 101 | Some(u64::from(res.unwrap())) |
| 102 | } |
| 103 | Arg::U32LE(_) => { |
| 104 | let res = src.peek_u32le(); |
| 105 | if res.is_err() { return None; } |
| 106 | Some(u64::from(res.unwrap())) |
| 107 | } |
| 108 | Arg::U64BE(_) => { |
| 109 | let res = src.peek_u64be(); |
| 110 | if res.is_err() { return None; } |
| 111 | Some(res.unwrap()) |
| 112 | } |
| 113 | Arg::U64LE(_) => { |
| 114 | let res = src.peek_u64le(); |
| 115 | if res.is_err() { return None; } |
| 116 | Some(res.unwrap()) |
| 117 | } |
| 118 | } |
| 119 | } |
| 120 | fn eq(&self, src: &mut ByteReader) -> bool { |
| 121 | let val = self.read_val(src); |
| 122 | if val.is_none() { false } |
| 123 | else { val.unwrap() == self.val() } |
| 124 | } |
| 125 | fn ge(&self, src: &mut ByteReader) -> bool { |
| 126 | let val = self.read_val(src); |
| 127 | if val.is_none() { false } |
| 128 | else { val.unwrap() >= self.val() } |
| 129 | } |
| 130 | fn gt(&self, src: &mut ByteReader) -> bool { |
| 131 | let val = self.read_val(src); |
| 132 | if val.is_none() { false } |
| 133 | else { val.unwrap() > self.val() } |
| 134 | } |
| 135 | fn le(&self, src: &mut ByteReader) -> bool { |
| 136 | let val = self.read_val(src); |
| 137 | if val.is_none() { false } |
| 138 | else { val.unwrap() <= self.val() } |
| 139 | } |
| 140 | fn lt(&self, src: &mut ByteReader) -> bool { |
| 141 | let val = self.read_val(src); |
| 142 | if val.is_none() { false } |
| 143 | else { val.unwrap() < self.val() } |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | #[allow(dead_code)] |
| 148 | enum CC<'a> { |
| 149 | Or(&'a CC<'a>, &'a CC<'a>), |
| 150 | Eq(Arg), |
| 151 | Str(&'static [u8]), |
| 152 | In(Arg, Arg), |
| 153 | Lt(Arg), |
| 154 | Le(Arg), |
| 155 | Gt(Arg), |
| 156 | Ge(Arg), |
| 157 | } |
| 158 | |
| 159 | impl<'a> CC<'a> { |
| 160 | fn eval(&self, src: &mut ByteReader) -> bool { |
| 161 | match *self { |
| 162 | CC::Or (ref a, ref b) => { a.eval(src) || b.eval(src) }, |
| 163 | CC::Eq(ref arg) => { arg.eq(src) }, |
| 164 | CC::In(ref a, ref b) => { a.ge(src) && b.le(src) }, |
| 165 | CC::Lt(ref arg) => { arg.lt(src) }, |
| 166 | CC::Le(ref arg) => { arg.le(src) }, |
| 167 | CC::Gt(ref arg) => { arg.gt(src) }, |
| 168 | CC::Ge(ref arg) => { arg.ge(src) }, |
| 169 | CC::Str(str) => { |
| 170 | let mut val: Vec<u8> = vec![0; str.len()]; |
| 171 | let res = src.peek_buf(val.as_mut_slice()); |
| 172 | if res.is_err() { return false; } |
| 173 | val == str |
| 174 | } |
| 175 | } |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | struct CheckItem<'a> { |
| 180 | offs: u32, |
| 181 | cond: &'a CC<'a>, |
| 182 | } |
| 183 | |
| 184 | #[allow(dead_code)] |
| 185 | struct DetectConditions<'a> { |
| 186 | demux_name: &'static str, |
| 187 | extensions: &'static str, |
| 188 | conditions: &'a [CheckItem<'a>], |
| 189 | } |
| 190 | |
| 191 | const DETECTORS: &[DetectConditions] = &[ |
| 192 | DetectConditions { |
| 193 | demux_name: "avi", |
| 194 | extensions: ".avi", |
| 195 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"RIFF"), &CC::Str(b"ON2 ")) }, |
| 196 | CheckItem{offs: 8, cond: &CC::Or(&CC::Or(&CC::Str(b"AVI LIST"), |
| 197 | &CC::Str(b"AVIXLIST")), |
| 198 | &CC::Str(b"ON2fLIST")) }, |
| 199 | ] |
| 200 | }, |
| 201 | DetectConditions { |
| 202 | demux_name: "gdv", |
| 203 | extensions: ".gdv", |
| 204 | conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U32LE(0x29111994))}], |
| 205 | }, |
| 206 | DetectConditions { |
| 207 | demux_name: "realaudio", |
| 208 | extensions: ".ra,.ram", |
| 209 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b".ra\xFD")}], |
| 210 | }, |
| 211 | DetectConditions { |
| 212 | demux_name: "realmedia", |
| 213 | extensions: ".rm,.rmvb,.rma,.ra,.ram", |
| 214 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".RMF"), &CC::Str(b".RMP")) }, |
| 215 | CheckItem{offs: 4, cond: &CC::Ge(Arg::U32BE(10))}], |
| 216 | }, |
| 217 | DetectConditions { |
| 218 | demux_name: "real_ivr", |
| 219 | extensions: ".ivr", |
| 220 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".R1M"), &CC::Str(b".REC"))}], |
| 221 | }, |
| 222 | DetectConditions { |
| 223 | demux_name: "bink", |
| 224 | extensions: ".bik,.bk2", |
| 225 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::In(Arg::U32BE(0x32494B62), // BIKb |
| 226 | Arg::U32BE(0x32494B7B)), // BIKz |
| 227 | &CC::In(Arg::U32BE(0x4B423261), // KB2a |
| 228 | Arg::U32BE(0x4B42327B)))}], // KB2z |
| 229 | }, |
| 230 | DetectConditions { |
| 231 | demux_name: "smacker", |
| 232 | extensions: ".smk", |
| 233 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"SMK2"), &CC::Str(b"SMK4"))}], |
| 234 | }, |
| 235 | DetectConditions { |
| 236 | demux_name: "bmv", |
| 237 | extensions: ".bmv", |
| 238 | conditions: &[], |
| 239 | }, |
| 240 | DetectConditions { |
| 241 | demux_name: "bmv3", |
| 242 | extensions: ".bmv", |
| 243 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"BMVi") }, |
| 244 | CheckItem{offs: 32, cond: &CC::Str(b"DATA")}], |
| 245 | }, |
| 246 | DetectConditions { |
| 247 | demux_name: "vmd", |
| 248 | extensions: ".vmd", |
| 249 | conditions: &[], |
| 250 | }, |
| 251 | ]; |
| 252 | |
| 253 | /// Tries to detect container format. |
| 254 | /// |
| 255 | /// This function tries to determine container format using both file extension and checking against container specific markers inside. |
| 256 | /// In case of success the function returns short container name and the detection score. |
| 257 | /// Result should have the highest detection score among tested. |
| 258 | pub fn detect_format(name: &str, src: &mut ByteReader) -> Option<(&'static str, DetectionScore)> { |
| 259 | let mut result = None; |
| 260 | let lname = name.to_lowercase(); |
| 261 | for detector in DETECTORS { |
| 262 | let mut score = DetectionScore::No; |
| 263 | if !name.is_empty() { |
| 264 | for ext in detector.extensions.split(',') { |
| 265 | if lname.ends_with(ext) { |
| 266 | score = DetectionScore::ExtensionMatches; |
| 267 | break; |
| 268 | } |
| 269 | } |
| 270 | } |
| 271 | let mut passed = !detector.conditions.is_empty(); |
| 272 | for ck in detector.conditions { |
| 273 | let ret = src.seek(SeekFrom::Start(u64::from(ck.offs))); |
| 274 | if ret.is_err() { |
| 275 | passed = false; |
| 276 | break; |
| 277 | } |
| 278 | if !ck.cond.eval(src) { |
| 279 | passed = false; |
| 280 | break; |
| 281 | } |
| 282 | } |
| 283 | if passed { |
| 284 | score = DetectionScore::MagicMatches; |
| 285 | } |
| 286 | if score == DetectionScore::MagicMatches { |
| 287 | return Some((detector.demux_name, score)); |
| 288 | } |
| 289 | if result.is_none() && score != DetectionScore::No { |
| 290 | result = Some((detector.demux_name, score)); |
| 291 | } else if result.is_some() { |
| 292 | let (_, oldsc) = result.unwrap(); |
| 293 | if oldsc.less(score) { |
| 294 | result = Some((detector.demux_name, score)); |
| 295 | } |
| 296 | } |
| 297 | } |
| 298 | result |
| 299 | } |
| 300 | |
| 301 | #[cfg(test)] |
| 302 | mod test { |
| 303 | use super::*; |
| 304 | use std::fs::File; |
| 305 | use crate::io::byteio::*; |
| 306 | |
| 307 | #[test] |
| 308 | fn test_avi_detect() { |
| 309 | let name = "assets/Indeo/laser05.avi"; |
| 310 | let mut file = File::open(name).unwrap(); |
| 311 | let mut fr = FileReader::new_read(&mut file); |
| 312 | let mut br = ByteReader::new(&mut fr); |
| 313 | let (name, score) = detect_format(name, &mut br).unwrap(); |
| 314 | assert_eq!(name, "avi"); |
| 315 | assert_eq!(score, DetectionScore::MagicMatches); |
| 316 | } |
| 317 | |
| 318 | #[test] |
| 319 | fn test_gdv_detect() { |
| 320 | let name = "assets/Game/intro1.gdv"; |
| 321 | let mut file = File::open(name).unwrap(); |
| 322 | let mut fr = FileReader::new_read(&mut file); |
| 323 | let mut br = ByteReader::new(&mut fr); |
| 324 | let (name, score) = detect_format(name, &mut br).unwrap(); |
| 325 | assert_eq!(name, "gdv"); |
| 326 | assert_eq!(score, DetectionScore::MagicMatches); |
| 327 | } |
| 328 | } |