| 1 | //! Container format detection. |
| 2 | //! |
| 3 | //! Usually user does not know the container format of the opened file. |
| 4 | //! That is why format detection functionality is needed. |
| 5 | //! This module contains the set of rules to detect container not merely by file extension but also by its content if possible. |
| 6 | //! |
| 7 | //! # Examples |
| 8 | //! |
| 9 | //! ```no_run |
| 10 | //! use nihav_registry::detect::detect_format; |
| 11 | //! use std::fs::File; |
| 12 | //! use nihav_core::io::byteio::*; |
| 13 | //! |
| 14 | //! let name = "mediafile.ogv"; |
| 15 | //! let mut file = File::open(name).unwrap(); |
| 16 | //! let mut filereader = FileReader::new_read(&mut file); |
| 17 | //! let mut br = ByteReader::new(&mut filereader); |
| 18 | //! let result = detect_format(name, &mut br); |
| 19 | //! if let Some((name, score)) = result { |
| 20 | //! println!("detected format {} with score {:?}", name, score); |
| 21 | //! } |
| 22 | //! ``` |
| 23 | use std::io::SeekFrom; |
| 24 | use nihav_core::io::byteio::ByteReader; |
| 25 | |
| 26 | /// Format detection score. |
| 27 | #[derive(Debug,Clone,Copy,PartialEq)] |
| 28 | pub enum DetectionScore { |
| 29 | /// Format is not detected. |
| 30 | No, |
| 31 | /// Format matched by file extension. |
| 32 | ExtensionMatches, |
| 33 | /// Format matches by markers inside the file. |
| 34 | MagicMatches, |
| 35 | } |
| 36 | |
| 37 | impl DetectionScore { |
| 38 | /// Checks whether current detection score is less than a value it is compared against. |
| 39 | pub fn less(self, other: DetectionScore) -> bool { |
| 40 | (self as i32) < (other as i32) |
| 41 | } |
| 42 | } |
| 43 | |
| 44 | #[allow(dead_code)] |
| 45 | enum Arg { |
| 46 | Byte(u8), |
| 47 | U16BE(u16), |
| 48 | U16LE(u16), |
| 49 | U24BE(u32), |
| 50 | U24LE(u32), |
| 51 | U32BE(u32), |
| 52 | U32LE(u32), |
| 53 | U64BE(u64), |
| 54 | U64LE(u64), |
| 55 | } |
| 56 | |
| 57 | impl Arg { |
| 58 | fn val(&self) -> u64 { |
| 59 | match *self { |
| 60 | Arg::Byte(b) => { u64::from(b) } |
| 61 | Arg::U16BE(v) => { u64::from(v) } |
| 62 | Arg::U16LE(v) => { u64::from(v) } |
| 63 | Arg::U24BE(v) => { u64::from(v) } |
| 64 | Arg::U24LE(v) => { u64::from(v) } |
| 65 | Arg::U32BE(v) => { u64::from(v) } |
| 66 | Arg::U32LE(v) => { u64::from(v) } |
| 67 | Arg::U64BE(v) => { v } |
| 68 | Arg::U64LE(v) => { v } |
| 69 | } |
| 70 | } |
| 71 | fn read_val(&self, src: &mut ByteReader) -> Option<u64> { |
| 72 | match *self { |
| 73 | Arg::Byte(_) => { |
| 74 | let res = src.peek_byte(); |
| 75 | if res.is_err() { return None; } |
| 76 | Some(u64::from(res.unwrap())) |
| 77 | } |
| 78 | Arg::U16BE(_) => { |
| 79 | let res = src.peek_u16be(); |
| 80 | if res.is_err() { return None; } |
| 81 | Some(u64::from(res.unwrap())) |
| 82 | } |
| 83 | Arg::U16LE(_) => { |
| 84 | let res = src.peek_u16le(); |
| 85 | if res.is_err() { return None; } |
| 86 | Some(u64::from(res.unwrap())) |
| 87 | } |
| 88 | Arg::U24BE(_) => { |
| 89 | let res = src.peek_u24be(); |
| 90 | if res.is_err() { return None; } |
| 91 | Some(u64::from(res.unwrap())) |
| 92 | } |
| 93 | Arg::U24LE(_) => { |
| 94 | let res = src.peek_u24le(); |
| 95 | if res.is_err() { return None; } |
| 96 | Some(u64::from(res.unwrap())) |
| 97 | } |
| 98 | Arg::U32BE(_) => { |
| 99 | let res = src.peek_u32be(); |
| 100 | if res.is_err() { return None; } |
| 101 | Some(u64::from(res.unwrap())) |
| 102 | } |
| 103 | Arg::U32LE(_) => { |
| 104 | let res = src.peek_u32le(); |
| 105 | if res.is_err() { return None; } |
| 106 | Some(u64::from(res.unwrap())) |
| 107 | } |
| 108 | Arg::U64BE(_) => { |
| 109 | let res = src.peek_u64be(); |
| 110 | if res.is_err() { return None; } |
| 111 | Some(res.unwrap()) |
| 112 | } |
| 113 | Arg::U64LE(_) => { |
| 114 | let res = src.peek_u64le(); |
| 115 | if res.is_err() { return None; } |
| 116 | Some(res.unwrap()) |
| 117 | } |
| 118 | } |
| 119 | } |
| 120 | fn eq(&self, src: &mut ByteReader) -> bool { |
| 121 | if let Some(rval) = self.read_val(src) { |
| 122 | rval == self.val() |
| 123 | } else { |
| 124 | false |
| 125 | } |
| 126 | } |
| 127 | fn ge(&self, src: &mut ByteReader) -> bool { |
| 128 | if let Some(rval) = self.read_val(src) { |
| 129 | rval >= self.val() |
| 130 | } else { |
| 131 | false |
| 132 | } |
| 133 | } |
| 134 | fn gt(&self, src: &mut ByteReader) -> bool { |
| 135 | if let Some(rval) = self.read_val(src) { |
| 136 | rval > self.val() |
| 137 | } else { |
| 138 | false |
| 139 | } |
| 140 | } |
| 141 | fn le(&self, src: &mut ByteReader) -> bool { |
| 142 | if let Some(rval) = self.read_val(src) { |
| 143 | rval <= self.val() |
| 144 | } else { |
| 145 | false |
| 146 | } |
| 147 | } |
| 148 | fn lt(&self, src: &mut ByteReader) -> bool { |
| 149 | if let Some(rval) = self.read_val(src) { |
| 150 | rval < self.val() |
| 151 | } else { |
| 152 | false |
| 153 | } |
| 154 | } |
| 155 | } |
| 156 | |
| 157 | #[allow(dead_code)] |
| 158 | enum CC<'a> { |
| 159 | Or(&'a CC<'a>, &'a CC<'a>), |
| 160 | Eq(Arg), |
| 161 | Str(&'static [u8]), |
| 162 | In(Arg, Arg), |
| 163 | Lt(Arg), |
| 164 | Le(Arg), |
| 165 | Gt(Arg), |
| 166 | Ge(Arg), |
| 167 | } |
| 168 | |
| 169 | impl<'a> CC<'a> { |
| 170 | fn eval(&self, src: &mut ByteReader) -> bool { |
| 171 | match *self { |
| 172 | CC::Or(a, b) => { a.eval(src) || b.eval(src) }, |
| 173 | CC::Eq(ref arg) => { arg.eq(src) }, |
| 174 | CC::In(ref a, ref b) => { a.ge(src) && b.le(src) }, |
| 175 | CC::Lt(ref arg) => { arg.lt(src) }, |
| 176 | CC::Le(ref arg) => { arg.le(src) }, |
| 177 | CC::Gt(ref arg) => { arg.gt(src) }, |
| 178 | CC::Ge(ref arg) => { arg.ge(src) }, |
| 179 | CC::Str(str) => { |
| 180 | let mut val: Vec<u8> = vec![0; str.len()]; |
| 181 | let res = src.peek_buf(val.as_mut_slice()); |
| 182 | if res.is_err() { return false; } |
| 183 | val == str |
| 184 | } |
| 185 | } |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | struct CheckItem<'a> { |
| 190 | offs: u32, |
| 191 | cond: &'a CC<'a>, |
| 192 | } |
| 193 | |
| 194 | #[allow(dead_code)] |
| 195 | struct DetectConditions<'a> { |
| 196 | demux_name: &'static str, |
| 197 | extensions: &'static str, |
| 198 | conditions: &'a [CheckItem<'a>], |
| 199 | } |
| 200 | |
| 201 | const DETECTORS: &[DetectConditions] = &[ |
| 202 | DetectConditions { |
| 203 | demux_name: "avi", |
| 204 | extensions: ".avi", |
| 205 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"RIFF"), &CC::Str(b"ON2 ")) }, |
| 206 | CheckItem{offs: 8, cond: &CC::Or(&CC::Or(&CC::Str(b"AVI LIST"), |
| 207 | &CC::Str(b"AVIXLIST")), |
| 208 | &CC::Str(b"ON2fLIST")) }, |
| 209 | ] |
| 210 | }, |
| 211 | DetectConditions { |
| 212 | demux_name: "wav", |
| 213 | extensions: ".wav", |
| 214 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"RIFF") }, |
| 215 | CheckItem{offs: 8, cond: &CC::Str(b"WAVEfmt ") } |
| 216 | ] |
| 217 | }, |
| 218 | DetectConditions { |
| 219 | demux_name: "mov", |
| 220 | extensions: ".mov", |
| 221 | conditions: &[CheckItem{offs: 4, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"), |
| 222 | &CC::Str(b"moov")), |
| 223 | &CC::Str(b"ftyp")) }], |
| 224 | }, |
| 225 | DetectConditions { |
| 226 | demux_name: "mov", |
| 227 | extensions: ".mov", |
| 228 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"\x00\x00\x00\x08wide") }, |
| 229 | CheckItem{offs: 12, cond: &CC::Or(&CC::Or(&CC::Str(b"mdat"), |
| 230 | &CC::Str(b"moov")), |
| 231 | &CC::Str(b"ftyp")) }], |
| 232 | }, |
| 233 | DetectConditions { |
| 234 | demux_name: "yuv4mpeg", |
| 235 | extensions: ".y4m", |
| 236 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"YUV4MPEG2 ") }], |
| 237 | }, |
| 238 | DetectConditions { |
| 239 | demux_name: "flv", |
| 240 | extensions: ".flv", |
| 241 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"FLV") }, |
| 242 | CheckItem{offs: 3, cond: &CC::Le(Arg::Byte(1)) }], |
| 243 | }, |
| 244 | DetectConditions { |
| 245 | demux_name: "dkivf", |
| 246 | extensions: ".ivf", |
| 247 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"DKIF\x00\x00")}, |
| 248 | CheckItem{offs: 6, cond: &CC::Ge(Arg::U16LE(32))}], |
| 249 | }, |
| 250 | DetectConditions { |
| 251 | demux_name: "fcmp", |
| 252 | extensions: ".cmp", |
| 253 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"FCMP")}], |
| 254 | }, |
| 255 | DetectConditions { |
| 256 | demux_name: "fst", |
| 257 | extensions: ".fst", |
| 258 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"2TSF")}], |
| 259 | }, |
| 260 | DetectConditions { |
| 261 | demux_name: "gdv", |
| 262 | extensions: ".gdv", |
| 263 | conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U32LE(0x29111994))}], |
| 264 | }, |
| 265 | DetectConditions { |
| 266 | demux_name: "fable-imax", |
| 267 | extensions: ".imx", |
| 268 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"IMAX") }, |
| 269 | CheckItem{offs: 10, cond: &CC::Eq(Arg::U16LE(0x102)) }], |
| 270 | }, |
| 271 | DetectConditions { |
| 272 | demux_name: "legend-q", |
| 273 | extensions: ".q", |
| 274 | conditions: &[CheckItem{offs: 0, cond: &CC::Eq(Arg::U16LE(0x6839))}, |
| 275 | CheckItem{offs: 2, cond: &CC::In(Arg::Byte(3), Arg::Byte(7))}], |
| 276 | }, |
| 277 | DetectConditions { |
| 278 | demux_name: "smush", |
| 279 | extensions: ".san", |
| 280 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"ANIM")}, |
| 281 | CheckItem{offs: 8, cond: &CC::Str(b"AHDR")}], |
| 282 | }, |
| 283 | DetectConditions { |
| 284 | demux_name: "smush-mcmp", |
| 285 | extensions: ".imc", |
| 286 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"MCMP")}, |
| 287 | CheckItem{offs: 6, cond: &CC::Eq(Arg::Byte(0))}, |
| 288 | CheckItem{offs: 7, cond: &CC::Eq(Arg::Byte(0))}], |
| 289 | }, |
| 290 | DetectConditions { |
| 291 | demux_name: "smush", |
| 292 | extensions: ".snm", |
| 293 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"SANM")}, |
| 294 | CheckItem{offs: 8, cond: &CC::Str(b"SHDR")}], |
| 295 | }, |
| 296 | DetectConditions { |
| 297 | demux_name: "realaudio", |
| 298 | extensions: ".ra,.ram", |
| 299 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b".ra\xFD")}], |
| 300 | }, |
| 301 | DetectConditions { |
| 302 | demux_name: "realmedia", |
| 303 | extensions: ".rm,.rmvb,.rma,.ra,.ram", |
| 304 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".RMF"), &CC::Str(b".RMP")) }, |
| 305 | CheckItem{offs: 4, cond: &CC::Ge(Arg::U32BE(10))}], |
| 306 | }, |
| 307 | DetectConditions { |
| 308 | demux_name: "real_ivr", |
| 309 | extensions: ".ivr", |
| 310 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b".R1M"), &CC::Str(b".REC"))}], |
| 311 | }, |
| 312 | DetectConditions { |
| 313 | demux_name: "bink", |
| 314 | extensions: ".bik,.bk2", |
| 315 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::In(Arg::U32BE(0x42494B62), // BIKb |
| 316 | Arg::U32BE(0x42494B7B)), // BIKz |
| 317 | &CC::In(Arg::U32BE(0x4B423261), // KB2a |
| 318 | Arg::U32BE(0x4B42327B)))}], // KB2z |
| 319 | }, |
| 320 | DetectConditions { |
| 321 | demux_name: "smacker", |
| 322 | extensions: ".smk", |
| 323 | conditions: &[CheckItem{offs: 0, cond: &CC::Or(&CC::Str(b"SMK2"), &CC::Str(b"SMK4"))}], |
| 324 | }, |
| 325 | DetectConditions { |
| 326 | demux_name: "ape", |
| 327 | extensions: ".ape", |
| 328 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"MAC ") }, |
| 329 | CheckItem{offs: 4, cond: &CC::In(Arg::U16LE(3800), Arg::U16LE(3990))}], |
| 330 | }, |
| 331 | DetectConditions { |
| 332 | demux_name: "flac", |
| 333 | extensions: ".flac", |
| 334 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"fLaC") }], |
| 335 | }, |
| 336 | DetectConditions { |
| 337 | demux_name: "tta", |
| 338 | extensions: ".tta", |
| 339 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"TTA1") }], |
| 340 | }, |
| 341 | DetectConditions { |
| 342 | demux_name: "wavpack", |
| 343 | extensions: ".wv", |
| 344 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"wvpk") }, |
| 345 | CheckItem{offs: 8, cond: &CC::In(Arg::U16LE(0x402), Arg::U16LE(0x410))}], |
| 346 | }, |
| 347 | DetectConditions { |
| 348 | demux_name: "vivo", |
| 349 | extensions: ".viv", |
| 350 | conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))}, |
| 351 | CheckItem{offs: 2, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}], |
| 352 | }, |
| 353 | DetectConditions { |
| 354 | demux_name: "vivo", |
| 355 | extensions: ".viv", |
| 356 | conditions: &[CheckItem{offs: 0, cond: &CC::In(Arg::U16BE(1), Arg::U16BE(0xFF))}, |
| 357 | CheckItem{offs: 3, cond: &CC::Str(b"\x0D\x0AVersion:Vivo/")}], |
| 358 | }, |
| 359 | DetectConditions { |
| 360 | demux_name: "bmv", |
| 361 | extensions: ".bmv", |
| 362 | conditions: &[], |
| 363 | }, |
| 364 | DetectConditions { |
| 365 | demux_name: "bmv3", |
| 366 | extensions: ".bmv", |
| 367 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"BMVi") }, |
| 368 | CheckItem{offs: 32, cond: &CC::Str(b"DATA")}], |
| 369 | }, |
| 370 | DetectConditions { |
| 371 | demux_name: "vmd", |
| 372 | extensions: ".vmd", |
| 373 | conditions: &[], |
| 374 | }, |
| 375 | DetectConditions { |
| 376 | demux_name: "vx", |
| 377 | extensions: ".vx", |
| 378 | conditions: &[CheckItem{offs: 0, cond: &CC::Str(b"VXDS") }], |
| 379 | }, |
| 380 | ]; |
| 381 | |
| 382 | /// Tries to detect container format. |
| 383 | /// |
| 384 | /// This function tries to determine container format using both file extension and checking against container specific markers inside. |
| 385 | /// In case of success the function returns short container name and the detection score. |
| 386 | /// Result should have the highest detection score among tested. |
| 387 | pub fn detect_format(name: &str, src: &mut ByteReader) -> Option<(&'static str, DetectionScore)> { |
| 388 | let mut result = None; |
| 389 | let lname = name.to_lowercase(); |
| 390 | for detector in DETECTORS { |
| 391 | let mut score = DetectionScore::No; |
| 392 | if !name.is_empty() { |
| 393 | for ext in detector.extensions.split(',') { |
| 394 | if lname.ends_with(ext) { |
| 395 | score = DetectionScore::ExtensionMatches; |
| 396 | break; |
| 397 | } |
| 398 | } |
| 399 | } |
| 400 | let mut passed = !detector.conditions.is_empty(); |
| 401 | for ck in detector.conditions { |
| 402 | let ret = src.seek(SeekFrom::Start(u64::from(ck.offs))); |
| 403 | if ret.is_err() { |
| 404 | passed = false; |
| 405 | break; |
| 406 | } |
| 407 | if !ck.cond.eval(src) { |
| 408 | passed = false; |
| 409 | break; |
| 410 | } |
| 411 | } |
| 412 | if passed { |
| 413 | score = DetectionScore::MagicMatches; |
| 414 | } |
| 415 | if score == DetectionScore::MagicMatches { |
| 416 | return Some((detector.demux_name, score)); |
| 417 | } |
| 418 | if result.is_none() && score != DetectionScore::No { |
| 419 | result = Some((detector.demux_name, score)); |
| 420 | } else if result.is_some() { |
| 421 | let (_, oldsc) = result.unwrap(); |
| 422 | if oldsc.less(score) { |
| 423 | result = Some((detector.demux_name, score)); |
| 424 | } |
| 425 | } |
| 426 | } |
| 427 | result |
| 428 | } |
| 429 | |
| 430 | /// Tries to detect container format for provided file name. |
| 431 | pub fn detect_format_by_name(name: &str) -> Option<&'static str> { |
| 432 | if name.is_empty() { |
| 433 | return None; |
| 434 | } |
| 435 | let lname = name.to_lowercase(); |
| 436 | for detector in DETECTORS { |
| 437 | for ext in detector.extensions.split(',') { |
| 438 | if lname.ends_with(ext) { |
| 439 | return Some(detector.demux_name); |
| 440 | } |
| 441 | } |
| 442 | } |
| 443 | None |
| 444 | } |
| 445 | |
| 446 | #[cfg(test)] |
| 447 | mod test { |
| 448 | use super::*; |
| 449 | use std::fs::File; |
| 450 | use nihav_core::io::byteio::*; |
| 451 | |
| 452 | #[test] |
| 453 | fn test_avi_detect() { |
| 454 | let name = "assets/Indeo/laser05.avi"; |
| 455 | let mut file = File::open(name).unwrap(); |
| 456 | let mut fr = FileReader::new_read(&mut file); |
| 457 | let mut br = ByteReader::new(&mut fr); |
| 458 | let (name, score) = detect_format(name, &mut br).unwrap(); |
| 459 | assert_eq!(name, "avi"); |
| 460 | assert_eq!(score, DetectionScore::MagicMatches); |
| 461 | } |
| 462 | |
| 463 | #[test] |
| 464 | fn test_gdv_detect() { |
| 465 | let name = "assets/Game/intro1.gdv"; |
| 466 | let mut file = File::open(name).unwrap(); |
| 467 | let mut fr = FileReader::new_read(&mut file); |
| 468 | let mut br = ByteReader::new(&mut fr); |
| 469 | let (name, score) = detect_format(name, &mut br).unwrap(); |
| 470 | assert_eq!(name, "gdv"); |
| 471 | assert_eq!(score, DetectionScore::MagicMatches); |
| 472 | } |
| 473 | } |