]> git.nihav.org Git - nihav.git/blobdiff - nihav-commonfmt/src/codecs/cinepakenc.rs
Acorn Super Moving Blocks Decoder
[nihav.git] / nihav-commonfmt / src / codecs / cinepakenc.rs
index 09bb112e87c02d6ecba10dd669c6357dfb4bd316..6905e423504a44c76e345d1f14b26a5ab32acde0 100644 (file)
@@ -169,7 +169,7 @@ impl MaskWriter {
         }
     }
     fn reset(&mut self) {
-        self.masks.truncate(0);
+        self.masks.clear();
         self.mask = 0;
         self.pos = 0;
     }
@@ -214,20 +214,27 @@ impl MaskWriter {
 #[derive(Clone,Copy,PartialEq)]
 enum QuantMode {
     ELBG,
-    Hybrid,
+    Fast,
     MedianCut,
 }
 
-impl QuantMode {
+impl std::string::ToString for QuantMode {
     fn to_string(&self) -> String {
         match *self {
             QuantMode::ELBG => "elbg".to_string(),
-            QuantMode::Hybrid => "hybrid".to_string(),
+            QuantMode::Fast => "fast".to_string(),
             QuantMode::MedianCut => "mediancut".to_string(),
         }
     }
 }
 
+#[derive(Clone,Copy,PartialEq)]
+enum CodingMode {
+    Skip,
+    V1,
+    V4
+}
+
 struct CinepakEncoder {
     stream:     Option<NAStreamRef>,
     lastfrm:    Option<NAVideoBufferRef<u8>>,
@@ -236,13 +243,16 @@ struct CinepakEncoder {
     key_int:    u8,
     qmode:      QuantMode,
     quality:    u8,
+    refine:     bool,
     nstrips:    usize,
+    force_v1:   bool,
+    cur_strip:  usize,
     v1_entries: Vec<YUVCode>,
     v4_entries: Vec<YUVCode>,
-    v1_cb:      [YUVCode; 256],
-    v4_cb:      [YUVCode; 256],
-    v1_cur_cb:  [YUVCode; 256],
-    v4_cur_cb:  [YUVCode; 256],
+    v1_cb:      Vec<[YUVCode; 256]>,
+    v4_cb:      Vec<[YUVCode; 256]>,
+    v1_cur_cb:  Vec<[YUVCode; 256]>,
+    v4_cur_cb:  Vec<[YUVCode; 256]>,
     v1_len:     usize,
     v4_len:     usize,
     v1_idx:     Vec<u8>,
@@ -251,12 +261,24 @@ struct CinepakEncoder {
     rng:        RNG,
     masks:      MaskWriter,
     skip_dist:  Vec<u32>,
+    fst_bins:   [Vec<YUVCode>; 4],
+    v1_cand:    Vec<YUVCode>,
+    v4_cand:    Vec<YUVCode>,
+    cmode:      Vec<CodingMode>,
 }
 
 fn avg4(a: u8, b: u8, c: u8, d: u8) -> u8 {
     ((u16::from(a) + u16::from(b) + u16::from(c) + u16::from(d) + 3) >> 2) as u8
 }
 
+fn variance(a: u8, mean: u8) -> u32 {
+    if a >= mean {
+        u32::from(a - mean) * u32::from(a - mean)
+    } else {
+        u32::from(mean - a) * u32::from(mean - a)
+    }
+}
+
 fn patch_size(bw: &mut ByteWriter, pos: u64) -> EncoderResult<()> {
     let size = bw.tell() - pos;
     bw.seek(SeekFrom::Current(-((size + 3) as i64)))?;
@@ -265,6 +287,51 @@ fn patch_size(bw: &mut ByteWriter, pos: u64) -> EncoderResult<()> {
     Ok(())
 }
 
+fn elbg_quant(entries: &[YUVCode], codebook: &mut [YUVCode]) -> usize {
+    let cb_len = quantise_median_cut::<YUVCode, YUVCodeSum>(entries, codebook);
+    if cb_len < codebook.len() {
+        cb_len
+    } else {
+        let mut elbg: ELBG<YUVCode, YUVCodeSum> = ELBG::new(codebook);
+        elbg.quantise(entries, codebook)
+    }
+}
+
+fn quant_fast(bins: &mut [Vec<YUVCode>; 4], entries: &[YUVCode], codebook: &mut [YUVCode]) -> usize {
+    for bin in bins.iter_mut() {
+        bin.clear();
+    }
+    for &entry in entries.iter() {
+        let y_avg = avg4(entry.y[0], entry.y[1], entry.y[2], entry.y[3]);
+        let dist = entry.y.iter().fold(0u32, |acc, &x| acc + variance(x, y_avg));
+        let ilog = if dist == 0 { 0 } else { 32 - dist.leading_zeros() };
+        let bin = match ilog {
+                0..=3 => &mut bins[0],
+                4..=7 => &mut bins[1],
+                8..=11 => &mut bins[2],
+                _ => &mut bins[3],
+            };
+        bin.push(entry);
+    }
+    let mut free_cw = codebook.len();
+    let mut entries_left = entries.len();
+    let mut offset = 0;
+    for bin in bins.iter() {
+        if bin.is_empty() {
+            continue;
+        }
+        if free_cw == 0 || entries_left == 0 {
+            break;
+        }
+        let target = (free_cw * bin.len() + entries_left - 1) / entries_left;
+        let cur_len = elbg_quant(bin, &mut codebook[offset..][..target]);
+        offset += cur_len;
+        free_cw -= cur_len;
+        entries_left -= bin.len();
+    }
+    offset
+}
+
 impl CinepakEncoder {
     fn new() -> Self {
         Self {
@@ -272,16 +339,19 @@ impl CinepakEncoder {
             pkt:        None,
             lastfrm:    None,
             frmcount:   0,
-            qmode:      QuantMode::MedianCut,
+            qmode:      QuantMode::Fast,
             key_int:    25,
             quality:    0,
+            refine:     false,
             nstrips:    2,
+            force_v1:   false,
+            cur_strip:  0,
             v1_entries: Vec::new(),
             v4_entries: Vec::new(),
-            v1_cb:      [YUVCode::default(); 256],
-            v4_cb:      [YUVCode::default(); 256],
-            v1_cur_cb:  [YUVCode::default(); 256],
-            v4_cur_cb:  [YUVCode::default(); 256],
+            v1_cb:      Vec::with_capacity(2),
+            v4_cb:      Vec::with_capacity(2),
+            v1_cur_cb:  Vec::with_capacity(2),
+            v4_cur_cb:  Vec::with_capacity(2),
             v1_len:     0,
             v4_len:     0,
             grayscale:  false,
@@ -290,6 +360,10 @@ impl CinepakEncoder {
             v4_idx:     Vec::new(),
             masks:      MaskWriter::new(),
             skip_dist:  Vec::new(),
+            fst_bins:   [Vec::new(), Vec::new(), Vec::new(), Vec::new()],
+            v1_cand:    Vec::new(),
+            v4_cand:    Vec::new(),
+            cmode:      Vec::new(),
         }
     }
     fn read_strip(&mut self, in_frm: &NAVideoBuffer<u8>, start: usize, end: usize) {
@@ -301,8 +375,8 @@ impl CinepakEncoder {
         let mut voff = in_frm.get_offset(2) + start / 2 * vstride;
         let (width, _) = in_frm.get_dimensions(0);
         let data = in_frm.get_data();
-        self.v1_entries.truncate(0);
-        self.v4_entries.truncate(0);
+        self.v1_entries.clear();
+        self.v4_entries.clear();
         for _ in (start..end).step_by(4) {
             for x in (0..width).step_by(4) {
                 let mut yblk = [0; 16];
@@ -358,18 +432,18 @@ impl CinepakEncoder {
         }
         (idx as u8, min_dist)
     }
-    fn can_update_cb(new_cb: &[YUVCode; 256], old_cb: &[YUVCode; 256], cb_size: usize) -> bool {
+    fn can_update_cb(new_cb: &[YUVCode], old_cb: &[YUVCode], cb_size: usize) -> bool {
         let mut skip_count = 0;
         for (new, old) in new_cb.iter().zip(old_cb.iter()) {
             if new == old {
                 skip_count += 1;
             }
         }
-        let full_size = cb_size * 256;
-        let upd_size = cb_size * (256 - skip_count) + 64;
+        let full_size = cb_size * new_cb.len();
+        let upd_size = cb_size * (new_cb.len() - skip_count) + (new_cb.len() + 31) / 32 * 4;
         upd_size < full_size
     }
-    fn write_cb(bw: &mut ByteWriter, mut id: u8, new_cb: &[YUVCode; 256], old_cb: &[YUVCode; 256], grayscale: bool, update: bool) -> EncoderResult<()> {
+    fn write_cb(bw: &mut ByteWriter, mut id: u8, new_cb: &[YUVCode], old_cb: &[YUVCode], grayscale: bool, update: bool, num_elem: usize) -> EncoderResult<()> {
         if grayscale {
             id |= 4;
         }
@@ -380,7 +454,7 @@ impl CinepakEncoder {
         bw.write_u24be(0)?;
         let chunk_pos = bw.tell();
         if !update {
-            for entry in new_cb.iter() {
+            for entry in new_cb.iter().take(num_elem) {
                 bw.write_buf(&entry.y)?;
                 if !grayscale {
                     bw.write_byte(entry.u ^ 0x80)?;
@@ -388,8 +462,8 @@ impl CinepakEncoder {
                 }
             }
         } else {
-            let mut end = 256;
-            for (i, (ncw, ocw)) in new_cb.iter().rev().zip(old_cb.iter().rev()).enumerate() {
+            let mut end = num_elem;
+            for (i, (ncw, ocw)) in new_cb.iter().zip(old_cb.iter()).enumerate().take(num_elem).rev() {
                 if ncw == ocw {
                     end = i;
                 } else {
@@ -436,7 +510,7 @@ impl CinepakEncoder {
             for _ in (start..end).step_by(4) {
                 for x in (0..width).step_by(4) {
                     if cur_bit == 0 {
-                        if !intra || self.v1_idx.len() > 0 {
+                        if !intra || !self.v1_idx.is_empty() {
                             cur_mask = *miter.next().unwrap();
                         } else {
                             cur_mask = 0xFFFFFFFF;
@@ -456,7 +530,7 @@ impl CinepakEncoder {
                     }
                     if (cur_mask & cur_bit) == 0 {
                         let idx = *v1_iter.next().unwrap() as usize;
-                        let cb = &self.v1_cur_cb[idx];
+                        let cb = &self.v1_cur_cb[self.cur_strip][idx];
 
                         let mut coff = yoff + x;
                         data[coff]     = cb.y[0]; data[coff + 1] = cb.y[0];
@@ -484,13 +558,13 @@ impl CinepakEncoder {
                         }
                     } else {
                         let idx0 = *v4_iter.next().unwrap() as usize;
-                        let cb0 = &self.v4_cur_cb[idx0];
+                        let cb0 = &self.v4_cur_cb[self.cur_strip][idx0];
                         let idx1 = *v4_iter.next().unwrap() as usize;
-                        let cb1 = &self.v4_cur_cb[idx1];
+                        let cb1 = &self.v4_cur_cb[self.cur_strip][idx1];
                         let idx2 = *v4_iter.next().unwrap() as usize;
-                        let cb2 = &self.v4_cur_cb[idx2];
+                        let cb2 = &self.v4_cur_cb[self.cur_strip][idx2];
                         let idx3 = *v4_iter.next().unwrap() as usize;
-                        let cb3 = &self.v4_cur_cb[idx3];
+                        let cb3 = &self.v4_cur_cb[self.cur_strip][idx3];
 
                         let mut coff = yoff + x;
                         data[coff]     = cb0.y[0]; data[coff + 1] = cb0.y[1];
@@ -528,7 +602,7 @@ impl CinepakEncoder {
         }
     }
     fn calc_skip_dist(&mut self, in_frm: &NAVideoBuffer<u8>, start: usize, end: usize) {
-        self.skip_dist.truncate(0);
+        self.skip_dist.clear();
         if let Some(ref ref_frm) = self.lastfrm {
             let rystride  = ref_frm.get_stride(0);
             let mut ryoff = ref_frm.get_offset(0) + start * rystride;
@@ -602,35 +676,82 @@ impl CinepakEncoder {
     fn quant_vectors(&mut self) {
         match self.qmode {
             QuantMode::ELBG => {
-                let mut elbg_v1: ELBG<YUVCode, YUVCodeSum> = ELBG::new(&self.v1_cb);
-                let mut elbg_v4: ELBG<YUVCode, YUVCodeSum> = ELBG::new(&self.v4_cb);
-
-                for entry in self.v1_cb.iter_mut().skip(self.v1_len) {
-                    self.rng.fill_entry(entry);
+                self.v1_len = elbg_quant(&self.v1_entries, &mut self.v1_cur_cb[self.cur_strip]);
+                self.v4_len = if !self.force_v1 {
+                        elbg_quant(&self.v4_entries, &mut self.v4_cur_cb[self.cur_strip])
+                    } else {
+                        0
+                    };
+            },
+            QuantMode::Fast => {
+                for bin in self.fst_bins.iter_mut() {
+                    bin.clear();
                 }
-                for entry in self.v4_cb.iter_mut().skip(self.v4_len) {
-                    self.rng.fill_entry(entry);
+                self.v1_len = quant_fast(&mut self.fst_bins, &self.v1_entries, &mut self.v1_cur_cb[self.cur_strip]);
+                self.v4_len = if !self.force_v1 {
+                        quant_fast(&mut self.fst_bins, &self.v4_entries, &mut self.v4_cur_cb[self.cur_strip])
+                    } else {
+                        0
+                    };
+            },
+            QuantMode::MedianCut => {
+                self.v1_len = quantise_median_cut::<YUVCode, YUVCodeSum>(&self.v1_entries, &mut self.v1_cur_cb[self.cur_strip]);
+                if !self.force_v1 {
+                    self.v4_len = quantise_median_cut::<YUVCode, YUVCodeSum>(&self.v4_entries, &mut self.v4_cur_cb[self.cur_strip]);
+                } else {
+                    self.v4_len = 0;
                 }
+            },
+        };
 
-                self.v1_len = elbg_v1.quantise(&self.v1_entries, &mut self.v1_cur_cb);
-                self.v4_len = elbg_v4.quantise(&self.v4_entries, &mut self.v4_cur_cb);
+        for e in self.v1_cur_cb[self.cur_strip].iter_mut().skip(self.v1_len) { *e = YUVCode::default(); }
+        for e in self.v4_cur_cb[self.cur_strip].iter_mut().skip(self.v4_len) { *e = YUVCode::default(); }
+    }
+    fn refine_vectors(&mut self) {
+        match self.qmode {
+            QuantMode::ELBG => {
+                self.v1_len = if !self.v1_cand.is_empty() {
+                        elbg_quant(&self.v1_cand, &mut self.v1_cur_cb[self.cur_strip])
+                    } else {
+                        0
+                    };
+                self.v4_len = if !self.force_v1 && !self.v4_cand.is_empty() {
+                        elbg_quant(&self.v4_cand, &mut self.v4_cur_cb[self.cur_strip])
+                    } else {
+                        0
+                    };
             },
-            QuantMode::Hybrid => {
-                quantise_median_cut::<YUVCode, YUVCodeSum>(&self.v1_entries, &mut self.v1_cur_cb);
-                quantise_median_cut::<YUVCode, YUVCodeSum>(&self.v4_entries, &mut self.v4_cur_cb);
-                let mut elbg_v1: ELBG<YUVCode, YUVCodeSum> = ELBG::new(&self.v1_cur_cb);
-                let mut elbg_v4: ELBG<YUVCode, YUVCodeSum> = ELBG::new(&self.v4_cur_cb);
-                self.v1_len = elbg_v1.quantise(&self.v1_entries, &mut self.v1_cur_cb);
-                self.v4_len = elbg_v4.quantise(&self.v4_entries, &mut self.v4_cur_cb);
+            QuantMode::Fast => {
+                for bin in self.fst_bins.iter_mut() {
+                    bin.clear();
+                }
+                self.v1_len = if !self.v1_cand.is_empty() {
+                        quant_fast(&mut self.fst_bins, &self.v1_cand, &mut self.v1_cur_cb[self.cur_strip])
+                    } else {
+                        0
+                    };
+                self.v4_len = if !self.force_v1 && !self.v4_cand.is_empty() {
+                        quant_fast(&mut self.fst_bins, &self.v4_cand, &mut self.v4_cur_cb[self.cur_strip])
+                    } else {
+                        0
+                    };
             },
             QuantMode::MedianCut => {
-                self.v1_len = quantise_median_cut::<YUVCode, YUVCodeSum>(&self.v1_entries, &mut self.v1_cur_cb);
-                self.v4_len = quantise_median_cut::<YUVCode, YUVCodeSum>(&self.v4_entries, &mut self.v4_cur_cb);
+                self.v1_len = if !self.v1_cand.is_empty() {
+                        quantise_median_cut::<YUVCode, YUVCodeSum>(&self.v1_cand, &mut self.v1_cur_cb[self.cur_strip])
+                    } else {
+                        0
+                    };
+                if !self.force_v1 && !self.v4_cand.is_empty() {
+                    self.v4_len = quantise_median_cut::<YUVCode, YUVCodeSum>(&self.v4_cand, &mut self.v4_cur_cb[self.cur_strip]);
+                } else {
+                    self.v4_len = 0;
+                }
             },
         };
 
-        for e in self.v1_cur_cb.iter_mut().skip(self.v1_len) { *e = YUVCode::default(); }
-        for e in self.v4_cur_cb.iter_mut().skip(self.v4_len) { *e = YUVCode::default(); }
+        for e in self.v1_cur_cb[self.cur_strip].iter_mut().skip(self.v1_len) { *e = YUVCode::default(); }
+        for e in self.v4_cur_cb[self.cur_strip].iter_mut().skip(self.v4_len) { *e = YUVCode::default(); }
     }
     fn encode_intra(&mut self, bw: &mut ByteWriter, in_frm: &NAVideoBuffer<u8>) -> EncoderResult<bool> {
         let (width, height) = in_frm.get_dimensions(0);
@@ -649,51 +770,103 @@ impl CinepakEncoder {
         bw.write_u16be(height as u16)?;
         bw.write_u16be(self.nstrips as u16)?;
 
-        for entry in self.v1_cb.iter_mut() {
+        self.cur_strip = 0;
+        for entry in self.v1_cb[self.cur_strip].iter_mut() {
             self.rng.fill_entry(entry);
         }
-        for entry in self.v4_cb.iter_mut() {
+        for entry in self.v4_cb[self.cur_strip].iter_mut() {
             self.rng.fill_entry(entry);
         }
         while start_line < height {
             self.read_strip(in_frm, start_line, end_line);
 
+            if self.cur_strip > 0 {
+                self.v1_cb[self.cur_strip] = self.v1_cb[self.cur_strip - 1];
+                self.v4_cb[self.cur_strip] = self.v4_cb[self.cur_strip - 1];
+            }
             self.quant_vectors();
             if self.grayscale {
-                for cw in self.v1_cur_cb.iter_mut() {
+                for cw in self.v1_cur_cb[self.cur_strip].iter_mut() {
                     cw.u = 128;
                     cw.v = 128;
                 }
-                for cw in self.v4_cur_cb.iter_mut() {
+                for cw in self.v4_cur_cb[self.cur_strip].iter_mut() {
                     cw.u = 128;
                     cw.v = 128;
                 }
             }
 
-            self.v1_idx.truncate(0);
-            self.v4_idx.truncate(0);
+            self.v1_idx.clear();
+            self.v4_idx.clear();
             self.masks.reset();
 
+            self.cmode.clear();
+            self.v1_cand.clear();
+            self.v4_cand.clear();
             for (v1_entry, v4_entries) in self.v1_entries.iter().zip(self.v4_entries.chunks(4)) {
-                let (v1_idx, v1_dist) = Self::find_nearest(&self.v1_cur_cb[..self.v1_len], *v1_entry);
-                if v1_dist == 0 {
-                    self.masks.put_v1();
-                    self.v1_idx.push(v1_idx);
+                let (v1_idx, v1_dist) = Self::find_nearest(&self.v1_cur_cb[self.cur_strip][..self.v1_len], *v1_entry);
+                if v1_dist == 0 || self.force_v1 {
+                    if !self.refine {
+                        self.masks.put_v1();
+                        self.v1_idx.push(v1_idx);
+                    } else {
+                        self.cmode.push(CodingMode::V1);
+                        self.v1_cand.push(*v1_entry);
+                    }
                     continue;
                 }
-                let (v40_idx, v40_dist) = Self::find_nearest(&self.v4_cur_cb[..self.v4_len], v4_entries[0]);
-                let (v41_idx, v41_dist) = Self::find_nearest(&self.v4_cur_cb[..self.v4_len], v4_entries[1]);
-                let (v42_idx, v42_dist) = Self::find_nearest(&self.v4_cur_cb[..self.v4_len], v4_entries[2]);
-                let (v43_idx, v43_dist) = Self::find_nearest(&self.v4_cur_cb[..self.v4_len], v4_entries[3]);
+                let (v40_idx, v40_dist) = Self::find_nearest(&self.v4_cur_cb[self.cur_strip][..self.v4_len], v4_entries[0]);
+                let (v41_idx, v41_dist) = Self::find_nearest(&self.v4_cur_cb[self.cur_strip][..self.v4_len], v4_entries[1]);
+                let (v42_idx, v42_dist) = Self::find_nearest(&self.v4_cur_cb[self.cur_strip][..self.v4_len], v4_entries[2]);
+                let (v43_idx, v43_dist) = Self::find_nearest(&self.v4_cur_cb[self.cur_strip][..self.v4_len], v4_entries[3]);
                 if v40_dist + v41_dist + v42_dist + v43_dist > v1_dist {
-                    self.masks.put_v4();
-                    self.v4_idx.push(v40_idx);
-                    self.v4_idx.push(v41_idx);
-                    self.v4_idx.push(v42_idx);
-                    self.v4_idx.push(v43_idx);
+                    if !self.refine {
+                        self.masks.put_v4();
+                        self.v4_idx.push(v40_idx);
+                        self.v4_idx.push(v41_idx);
+                        self.v4_idx.push(v42_idx);
+                        self.v4_idx.push(v43_idx);
+                    } else {
+                        self.cmode.push(CodingMode::V4);
+                        self.v4_cand.extend_from_slice(v4_entries);
+                    }
                 } else {
-                    self.masks.put_v1();
-                    self.v1_idx.push(v1_idx);
+                    if !self.refine {
+                        self.masks.put_v1();
+                        self.v1_idx.push(v1_idx);
+                    } else {
+                        self.cmode.push(CodingMode::V1);
+                        self.v1_cand.push(*v1_entry);
+                    }
+                }
+            }
+            if self.refine {
+                self.refine_vectors();
+                let mut v1_src = self.v1_cand.iter();
+                let mut v4_src = self.v4_cand.chunks_exact(4);
+                for &cmode in self.cmode.iter() {
+                    match cmode {
+                        CodingMode::Skip => unreachable!(),
+                        CodingMode::V1 => {
+                            let v1_entry = v1_src.next().unwrap();
+                            let (v1_idx, _) = Self::find_nearest(&self.v1_cur_cb[self.cur_strip][..self.v1_len], *v1_entry);
+                            self.masks.put_v1();
+                            self.v1_idx.push(v1_idx);
+                        },
+                        CodingMode::V4 => {
+                            let v4_entries = v4_src.next().unwrap();
+                            let (v40_idx, _) = Self::find_nearest(&self.v4_cur_cb[self.cur_strip][..self.v4_len], v4_entries[0]);
+                            let (v41_idx, _) = Self::find_nearest(&self.v4_cur_cb[self.cur_strip][..self.v4_len], v4_entries[1]);
+                            let (v42_idx, _) = Self::find_nearest(&self.v4_cur_cb[self.cur_strip][..self.v4_len], v4_entries[2]);
+                            let (v43_idx, _) = Self::find_nearest(&self.v4_cur_cb[self.cur_strip][..self.v4_len], v4_entries[3]);
+
+                            self.masks.put_v4();
+                            self.v4_idx.push(v40_idx);
+                            self.v4_idx.push(v41_idx);
+                            self.v4_idx.push(v42_idx);
+                            self.v4_idx.push(v43_idx);
+                        },
+                    };
                 }
             }
             self.masks.end();
@@ -701,8 +874,8 @@ impl CinepakEncoder {
             let mut is_intra_strip = start_line == 0;
             let (upd_v1, upd_v4) = if !is_intra_strip {
                     let cb_size = if self.grayscale { 4 } else { 6 };
-                    (Self::can_update_cb(&self.v1_cur_cb, &self.v1_cb, cb_size),
-                     Self::can_update_cb(&self.v4_cur_cb, &self.v4_cb, cb_size))
+                    (Self::can_update_cb(&self.v1_cur_cb[self.cur_strip][..self.v1_len], &self.v1_cb[self.cur_strip][..self.v1_len], cb_size),
+                     Self::can_update_cb(&self.v4_cur_cb[self.cur_strip][..self.v4_len], &self.v4_cb[self.cur_strip][..self.v4_len], cb_size))
                 } else {
                     (false, false)
                 };
@@ -717,12 +890,12 @@ impl CinepakEncoder {
             bw.write_u16be((end_line - start_line) as u16)?;
             bw.write_u16be(width as u16)?;
 
-            Self::write_cb(bw, 0x20, &self.v4_cur_cb, &self.v4_cb, self.grayscale, upd_v4)?;
-            Self::write_cb(bw, 0x22, &self.v1_cur_cb, &self.v1_cb, self.grayscale, upd_v1)?;
+            Self::write_cb(bw, 0x20, &self.v4_cur_cb[self.cur_strip], &self.v4_cb[self.cur_strip], self.grayscale, upd_v4, self.v4_len)?;
+            Self::write_cb(bw, 0x22, &self.v1_cur_cb[self.cur_strip], &self.v1_cb[self.cur_strip], self.grayscale, upd_v1, self.v1_len)?;
 
             self.render_stripe(true, start_line, end_line);
 
-            if self.v4_idx.len() == 0 {
+            if self.v4_idx.is_empty() {
                 bw.write_byte(0x32)?;
                 bw.write_u24be((self.v1_idx.len() + 4) as u32)?;
                 bw.write_buf(self.v1_idx.as_slice())?;
@@ -759,10 +932,12 @@ impl CinepakEncoder {
 
             patch_size(bw, strip_data_pos)?;
 
-            self.v1_cb.copy_from_slice(&self.v1_cur_cb);
-            self.v4_cb.copy_from_slice(&self.v4_cur_cb);
+            self.v1_cb[self.cur_strip].copy_from_slice(&self.v1_cur_cb[self.cur_strip]);
+            self.v4_cb[self.cur_strip].copy_from_slice(&self.v4_cur_cb[self.cur_strip]);
             start_line = end_line;
             end_line = (end_line + strip_h).min(height);
+
+            self.cur_strip += 1;
         }
         patch_size(bw, frame_data_pos)?;
         Ok(true)
@@ -784,66 +959,127 @@ impl CinepakEncoder {
         bw.write_u16be(height as u16)?;
         bw.write_u16be(self.nstrips as u16)?;
 
+        self.cur_strip = 0;
         while start_line < height {
             self.read_strip(in_frm, start_line, end_line);
             self.calc_skip_dist(in_frm, start_line, end_line);
 
             self.quant_vectors();
             if self.grayscale {
-                for cw in self.v1_cur_cb.iter_mut() {
+                for cw in self.v1_cur_cb[self.cur_strip].iter_mut() {
                     cw.u = 128;
                     cw.v = 128;
                 }
-                for cw in self.v4_cur_cb.iter_mut() {
+                for cw in self.v4_cur_cb[self.cur_strip].iter_mut() {
                     cw.u = 128;
                     cw.v = 128;
                 }
             }
 
-            self.v1_idx.truncate(0);
-            self.v4_idx.truncate(0);
+            self.v1_idx.clear();
+            self.v4_idx.clear();
             self.masks.reset();
 
+            self.cmode.clear();
+            self.v1_cand.clear();
+            self.v4_cand.clear();
+
             let mut skip_iter = self.skip_dist.iter();
             for (v1_entry, v4_entries) in self.v1_entries.iter().zip(self.v4_entries.chunks(4)) {
                 let skip_dist = *skip_iter.next().unwrap();
                 if skip_dist == 0 {
-                    self.masks.put_inter(true);
+                    if !self.refine {
+                        self.masks.put_inter(true);
+                    } else {
+                        self.cmode.push(CodingMode::Skip);
+                    }
                     continue;
                 }
-                let (v1_idx, v1_dist) = Self::find_nearest(&self.v1_cur_cb[..self.v1_len], *v1_entry);
+                let (v1_idx, v1_dist) = Self::find_nearest(&self.v1_cur_cb[self.cur_strip][..self.v1_len], *v1_entry);
                 if skip_dist < v1_dist {
-                    self.masks.put_inter(true);
+                    if !self.refine {
+                        self.masks.put_inter(true);
+                    } else {
+                        self.cmode.push(CodingMode::Skip);
+                    }
                     continue;
-                } else {
+                } else if !self.refine {
                     self.masks.put_inter(false);
                 }
-                if v1_dist == 0 {
-                    self.masks.put_v1();
-                    self.v1_idx.push(v1_idx);
+                if v1_dist == 0 || self.force_v1 {
+                    if !self.refine {
+                        self.masks.put_v1();
+                        self.v1_idx.push(v1_idx);
+                    } else {
+                        self.cmode.push(CodingMode::V1);
+                        self.v1_cand.push(*v1_entry);
+                    }
                     continue;
                 }
-                let (v40_idx, v40_dist) = Self::find_nearest(&self.v4_cur_cb[..self.v4_len], v4_entries[0]);
-                let (v41_idx, v41_dist) = Self::find_nearest(&self.v4_cur_cb[..self.v4_len], v4_entries[1]);
-                let (v42_idx, v42_dist) = Self::find_nearest(&self.v4_cur_cb[..self.v4_len], v4_entries[2]);
-                let (v43_idx, v43_dist) = Self::find_nearest(&self.v4_cur_cb[..self.v4_len], v4_entries[3]);
+                let (v40_idx, v40_dist) = Self::find_nearest(&self.v4_cur_cb[self.cur_strip][..self.v4_len], v4_entries[0]);
+                let (v41_idx, v41_dist) = Self::find_nearest(&self.v4_cur_cb[self.cur_strip][..self.v4_len], v4_entries[1]);
+                let (v42_idx, v42_dist) = Self::find_nearest(&self.v4_cur_cb[self.cur_strip][..self.v4_len], v4_entries[2]);
+                let (v43_idx, v43_dist) = Self::find_nearest(&self.v4_cur_cb[self.cur_strip][..self.v4_len], v4_entries[3]);
                 if v40_dist + v41_dist + v42_dist + v43_dist > v1_dist {
-                    self.masks.put_v4();
-                    self.v4_idx.push(v40_idx);
-                    self.v4_idx.push(v41_idx);
-                    self.v4_idx.push(v42_idx);
-                    self.v4_idx.push(v43_idx);
+                    if !self.refine {
+                        self.masks.put_v4();
+                        self.v4_idx.push(v40_idx);
+                        self.v4_idx.push(v41_idx);
+                        self.v4_idx.push(v42_idx);
+                        self.v4_idx.push(v43_idx);
+                    } else {
+                        self.cmode.push(CodingMode::V4);
+                        self.v4_cand.extend_from_slice(v4_entries);
+                    }
                 } else {
-                    self.masks.put_v1();
-                    self.v1_idx.push(v1_idx);
+                    if !self.refine {
+                        self.masks.put_v1();
+                        self.v1_idx.push(v1_idx);
+                    } else {
+                        self.cmode.push(CodingMode::V1);
+                        self.v1_cand.push(*v1_entry);
+                    }
+                }
+            }
+            if self.refine {
+                self.refine_vectors();
+                let mut v1_src = self.v1_cand.iter();
+                let mut v4_src = self.v4_cand.chunks_exact(4);
+                for &cmode in self.cmode.iter() {
+                    match cmode {
+                        CodingMode::Skip => {
+                            self.masks.put_inter(true);
+                        },
+                        CodingMode::V1 => {
+                            let v1_entry = v1_src.next().unwrap();
+                            let (v1_idx, _) = Self::find_nearest(&self.v1_cur_cb[self.cur_strip][..self.v1_len], *v1_entry);
+                            self.masks.put_inter(false);
+                            self.masks.put_v1();
+                            self.v1_idx.push(v1_idx);
+                        },
+                        CodingMode::V4 => {
+                            let v4_entries = v4_src.next().unwrap();
+                            let (v40_idx, _) = Self::find_nearest(&self.v4_cur_cb[self.cur_strip][..self.v4_len], v4_entries[0]);
+                            let (v41_idx, _) = Self::find_nearest(&self.v4_cur_cb[self.cur_strip][..self.v4_len], v4_entries[1]);
+                            let (v42_idx, _) = Self::find_nearest(&self.v4_cur_cb[self.cur_strip][..self.v4_len], v4_entries[2]);
+                            let (v43_idx, _) = Self::find_nearest(&self.v4_cur_cb[self.cur_strip][..self.v4_len], v4_entries[3]);
+
+                            self.masks.put_inter(false);
+                            self.masks.put_v4();
+                            self.v4_idx.push(v40_idx);
+                            self.v4_idx.push(v41_idx);
+                            self.v4_idx.push(v42_idx);
+                            self.v4_idx.push(v43_idx);
+                        },
+                    };
                 }
             }
             self.masks.end();
 
             let (upd_v1, upd_v4) = {
                     let cb_size = if self.grayscale { 4 } else { 6 };
-                    (Self::can_update_cb(&self.v1_cur_cb, &self.v1_cb, cb_size),
-                     Self::can_update_cb(&self.v4_cur_cb, &self.v4_cb, cb_size))
+                    (Self::can_update_cb(&self.v1_cur_cb[self.cur_strip][..self.v1_len], &self.v1_cb[self.cur_strip][..self.v1_len], cb_size),
+                     Self::can_update_cb(&self.v4_cur_cb[self.cur_strip][..self.v4_len], &self.v4_cb[self.cur_strip][..self.v4_len], cb_size))
                 };
             bw.write_byte(0x11)?;
             bw.write_u24be(0)?; // strip size
@@ -853,8 +1089,8 @@ impl CinepakEncoder {
             bw.write_u16be((end_line - start_line) as u16)?;
             bw.write_u16be(width as u16)?;
 
-            Self::write_cb(bw, 0x20, &self.v4_cur_cb, &self.v4_cb, self.grayscale, upd_v4)?;
-            Self::write_cb(bw, 0x22, &self.v1_cur_cb, &self.v1_cb, self.grayscale, upd_v1)?;
+            Self::write_cb(bw, 0x20, &self.v4_cur_cb[self.cur_strip], &self.v4_cb[self.cur_strip], self.grayscale, upd_v4, self.v4_len)?;
+            Self::write_cb(bw, 0x22, &self.v1_cur_cb[self.cur_strip], &self.v1_cb[self.cur_strip], self.grayscale, upd_v1, self.v1_len)?;
 
             self.render_stripe(false, start_line, end_line);
 
@@ -873,7 +1109,7 @@ impl CinepakEncoder {
             let mut skip = true;
             for mask in self.masks.masks.iter() {
                 bw.write_u32be(*mask)?;
-                if *mask == 0 { continue; }
+                if *mask == 0 && skip { continue; }
                 let mut bit = 1 << 31;
                 while bit > 0 {
                     if skip {
@@ -899,10 +1135,12 @@ impl CinepakEncoder {
 
             patch_size(bw, strip_data_pos)?;
 
-            self.v1_cb.copy_from_slice(&self.v1_cur_cb);
-            self.v4_cb.copy_from_slice(&self.v4_cur_cb);
+            self.v1_cb[self.cur_strip].copy_from_slice(&self.v1_cur_cb[self.cur_strip]);
+            self.v4_cb[self.cur_strip].copy_from_slice(&self.v4_cur_cb[self.cur_strip]);
             start_line = end_line;
             end_line = (end_line + strip_h).min(height);
+
+            self.cur_strip += 1;
         }
         patch_size(bw, frame_data_pos)?;
         Ok(true)
@@ -913,20 +1151,22 @@ impl NAEncoder for CinepakEncoder {
     fn negotiate_format(&self, encinfo: &EncodeParameters) -> EncoderResult<EncodeParameters> {
         match encinfo.format {
             NACodecTypeInfo::None => {
-                let mut ofmt = EncodeParameters::default();
-                ofmt.format = NACodecTypeInfo::Video(NAVideoInfo::new(0, 0, true, YUV420_FORMAT));
-                Ok(ofmt)
+                Ok(EncodeParameters {
+                        format: NACodecTypeInfo::Video(NAVideoInfo::new(0, 0, true, YUV420_FORMAT)),
+                        ..Default::default()
+                    })
             },
-            NACodecTypeInfo::Audio(_) => return Err(EncoderError::FormatError),
+            NACodecTypeInfo::Audio(_) => Err(EncoderError::FormatError),
             NACodecTypeInfo::Video(vinfo) => {
                 let pix_fmt = if vinfo.format == GRAY_FORMAT { GRAY_FORMAT } else { YUV420_FORMAT };
-                let outinfo = NAVideoInfo::new((vinfo.width + 3) & !3, (vinfo.height + 3) & !3, true, pix_fmt);
+                let outinfo = NAVideoInfo::new((vinfo.width + 3) & !3, (vinfo.height + 3) & !3, false, pix_fmt);
                 let mut ofmt = *encinfo;
                 ofmt.format = NACodecTypeInfo::Video(outinfo);
                 Ok(ofmt)
             }
         }
     }
+    fn get_capabilities(&self) -> u64 { 0 }
     fn init(&mut self, stream_id: u32, encinfo: EncodeParameters) -> EncoderResult<NAStreamRef> {
         match encinfo.format {
             NACodecTypeInfo::None => Err(EncoderError::FormatError),
@@ -943,8 +1183,8 @@ impl NAEncoder for CinepakEncoder {
                 }
 
                 let out_info = NAVideoInfo::new(vinfo.width, vinfo.height, false, vinfo.format);
-                let info = NACodecInfo::new("cinepak", NACodecTypeInfo::Video(out_info.clone()), None);
-                let mut stream = NAStream::new(StreamType::Video, stream_id, info, encinfo.tb_num, encinfo.tb_den);
+                let info = NACodecInfo::new("cinepak", NACodecTypeInfo::Video(out_info), None);
+                let mut stream = NAStream::new(StreamType::Video, stream_id, info, encinfo.tb_num, encinfo.tb_den, 0);
                 stream.set_num(stream_id as usize);
                 let stream = stream.into_ref();
 
@@ -968,6 +1208,25 @@ impl NAEncoder for CinepakEncoder {
     fn encode(&mut self, frm: &NAFrame) -> EncoderResult<()> {
         let buf = frm.get_buffer();
         if let Some(ref vbuf) = buf.get_vbuf() {
+            if self.nstrips == 0 {
+                let (w, h) = vbuf.get_dimensions(0);
+                self.nstrips = ((((w * h) >> 4) + 1200) / 2400).max(1).min(3);
+                let strip_h = ((h + self.nstrips - 1) / self.nstrips + 3) & !3;
+                self.nstrips = (h + strip_h - 1) / strip_h;
+            }
+            let cur_strips = self.v1_cb.len();
+            if cur_strips != self.nstrips {
+                self.frmcount = 0;
+            }
+            if cur_strips < self.nstrips {
+                for _ in cur_strips..self.nstrips {
+                    self.v1_cb.push([YUVCode::default(); 256]);
+                    self.v4_cb.push([YUVCode::default(); 256]);
+                    self.v1_cur_cb.push([YUVCode::default(); 256]);
+                    self.v4_cur_cb.push([YUVCode::default(); 256]);
+                }
+            }
+
             let mut dbuf = Vec::with_capacity(4);
             let mut gw   = GrowableMemoryWriter::new_write(&mut dbuf);
             let mut bw   = ByteWriter::new(&mut gw);
@@ -1006,7 +1265,13 @@ const ENCODER_OPTS: &[NAOptionDefinition] = &[
         opt_type: NAOptionDefinitionType::Int(Some(0), Some(16)) },
     NAOptionDefinition {
         name: "quant_mode", description: "Quantisation mode",
-        opt_type: NAOptionDefinitionType::String(Some(&["elbg", "hybrid", "mediancut"])) },
+        opt_type: NAOptionDefinitionType::String(Some(&["elbg", "fast", "mediancut"])) },
+    NAOptionDefinition {
+        name: "force_v1", description: "Force coarse (V1-only) mode",
+        opt_type: NAOptionDefinitionType::Bool },
+    NAOptionDefinition {
+        name: "refine", description: "Try to improve coded picture",
+        opt_type: NAOptionDefinitionType::Bool },
 ];
 
 impl NAOptionHandler for CinepakEncoder {
@@ -1027,15 +1292,25 @@ impl NAOptionHandler for CinepakEncoder {
                             }
                         },
                         "quant_mode" => {
-                            if let NAValue::String(ref str) = option.value {
-                                match str.as_str() {
+                            if let NAValue::String(ref strval) = option.value {
+                                match strval.as_str() {
                                     "elbg"      => self.qmode = QuantMode::ELBG,
-                                    "hybrid"    => self.qmode = QuantMode::Hybrid,
+                                    "fast"      => self.qmode = QuantMode::Fast,
                                     "mediancut" => self.qmode = QuantMode::MedianCut,
                                     _ => {},
                                 };
                             }
                         },
+                        "force_v1" => {
+                            if let NAValue::Bool(val) = option.value {
+                                self.force_v1 = val;
+                            }
+                        },
+                        "refine" => {
+                            if let NAValue::Bool(val) = option.value {
+                                self.refine = val;
+                            }
+                        },
                         _ => {},
                     };
                 }
@@ -1047,6 +1322,8 @@ impl NAOptionHandler for CinepakEncoder {
             KEYFRAME_OPTION => Some(NAValue::Int(i64::from(self.key_int))),
             "nstrips" => Some(NAValue::Int(self.nstrips as i64)),
             "quant_mode" => Some(NAValue::String(self.qmode.to_string())),
+            "force_v1" => Some(NAValue::Bool(self.force_v1)),
+            "refine" => Some(NAValue::Bool(self.refine)),
             _ => None,
         }
     }
@@ -1069,12 +1346,13 @@ mod test {
         let mut dmx_reg = RegisteredDemuxers::new();
         generic_register_all_demuxers(&mut dmx_reg);
         let mut dec_reg = RegisteredDecoders::new();
-        generic_register_all_codecs(&mut dec_reg);
+        generic_register_all_decoders(&mut dec_reg);
         let mut mux_reg = RegisteredMuxers::new();
         generic_register_all_muxers(&mut mux_reg);
         let mut enc_reg = RegisteredEncoders::new();
         generic_register_all_encoders(&mut enc_reg);
 
+        // sample: https://samples.mplayerhq.hu/V-codecs/UCOD/TalkingHead_352x288.avi
         let dec_config = DecoderTestParams {
                 demuxer:        "avi",
                 in_name:        "assets/Misc/TalkingHead_352x288.avi",
@@ -1103,6 +1381,11 @@ mod test {
                 tb_den:  0,
                 flags:   0,
             };
-        test_encoding_to_file(&dec_config, &enc_config, enc_params);
+        let enc_options = &[
+                NAOption { name: "quant_mode", value: NAValue::String("mediancut".to_string()) },
+            ];
+        //test_encoding_to_file(&dec_config, &enc_config, enc_params, enc_options);
+        test_encoding_md5(&dec_config, &enc_config, enc_params, enc_options,
+                          &[0x1d4690c8, 0x3b15b4b3, 0xc2df3c7b, 0x1a25b159]);
     }
 }