]>
Commit | Line | Data |
---|---|---|
5641dccf KS |
1 | use nihav_core::frame::{FrameType, NAVideoBuffer}; |
2 | use nihav_core::codecs::MV; | |
3 | use nihav_core::codecs::blockdsp::edge_emu; | |
47527732 KS |
4 | use super::rv3040::{RV34DSP, RV34MBInfo}; |
5 | ||
6 | fn clip8(a: i16) -> u8 { | |
7 | if a < 0 { 0 } | |
8 | else if a > 255 { 255 } | |
9 | else { a as u8 } | |
10 | } | |
11 | ||
12 | fn rv3_filter_h(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, bsize: usize, c1: i16, c2: i16) { | |
13 | for _ in 0..bsize { | |
14 | for x in 0..bsize { | |
15 | dst[didx + x] = clip8((-((src[sidx + x - 1] as i16) + (src[sidx + x + 2] as i16)) + (src[sidx + x + 0] as i16) * c1 + (src[sidx + x + 1] as i16) * c2 + 8) >> 4); | |
16 | } | |
17 | sidx += sstride; | |
18 | didx += dstride; | |
19 | } | |
20 | } | |
21 | ||
22 | fn rv3_filter_v(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, bsize: usize, c1: i16, c2: i16) { | |
23 | for _ in 0..bsize { | |
24 | for x in 0..bsize { | |
25 | dst[didx + x] = clip8((-((src[sidx + x - 1 * sstride] as i16) + (src[sidx + x + 2 * sstride] as i16)) + (src[sidx + x + 0 * sstride] as i16) * c1 + (src[sidx + x + 1 * sstride] as i16) * c2 + 8) >> 4); | |
26 | } | |
27 | sidx += sstride; | |
28 | didx += dstride; | |
29 | } | |
30 | } | |
31 | ||
32 | macro_rules! mc_matrix { | |
33 | ($s: ident, $o: expr, $c1: expr) => ( | |
34 | ($c1 * 6) * ($s[$o] as i32) + ($c1 * 9) * ($s[$o + 1] as i32) + ($c1) * ($s[$o + 2] as i32) | |
35 | ); | |
36 | ($s: ident, $o: expr, $c1: expr, $d1: expr, $d2: expr) => ( | |
e07387c7 | 37 | -($c1) * ($s[$o - 1] as i32) + ($c1 * $d1) * ($s[$o] as i32) + ($c1 * $d2) * ($s[$o + 1] as i32) + -($c1) * ($s[$o + 2] as i32) |
47527732 KS |
38 | ); |
39 | ($s: ident, $o: expr, $ss: expr, $c1: expr, $c2: expr, $d1: expr, $d2: expr) => ( | |
40 | ((mc_matrix!($s, $o - $ss, -1, $d1, $d2) + | |
41 | mc_matrix!($s, $o , $c1, $d1, $d2) + | |
42 | mc_matrix!($s, $o + $ss, $c2, $d1, $d2) + | |
43 | mc_matrix!($s, $o + 2 * $ss, -1, $d1, $d2) + 128) >> 8) as i16 | |
44 | ); | |
45 | (m22; $s: ident, $o: expr, $ss: expr) => ( | |
46 | ((mc_matrix!($s, $o + 0 * $ss, 6) + | |
47 | mc_matrix!($s, $o + 1 * $ss, 9) + | |
48 | mc_matrix!($s, $o + 2 * $ss, 1) + 128) >> 8) as i16 | |
49 | ); | |
50 | } | |
51 | ||
52 | macro_rules! mc_func { | |
53 | (copy; $name: ident, $size: expr) => ( | |
54 | fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { | |
55 | for _ in 0..$size { | |
56 | let d = &mut dst[didx..][..$size]; | |
57 | let s = &src[sidx..][..$size]; | |
58 | for x in 0..$size { d[x] = s[x]; } | |
59 | didx += dstride; | |
60 | sidx += sstride; | |
61 | } | |
62 | } | |
63 | ); | |
64 | (hor; $name: ident, $c1: expr, $c2: expr, $size: expr) => ( | |
65 | fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], sidx: usize, sstride: usize) { | |
66 | rv3_filter_h(dst, didx, dstride, src, sidx, sstride, $size, $c1, $c2); | |
67 | } | |
68 | ); | |
69 | (ver; $name: ident, $c1: expr, $c2: expr, $size: expr) => ( | |
70 | fn $name (dst: &mut [u8], didx: usize, dstride: usize, src: &[u8], sidx: usize, sstride: usize) { | |
71 | rv3_filter_v(dst, didx, dstride, src, sidx, sstride, $size, $c1, $c2); | |
72 | } | |
73 | ); | |
74 | (m11; $name: ident, $size: expr) => ( | |
75 | fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { | |
76 | for _ in 0..$size { | |
77 | for x in 0..$size { | |
78 | dst[didx + x] = clip8(mc_matrix!(src, sidx + x, sstride, 12, 6, 12, 6)); | |
79 | } | |
80 | didx += dstride; | |
81 | sidx += sstride; | |
82 | } | |
83 | } | |
84 | ); | |
85 | (m12; $name: ident, $size: expr) => ( | |
86 | fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { | |
87 | for _ in 0..$size { | |
88 | for x in 0..$size { | |
89 | dst[didx + x] = clip8(mc_matrix!(src, sidx + x, sstride, 6, 12, 12, 6)); | |
90 | } | |
91 | didx += dstride; | |
92 | sidx += sstride; | |
93 | } | |
94 | } | |
95 | ); | |
96 | (m21; $name: ident, $size: expr) => ( | |
97 | fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { | |
98 | for _ in 0..$size { | |
99 | for x in 0..$size { | |
100 | dst[didx + x] = clip8(mc_matrix!(src, sidx + x, sstride, 12, 6, 6, 12)); | |
101 | } | |
102 | didx += dstride; | |
103 | sidx += sstride; | |
104 | } | |
105 | } | |
106 | ); | |
107 | (m22; $name: ident, $size: expr) => ( | |
108 | fn $name (dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize) { | |
109 | for _ in 0..$size { | |
110 | for x in 0..$size { | |
111 | dst[didx + x] = clip8(mc_matrix!(m22; src, sidx + x, sstride)); | |
112 | } | |
113 | didx += dstride; | |
114 | sidx += sstride; | |
115 | } | |
116 | } | |
117 | ); | |
118 | } | |
119 | mc_func!(copy; copy_16, 16); | |
120 | mc_func!(copy; copy_8, 8); | |
121 | mc_func!(hor; luma_mc_10_16, 12, 6, 16); | |
122 | mc_func!(hor; luma_mc_20_16, 6, 12, 16); | |
123 | mc_func!(hor; luma_mc_10_8, 12, 6, 8); | |
124 | mc_func!(hor; luma_mc_20_8, 6, 12, 8); | |
125 | mc_func!(ver; luma_mc_01_16, 12, 6, 16); | |
126 | mc_func!(ver; luma_mc_02_16, 6, 12, 16); | |
127 | mc_func!(ver; luma_mc_01_8, 12, 6, 8); | |
128 | mc_func!(ver; luma_mc_02_8, 6, 12, 8); | |
129 | mc_func!(m11; luma_mc_11_16, 16); | |
130 | mc_func!(m11; luma_mc_11_8, 8); | |
131 | mc_func!(m21; luma_mc_21_16, 16); | |
132 | mc_func!(m21; luma_mc_21_8, 8); | |
133 | mc_func!(m12; luma_mc_12_16, 16); | |
134 | mc_func!(m12; luma_mc_12_8, 8); | |
135 | mc_func!(m22; luma_mc_22_16, 16); | |
136 | mc_func!(m22; luma_mc_22_8, 8); | |
137 | ||
138 | const RV30_CHROMA_FRAC1: [u16; 3] = [ 8, 5, 3 ]; | |
139 | const RV30_CHROMA_FRAC2: [u16; 3] = [ 0, 3, 5 ]; | |
140 | fn rv30_chroma_mc(dst: &mut [u8], mut didx: usize, dstride: usize, src: &[u8], mut sidx: usize, sstride: usize, size: usize, x: usize, y: usize) { | |
141 | if (x == 0) && (y == 0) { | |
142 | for _ in 0..size { | |
143 | for x in 0..size { dst[didx + x] = src[sidx + x]; } | |
144 | didx += dstride; | |
145 | sidx += sstride; | |
146 | } | |
147 | return; | |
148 | } | |
149 | let a = RV30_CHROMA_FRAC1[x] * RV30_CHROMA_FRAC1[y]; | |
150 | let b = RV30_CHROMA_FRAC2[x] * RV30_CHROMA_FRAC1[y]; | |
151 | let c = RV30_CHROMA_FRAC1[x] * RV30_CHROMA_FRAC2[y]; | |
152 | let d = RV30_CHROMA_FRAC2[x] * RV30_CHROMA_FRAC2[y]; | |
153 | for _ in 0..size { | |
154 | for x in 0..size { | |
155 | dst[didx + x] = ((a * (src[sidx + x] as u16) | |
156 | + b * (src[sidx + x + 1] as u16) | |
157 | + c * (src[sidx + x + sstride] as u16) | |
158 | + d * (src[sidx + x + 1 + sstride] as u16) + 32) >> 6) as u8; | |
159 | } | |
160 | didx += dstride; | |
161 | sidx += sstride; | |
162 | } | |
163 | } | |
164 | ||
165 | pub struct RV30DSP { | |
166 | luma_mc: [[fn (&mut [u8], usize, usize, &[u8], usize, usize); 9]; 2], | |
167 | } | |
168 | ||
169 | impl RV30DSP { | |
170 | pub fn new() -> Self { | |
171 | RV30DSP { | |
172 | luma_mc: [ | |
173 | [ copy_16, luma_mc_10_16, luma_mc_20_16, | |
174 | luma_mc_01_16, luma_mc_11_16, luma_mc_21_16, | |
175 | luma_mc_02_16, luma_mc_12_16, luma_mc_22_16 ], | |
176 | [ copy_8, luma_mc_10_8, luma_mc_20_8, | |
177 | luma_mc_01_8, luma_mc_11_8, luma_mc_21_8, | |
178 | luma_mc_02_8, luma_mc_12_8, luma_mc_22_8 ] ], | |
179 | } | |
180 | } | |
181 | } | |
182 | ||
183 | macro_rules! el { | |
184 | ($src: ident, $o: expr) => ($src[$o] as i16); | |
185 | } | |
186 | ||
187 | fn clip_symm(a: i16, lim: i16) -> i16 { | |
188 | if a < -lim { | |
189 | -lim | |
190 | } else if a > lim { | |
191 | lim | |
192 | } else { | |
193 | a | |
194 | } | |
195 | } | |
196 | ||
197 | const RV30_LOOP_FILTER_STRENGTH: [i16; 32] = [ | |
198 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, | |
199 | 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5 | |
200 | ]; | |
201 | ||
202 | macro_rules! test_bit { | |
203 | ($pat: expr, $x: expr) => ( (($pat >> $x) & 1) != 0 ) | |
204 | } | |
205 | ||
206 | fn rv30_loop_filter4(pix: &mut [u8], mut off: usize, step: usize, stride: usize, lim: i16) { | |
207 | for _ in 0..4 { | |
208 | let a = el!(pix, off - 2*step); | |
209 | let b = el!(pix, off - step); | |
210 | let c = el!(pix, off); | |
211 | let d = el!(pix, off + step); | |
212 | let diff0 = ((a - d) - (b - c) * 4) >> 3; | |
213 | let diff = clip_symm(diff0, lim); | |
214 | pix[off - step] = clip8(b + diff); | |
215 | pix[off ] = clip8(c - diff); | |
216 | off += stride; | |
217 | } | |
218 | } | |
219 | ||
220 | fn rv30_div_mv(mv: i16) -> (i16, usize) { | |
221 | let i = mv / 3; | |
222 | let f = mv - i * 3; | |
223 | if f < 0 { | |
224 | (i - 1, (f + 3) as usize) | |
225 | } else { | |
226 | (i, f as usize) | |
227 | } | |
228 | } | |
229 | ||
230 | fn check_pos(x: usize, y: usize, size: usize, w: usize, h: usize, dx: i16, dy: i16, e0: isize, e1: isize, e2: isize, e3: isize) -> bool { | |
231 | let xn = (x as isize) + (dx as isize); | |
232 | let yn = (y as isize) + (dy as isize); | |
233 | ||
234 | (xn - e0 >= 0) && (xn + (size as isize) + e1 <= (w as isize)) && (yn - e2 >= 0) && (yn + (size as isize) + e3 <= (h as isize)) | |
235 | } | |
236 | ||
237 | const RV30_EDGE1: [isize; 3] = [ 0, 1, 1 ]; | |
238 | const RV30_EDGE2: [isize; 3] = [ 0, 2, 2 ]; | |
239 | ||
240 | impl RV34DSP for RV30DSP { | |
241 | fn loop_filter(&self, frame: &mut NAVideoBuffer<u8>, _ftype: FrameType, mbinfo: &[RV34MBInfo], mb_w: usize, row: usize) { | |
242 | let mut offs: [usize; 3] = [0; 3]; | |
243 | let mut stride: [usize; 3] = [0; 3]; | |
244 | ||
245 | for comp in 0..3 { | |
246 | stride[comp] = frame.get_stride(comp); | |
247 | let start = if comp == 0 { row * 16 } else { row * 8 }; | |
248 | offs[comp] = frame.get_offset(comp) + start * stride[comp]; | |
249 | } | |
250 | ||
1a967e6b | 251 | let data = frame.get_data_mut().unwrap(); |
47527732 KS |
252 | let dst: &mut [u8] = data.as_mut_slice(); |
253 | ||
254 | // vertical filter | |
255 | let mut left_cbp = 0; | |
256 | let mut left_lim = 0; | |
257 | let mut left_dbk = 0; | |
258 | let mut mb_pos: usize = row * mb_w; | |
259 | for mb_x in 0..mb_w { | |
260 | let cur_lim = RV30_LOOP_FILTER_STRENGTH[mbinfo[mb_pos].q as usize]; | |
261 | let cur_dbk = mbinfo[mb_pos].deblock; | |
262 | let cur_cbp = mbinfo[mb_pos].cbp_c; | |
263 | let xstart = if mb_x == 0 { 1 } else { 0 }; | |
264 | for y in 0..4 { | |
265 | let yoff = offs[0] + mb_x * 16 + y * 4 * stride[0]; | |
266 | for x in xstart..4 { | |
267 | let cs = x + y*4; | |
268 | let loc_lim; | |
269 | ||
270 | if test_bit!(cur_dbk, cs) { | |
271 | loc_lim = cur_lim; | |
272 | } else if (x == 0) && test_bit!(left_dbk, cs + 3) { | |
273 | loc_lim = left_lim; | |
274 | } else if (x != 0) && test_bit!(cur_dbk, cs - 1) { | |
275 | loc_lim = cur_lim; | |
276 | } else { | |
277 | loc_lim = 0; | |
278 | } | |
279 | if loc_lim != 0 { | |
280 | rv30_loop_filter4(dst, yoff + x * 4, 1, stride[0], loc_lim); | |
281 | } | |
282 | } | |
283 | } | |
284 | ||
285 | for comp in 1..3 { | |
286 | for y in 0..2 { | |
287 | let coff = offs[comp] + mb_x * 8 + y * 4 * stride[comp]; | |
288 | for x in xstart..2 { | |
289 | let cs = x + y * 2 + (comp - 1) * 4; | |
290 | let loc_lim; | |
291 | ||
292 | if test_bit!(cur_cbp, cs) { | |
293 | loc_lim = cur_lim; | |
294 | } else if (x == 0) && test_bit!(left_cbp, cs + 1) { | |
295 | loc_lim = left_lim; | |
296 | } else if (x != 0) && test_bit!(cur_cbp, cs - 1) { | |
297 | loc_lim = cur_lim; | |
298 | } else { | |
299 | loc_lim = 0; | |
300 | } | |
301 | if loc_lim != 0 { | |
302 | rv30_loop_filter4(dst, coff + x * 4, 1, stride[comp], loc_lim); | |
303 | } | |
304 | } | |
305 | } | |
306 | } | |
307 | ||
308 | left_lim = cur_lim; | |
309 | left_dbk = cur_dbk; | |
310 | left_cbp = cur_cbp; | |
311 | mb_pos += 1; | |
312 | } | |
313 | ||
314 | // horizontal filter | |
315 | let mut mb_pos: usize = row * mb_w; | |
316 | for mb_x in 0..mb_w { | |
317 | let cur_lim = RV30_LOOP_FILTER_STRENGTH[mbinfo[mb_pos].q as usize]; | |
318 | let cur_dbk = mbinfo[mb_pos].deblock; | |
319 | let cur_cbp = mbinfo[mb_pos].cbp_c; | |
320 | let ystart = if row == 0 { 1 } else { 0 }; | |
321 | let top_lim; | |
322 | let top_dbk; | |
323 | let top_cbp; | |
324 | if row > 0 { | |
325 | top_lim = RV30_LOOP_FILTER_STRENGTH[mbinfo[mb_pos - mb_w].q as usize]; | |
326 | top_dbk = mbinfo[mb_pos - mb_w].deblock; | |
327 | top_cbp = mbinfo[mb_pos - mb_w].cbp_c; | |
328 | } else { | |
329 | top_lim = 0; | |
330 | top_dbk = 0; | |
331 | top_cbp = 0; | |
332 | } | |
333 | for y in ystart..4 { | |
334 | let yoff = offs[0] + mb_x * 16 + y * 4 * stride[0]; | |
335 | for x in 0..4 { | |
336 | let cs = x + y*4; | |
337 | let loc_lim; | |
338 | ||
339 | if test_bit!(cur_dbk, cs) { | |
340 | loc_lim = cur_lim; | |
341 | } else if (y == 0) && test_bit!(top_dbk, cs + 12) { | |
342 | loc_lim = top_lim; | |
343 | } else if (y != 0) && test_bit!(cur_dbk, cs - 4) { | |
344 | loc_lim = cur_lim; | |
345 | } else { | |
346 | loc_lim = 0; | |
347 | } | |
348 | if loc_lim != 0 { | |
349 | rv30_loop_filter4(dst, yoff + x * 4, stride[0], 1, loc_lim); | |
350 | } | |
351 | } | |
352 | } | |
353 | ||
354 | for comp in 1..3 { | |
355 | for y in ystart..2 { | |
356 | let coff = offs[comp] + mb_x * 8 + y * 4 * stride[comp]; | |
357 | for x in 0..2 { | |
358 | let cs = x + y * 2 + (comp - 1) * 4; | |
359 | let loc_lim; | |
360 | ||
361 | if test_bit!(cur_cbp, cs) { | |
362 | loc_lim = cur_lim; | |
363 | } else if (y == 0) && test_bit!(top_cbp, cs + 2) { | |
364 | loc_lim = top_lim; | |
365 | } else if (y != 0) && test_bit!(cur_cbp, cs - 2) { | |
366 | loc_lim = cur_lim; | |
367 | } else { | |
368 | loc_lim = 0; | |
369 | } | |
370 | if loc_lim != 0 { | |
371 | rv30_loop_filter4(dst, coff + x * 4, stride[comp], 1, loc_lim); | |
372 | } | |
373 | } | |
374 | } | |
375 | } | |
376 | ||
377 | mb_pos += 1; | |
378 | } | |
379 | } | |
380 | fn do_luma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, mv: MV, use16: bool, avg: bool) { | |
381 | let size: usize = if use16 { 16 } else { 8 }; | |
382 | let dstride = frame.get_stride(0); | |
383 | let doffset = frame.get_offset(0) + (if !avg { x + y * dstride } else { 0 }); | |
1a967e6b | 384 | let data = frame.get_data_mut().unwrap(); |
47527732 KS |
385 | let dst: &mut [u8] = data.as_mut_slice(); |
386 | ||
387 | let (w_, h_) = prev_frame.get_dimensions(0); | |
388 | let w = (w_ + 15) & !15; | |
389 | let h = (h_ + 15) & !15; | |
390 | ||
391 | let (dx, cx) = rv30_div_mv(mv.x); | |
392 | let (dy, cy) = rv30_div_mv(mv.y); | |
393 | let mode = cx + cy * 3; | |
394 | ||
395 | if check_pos(x, y, size, w, h, dx, dy, RV30_EDGE1[cx], RV30_EDGE2[cx], RV30_EDGE1[cy], RV30_EDGE2[cy]) { | |
396 | let sstride = prev_frame.get_stride(0); | |
397 | let mut soffset = prev_frame.get_offset(0) + x + y * sstride; | |
398 | let data = prev_frame.get_data(); | |
399 | let src: &[u8] = data.as_slice(); | |
400 | soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize; | |
401 | self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, src, soffset, sstride); | |
402 | } else { | |
403 | let mut ebuf: [u8; 32*20] = [0; 32*20]; | |
404 | edge_emu(prev_frame, (x as isize) + (dx as isize) - 1, (y as isize) + (dy as isize) - 1, 16+3, 16+3, &mut ebuf, 32, 0); | |
405 | self.luma_mc[if use16 { 0 } else { 1 }][mode](dst, doffset, dstride, &ebuf, 32 + 1, 32); | |
406 | } | |
407 | } | |
408 | fn do_chroma_mc(&self, frame: &mut NAVideoBuffer<u8>, prev_frame: &NAVideoBuffer<u8>, x: usize, y: usize, comp: usize, mv: MV, use8: bool, avg: bool) { | |
409 | let size: usize = if use8 { 8 } else { 4 }; | |
410 | let dstride = frame.get_stride(comp); | |
411 | let doffset = frame.get_offset(comp) + (if !avg { x + y * dstride } else { 0 }); | |
1a967e6b | 412 | let data = frame.get_data_mut().unwrap(); |
47527732 KS |
413 | let dst: &mut [u8] = data.as_mut_slice(); |
414 | ||
415 | let (w_, h_) = prev_frame.get_dimensions(comp); | |
416 | let w = (w_ + 7) & !7; | |
417 | let h = (h_ + 7) & !7; | |
418 | ||
419 | let (dx, cx) = rv30_div_mv(mv.x / 2); | |
420 | let (dy, cy) = rv30_div_mv(mv.y / 2); | |
421 | ||
422 | if check_pos(x, y, size, w, h, dx, dy, 0, 1, 0, 1) { | |
423 | let sstride = prev_frame.get_stride(comp); | |
424 | let mut soffset = prev_frame.get_offset(comp) + x + y * sstride; | |
425 | let data = prev_frame.get_data(); | |
426 | let src: &[u8] = data.as_slice(); | |
427 | soffset = ((soffset as isize) + (dx as isize) + (dy as isize) * (sstride as isize)) as usize; | |
428 | rv30_chroma_mc(dst, doffset, dstride, src, soffset, sstride, size, cx, cy); | |
429 | } else { | |
430 | let mut ebuf: [u8; 16*10] = [0; 16*10]; | |
431 | edge_emu(prev_frame, (x as isize) + (dx as isize), (y as isize) + (dy as isize), 8+1, 8+1, &mut ebuf, 16, comp); | |
432 | rv30_chroma_mc(dst, doffset, dstride, &ebuf, 0, 16, size, cx, cy); | |
433 | } | |
434 | } | |
435 | } |