Fawkes API  Fawkes Development Version
yuvrgb.cpp
1 
2 /****************************************************************************
3  * yuvrgb.h - YUV to RGB conversion - specific methods, macros and constants
4  *
5  * Created: Sat Aug 12 15:02:41 2006
6  * based on colorspaces.h from Tue Feb 23 13:49:38 2005
7  * Copyright 2005-2006 Tim Niemueller [www.niemueller.de]
8  *
9  ****************************************************************************/
10 
11 /* This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version. A runtime exception applies to
15  * this software (see LICENSE.GPL_WRE file mentioned below for details).
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU Library General Public License for more details.
21  *
22  * Read the full text in the LICENSE.GPL_WRE file in the doc directory.
23  */
24 
25 #include <core/macros.h>
26 #include <fvutils/color/yuvrgb.h>
27 #include <fvutils/cpu/mmx.h>
28 
29 namespace firevision {
30 
31 /** YUV to RGB Conversion
32  * B = 1.164(Y - 16) + 2.018(U - 128)
33  * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
34  * R = 1.164(Y - 16) + 1.596(V - 128)
35  *
36  * Values have to be clamped to keep them in the [0-255] range.
37  * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range
38  * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results.
39  * @param YUV unsigned char array that contains the pixels, 4 pixels in 6 byte macro pixel, line after
40  * line
41  * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel
42  * (thus this is a 24bit RGB with one byte per color) line by line.
43  * @param width Width of the image contained in the YUV buffer
44  * @param height Height of the image contained in the YUV buffer
45  */
46 void
47 yuv411packed_to_rgb_plainc(const unsigned char *YUV,
48  unsigned char * RGB,
49  unsigned int width,
50  unsigned int height)
51 {
52  int y0, y1, y2, y3, u, v;
53  unsigned int i = 0;
54  while (i < (width * height) * 3 / 2) {
55  u = YUV[i++] - 128;
56  y0 = YUV[i++] - 16;
57  y1 = YUV[i++] - 16;
58  v = YUV[i++] - 128;
59  y2 = YUV[i++] - 16;
60  y3 = YUV[i++] - 16;
61 
62  // Set red, green and blue bytes for pixel 0
63  *RGB++ = clip((76284 * y0 + 104595 * v) >> 16);
64  *RGB++ = clip((76284 * y0 - 25625 * u - 53281 * v) >> 16);
65  *RGB++ = clip((76284 * y0 + 132252 * u) >> 16);
66 
67  // Set red, green and blue bytes for pixel 1
68  *RGB++ = clip((76284 * y1 + 104595 * v) >> 16);
69  *RGB++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
70  *RGB++ = clip((76284 * y1 + 132252 * u) >> 16);
71 
72  // Set red, green and blue bytes for pixel 2
73  *RGB++ = clip((76284 * y2 + 104595 * v) >> 16);
74  *RGB++ = clip((76284 * y2 - 25625 * u - 53281 * v) >> 16);
75  *RGB++ = clip((76284 * y2 + 132252 * u) >> 16);
76 
77  // Set red, green and blue bytes for pixel 3
78  *RGB++ = clip((76284 * y3 + 104595 * v) >> 16);
79  *RGB++ = clip((76284 * y3 - 25625 * u - 53281 * v) >> 16);
80  *RGB++ = clip((76284 * y3 + 132252 * u) >> 16);
81  }
82 }
83 
84 /** YUV to RGB Conversion
85  * B = 1.164(Y - 16) + 2.018(U - 128)
86  * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
87  * R = 1.164(Y - 16) + 1.596(V - 128)
88  *
89  * Values have to be clamped to keep them in the [0-255] range.
90  * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range
91  * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results.
92  * @param YUV unsigned char array that contains the pixels, 4 pixels in 6 byte macro pixel, line after
93  * line
94  * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel
95  * (thus this is a 24bit RGB with one byte per color) line by line.
96  * @param width Width of the image contained in the YUV buffer
97  * @param height Height of the image contained in the YUV buffer
98  */
99 void
100 yuv422planar_to_rgb_plainc(const unsigned char *planar,
101  unsigned char * RGB,
102  unsigned int width,
103  unsigned int height)
104 {
105  short y1, y2, u, v;
106  const unsigned char *yp, *up, *vp;
107  unsigned int i;
108 
109  yp = planar;
110  up = planar + (width * height);
111  vp = up + (width * height / 2);
112 
113  for (i = 0; i < (width * height / 2); ++i) {
114  y1 = *yp++;
115  y2 = *yp++;
116  u = *up++;
117  v = *vp++;
118 
119  y1 -= 16;
120  y2 -= 16;
121  u -= 128;
122  v -= 128;
123 
124  // Set red, green and blue bytes for pixel 0
125  *RGB++ = clip((76284 * y1 + 104595 * v) >> 16);
126  *RGB++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
127  *RGB++ = clip((76284 * y1 + 132252 * u) >> 16);
128 
129  // Set red, green and blue bytes for pixel 1
130  *RGB++ = clip((76284 * y2 + 104595 * v) >> 16);
131  *RGB++ = clip((76284 * y2 - 25625 * u - 53281 * v) >> 16);
132  *RGB++ = clip((76284 * y2 + 132252 * u) >> 16);
133  }
134 }
135 
136 /** YUV to RGB Conversion
137  * B = 1.164(Y - 16) + 2.018(U - 128)
138  * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
139  * R = 1.164(Y - 16) + 1.596(V - 128)
140  *
141  * Values have to be clamped to keep them in the [0-255] range.
142  * Rumour has it that the valid range is actually a subset of [0-255] (fourcc.org mentions an RGB range
143  * of [16-235] mentioned) but clamping the values into [0-255] seems to produce acceptable results.
144  * @param YUV unsigned char array that contains the pixels, 4 pixels in 8 byte macro pixel, line after
145  * line
146  * @param RGB where the RGB output will be written to, will have pixel after pixel, 3 bytes per pixel
147  * (thus this is a 24bit RGB with one byte per color) line by line.
148  * @param width Width of the image contained in the YUV buffer
149  * @param height Height of the image contained in the YUV buffer
150  */
151 void
152 yuv422packed_to_rgb_plainc(const unsigned char *YUV,
153  unsigned char * RGB,
154  const unsigned int width,
155  const unsigned int height)
156 {
157  int y0, y1, u, v;
158  unsigned int i = 0;
159  for (unsigned int pixel = 0; pixel < (width * height); pixel += 2) {
160  u = YUV[i++] - 128;
161  y0 = YUV[i++] - 16;
162  v = YUV[i++] - 128;
163  y1 = YUV[i++] - 16;
164 
165  // Set red, green and blue bytes for pixel 0
166  *RGB++ = clip((76284 * y0 + 104595 * v) >> 16);
167  *RGB++ = clip((76284 * y0 - 25625 * u - 53281 * v) >> 16);
168  *RGB++ = clip((76284 * y0 + 132252 * u) >> 16);
169 
170  // Set red, green and blue bytes for pixel 1
171  *RGB++ = clip((76284 * y1 + 104595 * v) >> 16);
172  *RGB++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
173  *RGB++ = clip((76284 * y1 + 132252 * u) >> 16);
174  }
175 }
176 
177 /** Convert YUV422 planar to BGR.
178  * Use formula in aforementioned function.
179  * @param YUV YUV422 planar buffer
180  * @param BGR BGR buffer
181  * @param width Width of the image contained in the YUV buffer
182  * @param height Height of the image contained in the YUV buffer
183  */
184 void
185 yuv422planar_to_bgr_plainc(const unsigned char *planar,
186  unsigned char * BGR,
187  unsigned int width,
188  unsigned int height)
189 {
190  short y1, y2, u, v;
191  const unsigned char *yp, *up, *vp;
192  unsigned int i;
193 
194  yp = planar;
195  up = planar + (width * height);
196  vp = up + (width * height / 2);
197 
198  for (i = 0; i < (width * height / 2); ++i) {
199  y1 = *yp++;
200  y2 = *yp++;
201  u = *up++;
202  v = *vp++;
203 
204  y1 -= 16;
205  y2 -= 16;
206  u -= 128;
207  v -= 128;
208 
209  // Set red, green and blue bytes for pixel 0
210  *BGR++ = clip((76284 * y1 + 132252 * u) >> 16);
211  *BGR++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
212  *BGR++ = clip((76284 * y1 + 104595 * v) >> 16);
213 
214  // Set red, green and blue bytes for pixel 1
215  *BGR++ = clip((76284 * y2 + 132252 * u) >> 16);
216  *BGR++ = clip((76284 * y2 - 25625 * u - 53281 * v) >> 16);
217  *BGR++ = clip((76284 * y2 + 104595 * v) >> 16);
218  }
219 }
220 
221 void
222 yuv422planar_to_rgb_with_alpha_plainc(const unsigned char *planar,
223  unsigned char * RGB,
224  unsigned int width,
225  unsigned int height)
226 {
227  short y1, y2, u, v;
228  const unsigned char *yp, *up, *vp;
229  unsigned int i;
230 
231  yp = planar;
232  up = planar + (width * height);
233  vp = up + (width * height / 2);
234 
235  for (i = 0; i < (width * height / 2); ++i) {
236  y1 = *yp++;
237  y2 = *yp++;
238  u = *up++;
239  v = *vp++;
240 
241  y1 -= 16;
242  y2 -= 16;
243  u -= 128;
244  v -= 128;
245 
246  // Set red, green and blue bytes for pixel 0
247  *RGB++ = clip((76284 * y1 + 104595 * v) >> 16);
248  *RGB++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
249  *RGB++ = clip((76284 * y1 + 132252 * u) >> 16);
250  *RGB++ = 255;
251 
252  // Set red, green and blue bytes for pixel 1
253  *RGB++ = clip((76284 * y2 + 104595 * v) >> 16);
254  *RGB++ = clip((76284 * y2 - 25625 * u - 53281 * v) >> 16);
255  *RGB++ = clip((76284 * y2 + 132252 * u) >> 16);
256  *RGB++ = 255;
257  }
258 }
259 
260 void
261 yuv422planar_to_bgr_with_alpha_plainc(const unsigned char *planar,
262  unsigned char * BGR,
263  unsigned int width,
264  unsigned int height)
265 {
266  short y1, y2, u, v;
267  const unsigned char *yp, *up, *vp;
268  unsigned int i;
269 
270  yp = planar;
271  up = planar + (width * height);
272  vp = up + (width * height / 2);
273 
274  for (i = 0; i < (width * height / 2); ++i) {
275  y1 = *yp++;
276  y2 = *yp++;
277  u = *up++;
278  v = *vp++;
279 
280  y1 -= 16;
281  y2 -= 16;
282  u -= 128;
283  v -= 128;
284 
285  // Set red, green and blue bytes for pixel 0
286  *BGR++ = clip((76284 * y1 + 132252 * u) >> 16);
287  *BGR++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
288  *BGR++ = clip((76284 * y1 + 104595 * v) >> 16);
289  *BGR++ = 255;
290 
291  // Set red, green and blue bytes for pixel 1
292  *BGR++ = clip((76284 * y2 + 132252 * u) >> 16);
293  *BGR++ = clip((76284 * y2 - 25625 * u - 53281 * v) >> 16);
294  *BGR++ = clip((76284 * y2 + 104595 * v) >> 16);
295  *BGR++ = 255;
296  }
297 }
298 
299 void
300 yuv422packed_to_bgr_with_alpha_plainc(const unsigned char *YUV,
301  unsigned char * BGR,
302  unsigned int width,
303  unsigned int height)
304 {
305  int y0, y1, u, v;
306  unsigned int i = 0;
307  while (i < (width * height * 2)) {
308  u = YUV[i++] - 128;
309  y0 = YUV[i++] - 16;
310  v = YUV[i++] - 128;
311  y1 = YUV[i++] - 16;
312 
313  // Set red, green and blue bytes for pixel 0
314  *BGR++ = clip((76284 * y0 + 132252 * u) >> 16);
315  *BGR++ = clip((76284 * y0 - 25625 * u - 53281 * v) >> 16);
316  *BGR++ = clip((76284 * y0 + 104595 * v) >> 16);
317  *BGR++ = 255;
318 
319  // Set red, green and blue bytes for pixel 1
320  *BGR++ = clip((76284 * y1 + 132252 * u) >> 16);
321  *BGR++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
322  *BGR++ = clip((76284 * y1 + 104595 * v) >> 16);
323  *BGR++ = 255;
324  }
325 }
326 
327 #if (defined __i386__ || defined __386__ || defined __X86__ || defined _M_IX86 || defined i386)
328 
329 # define CRV 104595
330 # define CBU 132251
331 # define CGU 25624
332 # define CGV 53280
333 # define YMUL 76283
334 # define OFF 32768
335 # define BITRES 16
336 
337 /* calculation float resolution in bits */
338 /* ie RES = 6 is 10.6 fixed point */
339 /* RES = 8 is 8.8 fixed point */
340 /* RES = 4 is 12.4 fixed point */
341 /* NB: going above 6 will lead to overflow... :( */
342 # define RES 6
343 
344 # define RZ(i) (i >> (BITRES - RES))
345 # define FOUR(i) \
346  { \
347  i, i, i, i \
348  }
349 
350 __aligned(8) const volatile unsigned short _const_crvcrv[4] = FOUR(RZ(CRV));
351 __aligned(8) const volatile unsigned short _const_cbucbu[4] = FOUR(RZ(CBU));
352 __aligned(8) const volatile unsigned short _const_cgucgu[4] = FOUR(RZ(CGU));
353 __aligned(8) const volatile unsigned short _const_cgvcgv[4] = FOUR(RZ(CGV));
354 __aligned(8) const volatile unsigned short _const_ymul[4] = FOUR(RZ(YMUL));
355 __aligned(8) const volatile unsigned short _const_128[4] = FOUR(128);
356 __aligned(8) const volatile unsigned short _const_32[4] = FOUR(RZ(OFF));
357 __aligned(8) const volatile unsigned short _const_16[4] = FOUR(16);
358 
359 # define CONST_CRVCRV *_const_crvcrv
360 # define CONST_CBUCBU *_const_cbucbu
361 # define CONST_CGUCGU *_const_cgucgu
362 # define CONST_CGVCGV *_const_cgvcgv
363 # define CONST_YMUL *_const_ymul
364 # define CONST_128 *_const_128
365 # define CONST_32 *_const_32
366 # define CONST_16 *_const_16
367 
368 void
369 yuv411planar_to_rgb_mmx(const unsigned char *yuv,
370  unsigned char * rgb,
371  unsigned int w,
372  unsigned int h)
373 {
374  unsigned int xx, yy;
375  const unsigned char *yp1, *up, *vp;
376  unsigned char * dp1;
377 
378  /* plane pointers */
379  yp1 = yuv;
380  up = yuv + (w * h);
381  vp = up + (w * (h / 4));
382  /* destination pointers */
383  dp1 = rgb;
384 
385  yp1 = yuv;
386  up = yuv + (w * h);
387  vp = up + ((w / 2) * (h / 2));
388  dp1 = rgb;
389  for (yy = 0; yy < h; yy++) {
390  for (xx = 0; xx < w; xx += 8) {
391  movq_m2r(*yp1, mm0);
392  movq_r2r(mm0, mm1);
393  psrlw_i2r(8, mm0);
394  psllw_i2r(8, mm1);
395  psrlw_i2r(8, mm1);
396 
397  pxor_r2r(mm7, mm7);
398  movd_m2r(*up, mm3);
399  movd_m2r(*vp, mm2);
400 
401  punpcklbw_r2r(mm7, mm2);
402  punpcklbw_r2r(mm7, mm3);
403 
404  movq_m2r(CONST_16, mm4);
405  psubsw_r2r(mm4, mm0);
406  psubsw_r2r(mm4, mm1);
407 
408  movq_m2r(CONST_128, mm5);
409  psubsw_r2r(mm5, mm2);
410  psubsw_r2r(mm5, mm3);
411 
412  movq_m2r(CONST_YMUL, mm4);
413  pmullw_r2r(mm4, mm0);
414  pmullw_r2r(mm4, mm1);
415 
416  movq_m2r(CONST_CRVCRV, mm7);
417  pmullw_r2r(mm3, mm7);
418 
419  movq_m2r(CONST_CBUCBU, mm6);
420  pmullw_r2r(mm2, mm6);
421 
422  movq_m2r(CONST_CGUCGU, mm5);
423  pmullw_r2r(mm2, mm5);
424 
425  movq_m2r(CONST_CGVCGV, mm4);
426  pmullw_r2r(mm3, mm4);
427 
428  movq_r2r(mm0, mm2);
429  paddsw_r2r(mm7, mm2);
430  paddsw_r2r(mm1, mm7);
431 
432  psraw_i2r(RES, mm2);
433  psraw_i2r(RES, mm7);
434  packuswb_r2r(mm7, mm2);
435 
436  pxor_r2r(mm7, mm7);
437  movq_r2r(mm2, mm3);
438  punpckhbw_r2r(mm7, mm2);
439  punpcklbw_r2r(mm3, mm7);
440  por_r2r(mm7, mm2);
441 
442  movq_r2r(mm0, mm3);
443  psubsw_r2r(mm5, mm3);
444  psubsw_r2r(mm4, mm3);
445  paddsw_m2r(CONST_32, mm3);
446 
447  movq_r2r(mm1, mm7);
448  psubsw_r2r(mm5, mm7);
449  psubsw_r2r(mm4, mm7);
450  paddsw_m2r(CONST_32, mm7);
451 
452  psraw_i2r(RES, mm3);
453  psraw_i2r(RES, mm7);
454  packuswb_r2r(mm7, mm3);
455 
456  pxor_r2r(mm7, mm7);
457  movq_r2r(mm3, mm4);
458  punpckhbw_r2r(mm7, mm3);
459  punpcklbw_r2r(mm4, mm7);
460  por_r2r(mm7, mm3);
461 
462  movq_m2r(CONST_32, mm4);
463  paddsw_r2r(mm6, mm0);
464  paddsw_r2r(mm6, mm1);
465  paddsw_r2r(mm4, mm0);
466  paddsw_r2r(mm4, mm1);
467  psraw_i2r(RES, mm0);
468  psraw_i2r(RES, mm1);
469  packuswb_r2r(mm1, mm0);
470 
471  pxor_r2r(mm7, mm7);
472  movq_r2r(mm0, mm5);
473  punpckhbw_r2r(mm7, mm0);
474  punpcklbw_r2r(mm5, mm7);
475  por_r2r(mm7, mm0);
476 
477  pxor_r2r(mm1, mm1);
478  movq_r2r(mm0, mm5);
479  movq_r2r(mm3, mm6);
480  movq_r2r(mm2, mm7);
481  punpckhbw_r2r(mm3, mm2);
482  punpcklbw_r2r(mm6, mm7);
483  punpckhbw_r2r(mm1, mm0);
484  punpcklbw_r2r(mm1, mm5);
485 
486  movq_r2r(mm7, mm1);
487  punpckhwd_r2r(mm5, mm7);
488  punpcklwd_r2r(mm5, mm1);
489 
490  movq_r2r(mm2, mm4);
491  punpckhwd_r2r(mm0, mm2);
492  punpcklwd_r2r(mm0, mm4);
493 
494  movntq_r2m(mm1, *(dp1));
495  movntq_r2m(mm7, *(dp1 + 8));
496  movntq_r2m(mm4, *(dp1 + 16));
497  movntq_r2m(mm2, *(dp1 + 24));
498 
499  yp1 += 8;
500  up += 4;
501  vp += 4;
502  dp1 += 8 * 4;
503  }
504  if (yy & 0x1) {
505  up -= w / 2;
506  vp -= w / 2;
507  }
508  }
509  emms();
510 }
511 #endif
512 
513 } // end namespace firevision