00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include <mrpt/base.h>
00030
00031
00032
00033
00034
00035 #if MRPT_HAS_SSE3
00036
00037 #include <mrpt/utils/CImage.h>
00038 #include <mrpt/utils/SSE_types.h>
00039 #include <mrpt/utils/SSE_macros.h>
00040 #include "CImage_SSEx.h"
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056 void image_SSSE3_scale_half_3c8u(const uint8_t* in, uint8_t* out, int w, int h)
00057 {
00058 EIGEN_ALIGN16 const unsigned long long mask0[2] = { 0x0D0C080706020100ull, 0x808080808080800Eull };
00059 EIGEN_ALIGN16 const unsigned long long mask1[2] = { 0x8080808080808080ull, 0x0E0A090804030280ull };
00060 EIGEN_ALIGN16 const unsigned long long mask2[2] = { 0x0C0B0A0605040080ull, 0x8080808080808080ull };
00061 EIGEN_ALIGN16 const unsigned long long mask3[2] = { 0x808080808080800Full, 0x8080808080808080ull };
00062
00063 const __m128i m0 = _mm_load_si128((const __m128i*)mask0);
00064 const __m128i m1 = _mm_load_si128((const __m128i*)mask1);
00065 const __m128i m2 = _mm_load_si128((const __m128i*)mask2);
00066 const __m128i m3 = _mm_load_si128((const __m128i*)mask3);
00067
00068 const int sw = w >> 4;
00069 const int sh = h >> 1;
00070
00071 int odd_row = 0;
00072
00073 for (int i=0; i<sh; i++)
00074 {
00075 for (int j=0; j<sw; j++)
00076 {
00077
00078 __m128i d0 = _mm_load_si128((const __m128i*)in); in += 16;
00079 __m128i d1 = _mm_load_si128((const __m128i*)in); in += 16;
00080
00081
00082 __m128i shuf0 = _mm_shuffle_epi8(d0,m0);
00083 __m128i shuf1 = _mm_shuffle_epi8(d1,m1);
00084
00085 __m128i res0 = _mm_or_si128(shuf0,shuf1);
00086
00087 if ((odd_row&0x1)!=0)
00088 _mm_storeu_si128((__m128i*)out,res0);
00089 else _mm_store_si128 ((__m128i*)out,res0);
00090 out += 16;
00091
00092
00093 __m128i d2 = _mm_load_si128((const __m128i*)in); in += 16;
00094
00095 _mm_storel_epi64(
00096 (__m128i*)out,
00097 _mm_or_si128(_mm_shuffle_epi8(d2,m2),_mm_shuffle_epi8(d1,m3))
00098 );
00099 odd_row++;
00100 out += 8;
00101 }
00102 in += 3*w;
00103 }
00104 }
00105
00106
00107
00108 template <bool IS_RGB>
00109 void private_image_SSSE3_rgb_or_bgr_to_gray_8u(const uint8_t* in, uint8_t* out, int w, int h)
00110 {
00111
00112 BUILD_128BIT_CONST(mask0, 80,00, 80,03, 80,06, 80,09, 80,0C, 80,0F, 80,80, 80,80)
00113 BUILD_128BIT_CONST(mask1, 80,80, 80,80, 80,80, 80,80, 80,80, 80,80, 80,02, 80,05)
00114
00115 BUILD_128BIT_CONST(mask2, 80,01, 80,04, 80,07, 80,0A, 80,0D, 80,80, 80,80, 80,80)
00116 BUILD_128BIT_CONST(mask3, 80,80, 80,80, 80,80, 80,80, 80,80, 80,00, 80,03, 80,06)
00117
00118 BUILD_128BIT_CONST(mask4, 80,02, 80,05, 80,08, 80,0B, 80,0E, 80,80, 80,80, 80,80)
00119 BUILD_128BIT_CONST(mask5, 80,80, 80,80, 80,80, 80,80, 80,80, 80,01, 80,04, 80,07)
00120
00121
00122 BUILD_128BIT_CONST(mask6, 80,08, 80,0B, 80,0E, 80,80, 80,80, 80,80, 80,80, 80,80)
00123 BUILD_128BIT_CONST(mask7, 80,80, 80,80, 80,80, 80,01, 80,04, 80,07, 80,0A, 80,0D)
00124
00125 BUILD_128BIT_CONST(mask8, 80,09, 80,0C, 80,0F, 80,80, 80,80, 80,80, 80,80, 80,80)
00126 BUILD_128BIT_CONST(mask9, 80,80, 80,80, 80,80, 80,02, 80,05, 80,08, 80,0B, 80,0E)
00127
00128 BUILD_128BIT_CONST(mask10,80,0A, 80,0D, 80,80, 80,80, 80,80, 80,80, 80,80, 80,80)
00129 BUILD_128BIT_CONST(mask11,80,80, 80,80, 80,00, 80,03, 80,06, 80,09, 80,0C, 80,0F)
00130
00131
00132 BUILD_128BIT_CONST(mask_to_low, 01,03,05,07,09,0B,0D,0F, 80,80,80,80,80,80,80,80)
00133
00134
00135
00136 BUILD_128BIT_CONST(val_red , 00,1D, 00,1D, 00,1D, 00,1D, 00,1D, 00,1D, 00,1D, 00,1D)
00137 BUILD_128BIT_CONST(val_green , 00,96, 00,96, 00,96, 00,96, 00,96, 00,96, 00,96, 00,96)
00138 BUILD_128BIT_CONST(val_blue , 00,4D, 00,4D, 00,4D, 00,4D, 00,4D, 00,4D, 00,4D, 00,4D)
00139
00140 const __m128i m0 = _mm_load_si128( IS_RGB ? (const __m128i*)mask4 : (const __m128i*)mask0);
00141 const __m128i m1 = _mm_load_si128( IS_RGB ? (const __m128i*)mask5 : (const __m128i*)mask1);
00142 const __m128i m2 = _mm_load_si128((const __m128i*)mask2);
00143 const __m128i m3 = _mm_load_si128((const __m128i*)mask3);
00144 const __m128i m4 = _mm_load_si128( IS_RGB ? (const __m128i*)mask0 : (const __m128i*)mask4);
00145 const __m128i m5 = _mm_load_si128( IS_RGB ? (const __m128i*)mask1 : (const __m128i*)mask5);
00146
00147 const __m128i m6 = _mm_load_si128( IS_RGB ? (const __m128i*)mask10 : (const __m128i*)mask6);
00148 const __m128i m7 = _mm_load_si128( IS_RGB ? (const __m128i*)mask11 : (const __m128i*)mask7);
00149 const __m128i m8 = _mm_load_si128((const __m128i*)mask8);
00150 const __m128i m9 = _mm_load_si128((const __m128i*)mask9);
00151 const __m128i m10= _mm_load_si128( IS_RGB ? (const __m128i*)mask6 : (const __m128i*)mask10);
00152 const __m128i m11= _mm_load_si128( IS_RGB ? (const __m128i*)mask7 : (const __m128i*)mask11);
00153
00154 const __m128i mask_low= _mm_load_si128((const __m128i*)mask_to_low);
00155
00156 const __m128i VAL_R = _mm_load_si128((const __m128i*)val_red);
00157 const __m128i VAL_G = _mm_load_si128((const __m128i*)val_green);
00158 const __m128i VAL_B = _mm_load_si128((const __m128i*)val_blue);
00159
00160 const int sw = w >> 4;
00161 const int sh = h ;
00162
00163 for (int i=0; i<sh; i++)
00164 {
00165 for (int j=0; j<sw; j++)
00166 {
00167
00168 const __m128i d0 = _mm_load_si128((const __m128i*)in); in += 16;
00169 const __m128i d1 = _mm_load_si128((const __m128i*)in); in += 16;
00170 const __m128i d2 = _mm_load_si128((const __m128i*)in); in += 16;
00171
00172
00173 {
00174 const __m128i BLUES_0_7 = _mm_or_si128(_mm_shuffle_epi8(d0,m0),_mm_shuffle_epi8(d1,m1));
00175 const __m128i GREENS_0_7 = _mm_or_si128(_mm_shuffle_epi8(d0,m2),_mm_shuffle_epi8(d1,m3));
00176 const __m128i REDS_0_7 = _mm_or_si128(_mm_shuffle_epi8(d0,m4),_mm_shuffle_epi8(d1,m5));
00177
00178
00179
00180
00181
00182
00183
00184 const __m128i GRAYS_0_7 =
00185 _mm_adds_epu16(
00186 _mm_mulhi_epu16(REDS_0_7, VAL_R),
00187 _mm_adds_epu16(
00188 _mm_mulhi_epu16(GREENS_0_7, VAL_G),
00189 _mm_mulhi_epu16(BLUES_0_7, VAL_B)
00190 ));
00191
00192 _mm_storel_epi64((__m128i*)out, _mm_shuffle_epi8(GRAYS_0_7,mask_low));
00193 out+=8;
00194 }
00195
00196
00197 {
00198 const __m128i BLUES_8_15 = _mm_or_si128(_mm_shuffle_epi8(d1,m6),_mm_shuffle_epi8(d2,m7));
00199 const __m128i GREENS_8_15 = _mm_or_si128(_mm_shuffle_epi8(d1,m8),_mm_shuffle_epi8(d2,m9));
00200 const __m128i REDS_8_15 = _mm_or_si128(_mm_shuffle_epi8(d1,m10),_mm_shuffle_epi8(d2,m11));
00201
00202 const __m128i GRAYS_8_15 =
00203 _mm_adds_epu16(
00204 _mm_mulhi_epu16(REDS_8_15, VAL_R),
00205 _mm_adds_epu16(
00206 _mm_mulhi_epu16(GREENS_8_15, VAL_G),
00207 _mm_mulhi_epu16(BLUES_8_15, VAL_B)
00208 ));
00209
00210 _mm_storel_epi64((__m128i*)out, _mm_shuffle_epi8(GRAYS_8_15,mask_low));
00211 out+=8;
00212 }
00213 }
00214 }
00215
00216 }
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227 void image_SSSE3_bgr_to_gray_8u(const uint8_t* in, uint8_t* out, int w, int h)
00228 {
00229 private_image_SSSE3_rgb_or_bgr_to_gray_8u<false>(in,out,w,h);
00230 }
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240 void image_SSSE3_rgb_to_gray_8u(const uint8_t* in, uint8_t* out, int w, int h)
00241 {
00242 private_image_SSSE3_rgb_or_bgr_to_gray_8u<true>(in,out,w,h);
00243 }
00244
00245
00246
00247
00248 #endif // end of MRPT_HAS_SSE3