GNU Radio Manual and C++ API Reference  3.7.6.1
The Free & Open Software Radio Ecosystem
volk_8ic_x2_s32f_multiply_conjugate_32fc.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H
24 #define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H
25 
26 #include <inttypes.h>
27 #include <stdio.h>
28 #include <volk/volk_complex.h>
29 
30 #ifdef LV_HAVE_SSE4_1
31 #include <smmintrin.h>
32 /*!
33  \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector
34  \param cVector The complex vector where the results will be stored
35  \param aVector One of the complex vectors to be multiplied
36  \param bVector The complex vector which will be converted to complex conjugate and multiplied
37  \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
38 */
39 static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_a_sse4_1(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){
40  unsigned int number = 0;
41  const unsigned int quarterPoints = num_points / 4;
42 
43  __m128i x, y, realz, imagz;
44  __m128 ret;
45  lv_32fc_t* c = cVector;
46  const lv_8sc_t* a = aVector;
47  const lv_8sc_t* b = bVector;
48  __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
49 
50  __m128 invScalar = _mm_set_ps1(1.0/scalar);
51 
52  for(;number < quarterPoints; number++){
53  // Convert into 8 bit values into 16 bit values
54  x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
55  y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
56 
57  // Calculate the ar*cr - ai*(-ci) portions
58  realz = _mm_madd_epi16(x,y);
59 
60  // Calculate the complex conjugate of the cr + ci j values
61  y = _mm_sign_epi16(y, conjugateSign);
62 
63  // Shift the order of the cr and ci values
64  y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
65 
66  // Calculate the ar*(-ci) + cr*(ai)
67  imagz = _mm_madd_epi16(x,y);
68 
69  // Interleave real and imaginary and then convert to float values
70  ret = _mm_cvtepi32_ps(_mm_unpacklo_epi32(realz, imagz));
71 
72  // Normalize the floating point values
73  ret = _mm_mul_ps(ret, invScalar);
74 
75  // Store the floating point values
76  _mm_store_ps((float*)c, ret);
77  c += 2;
78 
79  // Interleave real and imaginary and then convert to float values
80  ret = _mm_cvtepi32_ps(_mm_unpackhi_epi32(realz, imagz));
81 
82  // Normalize the floating point values
83  ret = _mm_mul_ps(ret, invScalar);
84 
85  // Store the floating point values
86  _mm_store_ps((float*)c, ret);
87  c += 2;
88 
89  a += 4;
90  b += 4;
91  }
92 
93  number = quarterPoints * 4;
94  float* cFloatPtr = (float*)&cVector[number];
95  int8_t* a8Ptr = (int8_t*)&aVector[number];
96  int8_t* b8Ptr = (int8_t*)&bVector[number];
97  for(; number < num_points; number++){
98  float aReal = (float)*a8Ptr++;
99  float aImag = (float)*a8Ptr++;
100  lv_32fc_t aVal = lv_cmake(aReal, aImag );
101  float bReal = (float)*b8Ptr++;
102  float bImag = (float)*b8Ptr++;
103  lv_32fc_t bVal = lv_cmake( bReal, -bImag );
104  lv_32fc_t temp = aVal * bVal;
105 
106  *cFloatPtr++ = lv_creal(temp) / scalar;
107  *cFloatPtr++ = lv_cimag(temp) / scalar;
108  }
109 }
110 #endif /* LV_HAVE_SSE4_1 */
111 
112 #ifdef LV_HAVE_GENERIC
113 /*!
114  \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector
115  \param cVector The complex vector where the results will be stored
116  \param aVector One of the complex vectors to be multiplied
117  \param bVector The complex vector which will be converted to complex conjugate and multiplied
118  \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
119 */
120 static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_generic(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points){
121  unsigned int number = 0;
122  float* cPtr = (float*)cVector;
123  const float invScalar = 1.0 / scalar;
124  int8_t* a8Ptr = (int8_t*)aVector;
125  int8_t* b8Ptr = (int8_t*)bVector;
126  for(number = 0; number < num_points; number++){
127  float aReal = (float)*a8Ptr++;
128  float aImag = (float)*a8Ptr++;
129  lv_32fc_t aVal = lv_cmake(aReal, aImag );
130  float bReal = (float)*b8Ptr++;
131  float bImag = (float)*b8Ptr++;
132  lv_32fc_t bVal = lv_cmake( bReal, -bImag );
133  lv_32fc_t temp = aVal * bVal;
134 
135  *cPtr++ = (lv_creal(temp) * invScalar);
136  *cPtr++ = (lv_cimag(temp) * invScalar);
137  }
138 }
139 #endif /* LV_HAVE_GENERIC */
140 
141 
142 
143 
144 #endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H */
#define lv_cmake(r, i)
Definition: volk_complex.h:59
signed char int8_t
Definition: stdint.h:75
float complex lv_32fc_t
Definition: volk_complex.h:56
#define lv_creal(x)
Definition: volk_complex.h:76
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:52
#define lv_cimag(x)
Definition: volk_complex.h:78