23 #ifndef INCLUDED_volk_32fc_conjugate_32fc_u_H
24 #define INCLUDED_volk_32fc_conjugate_32fc_u_H
32 #include <immintrin.h>
39 static inline void volk_32fc_conjugate_32fc_u_avx(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
unsigned int num_points){
40 unsigned int number = 0;
41 const unsigned int quarterPoints = num_points / 4;
47 __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
49 for(;number < quarterPoints; number++){
51 x = _mm256_loadu_ps((
float*)a);
53 x = _mm256_xor_ps(x, conjugator);
55 _mm256_storeu_ps((
float*)c,x);
61 number = quarterPoints * 4;
63 for(;number < num_points; number++) {
70 #include <pmmintrin.h>
77 static inline void volk_32fc_conjugate_32fc_u_sse3(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
unsigned int num_points){
78 unsigned int number = 0;
79 const unsigned int halfPoints = num_points / 2;
85 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
87 for(;number < halfPoints; number++){
89 x = _mm_loadu_ps((
float*)a);
91 x = _mm_xor_ps(x, conjugator);
93 _mm_storeu_ps((
float*)c,x);
99 if((num_points % 2) != 0) {
105 #ifdef LV_HAVE_GENERIC
112 static inline void volk_32fc_conjugate_32fc_generic(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
unsigned int num_points){
115 unsigned int number = 0;
117 for(number = 0; number < num_points; number++){
125 #ifndef INCLUDED_volk_32fc_conjugate_32fc_a_H
126 #define INCLUDED_volk_32fc_conjugate_32fc_a_H
134 #include <immintrin.h>
141 static inline void volk_32fc_conjugate_32fc_a_avx(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
unsigned int num_points){
142 unsigned int number = 0;
143 const unsigned int quarterPoints = num_points / 4;
149 __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
151 for(;number < quarterPoints; number++){
153 x = _mm256_load_ps((
float*)a);
155 x = _mm256_xor_ps(x, conjugator);
157 _mm256_store_ps((
float*)c,x);
163 number = quarterPoints * 4;
165 for(;number < num_points; number++) {
172 #include <pmmintrin.h>
179 static inline void volk_32fc_conjugate_32fc_a_sse3(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
unsigned int num_points){
180 unsigned int number = 0;
181 const unsigned int halfPoints = num_points / 2;
187 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
189 for(;number < halfPoints; number++){
191 x = _mm_load_ps((
float*)a);
193 x = _mm_xor_ps(x, conjugator);
195 _mm_store_ps((
float*)c,x);
201 if((num_points % 2) != 0) {
208 #include <arm_neon.h>
215 static inline void volk_32fc_conjugate_32fc_a_neon(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
unsigned int num_points){
217 const unsigned int quarterPoints = num_points / 4;
223 for(number=0; number < quarterPoints; number++){
224 __builtin_prefetch(a+4);
225 x = vld2q_f32((
float*)a);
228 x.val[1] = vnegq_f32( x.val[1]);
230 vst2q_f32((
float*)c,x);
236 for(number=quarterPoints*4; number < num_points; number++){
242 #ifdef LV_HAVE_GENERIC
249 static inline void volk_32fc_conjugate_32fc_a_generic(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
unsigned int num_points){
252 unsigned int number = 0;
254 for(number = 0; number < num_points; number++){
#define lv_conj(x)
Definition: volk_complex.h:80
float complex lv_32fc_t
Definition: volk_complex.h:56