23 #ifndef INCLUDED_volk_32i_x2_or_32i_a_H
24 #define INCLUDED_volk_32i_x2_or_32i_a_H
30 #include <xmmintrin.h>
38 static inline void volk_32i_x2_or_32i_a_sse(
int32_t* cVector,
const int32_t* aVector,
const int32_t* bVector,
unsigned int num_points){
39 unsigned int number = 0;
40 const unsigned int quarterPoints = num_points / 4;
42 float* cPtr = (
float*)cVector;
43 const float* aPtr = (
float*)aVector;
44 const float* bPtr = (
float*)bVector;
46 __m128 aVal, bVal, cVal;
47 for(;number < quarterPoints; number++){
49 aVal = _mm_load_ps(aPtr);
50 bVal = _mm_load_ps(bPtr);
52 cVal = _mm_or_ps(aVal, bVal);
54 _mm_store_ps(cPtr,cVal);
61 number = quarterPoints * 4;
62 for(;number < num_points; number++){
63 cVector[number] = aVector[number] | bVector[number];
77 static inline void volk_32i_x2_or_32i_neon(
int32_t* cVector,
const int32_t* aVector,
const int32_t* bVector,
unsigned int num_points){
81 unsigned int number = 0;
82 unsigned int quarter_points = num_points / 4;
84 int32x4_t a_val, b_val, c_val;
86 for(number = 0; number < quarter_points; number++){
87 a_val = vld1q_s32(aPtr);
88 b_val = vld1q_s32(bPtr);
89 c_val = vorrq_s32(a_val, b_val);
90 vst1q_s32(cPtr, c_val);
96 for(number = quarter_points * 4; number < num_points; number++){
97 *cPtr++ = (*aPtr++) | (*bPtr++);
102 #ifdef LV_HAVE_GENERIC
110 static inline void volk_32i_x2_or_32i_generic(
int32_t* cVector,
const int32_t* aVector,
const int32_t* bVector,
unsigned int num_points){
114 unsigned int number = 0;
116 for(number = 0; number < num_points; number++){
117 *cPtr++ = (*aPtr++) | (*bPtr++);
130 extern void volk_32i_x2_or_32i_a_orc_impl(
int32_t* cVector,
const int32_t* aVector,
const int32_t* bVector,
unsigned int num_points);
131 static inline void volk_32i_x2_or_32i_u_orc(
int32_t* cVector,
const int32_t* aVector,
const int32_t* bVector,
unsigned int num_points){
132 volk_32i_x2_or_32i_a_orc_impl(cVector, aVector, bVector, num_points);
signed int int32_t
Definition: stdint.h:77