saxpy_SSE_FMA.c (304B)
1 #include <x86intrin.h> 2 3 void saxpy_SSE_FMA(float *x, float *y, float a, size_t n) 4 { 5 const __m128 a4 = _mm_set1_ps(a); 6 // assumes n % 4 == 0 7 for (size_t i = 0; i < n; i += 4) { 8 _mm_store_ps(y + i, 9 _mm_fmadd_ps(a4, _mm_load_ps(x + i), _mm_load_ps(y + i))); 10 } 11 }