cs205-lecture-examples

Example codes used during Harvard CS205 lectures
git clone https://git.0xfab.ch/cs205-lecture-examples.git
Log | Files | Refs | README | LICENSE

saxpy_SSE_FMA.c (304B)


      1 #include <x86intrin.h>
      2 
      3 void saxpy_SSE_FMA(float *x, float *y, float a, size_t n)
      4 {
      5     const __m128 a4 = _mm_set1_ps(a);
      6     // assumes n % 4 == 0
      7     for (size_t i = 0; i < n; i += 4) {
      8         _mm_store_ps(y + i,
      9                      _mm_fmadd_ps(a4, _mm_load_ps(x + i), _mm_load_ps(y + i)));
     10     }
     11 }