cs205-lecture-examples

Example codes used during Harvard CS205 lectures
git clone https://git.0xfab.ch/cs205-lecture-examples.git
Log | Files | Refs | README | LICENSE

loop_unroll_prefetch.cpp (1136B)


      1 #include <chrono>
      2 #include <iostream>
      3 
      4 #define N (1 << 20)
      5 int main(void)
      6 {
      7     double *A = new double[N];
      8     double *B = new double[N];
      9     double *C = new double[N];
     10     for (int i = 0; i < N; ++i) {
     11         A[i] = 0;
     12         B[i] = i + 0;
     13         C[i] = i + 2;
     14     }
     15 
     16     typedef std::chrono::high_resolution_clock Clock;
     17     auto t1 = Clock::now();
     18     // manual 4-fold loop unroll
     19     for (int i = 0; i < N; i += 4) {
     20         __builtin_prefetch(&A[i + 4], 1, 1);
     21         __builtin_prefetch(&B[i + 4], 0, 1);
     22         __builtin_prefetch(&C[i + 4], 0, 1);
     23         A[i + 0] = A[i + 0] * B[i + 0] + C[i + 0];
     24         A[i + 1] = A[i + 1] * B[i + 1] + C[i + 1];
     25         A[i + 2] = A[i + 2] * B[i + 2] + C[i + 2];
     26         A[i + 3] = A[i + 3] * B[i + 3] + C[i + 3];
     27     }
     28     auto t2 = Clock::now();
     29     std::cout
     30         << "Time:   "
     31         << std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count()
     32         << " ns\n";
     33 
     34     volatile double sum = 0.0;
     35     for (int i = 0; i < N; ++i) {
     36         sum += A[i];
     37     }
     38     std::cout << "Result: " << sum << '\n';
     39 
     40     delete[] A;
     41     delete[] B;
     42     delete[] C;
     43     return 0;
     44 }