This double loop is 50 times slower in Chez Scheme than in C++ (compiled with --optimize-level 3
and -O3
, respectively)
(import
(rnrs)
(rnrs r5rs))
(let* ((n (* 1024 16))
(a (make-vector n))
(acc 0))
(do ((i 0 (+ i 1)))
((= i n) #f)
(vector-set! a i (cons (cos i) (sin i))))
(do ((i 0 (+ i 1)))
((= i n) #f)
(do ((j 0 (+ j 1)))
((= j n) #f)
(let ((ai (vector-ref a i))
(aj (vector-ref a j)))
(set! acc (+ acc (+ (* (car ai) (cdr aj))
(* (cdr ai) (car aj))))))))
(write acc)
(newline))
(exit)
vs
#include <iostream>
#include <cmath>
#include <vector>
#include <algorithm>
typedef std::pair<double, double> pr;
typedef std::vector<pr> vec;
double loop(const vec& a)
{
double acc = 0;
const int n = a.size();
for(int i = 0; i < n; ++i)
for(int j = 0; j < n; ++j)
{
const pr& ai = a[i];
const pr& aj = a[j];
acc += ai .first * aj.second +
ai.second * aj .first;
}
return acc;
}
int main()
{
const int n = 1024 * 16;
vec v(n);
for(int i = 0; i < n; ++i)
v[i] = pr(std::cos(i), std::sin(i));
std::cout << loop(v) << std::endl;
}
I realize that there is more memory indirection in Scheme than in C++ here, but still the performance difference is surprising...
Is there a simple way to speed up the Scheme version? (Without changing the memory layout to something totally unidiomatic)