#include <math.h>
#include <stdio.h>
#include <time.h>
#define ITERS 10000000
#define TESTWITH(x) { \
diff = 0.0; \
time1 = clock(); \
for (i = 0; i < ITERS; ++i) \
diff += (x) - M_PI; \
time2 = clock(); \
printf("%s\t=> %e, time => %f\n", #x, diff, diffclock(time2, time1)); \
}
static inline double
diffclock(clock_t time1, clock_t time0)
{
return (double) (time1 - time0) / CLOCKS_PER_SEC;
}
int
main()
{
int i;
clock_t time1, time2;
double diff;
/* Warmup. The atan2 case catches GCC's atan folding (which would
* optimise the ``4 * atan(1) - M_PI'' to a no-op), if -fno-builtin
* is not used. */
TESTWITH(4 * atan(1))
TESTWITH(4 * atan2(1, 1))
#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__))
extern double fldpi();
TESTWITH(fldpi())
#endif
/* Actual tests start here. */
TESTWITH(atan2(0, -1))
TESTWITH(acos(-1))
TESTWITH(2 * asin(1))
TESTWITH(4 * atan2(1, 1))
TESTWITH(4 * atan(1))
return 0;
}
double
fldpi()
{
double pi;
asm("fldpi" : "=t" (pi));
return pi;
}
#!/bin/sh gcc -O3 -Wall -c -m32 -o fldpi-32.o fldpi.c gcc -O3 -Wall -c -m64 -o fldpi-64.o fldpi.c gcc -O3 -Wall -ffast-math -m32 -o pitimes1-32 pitimes.c fldpi-32.o gcc -O3 -Wall -m32 -o pitimes2-32 pitimes.c fldpi-32.o -lm gcc -O3 -Wall -fno-builtin -m32 -o pitimes3-32 pitimes.c fldpi-32.o -lm gcc -O3 -Wall -ffast-math -m64 -o pitimes1-64 pitimes.c fldpi-64.o -lm gcc -O3 -Wall -m64 -o pitimes2-64 pitimes.c fldpi-64.o -lm gcc -O3 -Wall -fno-builtin -m64 -o pitimes3-64 pitimes.c fldpi-64.o -lm
Refactorings
No refactoring yet !
evilteach.blogspot.com
August 31, 2008, August 31, 2008 21:53, permalink
The trig functions are limited in the accuracy they provide, that really puts an upper limit on the accuracy you can expect. I would advise going to the wikipedia page on pi. It provides an excellent overview of the topic, with some algorithms that should give better results.
Here is a small erlang program that demonstrates one of the techniques.
%
% A simple test program to calculate pi, using an easy, slow converging formula
%
-module(pi).
-export([main/0]).
% The formual we use
% pi^2 = 6*(1/(1^2) + 1/(2^2) + 1/(3^2) ...)
% When we reach the 0th term, we are done.
pi_engine(0, Sum) ->
io:format("~20w ~30.25f ~n", [0, Sum]),
io:format("The value of Pi is about ~30.25f ~n", [math:sqrt(Sum * 6)]);
% Calculate the Nth term
pi_engine(N, Sum) when N > 0 ->
%io:format("~20w ~30.25f ~n", [N, Sum]),
pi_engine(N - 1, Sum + (1 / (N * N)));
pi_engine(N, _Sum) when N < 0 ->
io:format("A negative number of terms does not make sense~w ~n", [N]).
% provide an interface into the engine, as we must pass the sum along in order to
% be able to accumulate the running total
pi(TermsToCalculate) ->
InitialValue = 0.0,
pi_engine(TermsToCalculate, InitialValue).
main() ->
io:format("main begins ~n", []),
% Decide how many terms should be used for this run
TermsToCalculate = 100000,
% Perform the calculation, and display the result
pi(TermsToCalculate),
io:format("main ends ~n", []).
Chris Jester-Young
August 31, 2008, August 31, 2008 22:34, permalink
@evilteach: Thanks for that. My question was going for the fastest double-precision method applicable, however, your approach of going for more precision is nice too.
I noticed that your final answer has a sqrt in it. Do you have an arbitrary-precision sqrt function? Does Erlang provide one, or would you have to do an iterative approximation by hand?
I've taken the liberty of taking your code and marking it as Erlang, so that the syntax highlighting works better for it. Hope you don't mind! :-)
%
% A simple test program to calculate pi, using an easy, slow converging formula
%
-module(pi).
-export([main/0]).
% The formual we use
% pi^2 = 6*(1/(1^2) + 1/(2^2) + 1/(3^2) ...)
% When we reach the 0th term, we are done.
pi_engine(0, Sum) ->
io:format("~20w ~30.25f ~n", [0, Sum]),
io:format("The value of Pi is about ~30.25f ~n", [math:sqrt(Sum * 6)]);
% Calculate the Nth term
pi_engine(N, Sum) when N > 0 ->
%io:format("~20w ~30.25f ~n", [N, Sum]),
pi_engine(N - 1, Sum + (1 / (N * N)));
pi_engine(N, _Sum) when N < 0 ->
io:format("A negative number of terms does not make sense~w ~n", [N]).
% provide an interface into the engine, as we must pass the sum along in order to
% be able to accumulate the running total
pi(TermsToCalculate) ->
InitialValue = 0.0,
pi_engine(TermsToCalculate, InitialValue).
main() ->
io:format("main begins ~n", []),
% Decide how many terms should be used for this run
TermsToCalculate = 100000,
% Perform the calculation, and display the result
pi(TermsToCalculate),
io:format("main ends ~n", []).
EvilTeach
September 1, 2008, September 01, 2008 12:45, permalink
Ya, off the top of my head, I don't know if sqrt would give good precision.
The purpose of the program was to demonstrate the technique.
However, if you are only need double precision, I kind of like this implementation.
double pi
(
void
)
{
return 3.1415926535897932;
}
spz
November 5, 2010, November 05, 2010 00:51, permalink
Problem is: Under the hood, math libraries approximate most functions with already existing tables x->result. Because of doubles have fixed precision even approximating between two table rows wouldn't be recognized in the end. I'd happy to find a fast unlimited approximation for tanges(x), so I could find pi by that:
#!/bin/python
"""
I'd use Newton's method
x1 = x0 - f(x0)/f'(x0)
on sin(x) and would start with x0=3.
And with f = sin, f/f' would be tan
"""
pi = 3;
while True:
pi = pi - tan(pi)
print pi;
# Sadly tan is one of the internally table-approximated functions.
Also any BigFloat implementation wouldn't deliver a tan(BigFloat).
tan(BigFloat) would take as long as 4/1 - 4/3 +… or even longer.
spz
November 5, 2010, November 05, 2010 03:02, permalink
<script type="text/javascript">i=function(j){alert(j);i(j-Math.tan(j));}</script>
i=function(j){alert(j);i(j-Math.tan(j));}
i(3);
Solutions welcome in any language. :-) I'm looking for the fastest way to obtain the value of pi, as a personal challenge. More specifically I'm using ways that don't involve using #defined constants like M_PI, or hardcoding the number in.
The program below tests the various ways I know of. The inline assembly version is, in theory, the fastest option, though clearly not portable; I've included it as a baseline to compare the other versions against. In my tests, with builtins, the 4 * atan(1) version is fastest on GCC 4.2, because it auto-folds the atan(1) into a constant. With -fno-builtin specified, the atan2(0, -1) version is fastest.
Apart from testing between various compiler flags (I've compared 32-bit against 64-bit too, because the optimisations are different), I've also tried switching the order of the tests around. The atan2(0, -1) version still comes out top every time, though.
I'm keen to hear what results you have, as well as improvements to the testing process. :-)
ETA: I noticed that the inline assembly was treated as a constant and hoisted out of the loop. Thus, I've included the fldpi in a separate file, so that it gets treated like a function call. As a bonus, now the frame setup times get counted too, just like all the other function calls.