Skip to content

Commit 6437728

Browse files
authored
Faster implementation of Cl3 (#20)
using a Pade approximant
1 parent 01e7602 commit 6437728

File tree

6 files changed

+324
-5
lines changed

6 files changed

+324
-5
lines changed

src/cpp/Cl3.cpp

+198-4
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,223 @@
55
// ====================================================================
66

77
#include "Cl3.hpp"
8-
#include "Li3.hpp"
9-
#include <complex>
8+
#include <cmath>
109

1110
namespace polylogarithm {
1211

1312
/**
1413
* @brief Clausen function \f$\mathrm{Cl}_3(\theta) = \mathrm{Re}(\mathrm{Li}_3(e^{i\theta}))\f$
1514
* @param x real angle
1615
* @return \f$\mathrm{Cl}_3(\theta)\f$
16+
* @author Alexander Voigt
17+
* @note Implementation as economized Padé approximation.
1718
*/
1819
double Cl3(double x) noexcept
1920
{
20-
return std::real(Li3(std::polar(1.0, x)));
21+
const double PI = 3.14159265358979324;
22+
const double PI2 = 2*PI, PIH = PI/2, PI28 = PI*PI/8;
23+
const double zeta3 = 1.2020569031595943;
24+
25+
if (x < 0) {
26+
x = -x;
27+
}
28+
29+
if (x >= PI2) {
30+
x = std::fmod(x, PI2);
31+
}
32+
33+
if (x > PI) {
34+
const double p0 = 6.28125;
35+
const double p1 = 0.0019353071795864769253;
36+
x = (p0 - x) + p1;
37+
}
38+
39+
if (x == 0) {
40+
return zeta3;
41+
}
42+
43+
double h = 0;
44+
45+
if (x < PIH) {
46+
const double P[] = {
47+
-7.5430148591242361e-01, 1.6121940167854339e-02,
48+
-3.7484056212140535e-05, -2.5191292110169198e-07
49+
};
50+
const double Q[] = {
51+
1.0000000000000000e+00, -2.6015033560727570e-02,
52+
1.5460630299236049e-04, -1.0987530650923219e-07
53+
};
54+
const double y = x*x;
55+
const double z = y - PI28;
56+
const double z2 = z*z;
57+
const double p = P[0] + z * P[1] + z2 * (P[2] + z * P[3]);
58+
const double q = Q[0] + z * Q[1] + z2 * (Q[2] + z * Q[3]);
59+
h = zeta3 + y*(p/q + std::log(x)/2);
60+
} else {
61+
const double P[] = {
62+
-4.9017024647634973e-01, 4.1559155224660940e-01,
63+
-7.9425531417806701e-02, 5.9420152260602943e-03,
64+
-1.8302227163540190e-04, 1.8027408929418533e-06
65+
};
66+
const double Q[] = {
67+
1.0000000000000000e+00, -1.9495887541644712e-01,
68+
1.2059410236484074e-02, -2.5235889467301620e-04,
69+
1.0199322763377861e-06, 1.9612106499469264e-09
70+
};
71+
const double y = PI - x;
72+
const double z = y*y - PI28;
73+
const double z2 = z*z;
74+
const double z4 = z2*z2;
75+
const double p = P[0] + z * P[1] + z2 * (P[2] + z * P[3]) +
76+
z4 * (P[4] + z * P[5]);
77+
const double q = Q[0] + z * Q[1] + z2 * (Q[2] + z * Q[3]) +
78+
z4 * (Q[4] + z * Q[5]);
79+
h = p/q;
80+
}
81+
82+
return h;
2183
}
2284

2385
/**
2486
* @brief Clausen function \f$\mathrm{Cl}_3(\theta) = \mathrm{Re}(\mathrm{Li}_3(e^{i\theta}))\f$ with long double precision
2587
* @param x real angle
2688
* @return \f$\mathrm{Cl}_3(\theta)\f$
89+
* @author Alexander Voigt
90+
* @note Implementation as economized Padé approximation.
2791
*/
2892
long double Cl3(long double x) noexcept
2993
{
30-
return std::real(Li3(std::polar(1.0L, x)));
94+
const long double PI = 3.14159265358979323846264338327950288L;
95+
const long double PI2 = 2*PI, PIH = PI/2, PI28 = PI*PI/8;
96+
const long double zeta3 = 1.2020569031595942853997381615114499908L;
97+
98+
if (x < 0) {
99+
x = -x;
100+
}
101+
102+
if (x >= PI2) {
103+
x = std::fmod(x, PI2);
104+
}
105+
106+
if (x > PI) {
107+
const long double p0 = 6.28125L;
108+
const long double p1 = 0.0019353071795864769252867665590057683943L;
109+
x = (p0 - x) + p1;
110+
}
111+
112+
if (x == 0) {
113+
return zeta3;
114+
}
115+
116+
long double h = 0;
117+
118+
if (x < PIH) {
119+
const long double P[] = {
120+
-7.543014859124236086513359303676733979191e-01L,
121+
6.402301836868117230156416581268033099875e-02L,
122+
-2.127896098530218208963041584986434591351e-03L,
123+
3.463165731182357705183279540808782152120e-05L,
124+
-2.754862729116033380287404534774919686497e-07L,
125+
8.052538909862304289974104174276673516832e-10L,
126+
1.346114649676610056675054469405688579693e-12L,
127+
-9.624573206929882592200009480606020302094e-15L,
128+
8.275206821858140239162250779757575466957e-18L
129+
};
130+
const long double Q[] = {
131+
1.000000000000000000000000000000000000000e+00L,
132+
-8.951892304715004068142060061380434166813e-02L,
133+
3.220693717342225233144437822487371940443e-03L,
134+
-5.958192714621426181787114562889325229941e-05L,
135+
6.014846196895560469030445734629791881016e-07L,
136+
-3.236076181461994705051496031602571907271e-09L,
137+
8.340438662048507021623726184074556662658e-12L,
138+
-7.892956808623089379250167198187183753861e-15L,
139+
1.129224149808716947279552120998699589829e-18L
140+
};
141+
const long double y = x*x;
142+
const long double z = y - PI28;
143+
const long double z2 = z*z;
144+
const long double z4 = z2*z2;
145+
const long double z8 = z4*z4;
146+
// @todo(alex) add one more term:
147+
const long double p = P[0] + z * P[1] + z2 * (P[2] + z * P[3]) +
148+
z4 * (P[4] + z * P[5] + z2 * (P[6] + z * P[7])) + z8 * P[8];
149+
const long double q = Q[0] + z * Q[1] + z2 * (Q[2] + z * Q[3]) +
150+
z4 * (Q[4] + z * Q[5] + z2 * (Q[6] + z * Q[7])) + z8 * Q[8];
151+
h = zeta3 + y*(p/q + std::log(x)/2);
152+
} else {
153+
const long double P[] = {
154+
-4.901702464763497295023867883920487195585e-01L,
155+
1.383627100551763417738051599449773818178e+00L,
156+
-1.844002682148364621305380083013461847933e+00L,
157+
1.563880808732850065996446127297492925273e+00L,
158+
-9.527454451278452672142805742734474223237e-01L,
159+
4.444304509117015442253289733308961077770e-01L,
160+
-1.647875030685306314220378800538115071507e-01L,
161+
4.967825071601030726970818647842992410835e-02L,
162+
-1.233860796084952133261104412894436518532e-02L,
163+
2.541279038266908987516674931803683131910e-03L,
164+
-4.345955367166196341753615932772484893528e-04L,
165+
6.151865219319515057283373610462827653393e-05L,
166+
-7.156818018509654192384040775515653410043e-06L,
167+
6.767889054848634496309672687089961659404e-07L,
168+
-5.125309226510821380097226378219650957199e-08L,
169+
3.048859537856972635577813264215347682070e-09L,
170+
-1.389976271585941361238252210621219258086e-10L,
171+
4.704164267289991379740337499442804533393e-12L,
172+
-1.132342670092216676813702977101988168170e-13L,
173+
1.824345098049373687569378456747139377638e-15L,
174+
-1.791641454777807114948357142007867857930e-17L,
175+
9.108765355959209109895175829397816299966e-20L,
176+
-1.660401736402055166724718838052384514483e-22L
177+
};
178+
const long double Q[] = {
179+
1.000000000000000000000000000000000000000e+00L,
180+
-2.169855465456334109398757427681231252557e+00L,
181+
2.322591192513290976964726281913672818074e+00L,
182+
-1.625275527588871360012554245669893880287e+00L,
183+
8.307827568524387115453112563337808771609e-01L,
184+
-3.283520342971423622595752570043633453066e-01L,
185+
1.036112385177655663181226022787541960128e-01L,
186+
-2.658049675854302301943410969110308213449e-02L,
187+
5.594232916811019674312451829387056772971e-03L,
188+
-9.682117420514440669170644446720530951119e-04L,
189+
1.373775076479943342597652514135717056402e-04L,
190+
-1.585377827830451443222728940538258932617e-05L,
191+
1.469487119911154060364041910223012615302e-06L,
192+
-1.075235400618754592467376790070172663512e-07L,
193+
6.072406717259994215240055677751661462408e-09L,
194+
-2.571318839166908430254322849495115672150e-10L,
195+
7.860204435599274911316716981969539571217e-12L,
196+
-1.647194263529074575838983448164465153777e-13L,
197+
2.194513208755261829948257460166939183883e-15L,
198+
-1.645669178927884807010050119309005828655e-17L,
199+
5.496854459511181731052565995680338061501e-20L,
200+
-4.060391001681163801807157030538815464753e-23L,
201+
-1.500455700173452211389785169624351996026e-26L
202+
};
203+
const long double y = PI - x;
204+
const long double z = y*y - PI28;
205+
const long double z2 = z*z;
206+
const long double z4 = z2*z2;
207+
const long double z8 = z4*z4;
208+
const long double z16 = z8*z8;
209+
const long double p = P[0] + z*P[1] + z2*(P[2] + z*P[3]) +
210+
z4*(P[4] + z*P[5] + z2*(P[6] + z*P[7])) +
211+
z8*(P[8] + z*P[9] + z2*(P[10] + z*P[11]) +
212+
z4*(P[12] + z*P[13] + z2*(P[14] + z*P[15]))) +
213+
z16*(P[16] + z*P[17] + z2*(P[18] + z*P[19]) +
214+
z4*(P[20] + z*P[21] + z2*P[22]));
215+
const long double q = Q[0] + z*Q[1] + z2*(Q[2] + z*Q[3]) +
216+
z4*(Q[4] + z*Q[5] + z2*(Q[6] + z*Q[7])) +
217+
z8*(Q[8] + z*Q[9] + z2*(Q[10] + z*Q[11]) +
218+
z4*(Q[12] + z*Q[13] + z2*(Q[14] + z*Q[15]))) +
219+
z16*(Q[16] + z*Q[17] + z2*(Q[18] + z*Q[19]) +
220+
z4*(Q[20] + z*Q[21] + z2*Q[22]));
221+
h = p/q;
222+
}
223+
224+
return h;
31225
}
32226

33227
} // namespace polylogarithm

test/alt/alt.h

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ double clausen_2_bernoulli(double x);
4444
double clausen_2_koelbig(double x);
4545
long double clausen_2l_koelbig(long double x);
4646
double clausen_2_pade(double x);
47+
double clausen_3_pade(double x);
4748
double clausen_2_wu(double x);
4849
double clausen_3_wu(double x);
4950

test/alt/pade/pade.c

+71
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,74 @@ double clausen_2_pade(double x)
7676

7777
return sgn*h;
7878
}
79+
80+
/**
81+
* @brief Clausen function \f$\mathrm{Cl}_3(\theta) = \mathrm{Re}(\mathrm{Li}_3(e^{i\theta}))\f$
82+
* @param x real angle
83+
* @return \f$\mathrm{Cl}_3(\theta)\f$
84+
* @author Alexander Voigt
85+
* @note Implementation as economized Padé approximation in
86+
* combination with the series expansion from [Jiming Wu, Xiaoping
87+
* Zhang, Dongjie Liu: "An efficient calculation of the Clausen
88+
* functions Cl_n(0)(n >= 2)"].
89+
*/
90+
double clausen_3_pade(double x)
91+
{
92+
const double PI = 3.14159265358979324;
93+
const double PI2 = 2*PI, PIH = PI/2, PI28 = PI*PI/8;
94+
const double zeta3 = 1.2020569031595943;
95+
96+
if (x < 0) {
97+
x = -x;
98+
}
99+
100+
if (x >= PI2) {
101+
x = fmod(x, PI2);
102+
}
103+
104+
if (x > PI) {
105+
const double p0 = 6.28125;
106+
const double p1 = 0.0019353071795864769253;
107+
x = (p0 - x) + p1;
108+
}
109+
110+
if (x == 0) {
111+
return zeta3;
112+
}
113+
114+
const double y = x*x;
115+
double p = 0, q = 0;
116+
117+
if (x < PIH) {
118+
const double P[] = {
119+
-7.2832985481448279e-01, 4.4443503980076525e-02,
120+
-7.1973646626163953e-04, 2.8044709581614591e-06
121+
};
122+
const double Q[] = {
123+
1.0000000000000000e+00, -3.6618102564796636e-02,
124+
3.3125108587789328e-04, -4.1893453026087742e-07
125+
};
126+
const double z = y - PI28;
127+
const double z2 = z*z;
128+
p = P[0] + z * P[1] + z2 * (P[2] + z * P[3]);
129+
q = Q[0] + z * Q[1] + z2 * (Q[2] + z * Q[3]);
130+
} else {
131+
const double P[] = {
132+
-6.3603493635005218e-01, 6.5684157446730646e-02,
133+
-2.3565499130760498e-03, 3.5782068616362699e-05,
134+
-2.1603488890230991e-07, 3.5781888382472248e-10
135+
};
136+
const double Q[] = {
137+
1.0000000000000000e+00, -7.2267100371655266e-02,
138+
1.8193983947330242e-03, -1.8536201173545669e-05,
139+
6.4758810460566848e-08, -3.5368448623664113e-11
140+
};
141+
const double z = y - 5*PI28;
142+
const double z2 = z*z;
143+
const double z4 = z2*z2;
144+
p = P[0] + z * P[1] + z2 * (P[2] + z * P[3]) + z4 * (P[4] + z * P[5]);
145+
q = Q[0] + z * Q[1] + z2 * (Q[2] + z * Q[3]) + z4 * (Q[4] + z * Q[5]);
146+
}
147+
148+
return zeta3 + y*(p/q + log(2*sin(x/2))/2);
149+
}

test/bench_Cl.cpp

+20
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "Cl5.hpp"
99
#include "Cl6.hpp"
1010
#include "Li2.hpp"
11+
#include "Li3.hpp"
1112
#include <iostream>
1213
#include <iomanip>
1314

@@ -32,11 +33,21 @@ double Cl2_via_Li2(double x) noexcept
3233
return std::imag(polylogarithm::Li2(std::polar(1.0, x)));
3334
}
3435

36+
double Cl3_via_Li3(double x) noexcept
37+
{
38+
return std::real(polylogarithm::Li3(std::polar(1.0, x)));
39+
}
40+
3541
long double Cl2_via_Li2(long double x) noexcept
3642
{
3743
return std::imag(polylogarithm::Li2(std::polar(1.0L, x)));
3844
}
3945

46+
long double Cl3_via_Li3(long double x) noexcept
47+
{
48+
return std::real(polylogarithm::Li3(std::polar(1.0L, x)));
49+
}
50+
4051
} // anonymous namespace
4152

4253
template <typename T, typename Fn>
@@ -133,12 +144,21 @@ void bench(const T& values_d, const U& values_l)
133144
bench_fn([&](double x) { return polylogarithm::Cl3(x); }, values_d,
134145
"polylogarithm C++", "double");
135146

147+
bench_fn([&](double x) { return Cl3_via_Li3(x); }, values_d,
148+
"via Li3 C++", "double");
149+
150+
bench_fn([&](double x) { return clausen_3_pade(x); }, values_d,
151+
"Pade", "double");
152+
136153
bench_fn([&](double x) { return clausen_3_wu(x); }, values_d,
137154
"Wu", "double");
138155

139156
bench_fn([&](long double x) { return polylogarithm::Cl3(x); }, values_l,
140157
"polylogarithm C++", "long double");
141158

159+
bench_fn([&](long double x) { return Cl3_via_Li3(x); }, values_l,
160+
"via Li3 C++", "long double");
161+
142162
print_headline_2("Cl4");
143163

144164
bench_fn([&](double x) { return polylogarithm::Cl4(x); }, values_d,

test/math/clausenEconomizedPadeApprox.m

+13
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
Needs["FunctionApproximations`"];
77

88
Get["../test/math/clausenBernoulli.m"];
9+
Get["../test/math/clausenWu.m"];
910

1011
(* maximum number of terms in Cl2 series *)
1112
nMax = 100;
@@ -57,3 +58,15 @@
5758
itrans[x_] := (Pi - x)^2
5859

5960
CalcPade[N[fHi[trans[#]], 10*outPrec]&, itrans /@ {Pi/2, Pi}, 5];
61+
62+
(* Cl3[x] *)
63+
64+
(* interval = {0, Pi/2}; *)
65+
fLo[x_] := (Cl[3, x, 100] - Zeta[3])/x^2 - Log[x]/2
66+
67+
CalcPade[N[fLo[Sqrt[#]], 10*outPrec]&, {0, (Pi/2)^2}, 3];
68+
69+
(* interval = {Pi/2, Pi}; *)
70+
fHi[x_] := Cl[3, x, 100]
71+
72+
CalcPade[N[fHi[trans[#]], 10*outPrec]&, itrans /@ {Pi/2, Pi}, 5];

0 commit comments

Comments
 (0)