cbrtf.h source code [llvm_projects/libc/src/__support/math/cbrtf.h]

1	//===-- Implementation header for cbrtf -------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_CBRTF_H
10	#define LLVM_LIBC_SRC___SUPPORT_MATH_CBRTF_H
11
12	#include "src/__support/FPUtil/FEnvImpl.h"
13	#include "src/__support/FPUtil/FPBits.h"
14	#include "src/__support/FPUtil/multiply_add.h"
15	#include "src/__support/macros/config.h"
16	#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
17
18	namespace LIBC_NAMESPACE_DECL {
19
20	namespace math {
21
22	LIBC_INLINE float cbrtf(float x) {
23	// Look up table for 2^(i/3) for i = 0, 1, 2.
24	constexpr double CBRT2[`3`] = {`1.0`, `0x1.428a2f98d728bp0`, `0x1.965fea53d6e3dp0`};
25
26	// Degree-7 polynomials approximation of ((1 + x)^(1/3) - 1)/x for 0 <= x <= 1
27	// generated by Sollya with:
28	// > for i from 0 to 15 do {
29	// P = fpminimax(((1 + x)^(1/3) - 1)/x, 6, [\|D...\|], [i/16, (i + 1)/16]);
30	// print("{", coeff(P, 0), ",", coeff(P, 1), ",", coeff(P, 2), ",",
31	// coeff(P, 3), ",", coeff(P, 4), ",", coeff(P, 5), ",",
32	// coeff(P, 6), "},");
33	// };
34	// Then (1 + x)^(1/3) ~ 1 + x P(x).*
35	constexpr double COEFFS[`16`][`7`] = {
36	{`0x1.55555555554ebp-2`, -`0x1.c71c71c678c0cp-4`, `0x1.f9add2776de81p-5`,
37	-`0x1.511e10aa964a7p-5`, `0x1.ee44165937fa2p-6`, -`0x1.7c5c9e059345dp-6`,
38	`0x1.047f75e0aff14p-6`},
39	{`0x1.5555554d1149ap-2`, -`0x1.c71c676fcb5bp-4`, `0x1.f9ab127dc57ebp-5`,
40	-`0x1.50ea8fd1d4c15p-5`, `0x1.e9d68f28ced43p-6`, -`0x1.60e0e1e661311p-6`,
41	`0x1.716eca1d6e3bcp-7`},
42	{`0x1.5555546377d45p-2`, -`0x1.c71bc1c6d49d2p-4`, `0x1.f9924cc0ed24dp-5`,
43	-`0x1.4fea3beb53b3bp-5`, `0x1.de028a9a07b1bp-6`, -`0x1.3b090d2233524p-6`,
44	`0x1.0aeca34893785p-7`},
45	{`0x1.55554dce9f649p-2`, -`0x1.c7188b34b98f8p-4`, `0x1.f93e1af34af49p-5`,
46	-`0x1.4d9a06be75c63p-5`, `0x1.cb943f4f68992p-6`, -`0x1.139a685a5e3c4p-6`,
47	`0x1.88410674c6a5dp-8`},
48	{`0x1.5555347d211c3p-2`, -`0x1.c70f2a4b1a5fap-4`, `0x1.f88420e8602c3p-5`,
49	-`0x1.49becfa4ed3ep-5`, `0x1.b475cd9013162p-6`, -`0x1.dcfee1dd2f8efp-7`,
50	`0x1.249bb51a1c498p-8`},
51	{`0x1.5554f01b33dbap-2`, -`0x1.c6facb929dbf1p-4`, `0x1.f73fb7861252ep-5`,
52	-`0x1.4459a4a0071fap-5`, `0x1.9a8df2b504fc2p-6`, -`0x1.9a7ce3006d06ep-7`,
53	`0x1.ba9230918fa2ep-9`},
54	{`0x1.55545c695db5fp-2`, -`0x1.c6d6089f20275p-4`, `0x1.f556e0ea80efp-5`,
55	-`0x1.3d91372d083f4p-5`, `0x1.7f66cff331f4p-6`, -`0x1.606a562491737p-7`,
56	`0x1.52e3e17c71069p-9`},
57	{`0x1.55534a879232ap-2`, -`0x1.c69b836998b84p-4`, `0x1.f2bb26dac0e4cp-5`,
58	-`0x1.359eed43716d7p-5`, `0x1.64218cd824fbcp-6`, -`0x1.2e703e2e091e8p-7`,
59	`0x1.0677d9af6aad4p-9`},
60	{`0x1.5551836bb5494p-2`, -`0x1.c64658c15353bp-4`, `0x1.ef68517451a6ep-5`,
61	-`0x1.2cc20a980dceep-5`, `0x1.49843e0fad93ap-6`, -`0x1.03c59ccb68e54p-7`,
62	`0x1.9ad325dc7adcbp-10`},
63	{`0x1.554ecacb0d035p-2`, -`0x1.c5d2664026ffcp-4`, `0x1.eb624796ba809p-5`,
64	-`0x1.233803d19a535p-5`, `0x1.300decb1c3c28p-6`, -`0x1.befe18031ec3dp-8`,
65	`0x1.449f5ee175c69p-10`},
66	{`0x1.554ae1f5ae815p-2`, -`0x1.c53c6b14ff6b2p-4`, `0x1.e6b2d5127bb5bp-5`,
67	-`0x1.19387336788a3p-5`, `0x1.180955a6ab255p-6`, -`0x1.81696703ba369p-8`,
68	`0x1.02cb36389bd79p-10`},
69	{`0x1.55458a59f356ep-2`, -`0x1.c4820dd631ae9p-4`, `0x1.e167af818bd15p-5`,
70	-`0x1.0ef35f6f72e52p-5`, `0x1.019c33b65e4ebp-6`, -`0x1.4d25bdd52d3a5p-8`,
71	`0x1.a008ae91f5936p-11`},
72	{`0x1.553e878eafee1p-2`, -`0x1.c3a1d0b2a3db2p-4`, `0x1.db90d8ed9f89bp-5`,
73	-`0x1.0490e20f1ae91p-5`, `0x1.d9a5d1fc42fe3p-7`, -`0x1.20bf8227c2abfp-8`,
74	`0x1.50f8174cdb6e9p-11`},
75	{`0x1.5535a0dedf1b1p-2`, -`0x1.c29afb8bd01a1p-4`, `0x1.d53f6371c1e27p-5`,
76	-`0x1.f463209b433e2p-6`, `0x1.b35222a17e44p-7`, -`0x1.f5efbf505e133p-9`,
77	`0x1.12e0e94e8586dp-11`},
78	{`0x1.552aa25e57bfdp-2`, -`0x1.c16d811e4acadp-4`, `0x1.ce8489b47aa51p-5`,
79	-`0x1.dfde7ff758ea8p-6`, `0x1.901f43aac38c8p-7`, -`0x1.b581d07df5ad5p-9`,
80	`0x1.c3726535f1fc6p-12`},
81	{`0x1.551d5d9b204d3p-2`, -`0x1.c019e328f8db1p-4`, `0x1.c7710f44fc3cep-5`,
82	-`0x1.cbbbe25ea8ba4p-6`, `0x1.6fe270088623dp-7`, -`0x1.7e6fc79733761p-9`,
83	`0x1.75077abf18d84p-12`},
84	};
85
86	using FloatBits = typename fputil::FPBits<float>;
87	using DoubleBits = typename fputil::FPBits<double>;
88
89	FloatBits x_bits(x);
90
91	uint32_t x_abs = x_bits.uintval() & `0x7fff'ffff`;
92	uint32_t sign_bit = (x_bits.uintval() >> `31`) << DoubleBits::EXP_LEN;
93
94	if (LIBC_UNLIKELY(x == `0.0f` \|\| x_abs >= `0x7f80'0000`)) {
95	// x is 0, Inf, or NaN.
96	// Make sure it works for FTZ/DAZ modes.
97	return x + x;
98	}
99
100	double xd = static_cast<double>(x);
101	DoubleBits xd_bits(xd);
102
103	// When using biased exponent of x in double precision,
104	// x_e = real_exponent_of_x + 1023
105	// Then:
106	// x_e / 3 = real_exponent_of_x / 3 + 1023/3
107	// = real_exponent_of_x / 3 + 341
108	// So to make it the correct biased exponent of x^(1/3), we add
109	// 1023 - 341 = 682
110	// to the quotient x_e / 3.
111	unsigned x_e = static_cast<unsigned>(xd_bits.get_biased_exponent());
112	unsigned out_e = (x_e / `3` + `682`) \| sign_bit;
113	unsigned shift_e = x_e % `3`;
114
115	// Set x_m = 2^(x_e % 3) (1.mantissa)*
116	uint64_t x_m = xd_bits.get_mantissa();
117	// Use the leading 4 bits for look up table
118	unsigned idx = static_cast<unsigned>(x_m >> (DoubleBits::FRACTION_LEN - `4`));
119
120	x_m \|= static_cast<uint64_t>(DoubleBits::EXP_BIAS)
121	<< DoubleBits::FRACTION_LEN;
122
123	double x_reduced = DoubleBits (x_m).get_val();
124	double dx = x_reduced - `1.0`;
125
126	double dx_sq = dx * dx;
127	double c0 = fputil::multiply_add(x: dx, y: COEFFS[idx][`0`], z: `1.0`);
128	double c1 = fputil::multiply_add(x: dx, y: COEFFS[idx][`2`], z: COEFFS[idx][`1`]);
129	double c2 = fputil::multiply_add(x: dx, y: COEFFS[idx][`4`], z: COEFFS[idx][`3`]);
130	double c3 = fputil::multiply_add(x: dx, y: COEFFS[idx][`6`], z: COEFFS[idx][`5`]);
131
132	double dx_4 = dx_sq * dx_sq;
133	double p0 = fputil::multiply_add(x: dx_sq, y: c1, z: c0);
134	double p1 = fputil::multiply_add(x: dx_sq, y: c3, z: c2);
135
136	double r = fputil::multiply_add(x: dx_4, y: p1, z: p0) * CBRT2[shift_e];
137
138	uint64_t r_m = DoubleBits (r).get_mantissa();
139	// Check if the output is exact. To be exact, the smallest 1-bit of the
140	// output has to be at least 2^-7 or higher. So we check the lowest 44 bits
141	// to see if they are within 2^(-52 + 3) errors from all zeros, then the
142	// result cube root is exact.
143	if (LIBC_UNLIKELY(((r_m + `8`) & `0xfffffffffff`) <= `16`)) {
144	if ((r_m & `0xfffffffffff`) <= `8`)
145	r_m &= `0xffff'ffff'ffff'ffe0`;
146	else
147	r_m = (r_m & `0xffff'ffff'ffff'ffe0`) + `0x20`;
148	fputil::clear_except_if_required(FE_INEXACT);
149	}
150	// Adjust exponent and sign.
151	uint64_t r_bits =
152	r_m \| (static_cast<uint64_t>(out_e) << DoubleBits::FRACTION_LEN);
153
154	return static_cast<float>(DoubleBits (r_bits).get_val());
155	}
156
157	} // namespace math
158
159	} // namespace LIBC_NAMESPACE_DECL
160
161	#endif // LLVM_LIBC_SRC___SUPPORT_MATH_CBRTF_H
162

Browse the source code of llvm_projects/libc/src/__support/math/cbrtf.h