Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ext/bcmath: Improving bcpow() performance #18099

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
2 changes: 0 additions & 2 deletions ext/bcmath/libbcmath/src/bcmath.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,6 @@ bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale);
*(result) = mul_ex; \
} while (0)

bc_num bc_square(bc_num n1, size_t scale);

bool bc_divide(bc_num n1, bc_num n2, bc_num *quot, size_t scale);

bool bc_modulo(bc_num num1, bc_num num2, bc_num *resul, size_t scale);
Expand Down
2 changes: 2 additions & 0 deletions ext/bcmath/libbcmath/src/private.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ static const BC_VECTOR BC_POW_10_LUT[9] = {
bcmath_compare_result _bc_do_compare (bc_num n1, bc_num n2, size_t scale, bool use_sign);
bc_num _bc_do_add (bc_num n1, bc_num n2);
bc_num _bc_do_sub (bc_num n1, bc_num n2);
void bc_multiply_vector(
BC_VECTOR *n1_vector, size_t n1_arr_size, BC_VECTOR *n2_vector, size_t n2_arr_size, BC_VECTOR *prod_vector, size_t prod_arr_size);
void _bc_rm_leading_zeros (bc_num num);

#endif
201 changes: 170 additions & 31 deletions ext/bcmath/libbcmath/src/raise.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,152 @@
*************************************************************************/

#include "bcmath.h"
#include "convert.h"
#include "private.h"
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>

void bc_square_ex(bc_num n1, bc_num *result, size_t scale_min) {
bc_num square_ex = bc_square(n1, scale_min);
bc_free_num(result);
*(result) = square_ex;
static inline size_t bc_multiply_vector_ex(
BC_VECTOR **n1_vector, size_t n1_arr_size, BC_VECTOR *n2_vector, size_t n2_arr_size, BC_VECTOR **result_vector)
{
size_t result_arr_size = n1_arr_size + n2_arr_size;
bc_multiply_vector(*n1_vector, n1_arr_size, n2_vector, n2_arr_size, *result_vector, result_arr_size);

/* Eliminate extra zeros because they increase the number of calculations. */
while ((*result_vector)[result_arr_size - 1] == 0) {
result_arr_size--;
}

/* Swap n1_vector and result_vector. */
BC_VECTOR *tmp = *n1_vector;
*n1_vector = *result_vector;
*result_vector = tmp;

return result_arr_size;
}

static inline size_t bc_square_vector_ex(BC_VECTOR **base_vector, size_t base_arr_size, BC_VECTOR **result_vector)
{
return bc_multiply_vector_ex(base_vector, base_arr_size, *base_vector, base_arr_size, result_vector);
}

/* Use "exponentiation by squaring". This is the fast path when the results are small. */
static inline bc_num bc_fast_raise(
const char *base_end, long exponent, size_t base_len, size_t power_len, size_t power_scale, size_t power_full_len)
{
BC_VECTOR base_vector = 0;

/* Convert to BC_VECTOR[] */
bc_convert_to_vector(&base_vector, base_end, base_len);

while ((exponent & 1) == 0) {
base_vector *= base_vector;
exponent >>= 1;
}

/* copy base to power */
BC_VECTOR power_vector = base_vector;
exponent >>= 1;

while (exponent > 0) {
base_vector *= base_vector;
if ((exponent & 1) == 1) {
power_vector *= base_vector;
}
exponent >>= 1;
}

bc_num power = bc_new_num_nonzeroed(power_len, power_scale);
char *pptr = power->n_value;
char *pend = pptr + power_full_len - 1;

while (pend >= pptr) {
*pend-- = power_vector % BASE;
power_vector /= BASE;
}
return power;
}

/* Use "exponentiation by squaring". This is the standard path. */
static bc_num bc_standard_raise(
const char *base_ptr, const char *base_end, long exponent, size_t base_len, size_t power_scale)
{
/* Remove the leading zeros as they will be filled in later. */
while (*base_ptr++ == 0) {
base_len--;
}

size_t base_arr_size = BC_ARR_SIZE_FROM_LEN(base_len);
size_t max_power_arr_size = base_arr_size * exponent;

/* The allocated memory area is reused on a rotational basis, so the same size is required. */
BC_VECTOR *buf = safe_emalloc(max_power_arr_size * 3, sizeof(BC_VECTOR), 0);
BC_VECTOR *base_vector = buf;
BC_VECTOR *power_vector = base_vector + max_power_arr_size;
BC_VECTOR *tmp_result_vector = power_vector + max_power_arr_size;

/* Convert to BC_VECTOR[] */
bc_convert_to_vector(base_vector, base_end, base_len);

while ((exponent & 1) == 0) {
base_arr_size = bc_square_vector_ex(&base_vector, base_arr_size, &tmp_result_vector);
exponent >>= 1;
}

/* copy base to power */
size_t power_arr_size = base_arr_size;
for (size_t i = 0; i < base_arr_size; i++) {
power_vector[i] = base_vector[i];
}
exponent >>= 1;

while (exponent > 0) {
base_arr_size = bc_square_vector_ex(&base_vector, base_arr_size, &tmp_result_vector);
if ((exponent & 1) == 1) {
power_arr_size = bc_multiply_vector_ex(&power_vector, power_arr_size, base_vector, base_arr_size, &tmp_result_vector);
}
exponent >>= 1;
}

/* Convert to bc_num */
size_t power_leading_zeros = 0;
size_t power_len;
size_t power_full_len = power_arr_size * BC_VECTOR_SIZE;
if (power_full_len > power_scale) {
power_len = power_full_len - power_scale;
} else {
power_len = 1;
power_leading_zeros = power_scale - power_full_len + 1;
power_full_len = power_scale + 1;
}
bc_num power = bc_new_num_nonzeroed(power_len, power_scale);

char *pptr = power->n_value;
char *pend = pptr + power_full_len - 1;

/* Pad with leading zeros if necessary. */
while (power_leading_zeros > sizeof(uint32_t)) {
bc_write_bcd_representation(0, pptr);
pptr += sizeof(uint32_t);
power_leading_zeros -= sizeof(uint32_t);
}
for (size_t i = 0; i < power_leading_zeros; i++) {
*pptr++ = 0;
}

bc_convert_vector_to_char(power_vector, pptr, pend, power_arr_size);

efree(buf);

return power;
}

/* Raise "base" to the "exponent" power. The result is placed in RESULT.
Maximum exponent is LONG_MAX. If a "exponent" is not an integer,
only the integer part is used. */
bool bc_raise(bc_num base, long exponent, bc_num *result, size_t scale) {
bc_num temp, power;
size_t rscale;
size_t pwrscale;
size_t calcscale;
bool is_neg;

/* Special case if exponent is a zero. */
Expand All @@ -67,43 +195,54 @@ bool bc_raise(bc_num base, long exponent, bc_num *result, size_t scale) {
rscale = MIN (base->n_scale * exponent, MAX(scale, base->n_scale));
}

/* Set initial value of temp. */
power = bc_copy_num(base);
pwrscale = base->n_scale;
while ((exponent & 1) == 0) {
pwrscale = 2 * pwrscale;
bc_square_ex(power, &power, pwrscale);
exponent = exponent >> 1;
if (bc_is_zero(base)) {
bc_free_num(result);
*result = bc_copy_num(BCG(_zero_));
/* If the exponent is negative, it divides by 0, so it is false. */
return !is_neg;
}
temp = bc_copy_num(power);
calcscale = pwrscale;
exponent = exponent >> 1;

/* Do the calculation. */
while (exponent > 0) {
pwrscale = 2 * pwrscale;
bc_square_ex(power, &power, pwrscale);
if ((exponent & 1) == 1) {
calcscale = pwrscale + calcscale;
bc_multiply_ex(temp, power, &temp, calcscale);
}
exponent = exponent >> 1;
size_t base_len = base->n_len + base->n_scale;
size_t power_len = base->n_len * exponent;
size_t power_scale = base->n_scale * exponent;
size_t power_full_len = power_len + power_scale;

sign power_sign;
if (base->n_sign == MINUS && (exponent & 1) == 1) {
power_sign = MINUS;
} else {
power_sign = PLUS;
}

const char *base_end = base->n_value + base_len - 1;

bc_num power;
if (base_len <= BC_VECTOR_SIZE && power_full_len <= BC_VECTOR_SIZE * 2) {
power = bc_fast_raise(base_end, exponent, base_len, power_len, power_scale, power_full_len);
} else {
power = bc_standard_raise(base->n_value, base_end, exponent, base_len, power_scale);
}

_bc_rm_leading_zeros(power);
if (bc_is_zero(power)) {
power->n_sign = PLUS;
power->n_scale = 0;
} else {
power->n_sign = power_sign;
}

/* Assign the value. */
if (is_neg) {
if (bc_divide(BCG(_one_), temp, result, rscale) == false) {
bc_free_num (&temp);
if (bc_divide(BCG(_one_), power, result, rscale) == false) {
bc_free_num (&power);
return false;
}
bc_free_num (&temp);
bc_free_num (&power);
} else {
bc_free_num (result);
*result = temp;
*result = power;
(*result)->n_scale = MIN(scale, (*result)->n_scale);
}
bc_free_num (&power);
return true;
}

Expand Down
Loading
Loading