Skip to content

Commit 31c0af2

Browse files
committed
Fixed floating point mathematic speed degradation (Christian)
1 parent d3b4270 commit 31c0af2

File tree

11 files changed

+93
-65
lines changed

11 files changed

+93
-65
lines changed

Diff for: Zend/Makefile.am

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ libZend_la_SOURCES=\
1717
zend_objects_API.c zend_ts_hash.c zend_stream.c \
1818
zend_default_classes.c \
1919
zend_iterators.c zend_interfaces.c zend_exceptions.c \
20-
zend_strtod.c zend_closures.c
20+
zend_strtod.c zend_closures.c zend_float.c
2121

2222
libZend_la_LDFLAGS =
2323
libZend_la_LIBADD = @ZEND_EXTRA_LIBS@

Diff for: Zend/Zend.dsp

+4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: Zend/zend.c

+1
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,7 @@ static void executor_globals_ctor(zend_executor_globals *executor_globals TSRMLS
549549
EG(current_execute_data) = NULL;
550550
EG(current_module) = NULL;
551551
EG(exit_status) = 0;
552+
EG(saved_fpu_cw) = NULL;
552553
EG(active) = 0;
553554
}
554555
/* }}} */

Diff for: Zend/zend_execute_API.c

+5
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ static int clean_non_persistent_class_full(zend_class_entry **ce TSRMLS_DC) /* {
123123

124124
void init_executor(TSRMLS_D) /* {{{ */
125125
{
126+
zend_init_fpu(TSRMLS_C);
127+
126128
INIT_ZVAL(EG(uninitialized_zval));
127129
/* trick to make uninitialized_zval never be modified, passed by ref, etc. */
128130
Z_ADDREF(EG(uninitialized_zval));
@@ -331,6 +333,9 @@ void shutdown_executor(TSRMLS_D) /* {{{ */
331333
FREE_HASHTABLE(EG(in_autoload));
332334
}
333335
} zend_end_try();
336+
337+
zend_shutdown_fpu(TSRMLS_C);
338+
334339
EG(active) = 0;
335340
}
336341
/* }}} */

Diff for: Zend/zend_float.h

+73-9
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,12 @@
2121
#ifndef ZEND_FLOAT_H
2222
#define ZEND_FLOAT_H
2323

24-
#define ZEND_FLOAT_DECLARE XPFPA_DECLARE
25-
#define ZEND_FLOAT_ENSURE() XPFPA_SWITCH_DOUBLE()
26-
#define ZEND_FLOAT_RESTORE() XPFPA_RESTORE()
27-
#define ZEND_FLOAT_RETURN(val) XPFPA_RETURN_DOUBLE(val)
24+
/*
25+
Define functions for FP initialization and de-initialization.
26+
*/
27+
extern ZEND_API void zend_init_fpu(TSRMLS_D);
28+
extern ZEND_API void zend_shutdown_fpu(TSRMLS_D);
29+
extern ZEND_API void zend_ensure_fpu_mode(TSRMLS_D);
2830

2931
/* Copy of the contents of xpfpa.h (which is under public domain)
3032
See http://wiki.php.net/rfc/rounding for details.
@@ -45,7 +47,7 @@
4547
For further details, please visit:
4648
http://www.christian-seiler.de/projekte/fpmath/
4749
48-
Version: 20081026 */
50+
Version: 20090317 */
4951

5052
/*
5153
Implementation notes:
@@ -69,11 +71,8 @@
6971
/* MSVC detection (MSVC people usually don't use autoconf) */
7072
#ifdef _MSC_VER
7173
# if _MSC_VER >= 1500
72-
/* Disable it, it slowdowns the floating operation more than
73-
anything else, by a factor 3 (using Bench.php (mandel and
74-
mandel2 for example)*/
7574
/* Visual C++ 2008 or higher, supports _controlfp_s */
76-
/*# define HAVE__CONTROLFP_S */
75+
# define HAVE__CONTROLFP_S
7776
# else
7877
/* Visual C++ (up to 2005), supports _controlfp */
7978
# define HAVE__CONTROLFP
@@ -87,6 +86,19 @@
8786
/* float.h defines _controlfp_s */
8887
# include <float.h>
8988

89+
# define XPFPA_HAVE_CW 1
90+
# define XPFPA_CW_DATATYPE \
91+
unsigned int
92+
93+
# define XPFPA_STORE_CW(vptr) do { \
94+
_controlfp_s((unsigned int *)(vptr), 0, 0); \
95+
} while (0)
96+
97+
# define XPFPA_RESTORE_CW(vptr) do { \
98+
unsigned int _xpfpa_fpu_cw; \
99+
_controlfp_s(&_xpfpa_fpu_cw, *((unsigned int *)(vptr)), _MCW_PC); \
100+
} while (0)
101+
90102
# define XPFPA_DECLARE \
91103
unsigned int _xpfpa_fpu_oldcw, _xpfpa_fpu_cw;
92104

@@ -141,6 +153,18 @@
141153
# define XPFPA_DECLARE \
142154
unsigned int _xpfpa_fpu_oldcw;
143155

156+
# define XPFPA_HAVE_CW 1
157+
# define XPFPA_CW_DATATYPE \
158+
unsigned int
159+
160+
# define XPFPA_STORE_CW(vptr) do { \
161+
*((unsigned int *)(vptr)) = _controlfp(0, 0); \
162+
} while (0)
163+
164+
# define XPFPA_RESTORE_CW(vptr) do { \
165+
_controlfp(*((unsigned int *)(vptr)), _MCW_PC); \
166+
} while (0)
167+
144168
# define XPFPA_SWITCH_DOUBLE() do { \
145169
_xpfpa_fpu_oldcw = _controlfp(0, 0); \
146170
_controlfp(_PC_53, _MCW_PC); \
@@ -188,6 +212,18 @@
188212
# define XPFPA_DECLARE \
189213
fpu_control_t _xpfpa_fpu_oldcw, _xpfpa_fpu_cw;
190214

215+
# define XPFPA_HAVE_CW 1
216+
# define XPFPA_CW_DATATYPE \
217+
fpu_control_t
218+
219+
# define XPFPA_STORE_CW(vptr) do { \
220+
_FPU_GETCW((*((fpu_control_t *)(vptr)))); \
221+
} while (0)
222+
223+
# define XPFPA_RESTORE_CW(vptr) do { \
224+
_FPU_SETCW((*((fpu_control_t *)(vptr)))); \
225+
} while (0)
226+
191227
# define XPFPA_SWITCH_DOUBLE() do { \
192228
_FPU_GETCW(_xpfpa_fpu_oldcw); \
193229
_xpfpa_fpu_cw = (_xpfpa_fpu_oldcw & ~_FPU_EXTENDED & ~_FPU_SINGLE) | _FPU_DOUBLE; \
@@ -235,6 +271,18 @@
235271
# define XPFPA_DECLARE \
236272
fp_prec_t _xpfpa_fpu_oldprec;
237273

274+
# define XPFPA_HAVE_CW 1
275+
# define XPFPA_CW_DATATYPE \
276+
fp_prec_t
277+
278+
# define XPFPA_STORE_CW(vptr) do { \
279+
*((fp_prec_t *)(vptr)) = fpgetprec(); \
280+
} while (0)
281+
282+
# define XPFPA_RESTORE_CW(vptr) do { \
283+
fpsetprec(*((fp_prec_t *)(vptr))); \
284+
} while (0)
285+
238286
# define XPFPA_SWITCH_DOUBLE() do { \
239287
_xpfpa_fpu_oldprec = fpgetprec(); \
240288
fpsetprec(FP_PD); \
@@ -298,6 +346,18 @@
298346
# define XPFPA_DECLARE \
299347
unsigned int _xpfpa_fpu_oldcw, _xpfpa_fpu_cw;
300348

349+
# define XPFPA_HAVE_CW 1
350+
# define XPFPA_CW_DATATYPE \
351+
unsigned int
352+
353+
# define XPFPA_STORE_CW(vptr) do { \
354+
__asm__ __volatile__ ("fnstcw %0" : "=m" (*((unsigned int *)(vptr)))); \
355+
} while (0)
356+
357+
# define XPFPA_RESTORE_CW(vptr) do { \
358+
__asm__ __volatile__ ("fldcw %0" : : "m" (*((unsigned int *)(vptr)))); \
359+
} while (0)
360+
301361
# define XPFPA_SWITCH_DOUBLE() do { \
302362
__asm__ __volatile__ ("fnstcw %0" : "=m" (*&_xpfpa_fpu_oldcw)); \
303363
_xpfpa_fpu_cw = (_xpfpa_fpu_oldcw & ~0x100) | 0x200; \
@@ -345,6 +405,10 @@
345405
generated code will behave as planned.
346406
*/
347407
# define XPFPA_DECLARE /* NOP */
408+
# define XPFPA_HAVE_CW 0
409+
# define XPFPA_CW_DATATYPE unsigned int
410+
# define XPFPA_STORE_CW(variable) /* NOP */
411+
# define XPFPA_RESTORE_CW(variable) /* NOP */
348412
# define XPFPA_SWITCH_DOUBLE() /* NOP */
349413
# define XPFPA_SWITCH_SINGLE() /* NOP */
350414
# define XPFPA_SWITCH_DOUBLE_EXTENDED() /* NOP */

Diff for: Zend/zend_globals.h

+2
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,8 @@ struct _zend_executor_globals {
254254

255255
zend_bool active;
256256

257+
void *saved_fpu_cw;
258+
257259
void *reserved[ZEND_MAX_RESERVED_RESOURCES];
258260
};
259261

Diff for: Zend/zend_operators.c

-39
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
#include "zend_multiply.h"
3131
#include "zend_strtod.h"
3232
#include "zend_exceptions.h"
33-
#include "zend_float.h"
3433

3534
#define LONG_SIGN_MASK (1L << (8*sizeof(long)-1))
3635

@@ -742,7 +741,6 @@ ZEND_API void multi_convert_to_string_ex(int argc, ...)
742741

743742
ZEND_API int add_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
744743
{
745-
ZEND_FLOAT_DECLARE
746744
zval op1_copy, op2_copy;
747745
int converted = 0;
748746

@@ -755,31 +753,23 @@ ZEND_API int add_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
755753
if ((Z_LVAL_P(op1) & LONG_SIGN_MASK) == (Z_LVAL_P(op2) & LONG_SIGN_MASK)
756754
&& (Z_LVAL_P(op1) & LONG_SIGN_MASK) != (lval & LONG_SIGN_MASK)) {
757755

758-
ZEND_FLOAT_ENSURE();
759756
ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) + (double) Z_LVAL_P(op2));
760-
ZEND_FLOAT_RESTORE();
761757
} else {
762758
ZVAL_LONG(result, lval);
763759
}
764760
return SUCCESS;
765761
}
766762

767763
case TYPE_PAIR(IS_LONG, IS_DOUBLE):
768-
ZEND_FLOAT_ENSURE();
769764
ZVAL_DOUBLE(result, ((double)Z_LVAL_P(op1)) + Z_DVAL_P(op2));
770-
ZEND_FLOAT_RESTORE();
771765
return SUCCESS;
772766

773767
case TYPE_PAIR(IS_DOUBLE, IS_LONG):
774-
ZEND_FLOAT_ENSURE();
775768
ZVAL_DOUBLE(result, Z_DVAL_P(op1) + ((double)Z_LVAL_P(op2)));
776-
ZEND_FLOAT_RESTORE();
777769
return SUCCESS;
778770

779771
case TYPE_PAIR(IS_DOUBLE, IS_DOUBLE):
780-
ZEND_FLOAT_ENSURE();
781772
ZVAL_DOUBLE(result, Z_DVAL_P(op1) + Z_DVAL_P(op2));
782-
ZEND_FLOAT_RESTORE();
783773
return SUCCESS;
784774

785775
case TYPE_PAIR(IS_ARRAY, IS_ARRAY): {
@@ -813,7 +803,6 @@ ZEND_API int add_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
813803

814804
ZEND_API int sub_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
815805
{
816-
ZEND_FLOAT_DECLARE
817806
zval op1_copy, op2_copy;
818807
int converted = 0;
819808

@@ -826,31 +815,23 @@ ZEND_API int sub_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
826815
if ((Z_LVAL_P(op1) & LONG_SIGN_MASK) != (Z_LVAL_P(op2) & LONG_SIGN_MASK)
827816
&& (Z_LVAL_P(op1) & LONG_SIGN_MASK) != (lval & LONG_SIGN_MASK)) {
828817

829-
ZEND_FLOAT_ENSURE();
830818
ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) - (double) Z_LVAL_P(op2));
831-
ZEND_FLOAT_RESTORE();
832819
} else {
833820
ZVAL_LONG(result, lval);
834821
}
835822
return SUCCESS;
836823

837824
}
838825
case TYPE_PAIR(IS_LONG, IS_DOUBLE):
839-
ZEND_FLOAT_ENSURE();
840826
ZVAL_DOUBLE(result, ((double)Z_LVAL_P(op1)) - Z_DVAL_P(op2));
841-
ZEND_FLOAT_RESTORE();
842827
return SUCCESS;
843828

844829
case TYPE_PAIR(IS_DOUBLE, IS_LONG):
845-
ZEND_FLOAT_ENSURE();
846830
ZVAL_DOUBLE(result, Z_DVAL_P(op1) - ((double)Z_LVAL_P(op2)));
847-
ZEND_FLOAT_RESTORE();
848831
return SUCCESS;
849832

850833
case TYPE_PAIR(IS_DOUBLE, IS_DOUBLE):
851-
ZEND_FLOAT_ENSURE();
852834
ZVAL_DOUBLE(result, Z_DVAL_P(op1) - Z_DVAL_P(op2));
853-
ZEND_FLOAT_RESTORE();
854835
return SUCCESS;
855836

856837
default:
@@ -869,7 +850,6 @@ ZEND_API int sub_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
869850

870851
ZEND_API int mul_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
871852
{
872-
ZEND_FLOAT_DECLARE
873853
zval op1_copy, op2_copy;
874854
int converted = 0;
875855

@@ -878,29 +858,21 @@ ZEND_API int mul_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
878858
case TYPE_PAIR(IS_LONG, IS_LONG): {
879859
long overflow;
880860

881-
ZEND_FLOAT_ENSURE();
882861
ZEND_SIGNED_MULTIPLY_LONG(Z_LVAL_P(op1),Z_LVAL_P(op2), Z_LVAL_P(result),Z_DVAL_P(result),overflow);
883-
ZEND_FLOAT_RESTORE();
884862
Z_TYPE_P(result) = overflow ? IS_DOUBLE : IS_LONG;
885863
return SUCCESS;
886864

887865
}
888866
case TYPE_PAIR(IS_LONG, IS_DOUBLE):
889-
ZEND_FLOAT_ENSURE();
890867
ZVAL_DOUBLE(result, ((double)Z_LVAL_P(op1)) * Z_DVAL_P(op2));
891-
ZEND_FLOAT_RESTORE();
892868
return SUCCESS;
893869

894870
case TYPE_PAIR(IS_DOUBLE, IS_LONG):
895-
ZEND_FLOAT_ENSURE();
896871
ZVAL_DOUBLE(result, Z_DVAL_P(op1) * ((double)Z_LVAL_P(op2)));
897-
ZEND_FLOAT_RESTORE();
898872
return SUCCESS;
899873

900874
case TYPE_PAIR(IS_DOUBLE, IS_DOUBLE):
901-
ZEND_FLOAT_ENSURE();
902875
ZVAL_DOUBLE(result, Z_DVAL_P(op1) * Z_DVAL_P(op2));
903-
ZEND_FLOAT_RESTORE();
904876
return SUCCESS;
905877

906878
default:
@@ -918,7 +890,6 @@ ZEND_API int mul_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
918890

919891
ZEND_API int div_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
920892
{
921-
ZEND_FLOAT_DECLARE
922893
zval op1_copy, op2_copy;
923894
int converted = 0;
924895

@@ -931,17 +902,13 @@ ZEND_API int div_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
931902
return FAILURE; /* division by zero */
932903
} else if (Z_LVAL_P(op2) == -1 && Z_LVAL_P(op1) == LONG_MIN) {
933904
/* Prevent overflow error/crash */
934-
ZEND_FLOAT_ENSURE();
935905
ZVAL_DOUBLE(result, (double) LONG_MIN / -1);
936-
ZEND_FLOAT_RESTORE();
937906
return SUCCESS;
938907
}
939908
if (Z_LVAL_P(op1) % Z_LVAL_P(op2) == 0) { /* integer */
940909
ZVAL_LONG(result, Z_LVAL_P(op1) / Z_LVAL_P(op2));
941910
} else {
942-
ZEND_FLOAT_ENSURE();
943911
ZVAL_DOUBLE(result, ((double) Z_LVAL_P(op1)) / Z_LVAL_P(op2));
944-
ZEND_FLOAT_RESTORE();
945912
}
946913
return SUCCESS;
947914

@@ -951,9 +918,7 @@ ZEND_API int div_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
951918
ZVAL_BOOL(result, 0);
952919
return FAILURE; /* division by zero */
953920
}
954-
ZEND_FLOAT_ENSURE();
955921
ZVAL_DOUBLE(result, Z_DVAL_P(op1) / (double)Z_LVAL_P(op2));
956-
ZEND_FLOAT_RESTORE();
957922
return SUCCESS;
958923

959924
case TYPE_PAIR(IS_LONG, IS_DOUBLE):
@@ -962,9 +927,7 @@ ZEND_API int div_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
962927
ZVAL_BOOL(result, 0);
963928
return FAILURE; /* division by zero */
964929
}
965-
ZEND_FLOAT_ENSURE();
966930
ZVAL_DOUBLE(result, (double)Z_LVAL_P(op1) / Z_DVAL_P(op2));
967-
ZEND_FLOAT_RESTORE();
968931
return SUCCESS;
969932

970933
case TYPE_PAIR(IS_DOUBLE, IS_DOUBLE):
@@ -973,9 +936,7 @@ ZEND_API int div_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
973936
ZVAL_BOOL(result, 0);
974937
return FAILURE; /* division by zero */
975938
}
976-
ZEND_FLOAT_ENSURE();
977939
ZVAL_DOUBLE(result, Z_DVAL_P(op1) / Z_DVAL_P(op2));
978-
ZEND_FLOAT_RESTORE();
979940
return SUCCESS;
980941

981942
default:

0 commit comments

Comments
 (0)