Skip to content

Commit b81a509

Browse files
committed
Make asm use more selectable
- Make the SIMD ASM code off by default. Use configure --enable-simd-asm to enable. - Allow MD5 ASM code to be requested even when OpenSSL is handling MD4 checksums. Use configure --enable-md5-asm to enable.
1 parent 26f4dbe commit b81a509

11 files changed

+283
-109
lines changed

Makefile.in

+8-7
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ SHELL=/bin/sh
3030
.SUFFIXES:
3131
.SUFFIXES: .c .o
3232

33-
SIMD_x86_64=simd-checksum-x86_64.o simd-checksum-avx2.o
34-
ASM_x86_64=lib/md5-asm-x86_64.o
33+
ROLL_SIMD_x86_64=simd-checksum-x86_64.o
34+
ROLL_ASM_x86_64=simd-checksum-avx2.o
35+
MD5_ASM_x86_64=lib/md5-asm-x86_64.o
3536

3637
GENFILES=configure.sh aclocal.m4 config.h.in rsync.1 rsync.1.html \
3738
rsync-ssl.1 rsync-ssl.1.html rsyncd.conf.5 rsyncd.conf.5.html \
@@ -46,7 +47,7 @@ OBJS1=flist.o rsync.o generator.o receiver.o cleanup.o sender.o exclude.o \
4647
util1.o util2.o main.o checksum.o match.o syscall.o log.o backup.o delete.o
4748
OBJS2=options.o io.o compat.o hlink.o token.o uidlist.o socket.o hashtable.o \
4849
usage.o fileio.o batch.o clientname.o chmod.o acls.o xattrs.o
49-
OBJS3=progress.o pipe.o @ASM@ @SIMD@
50+
OBJS3=progress.o pipe.o @MD5_ASM@ @ROLL_SIMD@ @ROLL_ASM@
5051
DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
5152
popt_OBJS=popt/findme.o popt/popt.o popt/poptconfig.o \
5253
popt/popthelp.o popt/poptparse.o
@@ -147,13 +148,13 @@ git-version.h: ALWAYS_RUN
147148
ALWAYS_RUN:
148149

149150
simd-checksum-x86_64.o: simd-checksum-x86_64.cpp
150-
@$(srcdir)/cmd-or-msg disable-simd $(CXX) -I. $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $(srcdir)/simd-checksum-x86_64.cpp
151+
@$(srcdir)/cmd-or-msg disable-roll-simd $(CXX) -I. $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $(srcdir)/simd-checksum-x86_64.cpp
151152

152153
simd-checksum-avx2.o: simd-checksum-avx2.S
153-
@$(srcdir)/cmd-or-msg disable-asm $(CC) $(CFLAGS) --include=$(srcdir)/rsync.h -DAVX2_ASM -I. @NOEXECSTACK@ -c -o $@ $(srcdir)/simd-checksum-avx2.S
154+
@$(srcdir)/cmd-or-msg disable-roll-asm $(CC) $(CFLAGS) -I. @NOEXECSTACK@ -c -o $@ $(srcdir)/simd-checksum-avx2.S
154155

155-
lib/md5-asm-x86_64.o: lib/md5-asm-x86_64.S config.h lib/md-defines.h
156-
@$(srcdir)/cmd-or-msg disable-asm $(CC) -I. @NOEXECSTACK@ -c -o $@ $(srcdir)/lib/md5-asm-x86_64.S
156+
lib/md5-asm-x86_64.o: lib/md5-asm-x86_64.S lib/md-defines.h
157+
@$(srcdir)/cmd-or-msg disable-md5-asm $(CC) -I. @NOEXECSTACK@ -c -o $@ $(srcdir)/lib/md5-asm-x86_64.S
157158

158159
tls$(EXEEXT): $(TLS_OBJ)
159160
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(TLS_OBJ) $(LIBS)

NEWS.md

+16-5
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,8 @@
136136
(keeping the behavior the same as before), so specifying `--info=nonreg0`
137137
can be used to turn the warnings off.
138138

139-
- More ASM optimizations from Shark64.
139+
- An optional asm optimization for the rolling checksum from Shark64. Enable
140+
it with `./configure --enable-roll-asm`.
140141

141142
- Using `--debug=FILTER` now outputs a caution message if a filter rule
142143
has trailing whitespace.
@@ -192,14 +193,24 @@
192193
using the output of `git describe` when building inside a non-shallow git
193194
checkout, though.)
194195

195-
- Improved the IPv6 determination in configure.
196+
- Renamed configure's `--enable-simd` option to `--enable-roll-simd` and added
197+
the option `--enable-roll-asm` to use the new asm version of the code. Both
198+
are x86_64/amd64 only.
199+
200+
- Renamed configure's `--enable-asm` option to `--enable-md5-asm` to avoid
201+
confusion with the asm option for the rolling checksum. It is also honored
202+
even when openssl crypto is in use. This allows: normal MD4 & MD5, normal
203+
MD4 + asm MD5, openssl MD4 & MD5, or openssl MD4 + asm MD5.
196204

197-
- Made SIMD & ASM configure default to "no" on non-Linux hosts due to various
198-
reports of problems on NetBSD & macOS hosts. These tests were also tweaked
199-
to allow enabling the feature on a host_cpu of amd64 (was only x86_64).
205+
- Made SIMD & asm configure checks default to "no" on non-Linux hosts due to
206+
various reports of problems on NetBSD & macOS hosts. These were also
207+
tweaked to allow enabling the feature on a host_cpu of amd64 (was only
208+
allowed on x86_64 before).
200209

201210
- Fixed configure to not fail at the SIMD check when cross-compiling.
202211

212+
- Improved the IPv6 determination in configure.
213+
203214
- Compile the C files with `-pedantic-errors` (when possible) so that we will
204215
get warned if a static initialization overflows in the future (among other
205216
things).

checksum.c

+17-17
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ int canonical_checksum(int csum_type)
179179
return 0;
180180
}
181181

182-
#ifndef HAVE_SIMD /* See simd-checksum-*.cpp. */
182+
#ifndef USE_ROLL_SIMD /* See simd-checksum-*.cpp. */
183183
/*
184184
a simple 32 bit checksum that can be updated from either end
185185
(inspired by Mark Adler's Adler-32 checksum)
@@ -222,23 +222,23 @@ void get_checksum2(char *buf, int32 len, char *sum)
222222
}
223223
#endif
224224
case CSUM_MD5: {
225-
MD5_CTX m5;
225+
md5_context m5;
226226
uchar seedbuf[4];
227-
MD5_Init(&m5);
227+
md5_begin(&m5);
228228
if (proper_seed_order) {
229229
if (checksum_seed) {
230230
SIVALu(seedbuf, 0, checksum_seed);
231-
MD5_Update(&m5, seedbuf, 4);
231+
md5_update(&m5, seedbuf, 4);
232232
}
233-
MD5_Update(&m5, (uchar *)buf, len);
233+
md5_update(&m5, (uchar *)buf, len);
234234
} else {
235-
MD5_Update(&m5, (uchar *)buf, len);
235+
md5_update(&m5, (uchar *)buf, len);
236236
if (checksum_seed) {
237237
SIVALu(seedbuf, 0, checksum_seed);
238-
MD5_Update(&m5, seedbuf, 4);
238+
md5_update(&m5, seedbuf, 4);
239239
}
240240
}
241-
MD5_Final((uchar *)sum, &m5);
241+
md5_result(&m5, (uchar *)sum);
242242
break;
243243
}
244244
case CSUM_MD4:
@@ -374,18 +374,18 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
374374
}
375375
#endif
376376
case CSUM_MD5: {
377-
MD5_CTX m5;
377+
md5_context m5;
378378

379-
MD5_Init(&m5);
379+
md5_begin(&m5);
380380

381381
for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
382-
MD5_Update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
382+
md5_update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
383383

384384
remainder = (int32)(len - i);
385385
if (remainder > 0)
386-
MD5_Update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder);
386+
md5_update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder);
387387

388-
MD5_Final((uchar *)sum, &m5);
388+
md5_result(&m5, (uchar *)sum);
389389
break;
390390
}
391391
case CSUM_MD4:
@@ -443,7 +443,7 @@ static union {
443443
#ifdef USE_OPENSSL
444444
MD4_CTX m4;
445445
#endif
446-
MD5_CTX m5;
446+
md5_context m5;
447447
} ctx;
448448
#ifdef SUPPORT_XXHASH
449449
static XXH64_state_t* xxh64_state;
@@ -482,7 +482,7 @@ void sum_init(int csum_type, int seed)
482482
break;
483483
#endif
484484
case CSUM_MD5:
485-
MD5_Init(&ctx.m5);
485+
md5_begin(&ctx.m5);
486486
break;
487487
case CSUM_MD4:
488488
#ifdef USE_OPENSSL
@@ -532,7 +532,7 @@ void sum_update(const char *p, int32 len)
532532
break;
533533
#endif
534534
case CSUM_MD5:
535-
MD5_Update(&ctx.m5, (uchar *)p, len);
535+
md5_update(&ctx.m5, (uchar *)p, len);
536536
break;
537537
case CSUM_MD4:
538538
#ifdef USE_OPENSSL
@@ -597,7 +597,7 @@ int sum_end(char *sum)
597597
}
598598
#endif
599599
case CSUM_MD5:
600-
MD5_Final((uchar *)sum, &ctx.m5);
600+
md5_result(&ctx.m5, (uchar *)sum);
601601
break;
602602
case CSUM_MD4:
603603
#ifdef USE_OPENSSL

configure.ac

+58-41
Original file line numberDiff line numberDiff line change
@@ -229,12 +229,12 @@ fi
229229
AC_DEFINE_UNQUOTED(NOBODY_USER, "$NOBODY_USER", [unprivileged user--e.g. nobody])
230230
AC_DEFINE_UNQUOTED(NOBODY_GROUP, "$NOBODY_GROUP", [unprivileged group for unprivileged user])
231231

232-
# SIMD optimizations
233-
SIMD=
232+
# rolling-checksum SIMD optimizations
233+
ROLL_SIMD=
234234

235-
AC_MSG_CHECKING([whether to enable SIMD optimizations])
236-
AC_ARG_ENABLE(simd,
237-
AS_HELP_STRING([--enable-simd],[enable/disable to control SIMD optimizations (requires c++)]))
235+
AC_MSG_CHECKING([whether to enable rolling-checksum SIMD optimizations])
236+
AC_ARG_ENABLE(roll-simd,
237+
AS_HELP_STRING([--enable-roll-simd],[enable/disable to control rolling-checksum SIMD optimizations (requires c++)]))
238238

239239
# Clag is crashing with -g -O2, so we'll get rid of -g for now.
240240
CXXFLAGS=`echo "$CXXFLAGS" | sed 's/-g //'`
@@ -263,14 +263,14 @@ __attribute__ ((target("ssse3"))) void more_testing(char* buf, int len)
263263
}
264264
]])
265265

266-
if test x"$enable_simd" = x""; then
266+
if test x"$enable_roll_simd" = x""; then
267267
case "$host_os" in
268268
*linux*) ;;
269-
*) enable_simd=no ;;
269+
*) enable_roll_simd=no ;;
270270
esac
271271
fi
272272

273-
if test x"$enable_simd" != x"no"; then
273+
if test x"$enable_roll_simd" != x"no"; then
274274
# For x86-64 SIMD, g++ >=5 or clang++ >=7 is required
275275
if test x"$host_cpu" = x"x86_64" || test x"$host_cpu" = x"amd64"; then
276276
AC_LANG(C++)
@@ -283,23 +283,23 @@ if test x"$enable_simd" != x"no"; then
283283
AC_LANG(C)
284284
if test x"$CXX_OK" = x"yes"; then
285285
# AC_MSG_RESULT() is called below.
286-
SIMD="$host_cpu"
287-
elif test x"$enable_simd" = x"yes"; then
286+
ROLL_SIMD="$host_cpu"
287+
elif test x"$enable_roll_simd" = x"yes"; then
288288
AC_MSG_RESULT(error)
289-
AC_MSG_ERROR(The SIMD compilation test failed.
290-
Omit --enable-simd to continue without it.)
289+
AC_MSG_ERROR(The rolling-checksum SIMD compilation test failed.
290+
Omit --enable-roll-simd to continue without it.)
291291
fi
292-
elif test x"$enable_simd" = x"yes"; then
292+
elif test x"$enable_roll_simd" = x"yes"; then
293293
AC_MSG_RESULT(unavailable)
294-
AC_MSG_ERROR(The SIMD optimizations are currently x86_64|amd64 only.
295-
Omit --enable-simd to continue without it.)
294+
AC_MSG_ERROR(The rolling-checksum SIMD optimizations are currently x86_64|amd64 only.
295+
Omit --enable-roll-simd to continue without it.)
296296
fi
297297
fi
298298

299-
if test x"$SIMD" != x""; then
300-
AC_MSG_RESULT([yes ($SIMD)])
301-
AC_DEFINE(HAVE_SIMD, 1, [Define to 1 to enable SIMD optimizations])
302-
SIMD='$(SIMD_'"$SIMD)"
299+
if test x"$ROLL_SIMD" != x""; then
300+
AC_MSG_RESULT([yes ($ROLL_SIMD)])
301+
AC_DEFINE(USE_ROLL_SIMD, 1, [Define to 1 to enable rolling-checksum SIMD optimizations])
302+
ROLL_SIMD='$(ROLL_SIMD_'"$ROLL_SIMD)"
303303
# We only use c++ for its target attribute dispatching, disable unneeded bulky features
304304
CXXFLAGS="$CXXFLAGS -fno-exceptions -fno-rtti"
305305
# Apple often has "g++" as a symlink for clang. Try to find out the truth.
@@ -311,7 +311,7 @@ else
311311
AC_MSG_RESULT(no)
312312
fi
313313

314-
AC_SUBST(SIMD)
314+
AC_SUBST(ROLL_SIMD)
315315

316316
AC_MSG_CHECKING([if assembler accepts noexecstack])
317317
OLD_CFLAGS="$CFLAGS"
@@ -433,45 +433,66 @@ if test x"$enable_openssl" != x"no"; then
433433
err_msg="$err_msg$nl- Failed to find openssl/md4.h and openssl/md5.h for openssl crypto lib support."
434434
no_lib="$no_lib openssl"
435435
fi
436-
if test x"$enable_asm" != x"yes"; then
437-
enable_asm=no
436+
if test x"$enable_md5_asm" != x"yes"; then
437+
enable_md5_asm=no
438438
fi
439439
else
440440
AC_MSG_RESULT(no)
441441
fi
442442

443-
ASM=
443+
MD5_ASM=
444444

445-
AC_MSG_CHECKING([whether to enable ASM optimizations])
446-
AC_ARG_ENABLE(asm,
447-
AS_HELP_STRING([--enable-asm],[enable/disable to control ASM optimizations]))
445+
AC_MSG_CHECKING([whether to enable MD5 ASM optimizations])
446+
AC_ARG_ENABLE(md5-asm,
447+
AS_HELP_STRING([--enable-md5-asm],[enable/disable to control MD5 ASM optimizations]))
448448

449-
if test x"$enable_asm" = x""; then
449+
if test x"$enable_md5_asm" = x""; then
450450
case "$host_os" in
451451
*linux*) ;;
452-
*) enable_asm=no ;;
452+
*) enable_md5_asm=no ;;
453453
esac
454454
fi
455455

456-
if test x"$enable_asm" != x"no"; then
456+
if test x"$enable_md5_asm" != x"no"; then
457457
if test x"$host_cpu" = x"x86_64" || test x"$host_cpu" = x"amd64"; then
458-
ASM="$host_cpu"
459-
elif test x"$enable_asm" = x"yes"; then
458+
MD5_ASM="$host_cpu"
459+
elif test x"$enable_md5_asm" = x"yes"; then
460460
AC_MSG_RESULT(unavailable)
461461
AC_MSG_ERROR(The ASM optimizations are currently x86_64|amd64 only.
462-
Omit --enable-asm to continue without it.)
462+
Omit --enable-md5-asm to continue without it.)
463463
fi
464464
fi
465465

466-
if test x"$ASM" != x""; then
467-
AC_MSG_RESULT([yes ($ASM)])
468-
AC_DEFINE(HAVE_ASM, 1, [Define to 1 to enable ASM optimizations])
469-
ASM='$(ASM_'"$ASM)"
466+
if test x"$MD5_ASM" != x""; then
467+
AC_MSG_RESULT([yes ($MD5_ASM)])
468+
AC_DEFINE(USE_MD5_ASM, 1, [Define to 1 to enable MD5 ASM optimizations])
469+
MD5_ASM='$(MD5_ASM_'"$MD5_ASM)"
470470
else
471471
AC_MSG_RESULT(no)
472472
fi
473473

474-
AC_SUBST(ASM)
474+
AC_SUBST(MD5_ASM)
475+
476+
ROLL_ASM=
477+
478+
AC_MSG_CHECKING([whether to enable rolling-checksum ASM optimizations])
479+
AC_ARG_ENABLE(roll-asm,
480+
AS_HELP_STRING([--enable-roll-asm],[enable/disable to control rolling-checksum ASM optimizations (requires --enable-roll-simd)]))
481+
482+
if test x"$ROLL_SIMD" = x""; then
483+
enable_roll_asm=no
484+
fi
485+
486+
if test x"$enable_roll_asm" = x"yes"; then
487+
ROLL_ASM="$host_cpu"
488+
AC_MSG_RESULT([yes ($ROLL_ASM)])
489+
AC_DEFINE(USE_ROLL_ASM, 1, [Define to 1 to enable rolling-checksum ASM optimizations (requires --enable-roll-simd)])
490+
ROLL_ASM='$(ROLL_ASM_'"$ROLL_ASM)"
491+
else
492+
AC_MSG_RESULT(no)
493+
fi
494+
495+
AC_SUBST(ROLL_ASM)
475496

476497
AC_MSG_CHECKING([whether to enable xxhash checksum support])
477498
AC_ARG_ENABLE([xxhash],
@@ -1421,10 +1442,6 @@ esac
14211442
AC_CONFIG_FILES([Makefile lib/dummy zlib/dummy popt/dummy shconfig])
14221443
AC_OUTPUT
14231444

1424-
if test "$enable_openssl" = yes && test "$enable_asm" = yes; then
1425-
echo "*** Ignoring --enable-asm option -- using openssl for MD5 checksums ***"
1426-
fi
1427-
14281445
AC_MSG_RESULT()
14291446
AC_MSG_RESULT([ rsync $PACKAGE_VERSION configuration successful])
14301447
AC_MSG_RESULT()

lib/md5-asm-x86_64.S

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
#include "config.h"
2828
#include "md-defines.h"
2929

30-
#if !defined USE_OPENSSL && CSUM_CHUNK == 64
30+
#ifdef USE_MD5_ASM /* { */
3131

3232
#ifdef __APPLE__
3333
#define md5_process_asm _md5_process_asm
@@ -698,4 +698,4 @@ md5_process_asm:
698698
pop %rbp
699699
ret
700700

701-
#endif /* !USE_OPENSSL ... */
701+
#endif /* } USE_MD5_ASM */

0 commit comments

Comments
 (0)