@@ -43,6 +43,11 @@ template <typename Element> void move(char *dst, const char *src) {
43
43
template <typename Element> void move (char *dst, const char *src, size_t size) {
44
44
Element::move (dst, src, size);
45
45
}
46
+ // Runtime-size move from 'src' to 'dst'.
47
+ template <typename Element>
48
+ void move_backward (char *dst, const char *src, size_t size) {
49
+ Element::move_backward (dst, src, size);
50
+ }
46
51
47
52
// Fixed-size equality between 'lhs' and 'rhs'.
48
53
template <typename Element> bool equals (const char *lhs, const char *rhs) {
@@ -96,10 +101,8 @@ template <typename Element, size_t ElementCount> struct Repeated {
96
101
}
97
102
98
103
static void move (char *dst, const char *src) {
99
- const auto value = Element::load (src);
100
- Repeated<Element, ElementCount - 1 >::move (dst + Element::SIZE,
101
- src + Element::SIZE);
102
- Element::store (dst, value);
104
+ const auto value = load (src);
105
+ store (dst, value);
103
106
}
104
107
105
108
static bool equals (const char *lhs, const char *rhs) {
@@ -341,6 +344,55 @@ template <typename T, typename TailT = T> struct Loop {
341
344
Tail<TailT>::copy (dst, src, size);
342
345
}
343
346
347
+ // Move forward suitable when dst < src. We load the tail bytes before
348
+ // handling the loop.
349
+ //
350
+ // e.g. Moving two bytes
351
+ // [ | | | | |]
352
+ // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
353
+ // [_________________________LLLLLLLL___]
354
+ // [___LLLLLLLL_________________________]
355
+ // [_SSSSSSSS___________________________]
356
+ // [___________LLLLLLLL_________________]
357
+ // [_________SSSSSSSS___________________]
358
+ // [___________________LLLLLLLL_________]
359
+ // [_________________SSSSSSSS___________]
360
+ // [_______________________SSSSSSSS_____]
361
+ static void move (char *dst, const char *src, size_t size) {
362
+ const size_t tail_offset = Tail<T>::offset (size);
363
+ const auto tail_value = TailT::load (src + tail_offset);
364
+ size_t offset = 0 ;
365
+ do {
366
+ T::move (dst + offset, src + offset);
367
+ offset += T::SIZE;
368
+ } while (offset < size - T::SIZE);
369
+ TailT::store (dst + tail_offset, tail_value);
370
+ }
371
+
372
+ // Move forward suitable when dst > src. We load the head bytes before
373
+ // handling the loop.
374
+ //
375
+ // e.g. Moving two bytes
376
+ // [ | | | | |]
377
+ // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
378
+ // [___LLLLLLLL_________________________]
379
+ // [_________________________LLLLLLLL___]
380
+ // [___________________________SSSSSSSS_]
381
+ // [_________________LLLLLLLL___________]
382
+ // [___________________SSSSSSSS_________]
383
+ // [_________LLLLLLLL___________________]
384
+ // [___________SSSSSSSS_________________]
385
+ // [_____SSSSSSSS_______________________]
386
+ static void move_backward (char *dst, const char *src, size_t size) {
387
+ const auto head_value = TailT::load (src);
388
+ ptrdiff_t offset = size - T::SIZE;
389
+ do {
390
+ T::move (dst + offset, src + offset);
391
+ offset -= T::SIZE;
392
+ } while (offset >= 0 );
393
+ TailT::store (dst, head_value);
394
+ }
395
+
344
396
static bool equals (const char *lhs, const char *rhs, size_t size) {
345
397
size_t offset = 0 ;
346
398
do {
@@ -375,30 +427,38 @@ enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 };
375
427
376
428
namespace internal {
377
429
378
- // Provides a specialized bump function that adjusts pointers and size so first
379
- // argument (resp. second argument) gets aligned to Alignment.
380
- // We make sure the compiler knows about the adjusted pointer alignment.
381
- template <Arg arg, size_t Alignment> struct AlignHelper {};
430
+ template <Arg arg> struct ArgSelector {};
382
431
383
- template <size_t Alignment > struct AlignHelper <Arg::_1, Alignment > {
432
+ template <> struct ArgSelector <Arg::_1> {
384
433
template <typename T1, typename T2>
385
- static void bump (T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) {
386
- const intptr_t offset = offset_to_next_aligned<Alignment>(p1ref);
387
- p1ref += offset;
388
- p2ref += offset;
389
- size -= offset;
390
- p1ref = assume_aligned<Alignment>(p1ref);
434
+ static T1 *__restrict &Select (T1 *__restrict &p1ref, T2 *__restrict &p2ref) {
435
+ return p1ref;
436
+ }
437
+ };
438
+
439
+ template <> struct ArgSelector <Arg::_2> {
440
+ template <typename T1, typename T2>
441
+ static T2 *__restrict &Select (T1 *__restrict &p1ref, T2 *__restrict &p2ref) {
442
+ return p2ref;
391
443
}
392
444
};
393
445
394
- template <size_t Alignment> struct AlignHelper <Arg::_2, Alignment> {
446
+ // Provides a specialized bump function that adjusts pointers and size so first
447
+ // argument (resp. second argument) gets aligned to Alignment.
448
+ // We make sure the compiler knows about the adjusted pointer alignment.
449
+ // The 'additional_bumps' parameter allows to reach previous / next aligned
450
+ // pointers.
451
+ template <Arg arg, size_t Alignment> struct Align {
395
452
template <typename T1, typename T2>
396
- static void bump (T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) {
397
- const intptr_t offset = offset_to_next_aligned<Alignment>(p2ref);
453
+ static void bump (T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size,
454
+ int additional_bumps = 0 ) {
455
+ auto &aligned_ptr = ArgSelector<arg>::Select (p1ref, p2ref);
456
+ auto offset = offset_to_next_aligned<Alignment>(aligned_ptr);
457
+ offset += additional_bumps * Alignment;
398
458
p1ref += offset;
399
459
p2ref += offset;
400
460
size -= offset;
401
- p2ref = assume_aligned<Alignment>(p2ref );
461
+ aligned_ptr = assume_aligned<Alignment>(aligned_ptr );
402
462
}
403
463
};
404
464
@@ -423,29 +483,85 @@ template <typename AlignmentT, Arg AlignOn = Arg::_1> struct Align {
423
483
static void copy (char *__restrict dst, const char *__restrict src,
424
484
size_t size) {
425
485
AlignmentT::copy (dst, src);
426
- internal::AlignHelper <AlignOn, ALIGNMENT>::bump (dst, src, size);
486
+ internal::Align <AlignOn, ALIGNMENT>::bump (dst, src, size);
427
487
NextT::copy (dst, src, size);
428
488
}
429
489
490
+ // Move forward suitable when dst < src. The alignment is performed with an
491
+ // HeadTail operation of size ∈ [Alignment, 2 x Alignment].
492
+ //
493
+ // e.g. Moving two bytes and making sure src is then aligned.
494
+ // [ | | | | ]
495
+ // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
496
+ // [____LLLLLLLL_____________________]
497
+ // [___________LLLLLLLL______________]
498
+ // [_SSSSSSSS________________________]
499
+ // [________SSSSSSSS_________________]
500
+ //
501
+ // e.g. Moving two bytes and making sure dst is then aligned.
502
+ // [ | | | | ]
503
+ // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
504
+ // [____LLLLLLLL_____________________]
505
+ // [______LLLLLLLL___________________]
506
+ // [_SSSSSSSS________________________]
507
+ // [___SSSSSSSS______________________]
508
+ static void move (char *dst, const char *src, size_t size) {
509
+ char *next_dst = dst;
510
+ const char *next_src = src;
511
+ size_t next_size = size;
512
+ internal::Align<AlignOn, ALIGNMENT>::bump (next_dst, next_src, next_size,
513
+ 1 );
514
+ HeadTail<AlignmentT>::move (dst, src, size - next_size);
515
+ NextT::move (next_dst, next_src, next_size);
516
+ }
517
+
518
+ // Move backward suitable when dst > src. The alignment is performed with an
519
+ // HeadTail operation of size ∈ [Alignment, 2 x Alignment].
520
+ //
521
+ // e.g. Moving two bytes backward and making sure src is then aligned.
522
+ // [ | | | | ]
523
+ // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
524
+ // [ _________________LLLLLLLL_______]
525
+ // [ ___________________LLLLLLLL_____]
526
+ // [____________________SSSSSSSS_____]
527
+ // [______________________SSSSSSSS___]
528
+ //
529
+ // e.g. Moving two bytes and making sure dst is then aligned.
530
+ // [ | | | | ]
531
+ // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
532
+ // [ _______________LLLLLLLL_________]
533
+ // [ ___________________LLLLLLLL_____]
534
+ // [__________________SSSSSSSS_______]
535
+ // [______________________SSSSSSSS___]
536
+ static void move_backward (char *dst, const char *src, size_t size) {
537
+ char *headtail_dst = dst + size;
538
+ const char *headtail_src = src + size;
539
+ size_t headtail_size = 0 ;
540
+ internal::Align<AlignOn, ALIGNMENT>::bump (headtail_dst, headtail_src,
541
+ headtail_size, -2 );
542
+ HeadTail<AlignmentT>::move (headtail_dst, headtail_src, headtail_size);
543
+ NextT::move_backward (dst, src, size - headtail_size);
544
+ }
545
+
430
546
static bool equals (const char *lhs, const char *rhs, size_t size) {
431
547
if (!AlignmentT::equals (lhs, rhs))
432
548
return false ;
433
- internal::AlignHelper <AlignOn, ALIGNMENT>::bump (lhs, rhs, size);
549
+ internal::Align <AlignOn, ALIGNMENT>::bump (lhs, rhs, size);
434
550
return NextT::equals (lhs, rhs, size);
435
551
}
436
552
437
553
static int three_way_compare (const char *lhs, const char *rhs,
438
554
size_t size) {
439
555
if (!AlignmentT::equals (lhs, rhs))
440
556
return AlignmentT::three_way_compare (lhs, rhs);
441
- internal::AlignHelper <AlignOn, ALIGNMENT>::bump (lhs, rhs, size);
557
+ internal::Align <AlignOn, ALIGNMENT>::bump (lhs, rhs, size);
442
558
return NextT::three_way_compare (lhs, rhs, size);
443
559
}
444
560
445
561
static void splat_set (char *dst, const unsigned char value, size_t size) {
446
562
AlignmentT::splat_set (dst, value);
447
563
char *dummy = nullptr ;
448
- internal::AlignHelper <Arg::_1, ALIGNMENT>::bump (dst, dummy, size);
564
+ internal::Align <Arg::_1, ALIGNMENT>::bump (dst, dummy, size);
449
565
NextT::splat_set (dst, value, size);
450
566
}
451
567
};
0 commit comments