@@ -382,6 +382,10 @@ def group_any_all(uint8_t[:] out,
382
382
if values[i] == flag_val:
383
383
out[lab] = flag_val
384
384
385
+ # ----------------------------------------------------------------------
386
+ # group_add, group_prod, group_var, group_mean, group_ohlc
387
+ # ----------------------------------------------------------------------
388
+
385
389
386
390
@ cython.wraparound (False )
387
391
@ cython.boundscheck (False )
@@ -396,9 +400,9 @@ def _group_add(floating[:, :] out,
396
400
cdef:
397
401
Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
398
402
floating val, count
399
- ndarray[ floating, ndim = 2 ] sumx, nobs
403
+ floating[:, : ] sumx, nobs
400
404
401
- if not len (values) = = len (labels):
405
+ if len (values) ! = len (labels):
402
406
raise AssertionError (" len(index) != len(labels)" )
403
407
404
408
nobs = np.zeros_like(out)
@@ -407,7 +411,6 @@ def _group_add(floating[:, :] out,
407
411
N, K = (< object > values).shape
408
412
409
413
with nogil:
410
-
411
414
for i in range (N):
412
415
lab = labels[i]
413
416
if lab < 0 :
@@ -433,5 +436,213 @@ def _group_add(floating[:, :] out,
433
436
group_add_float32 = _group_add[' float' ]
434
437
group_add_float64 = _group_add[' double' ]
435
438
439
+
440
+ @ cython.wraparound (False )
441
+ @ cython.boundscheck (False )
442
+ def _group_prod (floating[:, :] out ,
443
+ int64_t[:] counts ,
444
+ floating[:, :] values ,
445
+ const int64_t[:] labels ,
446
+ Py_ssize_t min_count = 0 ):
447
+ """
448
+ Only aggregates on axis=0
449
+ """
450
+ cdef:
451
+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
452
+ floating val, count
453
+ floating[:, :] prodx, nobs
454
+
455
+ if not len (values) == len (labels):
456
+ raise AssertionError (" len(index) != len(labels)" )
457
+
458
+ nobs = np.zeros_like(out)
459
+ prodx = np.ones_like(out)
460
+
461
+ N, K = (< object > values).shape
462
+
463
+ with nogil:
464
+ for i in range (N):
465
+ lab = labels[i]
466
+ if lab < 0 :
467
+ continue
468
+
469
+ counts[lab] += 1
470
+ for j in range (K):
471
+ val = values[i, j]
472
+
473
+ # not nan
474
+ if val == val:
475
+ nobs[lab, j] += 1
476
+ prodx[lab, j] *= val
477
+
478
+ for i in range (ncounts):
479
+ for j in range (K):
480
+ if nobs[i, j] < min_count:
481
+ out[i, j] = NAN
482
+ else :
483
+ out[i, j] = prodx[i, j]
484
+
485
+
486
+ group_prod_float32 = _group_prod[' float' ]
487
+ group_prod_float64 = _group_prod[' double' ]
488
+
489
+
490
+ @ cython.wraparound (False )
491
+ @ cython.boundscheck (False )
492
+ @ cython.cdivision (True )
493
+ def _group_var (floating[:, :] out ,
494
+ int64_t[:] counts ,
495
+ floating[:, :] values ,
496
+ const int64_t[:] labels ,
497
+ Py_ssize_t min_count = - 1 ):
498
+ cdef:
499
+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
500
+ floating val, ct, oldmean
501
+ floating[:, :] nobs, mean
502
+
503
+ assert min_count == - 1 , " 'min_count' only used in add and prod"
504
+
505
+ if not len (values) == len (labels):
506
+ raise AssertionError (" len(index) != len(labels)" )
507
+
508
+ nobs = np.zeros_like(out)
509
+ mean = np.zeros_like(out)
510
+
511
+ N, K = (< object > values).shape
512
+
513
+ out[:, :] = 0.0
514
+
515
+ with nogil:
516
+ for i in range (N):
517
+ lab = labels[i]
518
+ if lab < 0 :
519
+ continue
520
+
521
+ counts[lab] += 1
522
+
523
+ for j in range (K):
524
+ val = values[i, j]
525
+
526
+ # not nan
527
+ if val == val:
528
+ nobs[lab, j] += 1
529
+ oldmean = mean[lab, j]
530
+ mean[lab, j] += (val - oldmean) / nobs[lab, j]
531
+ out[lab, j] += (val - mean[lab, j]) * (val - oldmean)
532
+
533
+ for i in range (ncounts):
534
+ for j in range (K):
535
+ ct = nobs[i, j]
536
+ if ct < 2 :
537
+ out[i, j] = NAN
538
+ else :
539
+ out[i, j] /= (ct - 1 )
540
+
541
+
542
+ group_var_float32 = _group_var[' float' ]
543
+ group_var_float64 = _group_var[' double' ]
544
+
545
+
546
+ @ cython.wraparound (False )
547
+ @ cython.boundscheck (False )
548
+ def _group_mean (floating[:, :] out ,
549
+ int64_t[:] counts ,
550
+ floating[:, :] values ,
551
+ const int64_t[:] labels ,
552
+ Py_ssize_t min_count = - 1 ):
553
+ cdef:
554
+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
555
+ floating val, count
556
+ floating[:, :] sumx, nobs
557
+
558
+ assert min_count == - 1 , " 'min_count' only used in add and prod"
559
+
560
+ if not len (values) == len (labels):
561
+ raise AssertionError (" len(index) != len(labels)" )
562
+
563
+ nobs = np.zeros_like(out)
564
+ sumx = np.zeros_like(out)
565
+
566
+ N, K = (< object > values).shape
567
+
568
+ with nogil:
569
+ for i in range (N):
570
+ lab = labels[i]
571
+ if lab < 0 :
572
+ continue
573
+
574
+ counts[lab] += 1
575
+ for j in range (K):
576
+ val = values[i, j]
577
+ # not nan
578
+ if val == val:
579
+ nobs[lab, j] += 1
580
+ sumx[lab, j] += val
581
+
582
+ for i in range (ncounts):
583
+ for j in range (K):
584
+ count = nobs[i, j]
585
+ if nobs[i, j] == 0 :
586
+ out[i, j] = NAN
587
+ else :
588
+ out[i, j] = sumx[i, j] / count
589
+
590
+
591
+ group_mean_float32 = _group_mean[' float' ]
592
+ group_mean_float64 = _group_mean[' double' ]
593
+
594
+
595
+ @ cython.wraparound (False )
596
+ @ cython.boundscheck (False )
597
+ def _group_ohlc (floating[:, :] out ,
598
+ int64_t[:] counts ,
599
+ floating[:, :] values ,
600
+ const int64_t[:] labels ,
601
+ Py_ssize_t min_count = - 1 ):
602
+ """
603
+ Only aggregates on axis=0
604
+ """
605
+ cdef:
606
+ Py_ssize_t i, j, N, K, lab
607
+ floating val, count
608
+ Py_ssize_t ngroups = len (counts)
609
+
610
+ assert min_count == - 1 , " 'min_count' only used in add and prod"
611
+
612
+ if len (labels) == 0 :
613
+ return
614
+
615
+ N, K = (< object > values).shape
616
+
617
+ if out.shape[1 ] != 4 :
618
+ raise ValueError (' Output array must have 4 columns' )
619
+
620
+ if K > 1 :
621
+ raise NotImplementedError (" Argument 'values' must have only "
622
+ " one dimension" )
623
+ out[:] = np.nan
624
+
625
+ with nogil:
626
+ for i in range (N):
627
+ lab = labels[i]
628
+ if lab == - 1 :
629
+ continue
630
+
631
+ counts[lab] += 1
632
+ val = values[i, 0 ]
633
+ if val != val:
634
+ continue
635
+
636
+ if out[lab, 0 ] != out[lab, 0 ]:
637
+ out[lab, 0 ] = out[lab, 1 ] = out[lab, 2 ] = out[lab, 3 ] = val
638
+ else :
639
+ out[lab, 1 ] = max (out[lab, 1 ], val)
640
+ out[lab, 2 ] = min (out[lab, 2 ], val)
641
+ out[lab, 3 ] = val
642
+
643
+
644
+ group_ohlc_float32 = _group_ohlc[' float' ]
645
+ group_ohlc_float64 = _group_ohlc[' double' ]
646
+
436
647
# generated from template
437
648
include " groupby_helper.pxi"
0 commit comments