-
-
Notifications
You must be signed in to change notification settings - Fork 150
/
Copy pathpairwise.py
1511 lines (1312 loc) · 62 KB
/
pairwise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Author: Raphael Vallat <raphaelvallat9@gmail.com>
# Date: April 2018
import numpy as np
import pandas as pd
import pandas_flavor as pf
from itertools import combinations, product
from pingouin.config import options
from pingouin.parametric import anova
from pingouin.multicomp import multicomp
from pingouin.effsize import compute_effsize
from pingouin.utils import _check_dataframe, _flatten_list, _postprocess_dataframe
from scipy.stats import studentized_range
import warnings
__all__ = [
"pairwise_ttests",
"pairwise_tests",
"ptests",
"pairwise_tukey",
"pairwise_gameshowell",
"pairwise_corr",
]
@pf.register_dataframe_method
def pairwise_ttests(*args, **kwargs):
"""This function has been deprecated . Use :py:func:`pingouin.pairwise_tests` instead."""
warnings.warn("pairwise_ttests is deprecated, use pairwise_tests instead.", UserWarning)
return pairwise_tests(*args, **kwargs)
@pf.register_dataframe_method
def pairwise_tests(
data=None,
dv=None,
between=None,
within=None,
subject=None,
parametric=True,
marginal=True,
alpha=0.05,
alternative="two-sided",
padjust="none",
effsize="hedges",
correction="auto",
nan_policy="listwise",
return_desc=False,
interaction=True,
within_first=True,
):
"""Pairwise tests.
Parameters
----------
data : :py:class:`pandas.DataFrame`
DataFrame. Note that this function can also directly be used as a
Pandas method, in which case this argument is no longer needed.
dv : string
Name of column containing the dependent variable.
between : string or list with 2 elements
Name of column(s) containing the between-subject factor(s).
within : string or list with 2 elements
Name of column(s) containing the within-subject factor(s), i.e. the
repeated measurements. Only a single within-subject factor is supported for mixed analysis
(if ``between`` is also specified).
subject : string
Name of column containing the subject identifier. This is mandatory when ``within`` is
specified.
parametric : boolean
If True (default), use the parametric :py:func:`ttest` function.
If False, use :py:func:`pingouin.wilcoxon` or :py:func:`pingouin.mwu`
for paired or unpaired samples, respectively.
marginal : boolean
If True (default), the between-subject pairwise T-test(s) will be calculated
after averaging across all levels of the within-subject factor in mixed
design. This is recommended to avoid violating the assumption of
independence and conflating the degrees of freedom by the
number of repeated measurements.
.. versionadded:: 0.3.2
alpha : float
Significance level
alternative : string
Defines the alternative hypothesis, or tail of the test. Must be one of
"two-sided" (default), "greater" or "less". Both "greater" and "less" return one-sided
p-values. "greater" tests against the alternative hypothesis that the mean of ``x``
is greater than the mean of ``y``.
padjust : string
Method used for testing and adjustment of pvalues.
* ``'none'``: no correction
* ``'bonf'``: one-step Bonferroni correction
* ``'sidak'``: one-step Sidak correction
* ``'holm'``: step-down method using Bonferroni adjustments
* ``'fdr_bh'``: Benjamini/Hochberg FDR correction
* ``'fdr_by'``: Benjamini/Yekutieli FDR correction
effsize : string or None
Effect size type. Available methods are:
* ``'none'``: no effect size
* ``'cohen'``: Unbiased Cohen d
* ``'hedges'``: Hedges g
* ``'r'``: Pearson correlation coefficient
* ``'eta_square'``: Eta-square
* ``'odds_ratio'``: Odds ratio
* ``'AUC'``: Area Under the Curve
* ``'CLES'``: Common Language Effect Size
correction : string or boolean
For independent two sample T-tests, specify whether or not to correct for
unequal variances using Welch separate variances T-test. If `'auto'`,
it will automatically uses Welch T-test when the sample sizes are
unequal, as recommended by Zimmerman 2004.
.. versionadded:: 0.3.2
nan_policy : string
Can be `'listwise'` for listwise deletion of missing values in repeated
measures design (= complete-case analysis) or `'pairwise'` for the
more liberal pairwise deletion (= available-case analysis). The former (default) is more
appropriate for post-hoc analysis following an ANOVA, however it can drastically reduce
the power of the test: any subject with one or more missing value(s) will be
completely removed from the analysis.
.. versionadded:: 0.2.9
return_desc : boolean
If True, append group means and std to the output dataframe
interaction : boolean
If there are multiple factors and ``interaction`` is True (default),
Pingouin will also calculate T-tests for the interaction term (see Notes).
.. versionadded:: 0.2.9
within_first : boolean
Determines the order of the interaction in mixed design. Pingouin will
return within * between when this parameter is set to True (default),
and between * within otherwise.
.. versionadded:: 0.3.6
Returns
-------
stats : :py:class:`pandas.DataFrame`
* ``'Contrast'``: Contrast (= independent variable or interaction)
* ``'A'``: Name of first measurement
* ``'B'``: Name of second measurement
* ``'Paired'``: indicates whether the two measurements are paired or
independent
* ``'Parametric'``: indicates if (non)-parametric tests were used
* ``'T'``: T statistic (only if parametric=True)
* ``'U_val'``: Mann-Whitney U stat (if parametric=False and unpaired
data)
* ``'W_val'``: Wilcoxon W stat (if parametric=False and paired data)
* ``'dof'``: degrees of freedom (only if parametric=True)
* ``'alternative'``: tail of the test
* ``'p_unc'``: Uncorrected p-values
* ``'p_corr'``: Corrected p-values
* ``'p_adjust'``: p-values correction method
* ``'BF10'``: Bayes Factor
* ``'hedges'``: effect size (or any effect size defined in
``effsize``)
See also
--------
ttest, mwu, wilcoxon, compute_effsize, multicomp
Notes
-----
Data are expected to be in long-format. If your data is in wide-format,
you can use the :py:func:`pandas.melt` function to convert from wide to
long format.
If ``between`` or ``within`` is a list (e.g. ['col1', 'col2']),
the function returns 1) the pairwise T-tests between each values of the
first column, 2) the pairwise T-tests between each values of the second
column and 3) the interaction between col1 and col2. The interaction is
dependent of the order of the list, so ['col1', 'col2'] will not yield the
same results as ['col2', 'col1']. Furthermore, the interaction will only be
calculated if ``interaction=True``.
If ``between`` is a list with two elements, the output
model is between1 + between2 + between1 * between2.
Similarly, if ``within`` is a list with two elements, the output model is
within1 + within2 + within1 * within2.
If both ``between`` and ``within`` are specified, the output model is
within + between + within * between (= mixed design), unless
``within_first=False`` in which case the model becomes between + within +
between * within. Mixed analysis with two or more within-subject factors are not currently
supported.
Missing values in repeated measurements are automatically removed using a
listwise (default) or pairwise deletion strategy. The former is more conservative, as any
subject with one or more missing value(s) will be completely removed from the dataframe prior
to calculating the T-tests. The ``nan_policy`` parameter can therefore have a huge impact
on the results.
Examples
--------
For more examples, please refer to the `Jupyter notebooks
<https://github.com/raphaelvallat/pingouin/blob/master/notebooks/01_ANOVA.ipynb>`_
1. One between-subject factor
>>> import pandas as pd
>>> import pingouin as pg
>>> pd.set_option('display.expand_frame_repr', False)
>>> pd.set_option('display.max_columns', 20)
>>> df = pg.read_dataset('mixed_anova.csv')
>>> pg.pairwise_tests(dv='Scores', between='Group', data=df).round(3)
Contrast A B Paired Parametric T dof alternative p_unc BF10 hedges
0 Group Control Meditation False True -2.29 178.0 two-sided 0.023 1.813 -0.34
2. One within-subject factor
>>> post_hocs = pg.pairwise_tests(dv='Scores', within='Time', subject='Subject', data=df)
>>> post_hocs.round(3)
Contrast A B Paired Parametric T dof alternative p_unc BF10 hedges
0 Time August January True True -1.740 59.0 two-sided 0.087 0.582 -0.328
1 Time August June True True -2.743 59.0 two-sided 0.008 4.232 -0.483
2 Time January June True True -1.024 59.0 two-sided 0.310 0.232 -0.170
3. Non-parametric pairwise paired test (wilcoxon)
>>> pg.pairwise_tests(dv='Scores', within='Time', subject='Subject',
... data=df, parametric=False).round(3)
Contrast A B Paired Parametric W_val alternative p_unc hedges
0 Time August January True False 716.0 two-sided 0.144 -0.328
1 Time August June True False 564.0 two-sided 0.010 -0.483
2 Time January June True False 887.0 two-sided 0.840 -0.170
4. Mixed design (within and between) with bonferroni-corrected p-values
>>> posthocs = pg.pairwise_tests(dv='Scores', within='Time', subject='Subject',
... between='Group', padjust='bonf', data=df)
>>> posthocs.round(3)
Contrast Time A B Paired Parametric T dof alternative p_unc p_corr p_adjust BF10 hedges
0 Time - August January True True -1.740 59.0 two-sided 0.087 0.261 bonf 0.582 -0.328
1 Time - August June True True -2.743 59.0 two-sided 0.008 0.024 bonf 4.232 -0.483
2 Time - January June True True -1.024 59.0 two-sided 0.310 0.931 bonf 0.232 -0.170
3 Group - Control Meditation False True -2.248 58.0 two-sided 0.028 NaN NaN 2.096 -0.573
4 Time * Group August Control Meditation False True 0.316 58.0 two-sided 0.753 1.000 bonf 0.274 0.081
5 Time * Group January Control Meditation False True -1.434 58.0 two-sided 0.157 0.471 bonf 0.619 -0.365
6 Time * Group June Control Meditation False True -2.744 58.0 two-sided 0.008 0.024 bonf 5.593 -0.699
5. Two between-subject factors. The order of the ``between`` factors matters!
>>> pg.pairwise_tests(dv='Scores', between=['Group', 'Time'], data=df).round(3)
Contrast Group A B Paired Parametric T dof alternative p_unc BF10 hedges
0 Group - Control Meditation False True -2.290 178.0 two-sided 0.023 1.813 -0.340
1 Time - August January False True -1.806 118.0 two-sided 0.074 0.839 -0.328
2 Time - August June False True -2.660 118.0 two-sided 0.009 4.499 -0.483
3 Time - January June False True -0.934 118.0 two-sided 0.352 0.288 -0.170
4 Group * Time Control August January False True -0.383 58.0 two-sided 0.703 0.279 -0.098
5 Group * Time Control August June False True -0.292 58.0 two-sided 0.771 0.272 -0.074
6 Group * Time Control January June False True 0.045 58.0 two-sided 0.964 0.263 0.011
7 Group * Time Meditation August January False True -2.188 58.0 two-sided 0.033 1.884 -0.558
8 Group * Time Meditation August June False True -4.040 58.0 two-sided 0.000 148.302 -1.030
9 Group * Time Meditation January June False True -1.442 58.0 two-sided 0.155 0.625 -0.367
6. Same but without the interaction, and using a directional test
>>> df.pairwise_tests(dv='Scores', between=['Group', 'Time'], alternative="less",
... interaction=False).round(3)
Contrast A B Paired Parametric T dof alternative p_unc BF10 hedges
0 Group Control Meditation False True -2.290 178.0 less 0.012 3.626 -0.340
1 Time August January False True -1.806 118.0 less 0.037 1.679 -0.328
2 Time August June False True -2.660 118.0 less 0.004 8.998 -0.483
3 Time January June False True -0.934 118.0 less 0.176 0.577 -0.170
"""
from .parametric import ttest
from .nonparametric import wilcoxon, mwu
# Safety checks
data = _check_dataframe(
dv=dv, between=between, within=within, subject=subject, effects="all", data=data
)
assert alternative in [
"two-sided",
"greater",
"less",
], "Alternative must be one of 'two-sided' (default), 'greater' or 'less'."
assert isinstance(alpha, float), "alpha must be float."
assert nan_policy in ["listwise", "pairwise"]
# Check if we have multiple between or within factors
multiple_between = False
multiple_within = False
contrast = None
if isinstance(between, list):
if len(between) > 1:
multiple_between = True
contrast = "multiple_between"
assert all([b in data.keys() for b in between])
else:
between = between[0]
if isinstance(within, list):
if len(within) > 1:
multiple_within = True
contrast = "multiple_within"
assert all([w in data.keys() for w in within])
else:
within = within[0]
if all([multiple_within, multiple_between]):
raise ValueError(
"Multiple between and within factors are currently not supported. "
"Please select only one."
)
# Check the other cases. Between and within column names can be str or int (not float).
if isinstance(between, (str, int)) and within is None:
contrast = "simple_between"
assert between in data.keys()
if isinstance(within, (str, int)) and between is None:
contrast = "simple_within"
assert within in data.keys()
if isinstance(between, (str, int)) and isinstance(within, (str, int)):
contrast = "within_between"
assert all([between in data.keys(), within in data.keys()])
# Create col_order
col_order = [
"Contrast",
"Time",
"A",
"B",
"mean_A",
"std_A",
"mean_B",
"std_B",
"Paired",
"Parametric",
"T",
"U_val",
"W_val",
"dof",
"alternative",
"p_unc",
"p_corr",
"p_adjust",
"BF10",
effsize,
]
# If repeated measures, pivot and melt the table. This has several effects:
# 1) Force missing values to be explicit (a NaN cell is created)
# 2) Automatic collapsing to the mean if multiple within factors are present
# 3) If using dropna, remove rows with missing values (listwise deletion).
# The latter is the same behavior as JASP (= strict complete-case analysis).
if within is not None:
idx_piv = subject if between is None else [subject, between]
data_piv = data.pivot_table(index=idx_piv, columns=within, values=dv, observed=True)
if nan_policy == "listwise":
# Remove rows (= subject) with missing values. For pairwise deletion, missing values
# will be removed directly in the lower-level functions (e.g. pg.ttest)
data_piv = data_piv.dropna()
data = data_piv.melt(ignore_index=False, value_name=dv).reset_index()
if contrast in ["simple_within", "simple_between"]:
# OPTION A: SIMPLE MAIN EFFECTS, WITHIN OR BETWEEN
paired = True if contrast == "simple_within" else False
col = within if contrast == "simple_within" else between
# Extract levels of the grouping variable, sorted in alphabetical order
grp_col = data.groupby(col, sort=True, observed=True)[dv]
labels = grp_col.groups.keys()
# Number and labels of possible comparisons
if len(labels) >= 2:
combs = list(combinations(labels, 2))
combs = np.array(combs)
A = combs[:, 0]
B = combs[:, 1]
else:
raise ValueError("Columns must have at least two unique values.")
# Initialize dataframe
stats = pd.DataFrame(dtype=np.float64, index=range(len(combs)), columns=col_order)
# Force dtype conversion
cols_str = ["Contrast", "Time", "A", "B", "alternative", "p_adjust", "BF10"]
cols_bool = ["Parametric", "Paired"]
stats[cols_str] = stats[cols_str].astype(object)
stats[cols_bool] = stats[cols_bool].astype(bool)
# Fill str columns
stats.loc[:, "A"] = A
stats.loc[:, "B"] = B
stats.loc[:, "Contrast"] = col
stats.loc[:, "alternative"] = alternative
stats.loc[:, "Paired"] = paired
# For max precision, make sure rounding is disabled
old_options = options.copy()
options["round"] = None
for i in range(stats.shape[0]):
col1, col2 = stats.at[i, "A"], stats.at[i, "B"]
x = grp_col.get_group(col1).to_numpy(dtype=np.float64)
y = grp_col.get_group(col2).to_numpy(dtype=np.float64)
if parametric:
stat_name = "T"
df_ttest = ttest(
x, y, paired=paired, alternative=alternative, correction=correction
)
stats.at[i, "BF10"] = df_ttest.at["T_test", "BF10"]
stats.at[i, "dof"] = df_ttest.at["T_test", "dof"]
else:
if paired:
stat_name = "W_val"
df_ttest = wilcoxon(x, y, alternative=alternative)
else:
stat_name = "U_val"
df_ttest = mwu(x, y, alternative=alternative)
options.update(old_options) # restore options
# Compute Hedges / Cohen
ef = compute_effsize(x=x, y=y, eftype=effsize, paired=paired)
if return_desc:
stats.at[i, "mean_A"] = np.nanmean(x)
stats.at[i, "mean_B"] = np.nanmean(y)
stats.at[i, "std_A"] = np.nanstd(x, ddof=1)
stats.at[i, "std_B"] = np.nanstd(y, ddof=1)
stats.at[i, stat_name] = df_ttest[stat_name].iat[0]
stats.at[i, "p_unc"] = df_ttest["p_val"].iat[0]
stats.at[i, effsize] = ef
# Multiple comparisons
padjust = None if stats["p_unc"].size <= 1 else padjust
if padjust is not None:
if padjust.lower() != "none":
_, stats["p_corr"] = multicomp(
stats["p_unc"].to_numpy(), alpha=alpha, method=padjust
)
stats["p_adjust"] = padjust
else:
stats["p_corr"] = None
stats["p_adjust"] = None
else:
# Multiple factors
if contrast == "multiple_between":
# B1: BETWEEN1 + BETWEEN2 + BETWEEN1 * BETWEEN2
factors = between
fbt = factors
fwt = [None, None]
paired = False # the interaction is not paired
agg = [False, False]
# TODO: add a pool SD option, as in JASP and JAMOVI?
elif contrast == "multiple_within":
# B2: WITHIN1 + WITHIN2 + WITHIN1 * WITHIN2
factors = within
fbt = [None, None]
fwt = factors
paired = True
agg = [True, True] # Calculate marginal means for both factors
else:
# B3: WITHIN + BETWEEN + INTERACTION
# Decide which order should be reported
if within_first:
# within + between + within * between
factors = [within, between]
fbt = [None, between]
fwt = [within, None]
paired = False # only for interaction
agg = [False, True]
else:
# between + within + between * within
factors = [between, within]
fbt = [between, None]
fwt = [None, within]
paired = True
agg = [True, False]
stats = pd.DataFrame()
for i, f in enumerate(factors):
# Introduced in Pingouin v0.3.2
# Note that is only has an impact in the between test of mixed
# designs. Indeed, a similar groupby is applied by default on
# each within-subject factor of a two-way repeated measures design.
if all([agg[i], marginal]):
tmp = data.groupby([subject, f], as_index=False, observed=True, sort=True).mean(
numeric_only=True
)
else:
tmp = data
pt = pairwise_tests(
dv=dv,
between=fbt[i],
within=fwt[i],
subject=subject,
data=tmp,
parametric=parametric,
marginal=marginal,
alpha=alpha,
alternative=alternative,
padjust=padjust,
effsize=effsize,
correction=correction,
nan_policy=nan_policy,
return_desc=return_desc,
)
stats = pd.concat([stats, pt], axis=0, ignore_index=True, sort=False)
# Then compute the interaction between the factors
if interaction:
nrows = stats.shape[0]
# BUGFIX 0.3.9: If subject is present, make sure that we respect
# the order of subjects.
if subject is not None:
data = data.set_index(subject).sort_index()
# Extract interaction levels, sorted in alphabetical order
grp_fac1 = data.groupby(factors[0], observed=True, sort=True)[dv]
grp_fac2 = data.groupby(factors[1], observed=True, sort=True)[dv]
grp_both = data.groupby(factors, observed=True, sort=True)[dv]
labels_fac1 = grp_fac1.groups.keys()
labels_fac2 = grp_fac2.groups.keys()
# comb_fac1 = list(combinations(labels_fac1, 2))
comb_fac2 = list(combinations(labels_fac2, 2))
# Pairwise comparisons
combs_list = list(product(labels_fac1, comb_fac2))
ncombs = len(combs_list)
# np.array(combs_list) does not work because of tuples
# we therefore need to flatten the tupple
combs = np.zeros(shape=(ncombs, 3), dtype=object)
for i in range(ncombs):
combs[i] = _flatten_list(combs_list[i], include_tuple=True)
# Append empty rows
idxiter = np.arange(nrows, nrows + ncombs)
stats = stats.reindex(stats.index.union(idxiter))
# Update other columns
stats.loc[idxiter, "Contrast"] = factors[0] + " * " + factors[1]
stats.loc[idxiter, "Time"] = combs[:, 0]
stats.loc[idxiter, "Paired"] = paired
stats.loc[idxiter, "alternative"] = alternative
stats.loc[idxiter, "A"] = combs[:, 1]
stats.loc[idxiter, "B"] = combs[:, 2]
# For max precision, make sure rounding is disabled
old_options = options.copy()
options["round"] = None
for i, comb in enumerate(combs):
ic = nrows + i # Take into account previous rows
fac1, col1, col2 = comb
x = grp_both.get_group((fac1, col1)).to_numpy(dtype=np.float64)
y = grp_both.get_group((fac1, col2)).to_numpy(dtype=np.float64)
ef = compute_effsize(x=x, y=y, eftype=effsize, paired=paired)
if parametric:
stat_name = "T"
df_ttest = ttest(
x, y, paired=paired, alternative=alternative, correction=correction
)
stats.at[ic, "BF10"] = df_ttest.at["T_test", "BF10"]
stats.at[ic, "dof"] = df_ttest.at["T_test", "dof"]
else:
if paired:
stat_name = "W_val"
df_ttest = wilcoxon(x, y, alternative=alternative)
else:
stat_name = "U_val"
df_ttest = mwu(x, y, alternative=alternative)
options.update(old_options) # restore options
# Append to stats
if return_desc:
stats.at[ic, "mean_A"] = np.nanmean(x)
stats.at[ic, "mean_B"] = np.nanmean(y)
stats.at[ic, "std_A"] = np.nanstd(x, ddof=1)
stats.at[ic, "std_B"] = np.nanstd(y, ddof=1)
stats.at[ic, stat_name] = df_ttest[stat_name].iat[0]
stats.at[ic, "p_unc"] = df_ttest["p_val"].iat[0]
stats.at[ic, effsize] = ef
# Multi-comparison columns
if padjust is not None and padjust.lower() != "none":
_, pcor = multicomp(
stats.loc[idxiter, "p_unc"].to_numpy(), alpha=alpha, method=padjust
)
stats.loc[idxiter, "p_corr"] = pcor
stats.loc[idxiter, "p_adjust"] = padjust
# ---------------------------------------------------------------------
# Append parametric columns
stats.loc[:, "Parametric"] = parametric
# Reorder and drop empty columns
stats = stats[np.array(col_order)[np.isin(col_order, stats.columns)]]
stats = stats.dropna(how="all", axis=1)
# Rename Time columns
if contrast in ["multiple_within", "multiple_between", "within_between"] and interaction:
stats["Time"] = stats["Time"].fillna("-")
stats = stats.rename(columns={"Time": factors[0]})
return _postprocess_dataframe(stats)
@pf.register_dataframe_method
def ptests(
self,
paired=False,
decimals=3,
padjust=None,
stars=True,
pval_stars={0.001: "***", 0.01: "**", 0.05: "*"},
**kwargs,
):
"""
Pairwise T-test between columns of a dataframe.
T-values are reported on the lower triangle of the output pairwise matrix and p-values on the
upper triangle. This method is a faster, but less exhaustive, matrix-version of the
:py:func:`pingouin.pairwise_test` function. Missing values are automatically removed from each
pairwise T-test.
.. versionadded:: 0.5.3
Parameters
----------
self : :py:class:`pandas.DataFrame`
Input dataframe.
paired : boolean
Specify whether the two observations are related (i.e. repeated measures) or independent.
decimals : int
Number of decimals to display in the output matrix.
padjust : string or None
P-values adjustment for multiple comparison
* ``'none'``: no correction
* ``'bonf'``: one-step Bonferroni correction
* ``'sidak'``: one-step Sidak correction
* ``'holm'``: step-down method using Bonferroni adjustments
* ``'fdr_bh'``: Benjamini/Hochberg FDR correction
* ``'fdr_by'``: Benjamini/Yekutieli FDR correction
stars : boolean
If True, only significant p-values are displayed as stars using the pre-defined thresholds
of ``pval_stars``. If False, all the raw p-values are displayed.
pval_stars : dict
Significance thresholds. Default is 3 stars for p-values <0.001, 2 stars for
p-values <0.01 and 1 star for p-values <0.05.
**kwargs : optional
Optional argument(s) passed to the lower-level scipy functions, i.e.
:py:func:`scipy.stats.ttest_ind` for independent T-test and
:py:func:`scipy.stats.ttest_rel` for paired T-test.
Returns
-------
mat : :py:class:`pandas.DataFrame`
Pairwise T-test matrix, of dtype str, with T-values on the lower triangle and p-values on
the upper triangle.
Examples
--------
>>> import numpy as np
>>> import pandas as pd
>>> import pingouin as pg
>>> # Load an example dataset of personality dimensions
>>> df = pg.read_dataset('pairwise_corr').iloc[:30, 1:]
>>> df.columns = ["N", "E", "O", 'A', "C"]
>>> # Add some missing values
>>> df.iloc[[2, 5, 20], 2] = np.nan
>>> df.iloc[[1, 4, 10], 3] = np.nan
>>> df.head().round(2)
N E O A C
0 2.48 4.21 3.94 3.96 3.46
1 2.60 3.19 3.96 NaN 3.23
2 2.81 2.90 NaN 2.75 3.50
3 2.90 3.56 3.52 3.17 2.79
4 3.02 3.33 4.02 NaN 2.85
Independent pairwise T-tests
>>> df.ptests()
N E O A C
N - *** *** *** ***
E -8.397 - ***
O -8.332 -0.596 - ***
A -8.804 0.12 0.72 - ***
C -4.759 3.753 4.074 3.787 -
Let's compare with SciPy
>>> from scipy.stats import ttest_ind
>>> np.round(ttest_ind(df["N"], df["E"]), 3)
array([-8.397, 0. ])
Passing custom parameters to the lower-level :py:func:`scipy.stats.ttest_ind` function
>>> df.ptests(alternative="greater", equal_var=True)
N E O A C
N -
E -8.397 - ***
O -8.332 -0.596 - ***
A -8.804 0.12 0.72 - ***
C -4.759 3.753 4.074 3.787 -
Paired T-test, showing the actual p-values instead of stars
>>> df.ptests(paired=True, stars=False, decimals=4)
N E O A C
N - 0.0000 0.0000 0.0000 0.0002
E -7.0773 - 0.8776 0.7522 0.0012
O -8.0568 -0.1555 - 0.8137 0.0008
A -8.3994 0.3191 0.2383 - 0.0009
C -4.2511 3.5953 3.7849 3.7652 -
Adjusting for multiple comparisons using the Holm-Bonferroni method
>>> df.ptests(paired=True, stars=False, padjust="holm")
N E O A C
N - 0.000 0.000 0.000 0.001
E -7.077 - 1. 1. 0.005
O -8.057 -0.155 - 1. 0.005
A -8.399 0.319 0.238 - 0.005
C -4.251 3.595 3.785 3.765 -
"""
from itertools import combinations
from numpy import triu_indices_from as tif
from numpy import format_float_positional as ffp
from scipy.stats import ttest_ind, ttest_rel
assert isinstance(pval_stars, dict), "pval_stars must be a dictionary."
assert isinstance(decimals, int), "decimals must be an int."
if paired:
func = ttest_rel
else:
func = ttest_ind
# Get T-values and p-values
# We cannot use pandas.DataFrame.corr here because it will incorrectly remove rows missing
# values, even when using an independent T-test!
cols = self.columns
combs = list(combinations(cols, 2))
mat = pd.DataFrame(columns=cols, index=cols, dtype=np.float64)
mat_upper = mat.copy()
for a, b in combs:
t, p = func(self[a], self[b], **kwargs, nan_policy="omit")
mat.loc[b, a] = np.round(t, decimals)
# Do not round p-value here, or we'll lose precision for multicomp
mat_upper.loc[a, b] = p
if padjust is not None:
pvals = mat_upper.to_numpy()[tif(mat, k=1)]
mat_upper.to_numpy()[tif(mat, k=1)] = multicomp(pvals, alpha=0.05, method=padjust)[1]
# Convert T-values to str, and fill the diagonal with "-"
mat = mat.astype(str)
np.fill_diagonal(mat.to_numpy(), "-")
def replace_pval(x):
for key, value in pval_stars.items():
if x < key:
return value
return ""
if stars:
# Replace p-values by stars
mat_upper = mat_upper.map(replace_pval)
else:
mat_upper = mat_upper.map(lambda x: ffp(x, precision=decimals))
# Replace upper triangle by p-values
mat.to_numpy()[tif(mat, k=1)] = mat_upper.to_numpy()[tif(mat, k=1)]
return mat
@pf.register_dataframe_method
def pairwise_tukey(data=None, dv=None, between=None, effsize="hedges"):
"""Pairwise Tukey-HSD post-hoc test.
Parameters
----------
data : :py:class:`pandas.DataFrame`
DataFrame. Note that this function can also directly be used as a Pandas method, in which
case this argument is no longer needed.
dv : string
Name of column containing the dependent variable.
between: string
Name of column containing the between factor.
effsize : string or None
Effect size type. Available methods are:
* ``'none'``: no effect size
* ``'cohen'``: Unbiased Cohen d
* ``'hedges'``: Hedges g
* ``'r'``: Pearson correlation coefficient
* ``'eta_square'``: Eta-square
* ``'odds_ratio'``: Odds ratio
* ``'AUC'``: Area Under the Curve
* ``'CLES'``: Common Language Effect Size
Returns
-------
stats : :py:class:`pandas.DataFrame`
* ``'A'``: Name of first measurement
* ``'B'``: Name of second measurement
* ``'mean_A'``: Mean of first measurement
* ``'mean_B'``: Mean of second measurement
* ``'diff'``: Mean difference (= mean(A) - mean(B))
* ``'se'``: Standard error
* ``'T'``: T-values
* ``'p_tukey'``: Tukey-HSD corrected p-values
* ``'hedges'``: Hedges effect size (or any effect size defined in
``effsize``)
See also
--------
pairwise_tests, pairwise_gameshowell
Notes
-----
Tukey HSD post-hoc [1]_ is best for balanced one-way ANOVA.
It has been proven to be conservative for one-way ANOVA with unequal sample sizes. However, it
is not robust if the groups have unequal variances, in which case the Games-Howell test is
more adequate. Tukey HSD is not valid for repeated measures ANOVA. Only one-way ANOVA design
are supported.
The T-values are defined as:
.. math::
t = \\frac{\\overline{x}_i - \\overline{x}_j}
{\\sqrt{2 \\cdot \\text{MS}_w / n}}
where :math:`\\overline{x}_i` and :math:`\\overline{x}_j` are the means of the first and
second group, respectively, :math:`\\text{MS}_w` the mean squares of the error (computed using
ANOVA) and :math:`n` the sample size.
If the sample sizes are unequal, the Tukey-Kramer procedure is automatically used:
.. math::
t = \\frac{\\overline{x}_i - \\overline{x}_j}{\\sqrt{\\frac{MS_w}{n_i}
+ \\frac{\\text{MS}_w}{n_j}}}
where :math:`n_i` and :math:`n_j` are the sample sizes of the first and second group,
respectively.
The p-values are then approximated using the Studentized range distribution
:math:`Q(\\sqrt2|t_i|, r, N - r)` where :math:`r` is the total number of groups and
:math:`N` is the total sample size.
References
----------
.. [1] Tukey, John W. "Comparing individual means in the analysis of
variance." Biometrics (1949): 99-114.
.. [2] Gleason, John R. "An accurate, non-iterative approximation for
studentized range quantiles." Computational statistics & data
analysis 31.2 (1999): 147-158.
Examples
--------
Pairwise Tukey post-hocs on the Penguins dataset.
>>> import pingouin as pg
>>> df = pg.read_dataset('penguins')
>>> df.pairwise_tukey(dv='body_mass_g', between='species').round(3)
A B mean(A) mean(B) diff se T p_tukey hedges
0 Adelie Chinstrap 3700.662 3733.088 -32.426 67.512 -0.480 0.881 -0.074
1 Adelie Gentoo 3700.662 5076.016 -1375.354 56.148 -24.495 0.000 -2.860
2 Chinstrap Gentoo 3733.088 5076.016 -1342.928 69.857 -19.224 0.000 -2.875
"""
# First compute the ANOVA
# For max precision, make sure rounding is disabled
old_options = options.copy()
options["round"] = None
aov = anova(dv=dv, data=data, between=between, detailed=True)
options.update(old_options) # Restore original options
df = aov.at[1, "DF"]
ng = aov.at[0, "DF"] + 1
grp = data.groupby(between, observed=True)[dv] # default is sort=True
# Careful: pd.unique does NOT sort whereas numpy does
# The line below should be equal to labels = np.unique(data[between])
# However, this does not work if between is a Categorical column, because
# Pandas applies a custom, not alphabetical, sorting.
# See https://github.com/raphaelvallat/pingouin/issues/111
labels = np.array(list(grp.groups.keys()))
n = grp.count().to_numpy()
gmeans = grp.mean(numeric_only=True).to_numpy()
gvar = aov.at[1, "MS"] / n
# Pairwise combinations
g1, g2 = np.array(list(combinations(np.arange(ng), 2))).T
mn = gmeans[g1] - gmeans[g2]
se = np.sqrt(gvar[g1] + gvar[g2])
tval = mn / se
# Critical values and p-values
# crit = studentized_range.ppf(1 - alpha, ng, df) / np.sqrt(2)
pval = studentized_range.sf(np.sqrt(2) * np.abs(tval), ng, df)
pval = np.clip(pval, 0, 1)
# Uncorrected p-values
# from scipy.stats import t
# punc = t.sf(np.abs(tval), n[g1].size + n[g2].size - 2) * 2
# Effect size
# Method 1: Approximation
# d = tval * np.sqrt(1 / n[g1] + 1 / n[g2])
# ef = convert_effsize(d, "cohen", effsize, n[g1], n[g2])
# Method 2: Exact
ef = []
for idx_a, idx_b in zip(g1, g2):
ef.append(
compute_effsize(
grp.get_group(labels[idx_a]),
grp.get_group(labels[idx_b]),
paired=False,
eftype=effsize,
)
)
# Create dataframe
stats = pd.DataFrame(
{
"A": labels[g1],
"B": labels[g2],
"mean_A": gmeans[g1],
"mean_B": gmeans[g2],
"diff": mn,
"se": se,
"T": tval,
"p_tukey": pval,
effsize: ef,
}
)
return _postprocess_dataframe(stats)
def pairwise_gameshowell(data=None, dv=None, between=None, effsize="hedges"):
"""Pairwise Games-Howell post-hoc test.
Parameters
----------
data : :py:class:`pandas.DataFrame`
DataFrame
dv : string
Name of column containing the dependent variable.
between: string
Name of column containing the between factor.
effsize : string or None
Effect size type. Available methods are:
* ``'none'``: no effect size
* ``'cohen'``: Unbiased Cohen d
* ``'hedges'``: Hedges g
* ``'r'``: Pearson correlation coefficient
* ``'eta_square'``: Eta-square
* ``'odds_ratio'``: Odds ratio
* ``'AUC'``: Area Under the Curve
* ``'CLES'``: Common Language Effect Size
Returns
-------
stats : :py:class:`pandas.DataFrame`
Stats summary:
* ``'A'``: Name of first measurement
* ``'B'``: Name of second measurement
* ``'mean_A'``: Mean of first measurement
* ``'mean_B'``: Mean of second measurement
* ``'diff'``: Mean difference (= mean(A) - mean(B))
* ``'se'``: Standard error
* ``'T'``: T-values
* ``'df'``: adjusted degrees of freedom
* ``'pval'``: Games-Howell corrected p-values
* ``'hedges'``: Hedges effect size (or any effect size defined in
``effsize``)
See also
--------
pairwise_tests, pairwise_tukey
Notes
-----
Games-Howell [1]_ is very similar to the Tukey HSD post-hoc test but is much more robust to
heterogeneity of variances. While the Tukey-HSD post-hoc is optimal after a classic one-way
ANOVA, the Games-Howell is optimal after a Welch ANOVA. Please note that Games-Howell
is not valid for repeated measures ANOVA. Only one-way ANOVA design are supported.
Compared to the Tukey-HSD test, the Games-Howell test uses different pooled variances for
each pair of variables instead of the same pooled variance.
The T-values are defined as:
.. math::
t = \\frac{\\overline{x}_i - \\overline{x}_j}
{\\sqrt{(\\frac{s_i^2}{n_i} + \\frac{s_j^2}{n_j})}}
and the corrected degrees of freedom are: