@@ -7,6 +7,17 @@ X, Explantory: ``Categorical``
7
7
Y, Response: ``Categorical ``
8
8
Type: ``Non-Parametric ``
9
9
10
+ .. code :: python
11
+
12
+ print ' chi-square statistic, p-value, expected counts'
13
+ print ss.chi2_contingency(ct1)
14
+
15
+ chi- square statistic, p- value, expected counts
16
+ (1263.6306705804054 , 2.554837585615145e-272 , 4 , array([[ 7.74251477e+03 , 1.71950205e+03 , 3.69930718e+02 ,
17
+ 4.25495413e+01 , 2.50291420e+00 ],
18
+ [ 7.72448523e+03 , 1.71549795e+03 , 3.69069282e+02 ,
19
+ 4.24504587e+01 , 2.49708580e+00 ]]))
20
+
10
21
11
22
Student's T-Test
12
23
----------------
17
28
-----
18
29
Type: ``Parametric ``
19
30
20
- Analysis of Variance (ANOVA).
31
+ Analysis of Variance (ANOVA).
32
+
33
+
34
+ .. code :: python
35
+
36
+ # ### IMPORT MOUDLES ####
37
+ import numpy as np
38
+ import pandas as pd
39
+ import statsmodels.formula.api as smf
40
+ import statsmodels.stats.multicomp as multi
41
+
42
+
43
+
44
+ # ### FIT MODEL ####
45
+ # response~explanatory OR x~y, 'C' refers to categorical variable
46
+ # ANOVA for multiple factors
47
+ model = smf.ols(formula = ' diameter ~ C(layers)' , data = df3)
48
+ results = model.fit()
49
+ >> > print results.summary()
50
+
51
+
52
+ OLS Regression Results
53
+ ==============================================================================
54
+ Dep. Variable: diameter R- squared: 0.219
55
+ Model: OLS Adj. R- squared: 0.219
56
+ Method: Least Squares F- statistic: 1383 .
57
+ Date: Tue, 02 Aug 2016 Prob (F- statistic): 0.00
58
+ Time: 17 :04 :57 Log- Likelihood: - 60976 .
59
+ No. Observations: 19731 AIC : 1.220e+05
60
+ Df Residuals: 19726 BIC : 1.220e+05
61
+ Df Model: 4
62
+ Covariance Type: nonrobust
63
+ ==================================================================================
64
+ coef std err t P> | t| [95.0 % Conf. Int.]
65
+ ----------------------------------------------------------------------------------
66
+ Intercept 6.7217 0.043 157.125 0.000 6.638 6.806
67
+ C(layers)[T.2] 3.3941 0.100 33.822 0.000 3.197 3.591
68
+ C(layers)[T.3] 12.2841 0.200 61.319 0.000 11.891 12.677
69
+ C(layers)[T.4] 18.3139 0.579 31.649 0.000 17.180 19.448
70
+ C(layers)[T.5] 21.8123 2.380 9.166 0.000 17.148 26.477
71
+ ==============================================================================
72
+ Omnibus: 14916.319 Durbin- Watson: 0.529
73
+ Prob(Omnibus): 0.000 Jarque- Bera (JB ): 577157.627
74
+ Skew: 3.262 Prob(JB ): 0.00
75
+ Kurtosis: 28.680 Cond. No. 64.0
76
+ ==============================================================================
77
+
78
+ Warnings:
79
+ [1 ] Standard Errors assume that the covariance matrix of the errors is correctly specified.
80
+
81
+
82
+
83
+
84
+ # ### POST-HOC TEST ####
85
+ mc = multi.MultiComparison(df3[' diameter' ],df3[' layers' ])
86
+ result1 = mc.tukeyhsd()
87
+ print result1
88
+
89
+
90
+ Multiple Comparison of Means - Tukey HSD ,FWER = 0.05
91
+ ============================================ =
92
+ group1 group2 meandiff lower upper reject
93
+ -------------------------------------------- -
94
+ 1 2 3.3941 3.1204 3.6679 True
95
+ 1 3 12.2841 11.7376 12.8306 True
96
+ 1 4 18.3139 16.7353 19.8925 True
97
+ 1 5 21.8123 15.3204 28.3041 True
98
+ 2 3 8.89 8.3015 9.4785 True
99
+ 2 4 14.9198 13.3262 16.5134 True
100
+ 2 5 18.4181 11.9226 24.9137 True
101
+ 3 4 6.0298 4.3675 7.6921 True
102
+ 3 5 9.5281 3.0154 16.0409 True
103
+ 4 5 3.4984 - 3.1806 10.1773 False
104
+ -------------------------------------------- -
0 commit comments