30
30
print ("[INFO]--df_calif_h.keys()--" ,df_calif_h .frame ) # None
31
31
32
32
X_full , y_full = df_calif_h .data , df_calif_h .target
33
+
33
34
feature_names = df_calif_h .feature_names
34
- print ("[INFO]--df_calif---feature_names--" ,feature_names ) # - ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
35
-
36
- # feature_mapping = {
37
- # "MedInc": "Median income in block",
38
- # "HouseAge": "Median house age in block",
39
- # "AveRooms": "Average number of rooms",
40
- # "AveBedrms": "Average number of bedrooms",
41
- # "Population": "Block population",
42
- # "AveOccup": "Average house occupancy",
43
- # "Latitude": "House block latitude",
44
- # "Longitude": "House block longitude",
45
- # }
35
+ print ("[INFO]--df_calif---feature_names--" ,feature_names )
36
+ # - ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
37
+
38
+ feature_mapping = {
39
+ "MedInc" : "Median income in block" ,
40
+ "HouseAge" : "Median house age in block" ,
41
+ "AveRooms" : "Average number of rooms" ,
42
+ "AveBedrms" : "Average number of bedrooms" ,
43
+ "Population" : "Block population" ,
44
+ "AveOccup" : "Average house occupancy" ,
45
+ "Latitude" : "House block latitude" ,
46
+ "Longitude" : "House block longitude" ,
47
+ }
46
48
47
49
# # Take only 2 features to make visualization easier
48
50
# # Feature MedInc has a long tail distribution.
49
51
# # Feature AveOccup has a few but very large outliers.
50
52
51
- # features = ["MedInc", "AveOccup"]
52
- # features_idx = [feature_names.index(feature) for feature in features]
53
- # X = X_full[:, features_idx]
54
- # distributions = [
55
- # ("Unscaled data", X),
56
- # ("Data after standard scaling", StandardScaler().fit_transform(X)),
57
- # ("Data after min-max scaling", MinMaxScaler().fit_transform(X)),
58
- # ("Data after max-abs scaling", MaxAbsScaler().fit_transform(X)),
59
- # (
60
- # "Data after robust scaling",
61
- # RobustScaler(quantile_range=(25, 75)).fit_transform(X),
62
- # ),
63
- # (
64
- # "Data after power transformation (Yeo-Johnson)",
65
- # PowerTransformer(method="yeo-johnson").fit_transform(X),
66
- # ),
67
- # (
68
- # "Data after power transformation (Box-Cox)",
69
- # PowerTransformer(method="box-cox").fit_transform(X),
70
- # ),
71
- # (
72
- # "Data after quantile transformation (uniform pdf)",
73
- # QuantileTransformer(
74
- # output_distribution="uniform", random_state=42
75
- # ).fit_transform(X),
76
- # ),
77
- # (
78
- # "Data after quantile transformation (gaussian pdf)",
79
- # QuantileTransformer(
80
- # output_distribution="normal", random_state=42
81
- # ).fit_transform(X),
82
- # ),
83
- # ("Data after sample-wise L2 normalizing", Normalizer().fit_transform(X)),
84
- # ]
85
-
86
- # # scale the output between 0 and 1 for the colorbar
87
- # y = minmax_scale(y_full)
53
+ features = ["MedInc" , "AveOccup" ]
54
+ features_idx = [feature_names .index (feature ) for feature in features ]
55
+ print ("--features_idx--" ,features_idx )
56
+ print ("--features_idx----X_full.shape-" ,X_full .shape )
57
+ X = X_full [:, features_idx ]
58
+
59
+ print ("--features_idx---type(X)-" ,type (X ))
60
+ print ("--features_idx---X.shape-" ,X .shape ) #---X.shape- (20640, 2)
61
+
62
+
63
+ distributions = [
64
+ ("Unscaled data" , X ),
65
+ ("Data after standard scaling" , StandardScaler ().fit_transform (X )),
66
+ ("Data after min-max scaling" , MinMaxScaler ().fit_transform (X )),
67
+ ("Data after max-abs scaling" , MaxAbsScaler ().fit_transform (X )),
68
+ (
69
+ "Data after robust scaling" ,
70
+ RobustScaler (quantile_range = (25 , 75 )).fit_transform (X ),
71
+ ),
72
+ (
73
+ "Data after power transformation (Yeo-Johnson)" ,
74
+ PowerTransformer (method = "yeo-johnson" ).fit_transform (X ),
75
+ ),
76
+ (
77
+ "Data after power transformation (Box-Cox)" ,
78
+ PowerTransformer (method = "box-cox" ).fit_transform (X ),
79
+ ),
80
+ (
81
+ "Data after quantile transformation (uniform pdf)" ,
82
+ QuantileTransformer (
83
+ output_distribution = "uniform" , random_state = 42
84
+ ).fit_transform (X ),
85
+ ),
86
+ (
87
+ "Data after quantile transformation (gaussian pdf)" ,
88
+ QuantileTransformer (
89
+ output_distribution = "normal" , random_state = 42
90
+ ).fit_transform (X ),
91
+ ),
92
+ ("Data after sample-wise L2 normalizing" , Normalizer ().fit_transform (X )),
93
+ ]
94
+
95
+ # scale the output between 0 and 1 for the colorbar
96
+ y = minmax_scale (y_full )
88
97
89
98
# # plasma does not exist in matplotlib < 1.5
90
- # cmap = getattr(cm, "plasma_r", cm.hot_r)
91
-
92
-
93
- # def create_axes(title, figsize=(16, 6)):
94
- # fig = plt.figure(figsize=figsize)
95
- # fig.suptitle(title)
96
-
97
- # # define the axis for the first plot
98
- # left, width = 0.1, 0.22
99
- # bottom, height = 0.1, 0.7
100
- # bottom_h = height + 0.15
101
- # left_h = left + width + 0.02
102
-
103
- # rect_scatter = [left, bottom, width, height]
104
- # rect_histx = [left, bottom_h, width, 0.1]
105
- # rect_histy = [left_h, bottom, 0.05, height]
106
-
107
- # ax_scatter = plt.axes(rect_scatter)
108
- # ax_histx = plt.axes(rect_histx)
109
- # ax_histy = plt.axes(rect_histy)
110
-
111
- # # define the axis for the zoomed-in plot
112
- # left = width + left + 0.2
113
- # left_h = left + width + 0.02
114
-
115
- # rect_scatter = [left, bottom, width, height]
116
- # rect_histx = [left, bottom_h, width, 0.1]
117
- # rect_histy = [left_h, bottom, 0.05, height]
118
-
119
- # ax_scatter_zoom = plt.axes(rect_scatter)
120
- # ax_histx_zoom = plt.axes(rect_histx)
121
- # ax_histy_zoom = plt.axes(rect_histy)
122
-
123
- # # define the axis for the colorbar
124
- # left, width = width + left + 0.13, 0.01
125
-
126
- # rect_colorbar = [left, bottom, width, height]
127
- # ax_colorbar = plt.axes(rect_colorbar)
128
-
129
- # return (
130
- # (ax_scatter, ax_histy, ax_histx),
131
- # (ax_scatter_zoom, ax_histy_zoom, ax_histx_zoom),
132
- # ax_colorbar,
133
- # )
134
-
135
-
136
- # def plot_distribution(axes, X, y, hist_nbins=50, title="", x0_label="", x1_label=""):
137
- # ax, hist_X1, hist_X0 = axes
138
-
139
- # ax.set_title(title)
140
- # ax.set_xlabel(x0_label)
141
- # ax.set_ylabel(x1_label)
142
-
143
- # # The scatter plot
144
- # colors = cmap(y)
145
- # ax.scatter(X[:, 0], X[:, 1], alpha=0.5, marker="o", s=5, lw=0, c=colors)
146
-
147
- # # Removing the top and the right spine for aesthetics
148
- # # make nice axis layout
149
- # ax.spines["top"].set_visible(False)
150
- # ax.spines["right"].set_visible(False)
151
- # ax.get_xaxis().tick_bottom()
152
- # ax.get_yaxis().tick_left()
153
- # ax.spines["left"].set_position(("outward", 10))
154
- # ax.spines["bottom"].set_position(("outward", 10))
155
-
156
- # # Histogram for axis X1 (feature 5)
157
- # hist_X1.set_ylim(ax.get_ylim())
158
- # hist_X1.hist(
159
- # X[:, 1], bins=hist_nbins, orientation="horizontal", color="grey", ec="grey"
160
- # )
161
- # hist_X1.axis("off")
162
-
163
- # # Histogram for axis X0 (feature 0)
164
- # hist_X0.set_xlim(ax.get_xlim())
165
- # hist_X0.hist(
166
- # X[:, 0], bins=hist_nbins, orientation="vertical", color="grey", ec="grey"
167
- # )
168
- # hist_X0.axis("off")
99
+ cmap = getattr (cm , "plasma_r" , cm .hot_r )
100
+
101
+
102
+ def create_axes (title , figsize = (16 , 6 )):
103
+ """
104
+ """
105
+
106
+ fig = plt .figure (figsize = figsize )
107
+ fig .suptitle (title )
108
+
109
+ # define the axis for the first plot
110
+ left , width = 0.1 , 0.22
111
+ bottom , height = 0.1 , 0.7
112
+ bottom_h = height + 0.15
113
+ left_h = left + width + 0.02
114
+
115
+ rect_scatter = [left , bottom , width , height ]
116
+ rect_histx = [left , bottom_h , width , 0.1 ]
117
+ rect_histy = [left_h , bottom , 0.05 , height ]
118
+
119
+ ax_scatter = plt .axes (rect_scatter )
120
+ ax_histx = plt .axes (rect_histx )
121
+ ax_histy = plt .axes (rect_histy )
122
+
123
+ # define the axis for the zoomed-in plot
124
+ left = width + left + 0.2
125
+ left_h = left + width + 0.02
126
+
127
+ # rect_scatter = [left, bottom, width, height]
128
+ # rect_histx = [left, bottom_h, width, 0.1]
129
+ # rect_histy = [left_h, bottom, 0.05, height]
130
+
131
+ ax_scatter_zoom = plt .axes (rect_scatter )
132
+ ax_histx_zoom = plt .axes (rect_histx )
133
+ ax_histy_zoom = plt .axes (rect_histy )
134
+
135
+ # define the axis for the colorbar
136
+ left , width = width + left + 0.13 , 0.01
137
+
138
+ rect_colorbar = [left , bottom , width , height ]
139
+ ax_colorbar = plt .axes (rect_colorbar )
140
+ #plt.show() # OK Legends Only
141
+
142
+ return (
143
+ (ax_scatter , ax_histy , ax_histx ),
144
+ (ax_scatter_zoom , ax_histy_zoom , ax_histx_zoom ),
145
+ ax_colorbar ,
146
+ )
147
+
148
+
149
+ def plot_distribution (axes , X , y , hist_nbins = 50 , title = "" , x0_label = "" , x1_label = "" ):
150
+
151
+ """
152
+ """
153
+ print ("[INFO]---plot_distribution-axes---" ,axes )
154
+ print ("[INFO]---plot_distribution-y---" ,y )
155
+
156
+ ax , hist_X1 , hist_X0 = axes
157
+
158
+ ax .set_title (title )
159
+ print ("[INFO]---plot_distribution----title--" ,title )
160
+ ax .set_xlabel (x0_label )
161
+ ax .set_ylabel (x1_label )
162
+
163
+ # The scatter plot
164
+ colors = cmap (y )
165
+ ax .scatter (X [:, 0 ], X [:, 1 ], alpha = 0.5 , marker = "o" , s = 5 , lw = 0 , c = colors )
166
+
167
+
168
+ # Removing the top and the right spine for aesthetics
169
+ # make nice axis layout
170
+ ax .spines ["top" ].set_visible (False )
171
+ ax .spines ["right" ].set_visible (False )
172
+ ax .get_xaxis ().tick_bottom ()
173
+ ax .get_yaxis ().tick_left ()
174
+ ax .spines ["left" ].set_position (("outward" , 10 ))
175
+ ax .spines ["bottom" ].set_position (("outward" , 10 ))
176
+
177
+ # Histogram for axis X1 (feature 5)
178
+ hist_X1 .set_ylim (ax .get_ylim ())
179
+ hist_X1 .hist (
180
+ X [:, 1 ], bins = hist_nbins , orientation = "horizontal" , color = "grey" , ec = "grey"
181
+ )
182
+ hist_X1 .axis ("off" )
183
+
184
+ print ("---plot_distribution---type(hist_X1---" ,type (hist_X1 ))
185
+
186
+ # Histogram for axis X0 (feature 0)
187
+ hist_X0 .set_xlim (ax .get_xlim ())
188
+ hist_X0 .hist (
189
+ X [:, 0 ], bins = hist_nbins , orientation = "vertical" , color = "grey" , ec = "grey"
190
+ )
191
+ hist_X0 .axis ("off" )
192
+ print ("---plot_distribution---type(hist_X0---" ,type (hist_X0 ))
193
+ ## <class 'matplotlib.axes._axes.Axes'>
194
+ #plt.show() # OK_1
195
+
196
+
197
+ def make_plot (item_idx ):
198
+ """
199
+ """
200
+ print ("--make_plot-making---" )
201
+ title , X = distributions [item_idx ]
202
+ print ("--make_plot-----title-" ,title )
203
+
204
+ ax_zoom_out , ax_zoom_in , ax_colorbar = create_axes (title )
205
+ axarr = (ax_zoom_out , ax_zoom_in )
206
+ plot_distribution (
207
+ axarr [0 ],
208
+ X ,
209
+ y ,
210
+ hist_nbins = 200 ,
211
+ x0_label = feature_mapping [features [0 ]],
212
+ x1_label = feature_mapping [features [1 ]],
213
+ title = "Full data" ,
214
+ )
215
+
216
+ # zoom-in
217
+ zoom_in_percentile_range = (0 , 99 )
218
+ cutoffs_X0 = np .percentile (X [:, 0 ], zoom_in_percentile_range )
219
+ cutoffs_X1 = np .percentile (X [:, 1 ], zoom_in_percentile_range )
220
+
221
+ non_outliers_mask = np .all (X > [cutoffs_X0 [0 ], cutoffs_X1 [0 ]], axis = 1 ) & np .all (
222
+ X < [cutoffs_X0 [1 ], cutoffs_X1 [1 ]], axis = 1
223
+ )
224
+ print ("--make_plot-----non_outliers_mask-" ,type (non_outliers_mask ))
225
+ print ("--make_plot-----non_outliers_mask.shape---" ,non_outliers_mask .shape )
226
+
227
+ plot_distribution (
228
+ axarr [1 ],
229
+ X [non_outliers_mask ],
230
+ y [non_outliers_mask ],
231
+ hist_nbins = 50 ,
232
+ x0_label = feature_mapping [features [0 ]],
233
+ x1_label = feature_mapping [features [1 ]],
234
+ title = "Zoom-in" ,
235
+ )
236
+
237
+ norm = mpl .colors .Normalize (y_full .min (), y_full .max ())
238
+ mpl .colorbar .ColorbarBase (
239
+ ax_colorbar ,
240
+ cmap = cmap ,
241
+ norm = norm ,
242
+ orientation = "vertical" ,
243
+ label = "Color mapping for values of y" ,
244
+ )
245
+
246
+
247
+ if __name__ == "__main__" :
248
+ #make_plot(0)
249
+ #plt.show() #OK_1
250
+ make_plot (1 )
251
+ plt .show () # OK_2
252
+ # make_plot(2)
253
+ # plt.show() # OK_2
0 commit comments