@@ -149,6 +149,11 @@ def test_multiindex(self):
149
149
150
150
151
151
class TestGetDummies (tm .TestCase ):
152
+
153
+ def setUp (self ):
154
+ self .df = DataFrame ({'A' : ['a' , 'b' , 'a' ], 'B' : ['b' , 'b' , 'c' ],
155
+ 'C' : [1 , 2 , 3 ]})
156
+
152
157
def test_basic (self ):
153
158
s_list = list ('abc' )
154
159
s_series = Series (s_list )
@@ -209,6 +214,114 @@ def test_unicode(self): # See GH 6885 - get_dummies chokes on unicode values
209
214
u ('letter_%s' ) % eacute : {0 : 0.0 , 1 : 1.0 , 2 : 1.0 }})
210
215
assert_frame_equal (res , exp )
211
216
217
+ def test_dataframe_dummies_all_obj (self ):
218
+ df = self .df [['A' , 'B' ]]
219
+ result = get_dummies (df )
220
+ expected = DataFrame ({'A_a' : [1. , 0 , 1 ], 'A_b' : [0. , 1 , 0 ],
221
+ 'B_b' : [1. , 1 , 0 ], 'B_c' : [0. , 0 , 1 ]})
222
+ assert_frame_equal (result , expected )
223
+
224
+ def test_dataframe_dummies_mix_default (self ):
225
+ df = self .df
226
+ result = get_dummies (df )
227
+ expected = DataFrame ({'C' : [1 , 2 , 3 ], 'A_a' : [1. , 0 , 1 ],
228
+ 'A_b' : [0. , 1 , 0 ], 'B_b' : [1. , 1 , 0 ],
229
+ 'B_c' : [0. , 0 , 1 ]})
230
+ expected = expected [['C' , 'A_a' , 'A_b' , 'B_b' , 'B_c' ]]
231
+ assert_frame_equal (result , expected )
232
+
233
+ def test_dataframe_dummies_prefix_list (self ):
234
+ prefixes = ['from_A' , 'from_B' ]
235
+ df = DataFrame ({'A' : ['a' , 'b' , 'a' ], 'B' : ['b' , 'b' , 'c' ],
236
+ 'C' : [1 , 2 , 3 ]})
237
+ result = get_dummies (df , prefix = prefixes )
238
+ expected = DataFrame ({'C' : [1 , 2 , 3 ], 'from_A_a' : [1. , 0 , 1 ],
239
+ 'from_A_b' : [0. , 1 , 0 ], 'from_B_b' : [1. , 1 , 0 ],
240
+ 'from_B_c' : [0. , 0 , 1 ]})
241
+ expected = expected [['C' , 'from_A_a' , 'from_A_b' , 'from_B_b' ,
242
+ 'from_B_c' ]]
243
+ assert_frame_equal (result , expected )
244
+
245
+ def test_datafrmae_dummies_prefix_str (self ):
246
+ # not that you should do this...
247
+ df = self .df
248
+ result = get_dummies (df , prefix = 'bad' )
249
+ expected = DataFrame ([[1 , 1. , 0. , 1. , 0. ],
250
+ [2 , 0. , 1. , 1. , 0. ],
251
+ [3 , 1. , 0. , 0. , 1. ]],
252
+ columns = ['C' , 'bad_a' , 'bad_b' , 'bad_b' , 'bad_c' ])
253
+ assert_frame_equal (result , expected )
254
+
255
+ def test_dataframe_dummies_subset (self ):
256
+ df = self .df
257
+ result = get_dummies (df , prefix = ['from_A' ],
258
+ columns = ['A' ])
259
+ expected = DataFrame ({'from_A_a' : [1. , 0 , 1 ], 'from_A_b' : [0. , 1 , 0 ],
260
+ 'B' : ['b' , 'b' , 'c' ], 'C' : [1 , 2 , 3 ]})
261
+ assert_frame_equal (result , expected )
262
+
263
+ def test_dataframe_dummies_prefix_sep (self ):
264
+ df = self .df
265
+ result = get_dummies (df , prefix_sep = '..' )
266
+ expected = DataFrame ({'C' : [1 , 2 , 3 ], 'A..a' : [1. , 0 , 1 ],
267
+ 'A..b' : [0. , 1 , 0 ], 'B..b' : [1. , 1 , 0 ],
268
+ 'B..c' : [0. , 0 , 1 ]})
269
+ expected = expected [['C' , 'A..a' , 'A..b' , 'B..b' , 'B..c' ]]
270
+ assert_frame_equal (result , expected )
271
+
272
+ result = get_dummies (df , prefix_sep = ['..' , '__' ])
273
+ expected = expected .rename (columns = {'B..b' : 'B__b' , 'B..c' : 'B__c' })
274
+ assert_frame_equal (result , expected )
275
+
276
+ result = get_dummies (df , prefix_sep = {'A' : '..' , 'B' : '__' })
277
+ assert_frame_equal (result , expected )
278
+
279
+ def test_dataframe_dummies_prefix_bad_length (self ):
280
+ with tm .assertRaises (ValueError ):
281
+ get_dummies (self .df , prefix = ['too few' ])
282
+
283
+ def test_dataframe_dummies_prefix_sep_bad_length (self ):
284
+ with tm .assertRaises (ValueError ):
285
+ get_dummies (self .df , prefix_sep = ['bad' ])
286
+
287
+ def test_dataframe_dummies_prefix_dict (self ):
288
+ prefixes = {'A' : 'from_A' , 'B' : 'from_B' }
289
+ df = DataFrame ({'A' : ['a' , 'b' , 'a' ], 'B' : ['b' , 'b' , 'c' ],
290
+ 'C' : [1 , 2 , 3 ]})
291
+ result = get_dummies (df , prefix = prefixes )
292
+ expected = DataFrame ({'from_A_a' : [1. , 0 , 1 ], 'from_A_b' : [0. , 1 , 0 ],
293
+ 'from_B_b' : [1. , 1 , 0 ], 'from_B_c' : [0. , 0 , 1 ],
294
+ 'C' : [1 , 2 , 3 ]})
295
+ assert_frame_equal (result , expected )
296
+
297
+ def test_dataframe_dummies_with_na (self ):
298
+ df = self .df
299
+ df .loc [3 , :] = [np .nan , np .nan , np .nan ]
300
+ result = get_dummies (df , dummy_na = True )
301
+ expected = DataFrame ({'C' : [1 , 2 , 3 , np .nan ], 'A_a' : [1. , 0 , 1 , 0 ],
302
+ 'A_b' : [0. , 1 , 0 , 0 ], 'A_nan' : [0. , 0 , 0 , 1 ], 'B_b' : [1. , 1 , 0 , 0 ],
303
+ 'B_c' : [0. , 0 , 1 , 0 ], 'B_nan' : [0. , 0 , 0 , 1 ]})
304
+ expected = expected [['C' , 'A_a' , 'A_b' , 'A_nan' , 'B_b' , 'B_c' ,
305
+ 'B_nan' ]]
306
+ assert_frame_equal (result , expected )
307
+
308
+ result = get_dummies (df , dummy_na = False )
309
+ expected = expected [['C' , 'A_a' , 'A_b' , 'B_b' , 'B_c' ]]
310
+ assert_frame_equal (result , expected )
311
+
312
+ def test_dataframe_dummies_with_categorical (self ):
313
+ df = self .df
314
+ df ['cat' ] = pd .Categorical (['x' , 'y' , 'y' ])
315
+ result = get_dummies (df )
316
+ expected = DataFrame ({'C' : [1 , 2 , 3 ], 'A_a' : [1. , 0 , 1 ],
317
+ 'A_b' : [0. , 1 , 0 ], 'B_b' : [1. , 1 , 0 ],
318
+ 'B_c' : [0. , 0 , 1 ], 'cat_x' : [1. , 0 , 0 ],
319
+ 'cat_y' : [0. , 1 , 1 ]})
320
+ expected = expected [['C' , 'A_a' , 'A_b' , 'B_b' , 'B_c' ,
321
+ 'cat_x' , 'cat_y' ]]
322
+ assert_frame_equal (result , expected )
323
+
324
+
212
325
class TestConvertDummies (tm .TestCase ):
213
326
def test_convert_dummies (self ):
214
327
df = DataFrame ({'A' : ['foo' , 'bar' , 'foo' , 'bar' ,
@@ -218,8 +331,9 @@ def test_convert_dummies(self):
218
331
'C' : np .random .randn (8 ),
219
332
'D' : np .random .randn (8 )})
220
333
221
- result = convert_dummies (df , ['A' , 'B' ])
222
- result2 = convert_dummies (df , ['A' , 'B' ], prefix_sep = '.' )
334
+ with tm .assert_produces_warning (FutureWarning ):
335
+ result = convert_dummies (df , ['A' , 'B' ])
336
+ result2 = convert_dummies (df , ['A' , 'B' ], prefix_sep = '.' )
223
337
224
338
expected = DataFrame ({'A_foo' : [1 , 0 , 1 , 0 , 1 , 0 , 1 , 1 ],
225
339
'A_bar' : [0 , 1 , 0 , 1 , 0 , 1 , 0 , 0 ],
0 commit comments