@@ -2625,6 +2625,9 @@ def foo(x):
2625
2625
("int64_col" , "boolean" ),
2626
2626
("int64_col" , pd .ArrowDtype (pa .decimal128 (38 , 9 ))),
2627
2627
("int64_col" , pd .ArrowDtype (pa .decimal256 (76 , 38 ))),
2628
+ ("int64_col" , pd .ArrowDtype (pa .timestamp ("us" ))),
2629
+ ("int64_col" , pd .ArrowDtype (pa .timestamp ("us" , tz = "UTC" ))),
2630
+ ("int64_col" , "time64[us][pyarrow]" ),
2628
2631
("bool_col" , "Int64" ),
2629
2632
("bool_col" , "string[pyarrow]" ),
2630
2633
("string_col" , "binary[pyarrow]" ),
@@ -2633,9 +2636,17 @@ def foo(x):
2633
2636
# raises a deprecation warning to use tz_localize/tz_convert instead,
2634
2637
# but BigQuery always stores values as UTC and doesn't have to deal
2635
2638
# with timezone conversions, so we'll allow it.
2639
+ ("timestamp_col" , "date32[day][pyarrow]" ),
2640
+ ("timestamp_col" , "time64[us][pyarrow]" ),
2636
2641
("timestamp_col" , pd .ArrowDtype (pa .timestamp ("us" ))),
2642
+ ("datetime_col" , "date32[day][pyarrow]" ),
2643
+ ("datetime_col" , "string[pyarrow]" ),
2644
+ ("datetime_col" , "time64[us][pyarrow]" ),
2637
2645
("datetime_col" , pd .ArrowDtype (pa .timestamp ("us" , tz = "UTC" ))),
2638
2646
("date_col" , "string[pyarrow]" ),
2647
+ ("date_col" , pd .ArrowDtype (pa .timestamp ("us" ))),
2648
+ ("date_col" , pd .ArrowDtype (pa .timestamp ("us" , tz = "UTC" ))),
2649
+ ("time_col" , "string[pyarrow]" ),
2639
2650
# TODO(bmil): fix Ibis bug: BigQuery backend rounds to nearest int
2640
2651
# ("float64_col", "Int64"),
2641
2652
# TODO(bmil): decide whether to fix Ibis bug: BigQuery backend
@@ -2653,6 +2664,24 @@ def test_astype(scalars_df_index, scalars_pandas_df_index, column, to_type):
2653
2664
pd .testing .assert_series_equal (bf_result , pd_result )
2654
2665
2655
2666
2667
+ @pytest .mark .parametrize (
2668
+ ("column" , "to_type" ),
2669
+ [
2670
+ ("timestamp_col" , "int64[pyarrow]" ),
2671
+ ("datetime_col" , "int64[pyarrow]" ),
2672
+ ("time_col" , "int64[pyarrow]" ),
2673
+ ],
2674
+ )
2675
+ @skip_legacy_pandas
2676
+ def test_date_time_astype_int (
2677
+ scalars_df_index , scalars_pandas_df_index , column , to_type
2678
+ ):
2679
+ bf_result = scalars_df_index [column ].astype (to_type ).to_pandas ()
2680
+ pd_result = scalars_pandas_df_index [column ].astype (to_type )
2681
+ pd .testing .assert_series_equal (bf_result , pd_result , check_dtype = False )
2682
+ assert bf_result .dtype == "Int64"
2683
+
2684
+
2656
2685
def test_string_astype_int ():
2657
2686
pd_series = pd .Series (["4" , "-7" , "0" , " -03" ])
2658
2687
bf_series = series .Series (pd_series )
@@ -2676,6 +2705,75 @@ def test_string_astype_float():
2676
2705
pd .testing .assert_series_equal (bf_result , pd_result , check_index_type = False )
2677
2706
2678
2707
2708
+ def test_string_astype_date ():
2709
+ pd_series = pd .Series (["2014-08-15" , "2215-08-15" , "2016-02-29" ]).astype (
2710
+ pd .ArrowDtype (pa .string ())
2711
+ )
2712
+
2713
+ bf_series = series .Series (pd_series )
2714
+
2715
+ pd_result = pd_series .astype ("date32[day][pyarrow]" )
2716
+ bf_result = bf_series .astype ("date32[day][pyarrow]" ).to_pandas ()
2717
+
2718
+ pd .testing .assert_series_equal (bf_result , pd_result , check_index_type = False )
2719
+
2720
+
2721
+ def test_string_astype_datetime ():
2722
+ pd_series = pd .Series (
2723
+ ["2014-08-15 08:15:12" , "2015-08-15 08:15:12.654754" , "2016-02-29 00:00:00" ]
2724
+ ).astype (pd .ArrowDtype (pa .string ()))
2725
+
2726
+ bf_series = series .Series (pd_series )
2727
+
2728
+ pd_result = pd_series .astype (pd .ArrowDtype (pa .timestamp ("us" )))
2729
+ bf_result = bf_series .astype (pd .ArrowDtype (pa .timestamp ("us" ))).to_pandas ()
2730
+
2731
+ pd .testing .assert_series_equal (bf_result , pd_result , check_index_type = False )
2732
+
2733
+
2734
+ def test_string_astype_timestamp ():
2735
+ pd_series = pd .Series (
2736
+ [
2737
+ "2014-08-15 08:15:12+00:00" ,
2738
+ "2015-08-15 08:15:12.654754+05:00" ,
2739
+ "2016-02-29 00:00:00+08:00" ,
2740
+ ]
2741
+ ).astype (pd .ArrowDtype (pa .string ()))
2742
+
2743
+ bf_series = series .Series (pd_series )
2744
+
2745
+ pd_result = pd_series .astype (pd .ArrowDtype (pa .timestamp ("us" , tz = "UTC" )))
2746
+ bf_result = bf_series .astype (
2747
+ pd .ArrowDtype (pa .timestamp ("us" , tz = "UTC" ))
2748
+ ).to_pandas ()
2749
+
2750
+ pd .testing .assert_series_equal (bf_result , pd_result , check_index_type = False )
2751
+
2752
+
2753
+ def test_timestamp_astype_string ():
2754
+ bf_series = series .Series (
2755
+ [
2756
+ "2014-08-15 08:15:12+00:00" ,
2757
+ "2015-08-15 08:15:12.654754+05:00" ,
2758
+ "2016-02-29 00:00:00+08:00" ,
2759
+ ]
2760
+ ).astype (pd .ArrowDtype (pa .timestamp ("us" , tz = "UTC" )))
2761
+
2762
+ expected_result = pd .Series (
2763
+ [
2764
+ "2014-08-15 08:15:12+00" ,
2765
+ "2015-08-15 03:15:12.654754+00" ,
2766
+ "2016-02-28 16:00:00+00" ,
2767
+ ]
2768
+ )
2769
+ bf_result = bf_series .astype (pa .string ()).to_pandas ()
2770
+
2771
+ pd .testing .assert_series_equal (
2772
+ bf_result , expected_result , check_index_type = False , check_dtype = False
2773
+ )
2774
+ assert bf_result .dtype == "string[pyarrow]"
2775
+
2776
+
2679
2777
@pytest .mark .parametrize (
2680
2778
"index" ,
2681
2779
[0 , 5 , - 2 ],
0 commit comments