Skip to content

Commit e25e69d

Browse files
danfrankjpandres
authored andcommitted
enable multivalues insert (pandas-dev#19664)
1 parent 0846f93 commit e25e69d

File tree

4 files changed

+59
-3
lines changed

4 files changed

+59
-3
lines changed

doc/source/io.rst

+6
Original file line numberDiff line numberDiff line change
@@ -4711,6 +4711,12 @@ writes ``data`` to the database in batches of 1000 rows at a time:
47114711
47124712
data.to_sql('data_chunked', engine, chunksize=1000)
47134713
4714+
.. note::
4715+
4716+
The function :func:`~pandas.DataFrame.to_sql` will perform a multivalue
4717+
insert if the engine dialect ``supports_multivalues_insert``. This will
4718+
greatly speed up the insert in some cases.
4719+
47144720
SQL data types
47154721
++++++++++++++
47164722

doc/source/whatsnew/v0.23.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,8 @@ Other Enhancements
341341
- :func:`DataFrame.replace` now supports the ``method`` parameter, which can be used to specify the replacement method when ``to_replace`` is a scalar, list or tuple and ``value`` is ``None`` (:issue:`19632`)
342342
- :meth:`Timestamp.month_name`, :meth:`DatetimeIndex.month_name`, and :meth:`Series.dt.month_name` are now available (:issue:`12805`)
343343
- :meth:`Timestamp.day_name` and :meth:`DatetimeIndex.day_name` are now available to return day names with a specified locale (:issue:`12806`)
344+
- :meth:`DataFrame.to_sql` now performs a multivalue insert if the underlying connection supports itk rather than inserting row by row.
345+
``SQLAlchemy`` dialects supporting multivalue inserts include: ``mysql``, ``postgresql``, ``sqlite`` and any dialect with ``supports_multivalues_insert``. (:issue:`14315`, :issue:`8953`)
344346

345347
.. _whatsnew_0230.api_breaking:
346348

pandas/io/sql.py

+25-3
Original file line numberDiff line numberDiff line change
@@ -572,8 +572,29 @@ def create(self):
572572
else:
573573
self._execute_create()
574574

575-
def insert_statement(self):
576-
return self.table.insert()
575+
def insert_statement(self, data, conn):
576+
"""
577+
Generate tuple of SQLAlchemy insert statement and any arguments
578+
to be executed by connection (via `_execute_insert`).
579+
580+
Parameters
581+
----------
582+
conn : SQLAlchemy connectable(engine/connection)
583+
Connection to recieve the data
584+
data : list of dict
585+
The data to be inserted
586+
587+
Returns
588+
-------
589+
SQLAlchemy statement
590+
insert statement
591+
*, optional
592+
Additional parameters to be passed when executing insert statement
593+
"""
594+
dialect = getattr(conn, 'dialect', None)
595+
if dialect and getattr(dialect, 'supports_multivalues_insert', False):
596+
return self.table.insert(data),
597+
return self.table.insert(), data
577598

578599
def insert_data(self):
579600
if self.index is not None:
@@ -612,8 +633,9 @@ def insert_data(self):
612633
return column_names, data_list
613634

614635
def _execute_insert(self, conn, keys, data_iter):
636+
"""Insert data into this table with database connection"""
615637
data = [{k: v for k, v in zip(keys, row)} for row in data_iter]
616-
conn.execute(self.insert_statement(), data)
638+
conn.execute(*self.insert_statement(data, conn))
617639

618640
def insert(self, chunksize=None):
619641
keys, data_list = self.insert_data()

pandas/tests/io/test_sql.py

+26
Original file line numberDiff line numberDiff line change
@@ -1665,6 +1665,29 @@ class Temporary(Base):
16651665

16661666
tm.assert_frame_equal(df, expected)
16671667

1668+
def test_insert_multivalues(self):
1669+
# issues addressed
1670+
# https://github.com/pandas-dev/pandas/issues/14315
1671+
# https://github.com/pandas-dev/pandas/issues/8953
1672+
1673+
db = sql.SQLDatabase(self.conn)
1674+
df = DataFrame({'A': [1, 0, 0], 'B': [1.1, 0.2, 4.3]})
1675+
table = sql.SQLTable("test_table", db, frame=df)
1676+
data = [
1677+
{'A': 1, 'B': 0.46},
1678+
{'A': 0, 'B': -2.06}
1679+
]
1680+
statement = table.insert_statement(data, conn=self.conn)[0]
1681+
1682+
if self.supports_multivalues_insert:
1683+
assert statement.parameters == data, (
1684+
'insert statement should be multivalues'
1685+
)
1686+
else:
1687+
assert statement.parameters is None, (
1688+
'insert statement should not be multivalues'
1689+
)
1690+
16681691

16691692
class _TestSQLAlchemyConn(_EngineToConnMixin, _TestSQLAlchemy):
16701693

@@ -1679,6 +1702,7 @@ class _TestSQLiteAlchemy(object):
16791702
16801703
"""
16811704
flavor = 'sqlite'
1705+
supports_multivalues_insert = True
16821706

16831707
@classmethod
16841708
def connect(cls):
@@ -1727,6 +1751,7 @@ class _TestMySQLAlchemy(object):
17271751
17281752
"""
17291753
flavor = 'mysql'
1754+
supports_multivalues_insert = True
17301755

17311756
@classmethod
17321757
def connect(cls):
@@ -1796,6 +1821,7 @@ class _TestPostgreSQLAlchemy(object):
17961821
17971822
"""
17981823
flavor = 'postgresql'
1824+
supports_multivalues_insert = True
17991825

18001826
@classmethod
18011827
def connect(cls):

0 commit comments

Comments
 (0)