BUG/MAINT: Change default of inplace to False in pd.eval (#16732)

gfyoung · jreback · commit 8d197ba63128 · 2017-07-06T18:46:00.000-04:00
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
@@ -45,6 +45,52 @@ Other Enhancements
 Backwards incompatible API changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+Improved error handling during item assignment in pd.eval
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. _whatsnew_0210.api_breaking.pandas_eval:
+
+:func:`eval` will now raise a ``ValueError`` when item assignment malfunctions, or
+inplace operations are specified, but there is no item assignment in the expression (:issue:`16732`)
+
+.. ipython:: python
+
+   arr = np.array([1, 2, 3])
+
+Previously, if you attempted the following expression, you would get a not very helpful error message:
+
+.. code-block:: ipython
+
+  In [3]: pd.eval("a = 1 + 2", target=arr, inplace=True)
+  ...
+  IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`)
+  and integer or boolean arrays are valid indices
+
+This is a very long way of saying numpy arrays don't support string-item indexing. With this
+change, the error message is now this:
+
+.. code-block:: python
+
+   In [3]: pd.eval("a = 1 + 2", target=arr, inplace=True)
+   ...
+   ValueError: Cannot assign expression output to target
+
+It also used to be possible to evaluate expressions inplace, even if there was no item assignment:
+
+.. code-block:: ipython
+
+  In [4]: pd.eval("1 + 2", target=arr, inplace=True)
+  Out[4]: 3
+
+However, this input does not make much sense because the output is not being assigned to
+the target. Now, a ``ValueError`` will be raised when such an input is passed in:
+
+.. code-block:: ipython
+
+   In [4]: pd.eval("1 + 2", target=arr, inplace=True)
+   ...
+   ValueError: Cannot operate inplace if there is no assignment
+
 - Support has been dropped for Python 3.4 (:issue:`15251`)
 - The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`)
 - Accessing a non-existent attribute on a closed :class:`HDFStore` will now
@@ -79,6 +125,7 @@ Removal of prior version deprecations/changes
 - The ``pd.options.display.mpl_style`` configuration has been dropped (:issue:`12190`)
 - ``Index`` has dropped the ``.sym_diff()`` method in favor of ``.symmetric_difference()`` (:issue:`12591`)
 - ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`)
+- :func:`eval` and :method:`DataFrame.eval` have changed the default of ``inplace`` from ``None`` to ``False`` (:issue:`11149`)
 
 
 .. _whatsnew_0210.performance:
@@ -145,3 +192,4 @@ Categorical
 
 Other
 ^^^^^
+- Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`)
diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py
@@ -3,7 +3,6 @@
 """Top level ``eval`` module.
 """
 
-import warnings
 import tokenize
 from pandas.io.formats.printing import pprint_thing
 from pandas.core.computation import _NUMEXPR_INSTALLED
@@ -148,7 +147,7 @@ def _check_for_locals(expr, stack_level, parser):
 
 def eval(expr, parser='pandas', engine=None, truediv=True,
          local_dict=None, global_dict=None, resolvers=(), level=0,
-         target=None, inplace=None):
+         target=None, inplace=False):
     """Evaluate a Python expression as a string using various backends.
 
     The following arithmetic operations are supported: ``+``, ``-``, ``*``,
@@ -205,20 +204,40 @@ def eval(expr, parser='pandas', engine=None, truediv=True,
     level : int, optional
         The number of prior stack frames to traverse and add to the current
         scope. Most users will **not** need to change this parameter.
-    target : a target object for assignment, optional, default is None
-        essentially this is a passed in resolver
-    inplace : bool, default True
-        If expression mutates, whether to modify object inplace or return
-        copy with mutation.
-
-        WARNING: inplace=None currently falls back to to True, but
-        in a future version, will default to False.  Use inplace=True
-        explicitly rather than relying on the default.
+    target : object, optional, default None
+        This is the target object for assignment. It is used when there is
+        variable assignment in the expression. If so, then `target` must
+        support item assignment with string keys, and if a copy is being
+        returned, it must also support `.copy()`.
+    inplace : bool, default False
+        If `target` is provided, and the expression mutates `target`, whether
+        to modify `target` inplace. Otherwise, return a copy of `target` with
+        the mutation.
 
     Returns
     -------
     ndarray, numeric scalar, DataFrame, Series
 
+    Raises
+    ------
+    ValueError
+        There are many instances where such an error can be raised:
+
+        - `target=None`, but the expression is multiline.
+        - The expression is multiline, but not all them have item assignment.
+          An example of such an arrangement is this:
+
+          a = b + 1
+          a + 2
+
+          Here, there are expressions on different lines, making it multiline,
+          but the last line has no variable assigned to the output of `a + 2`.
+        - `inplace=True`, but the expression is missing item assignment.
+        - Item assignment is provided, but the `target` does not support
+          string item assignment.
+        - Item assignment is provided and `inplace=False`, but the `target`
+          does not support the `.copy()` method
+
     Notes
     -----
     The ``dtype`` of any objects involved in an arithmetic ``%`` operation are
@@ -232,8 +251,9 @@ def eval(expr, parser='pandas', engine=None, truediv=True,
     pandas.DataFrame.query
     pandas.DataFrame.eval
     """
-    inplace = validate_bool_kwarg(inplace, 'inplace')
-    first_expr = True
+
+    inplace = validate_bool_kwarg(inplace, "inplace")
+
     if isinstance(expr, string_types):
         _check_expression(expr)
         exprs = [e.strip() for e in expr.splitlines() if e.strip() != '']
@@ -245,7 +265,10 @@ def eval(expr, parser='pandas', engine=None, truediv=True,
         raise ValueError("multi-line expressions are only valid in the "
                          "context of data, use DataFrame.eval")
 
+    ret = None
     first_expr = True
+    target_modified = False
+
     for expr in exprs:
         expr = _convert_expression(expr)
         engine = _check_engine(engine)
@@ -266,28 +289,33 @@ def eval(expr, parser='pandas', engine=None, truediv=True,
         eng_inst = eng(parsed_expr)
         ret = eng_inst.evaluate()
 
-        if parsed_expr.assigner is None and multi_line:
-            raise ValueError("Multi-line expressions are only valid"
-                             " if all expressions contain an assignment")
+        if parsed_expr.assigner is None:
+            if multi_line:
+                raise ValueError("Multi-line expressions are only valid"
+                                 " if all expressions contain an assignment")
+            elif inplace:
+                raise ValueError("Cannot operate inplace "
+                                 "if there is no assignment")
 
         # assign if needed
         if env.target is not None and parsed_expr.assigner is not None:
-            if inplace is None:
-                warnings.warn(
-                    "eval expressions containing an assignment currently"
-                    "default to operating inplace.\nThis will change in "
-                    "a future version of pandas, use inplace=True to "
-                    "avoid this warning.",
-                    FutureWarning, stacklevel=3)
-                inplace = True
+            target_modified = True
 
             # if returning a copy, copy only on the first assignment
             if not inplace and first_expr:
-                target = env.target.copy()
+                try:
+                    target = env.target.copy()
+                except AttributeError:
+                    raise ValueError("Cannot return a copy of the target")
             else:
                 target = env.target
 
-            target[parsed_expr.assigner] = ret
+            # TypeError is most commonly raised (e.g. int, list), but you
+            # get IndexError if you try to do this assignment on np.ndarray.
+            try:
+                target[parsed_expr.assigner] = ret
+            except (TypeError, IndexError):
+                raise ValueError("Cannot assign expression output to target")
 
             if not resolvers:
                 resolvers = ({parsed_expr.assigner: ret},)
@@ -304,7 +332,6 @@ def eval(expr, parser='pandas', engine=None, truediv=True,
             ret = None
             first_expr = False
 
-    if not inplace and inplace is not None:
-        return target
-
-    return ret
+    # We want to exclude `inplace=None` as being False.
+    if inplace is False:
+        return target if target_modified else ret
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2224,21 +2224,18 @@ def query(self, expr, inplace=False, **kwargs):
         else:
             return new_data
 
-    def eval(self, expr, inplace=None, **kwargs):
+    def eval(self, expr, inplace=False, **kwargs):
         """Evaluate an expression in the context of the calling DataFrame
         instance.
 
         Parameters
         ----------
         expr : string
             The expression string to evaluate.
-        inplace : bool
-            If the expression contains an assignment, whether to return a new
-            DataFrame or mutate the existing.
-
-            WARNING: inplace=None currently falls back to to True, but
-            in a future version, will default to False.  Use inplace=True
-            explicitly rather than relying on the default.
+        inplace : bool, default False
+            If the expression contains an assignment, whether to perform the
+            operation inplace and mutate the existing DataFrame. Otherwise,
+            a new DataFrame is returned.
 
             .. versionadded:: 0.18.0
 
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
@@ -1311,14 +1311,6 @@ def assignment_not_inplace(self):
         expected['c'] = expected['a'] + expected['b']
         tm.assert_frame_equal(df, expected)
 
-        # Default for inplace will change
-        with tm.assert_produces_warnings(FutureWarning):
-            df.eval('c = a + b')
-
-        # but don't warn without assignment
-        with tm.assert_produces_warnings(None):
-            df.eval('a + b')
-
     def test_multi_line_expression(self):
         # GH 11149
         df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
@@ -1388,14 +1380,52 @@ def test_assignment_in_query(self):
             df.query('a = 1')
         assert_frame_equal(df, df_orig)
 
-    def query_inplace(self):
-        # GH 11149
+    def test_query_inplace(self):
+        # see gh-11149
         df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
         expected = df.copy()
         expected = expected[expected['a'] == 2]
         df.query('a == 2', inplace=True)
         assert_frame_equal(expected, df)
 
+        df = {}
+        expected = {"a": 3}
+
+        self.eval("a = 1 + 2", target=df, inplace=True)
+        tm.assert_dict_equal(df, expected)
+
+    @pytest.mark.parametrize("invalid_target", [1, "cat", [1, 2],
+                                                np.array([]), (1, 3)])
+    def test_cannot_item_assign(self, invalid_target):
+        msg = "Cannot assign expression output to target"
+        expression = "a = 1 + 2"
+
+        with tm.assert_raises_regex(ValueError, msg):
+            self.eval(expression, target=invalid_target, inplace=True)
+
+        if hasattr(invalid_target, "copy"):
+            with tm.assert_raises_regex(ValueError, msg):
+                self.eval(expression, target=invalid_target, inplace=False)
+
+    @pytest.mark.parametrize("invalid_target", [1, "cat", (1, 3)])
+    def test_cannot_copy_item(self, invalid_target):
+        msg = "Cannot return a copy of the target"
+        expression = "a = 1 + 2"
+
+        with tm.assert_raises_regex(ValueError, msg):
+            self.eval(expression, target=invalid_target, inplace=False)
+
+    @pytest.mark.parametrize("target", [1, "cat", [1, 2],
+                                        np.array([]), (1, 3), {1: 2}])
+    def test_inplace_no_assignment(self, target):
+        expression = "1 + 2"
+
+        assert self.eval(expression, target=target, inplace=False) == 3
+
+        msg = "Cannot operate inplace if there is no assignment"
+        with tm.assert_raises_regex(ValueError, msg):
+            self.eval(expression, target=target, inplace=True)
+
     def test_basic_period_index_boolean_expression(self):
         df = mkdf(2, 2, data_gen_f=f, c_idx_type='p', r_idx_type='i')