Merge pull request #691 from projectmesa/merge-test

Corvince · web-flow · commit 0d4d5e540491 · 2019-05-21T13:26:39.000+02:00
Resources #651 in docs branch
diff --git a/mesa/batchrunner.py b/mesa/batchrunner.py
@@ -10,6 +10,9 @@
 from itertools import product, count
 import pandas as pd
 from tqdm import tqdm
+
+import random
+
 try:
     from pathos.multiprocessing import ProcessPool
 except ImportError:
@@ -18,9 +21,9 @@
     pathos_support = True
 
 
-class VariableParameterError(TypeError):
-    MESSAGE = ('variable_parameters must map a name to a sequence of values. '
-               'These parameters were given with non-sequence values: {}')
+class ParameterError(TypeError):
+    MESSAGE = ('parameters must map a name to a value. '
+               'These names did not match paramerets: {}')
 
     def __init__(self, bad_names):
         self.bad_names = bad_names
@@ -29,7 +32,15 @@ def __str__(self):
         return self.MESSAGE.format(self.bad_names)
 
 
-class BatchRunner:
+class VariableParameterError(ParameterError):
+    MESSAGE = ('variable_parameters must map a name to a sequence of values. '
+               'These parameters were given with non-sequence values: {}')
+
+    def __init__(self, bad_names):
+        super().__init__(bad_names)
+
+
+class FixedBatchRunner:
     """ This class is instantiated with a model class, and model parameters
     associated with one or more values. It is also instantiated with model and
     agent-level reporters, dictionaries mapping a variable name to a function
@@ -39,9 +50,8 @@ class BatchRunner:
     Note that by default, the reporters only collect data at the *end* of the
     run. To get step by step data, simply have a reporter store the model's
     entire DataCollector object.
-
     """
-    def __init__(self, model_cls, variable_parameters=None,
+    def __init__(self, model_cls, parameters_list=None,
                  fixed_parameters=None, iterations=1, max_steps=1000,
                  model_reporters=None, agent_reporters=None,
                  display_progress=True):
@@ -50,20 +60,20 @@ def __init__(self, model_cls, variable_parameters=None,
 
         Args:
             model_cls: The class of model to batch-run.
-            variable_parameters: Dictionary of parameters to lists of values.
-                The model will be run with every combo of these paramters.
-                For example, given variable_parameters of
-                    {"param_1": range(5),
-                     "param_2": [1, 5, 10]}
-                models will be run with {param_1=1, param_2=1},
-                    {param_1=2, param_2=1}, ..., {param_1=4, param_2=10}.
+            parameters_list: A list of dictionaries of parameter sets.
+                The model will be run with dictionary of paramters.
+                For example, given parameters_list of
+                    [{"homophily": 3, "density": 0.8, "minority_pc": 0.2},
+                    {"homophily": 2, "density": 0.9, "minority_pc": 0.1},
+                    {"homophily": 4, "density": 0.6, "minority_pc": 0.5}]
+                3 models will be run, one for each provided set of parameters.
             fixed_parameters: Dictionary of parameters that stay same through
                 all batch runs. For example, given fixed_parameters of
                     {"constant_parameter": 3},
                 every instantiated model will be passed constant_parameter=3
                 as a kwarg.
-            iterations: The total number of times to run the model for each
-                combination of parameters.
+            iterations: The total number of times to run the model for each set
+                of parameters.
             max_steps: Upper limit of steps above which each run will be halted
                 if it hasn't halted on its own.
             model_reporters: The dictionary of variables to collect on each run
@@ -77,9 +87,9 @@ def __init__(self, model_cls, variable_parameters=None,
 
         """
         self.model_cls = model_cls
-        if variable_parameters is None:
-            variable_parameters = {}
-        self.variable_parameters = self._process_parameters(variable_parameters)
+        if parameters_list is None:
+            parameters_list = []
+        self.parameters_list = list(parameters_list)
         self.fixed_parameters = fixed_parameters or {}
         self._include_fixed = len(self.fixed_parameters.keys()) > 0
         self.iterations = iterations
@@ -96,16 +106,6 @@ def __init__(self, model_cls, variable_parameters=None,
 
         self.display_progress = display_progress
 
-    def _process_parameters(self, params):
-        params = copy.deepcopy(params)
-        bad_names = []
-        for name, values in params.items():
-            if (isinstance(values, str) or not hasattr(values, "__iter__")):
-                bad_names.append(name)
-        if bad_names:
-            raise VariableParameterError(bad_names)
-        return params
-
     def _make_model_args(self):
         """Prepare all combinations of parameter values for `run_all`
 
@@ -117,21 +117,20 @@ def _make_model_args(self):
         all_kwargs = []
         all_param_values = []
 
-        if len(self.variable_parameters) > 0:
-            param_names, param_ranges = zip(*self.variable_parameters.items())
-            for param_range in param_ranges:
-                total_iterations *= len(param_range)
-
-            for param_values in product(*param_ranges):
-                kwargs = dict(zip(param_names, param_values))
+        count = len(self.parameters_list)
+        if count:
+            for params in self.parameters_list:
+                kwargs = params.copy()
                 kwargs.update(self.fixed_parameters)
                 all_kwargs.append(kwargs)
-                all_param_values.append(param_values)
-        else:
-            kwargs = self.fixed_parameters
-            param_values = None
-            all_kwargs = [kwargs]
-            all_param_values = [None]
+                all_param_values.append(params.values())
+        elif len(self.fixed_parameters):
+            count = 1
+            kwargs = self.fixed_parameters.copy()
+            all_kwargs.append(kwargs)
+            all_param_values.append(kwargs.values())
+
+        total_iterations *= count
 
         return (total_iterations, all_kwargs, all_param_values)
 
@@ -154,7 +153,7 @@ def run_iteration(self, kwargs, param_values, run_count):
 
         # Collect and store results:
         if param_values is not None:
-            model_key = param_values + (run_count,)
+            model_key = tuple(param_values) + (run_count,)
         else:
             model_key = (run_count,)
 
@@ -215,7 +214,10 @@ def _prepare_report_table(self, vars_dict, extra_cols=None):
         column as a key.
         """
         extra_cols = ['Run'] + (extra_cols or [])
-        index_cols = list(self.variable_parameters.keys()) + extra_cols
+        index_cols = set()
+        for params in self.parameters_list:
+            index_cols |= params.keys()
+        index_cols = list(index_cols) + extra_cols
 
         records = []
         for param_key, values in vars_dict.items():
@@ -237,6 +239,98 @@ def _prepare_report_table(self, vars_dict, extra_cols=None):
         return ordered
 
 
+# This is kind of a useless class, but it does carry the 'source' parameters with it
+class ParameterProduct:
+    def __init__(self, variable_parameters):
+        self.param_names, self.param_lists = \
+            zip(*(copy.deepcopy(variable_parameters)).items())
+        self._product = product(*self.param_lists)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        return dict(zip(self.param_names, next(self._product)))
+
+
+# Roughly inspired by sklearn.model_selection.ParameterSampler.  Does not handle
+# distributions, only lists.
+class ParameterSampler:
+    def __init__(self, parameter_lists, n, random_state=None):
+        self.param_names, self.param_lists = \
+            zip(*(copy.deepcopy(parameter_lists)).items())
+        self.n = n
+        if random_state is None:
+            self.random_state = random.Random()
+        elif isinstance(random_state, int):
+            self.random_state = random.Random(random_state)
+        else:
+            self.random_state = random_state
+        self.count = 0
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        self.count += 1
+        if self.count <= self.n:
+            return dict(zip(self.param_names, [self.random_state.choice(l) for l in self.param_lists]))
+        raise StopIteration()
+
+
+class BatchRunner(FixedBatchRunner):
+    """ This class is instantiated with a model class, and model parameters
+    associated with one or more values. It is also instantiated with model and
+    agent-level reporters, dictionaries mapping a variable name to a function
+    which collects some data from the model or its agents at the end of the run
+    and stores it.
+
+    Note that by default, the reporters only collect data at the *end* of the
+    run. To get step by step data, simply have a reporter store the model's
+    entire DataCollector object.
+
+    """
+    def __init__(self, model_cls, variable_parameters=None,
+                 fixed_parameters=None, iterations=1, max_steps=1000,
+                 model_reporters=None, agent_reporters=None,
+                 display_progress=True):
+        """ Create a new BatchRunner for a given model with the given
+        parameters.
+
+        Args:
+            model_cls: The class of model to batch-run.
+            variable_parameters: Dictionary of parameters to lists of values.
+                The model will be run with every combo of these paramters.
+                For example, given variable_parameters of
+                    {"param_1": range(5),
+                     "param_2": [1, 5, 10]}
+                models will be run with {param_1=1, param_2=1},
+                    {param_1=2, param_2=1}, ..., {param_1=4, param_2=10}.
+            fixed_parameters: Dictionary of parameters that stay same through
+                all batch runs. For example, given fixed_parameters of
+                    {"constant_parameter": 3},
+                every instantiated model will be passed constant_parameter=3
+                as a kwarg.
+            iterations: The total number of times to run the model for each
+                combination of parameters.
+            max_steps: Upper limit of steps above which each run will be halted
+                if it hasn't halted on its own.
+            model_reporters: The dictionary of variables to collect on each run
+                at the end, with variable names mapped to a function to collect
+                them. For example:
+                    {"agent_count": lambda m: m.schedule.get_agent_count()}
+            agent_reporters: Like model_reporters, but each variable is now
+                collected at the level of each agent present in the model at
+                the end of the run.
+            display_progress: Display progresss bar with time estimation?
+
+        """
+        super().__init__(model_cls, ParameterProduct(variable_parameters),
+                     fixed_parameters, iterations, max_steps,
+                     model_reporters, agent_reporters,
+                     display_progress)
+
+
 class MPSupport(Exception):
     def __str__(self):
         return ("BatchRunnerMP depends on pathos, which is either not "
diff --git a/tests/test_batchrunner.py b/tests/test_batchrunner.py
@@ -7,7 +7,7 @@
 
 from mesa import Agent, Model
 from mesa.time import BaseScheduler
-from mesa.batchrunner import BatchRunner
+from mesa.batchrunner import BatchRunner, ParameterProduct, ParameterSampler
 
 
 NUM_AGENTS = 7
@@ -163,5 +163,38 @@ def test_model_with_variable_and_fixed_kwargs(self):
                 self.fixed_params['fixed_name'])
 
 
+class TestParameters(unittest.TestCase):
+    def test_product(self):
+        params = ParameterProduct({
+            "var_alpha": ['a', 'b', 'c'],
+            "var_num": [10, 20]
+            })
+
+        lp = list(params)
+        self.assertCountEqual(lp, [{'var_alpha': 'a', 'var_num': 10},
+                            {'var_alpha': 'a', 'var_num': 20},
+                            {'var_alpha': 'b', 'var_num': 10},
+                            {'var_alpha': 'b', 'var_num': 20},
+                            {'var_alpha': 'c', 'var_num': 10},
+                            {'var_alpha': 'c', 'var_num': 20}])
+
+    def test_sampler(self):
+        params1 = ParameterSampler({
+            "var_alpha": ['a', 'b', 'c', 'd', 'e'],
+            "var_num": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]},
+            n=10,
+            random_state=1)
+        params2 = ParameterSampler({
+            "var_alpha": ['a', 'b', 'c', 'd', 'e'],
+            "var_num": range(16)},
+            n=10,
+            random_state=1
+            )
+
+        lp = list(params1)
+        self.assertEqual(10, len(lp))
+        self.assertEqual(lp, list(params2))
+
+
 if __name__ == '__main__':
     unittest.main()