Commit b9050ebf authored by Chanelle Lee's avatar Chanelle Lee
Browse files

add data column flag for easy searching

parent 1c1634f0
from .simulation import Simulation
from multiprocessing import Pool
import pandas as pd
import contextlib
import numpy as np
import os
def unwrap_self_runSingleSim(arg, **kwarg):
"""
unwrapper function to allow multiple arguments to be passed to
runSingleSim
"""
return Trial.runSingleSim(*arg, **kwarg)
class Trial(object):
"""
Class for running simulations. Serves as a container for simulation params.
Uses multiprocessing to run a number of simulations simultaneously.
Simulation data is saved as pandas dataframes in hdf datastores
"""
def __init__(self, numSims, parameters, optionWeights, filePath, path):
"""
Initialising method for Trial class
Parameters
----------
numSims : int
Number of simulations that will be run
parameters : dict
Contains simulation parameters generated by parameters.py
{totalIts: int, popSize: int, distrust: float, k: int, eR: int,
w: float, numOptions: float}
optionWeights : list
Weights to use when picking options
Attributes
----------
Same as Parameters
"""
if not isinstance(parameters, dict):
raise TypeError("Passed value must be a dictionary")
if not all(key in parameters for key in ('numOptions', 'totalIts',
'popSize', 'distrust',
'poolSize', 'evidenceRate',
'w')):
raise ValueError("Parameters dict must contain the following:"
"numOptions, totalIts, popSize, distrust, "
"poolSize, evidenceRate, w")
if np.array(optionWeights).shape != (parameters["numOptions"], ):
raise ValueError("Passed value is not correct length - must have "
"weight for every option.")
if not os.path.isdir(path):
raise ValueError("Path is invalid")
self._numSims = numSims
self._parameters = parameters
self._numOptions = parameters['numOptions']
self._optionWeights = optionWeights
self._filePath = filePath
self._path = path
self.beliefResults = ()
self.simResults = ()
def runSingleSim(self, i):
"""
Intialises the ith simulation with the parameters set when Trial
instance is initialised. Runs the simulation, stores the results
before deleting the simulation to save memory. Then returns the
results.
Parameters
----------
i : int
Iterator for running the pool mapping. Also simulation identity to
add to file name for saving. Will always be in [0, numSims)
Returns
-------
result : dict
sim : int
identifier of the simulation
beliefs : numpy array
beliefs of the population throughout the simulation
neededInteractions : int
number of iterations need by the simulation to reaching
correct consensus or time out
"""
if not (0 <= i < self._numSims):
raise ValueError("Sim id out of range")
s = Simulation(optionWeights=self._optionWeights, **self._parameters)
neededIterations = s.run()
result = {'sim': i,
'beliefs': s.populationBeliefs,
'neededIterations': neededIterations}
del s
return result
def retBeliefResultsDF(self, beliefs):
"""
Stores the beliefs from all the simulation populations in a
dataframe with information about the simulation it is from and which
iteration, agent and option
e.g. if in simulation 0, iteration 0 and n=5 then all agents would have
belief 0.2 in all options
simID | iteration | agent | option | belief
0 | 0 | 0 | 0 | 0.2
...
0 | 0 | 0 | 4 | 0.2
...
0 | 0 | 3 | 2 | 0.2
"""
sims = np.arange(0, self._numSims)
iterations = np.arange(0, self._parameters['totalIts'])
agents = np.arange(0, self._parameters['popSize'])
options = np.arange(0, self._numOptions)
arrays = np.meshgrid(sims, iterations, agents, options)
data = np.stack([a.reshape(-1).T for a in arrays]).T
df = pd.DataFrame(data=data,
columns=[str('simID'),
str('iteration'),
str('agent'),
str('option')])
df.sort_values(by=['simID',
'iteration',
'agent',
'option'],
inplace=True)
df['belief'] = np.array([belief
for belief in beliefs]).flatten()
return df
def retSimResultsDF(self, simResults):
"""
Stores the number of needed iterations before a correct consensus was
found or a timeout for each simulation in the trial in a dataframe
e.g.
simID | iterations
0 | 4
1 | 10
"""
df = pd.DataFrame([[d['sim'],
d['neededIterations']]
for d in simResults],
columns=['simID', 'iterations'])
return df
def runSimulations(self):
"""
Uses the multiprocessing Pool.map function to simultaneously run
simulations. Sums results to check all numSims simulations were run
before deleting.
Note
----
Pool.map can only take global functions and so uses unwrapper function
for Trial.runSingleSim.
Raises
------
AssertionError
When sum(results) is not equal to the number of simulations and so
not all simulations have been run.
"""
with contextlib.closing(Pool()) as pool:
results = pool.map(unwrap_self_runSingleSim,
zip([self] * self._numSims,
range(0, self._numSims)))
assert (len(results) == self._numSims), "Not all simulations run!"
self.beliefResults = self.retBeliefResultsDF([d["beliefs"]
for d in results])
self.simResults = self.retSimResultsDF(results)
del results
def retConfigDF(self):
configDF = pd.DataFrame(data=self._parameters, index=[0])
configDF['key'] = "{:<50}".format(self._filePath)
return configDF
def saveConfigDF(self):
configDF = self.retConfigDF()
configDF.to_hdf(self._path + 'Config.h5', key='configs', append=True,
mode='a')
def saveBeliefResults(self):
self.beliefResults.to_hdf(self._path + 'Beliefs.h5',
key=self._filePath, mode='a', format='table',
complevel=9)
def saveSimResults(self):
self.simResults.to_hdf(self._path + 'AllTrialResults.h5', append=True,
key=self._filePath, mode='a', complevel=9)
def saveTrial(self):
self.saveConfigDF()
self.saveBeliefResults()
self.saveSimResults()
from .simulation import Simulation
from multiprocessing import Pool
import pandas as pd
import contextlib
import numpy as np
import os
def unwrap_self_runSingleSim(arg, **kwarg):
"""
unwrapper function to allow multiple arguments to be passed to
runSingleSim
"""
return Trial.runSingleSim(*arg, **kwarg)
class Trial(object):
"""
Class for running simulations. Serves as a container for simulation params.
Uses multiprocessing to run a number of simulations simultaneously.
Simulation data is saved as pandas dataframes in hdf datastores
"""
def __init__(self, numSims, parameters, optionWeights, filePath, path):
"""
Initialising method for Trial class
Parameters
----------
numSims : int
Number of simulations that will be run
parameters : dict
Contains simulation parameters generated by parameters.py
{totalIts: int, popSize: int, distrust: float, k: int, eR: int,
w: float, numOptions: float}
optionWeights : list
Weights to use when picking options
Attributes
----------
Same as Parameters
"""
if not isinstance(parameters, dict):
raise TypeError("Passed value must be a dictionary")
if not all(key in parameters for key in ('numOptions', 'totalIts',
'popSize', 'distrust',
'poolSize', 'evidenceRate',
'w')):
raise ValueError("Parameters dict must contain the following:"
"numOptions, totalIts, popSize, distrust, "
"poolSize, evidenceRate, w")
if np.array(optionWeights).shape != (parameters["numOptions"], ):
raise ValueError("Passed value is not correct length - must have "
"weight for every option.")
if not os.path.isdir(path):
raise ValueError("Path is invalid")
self._numSims = numSims
self._parameters = parameters
self._numOptions = parameters['numOptions']
self._optionWeights = optionWeights
self._filePath = filePath
self._path = path
self.beliefResults = ()
self.simResults = ()
def runSingleSim(self, i):
"""
Intialises the ith simulation with the parameters set when Trial
instance is initialised. Runs the simulation, stores the results
before deleting the simulation to save memory. Then returns the
results.
Parameters
----------
i : int
Iterator for running the pool mapping. Also simulation identity to
add to file name for saving. Will always be in [0, numSims)
Returns
-------
result : dict
sim : int
identifier of the simulation
beliefs : numpy array
beliefs of the population throughout the simulation
neededInteractions : int
number of iterations need by the simulation to reaching
correct consensus or time out
"""
if not (0 <= i < self._numSims):
raise ValueError("Sim id out of range")
s = Simulation(optionWeights=self._optionWeights, **self._parameters)
neededIterations = s.run()
result = {'sim': i,
'beliefs': s.populationBeliefs,
'neededIterations': neededIterations}
del s
return result
def retBeliefResultsDF(self, beliefs):
"""
Stores the beliefs from all the simulation populations in a
dataframe with information about the simulation it is from and which
iteration, agent and option
e.g. if in simulation 0, iteration 0 and n=5 then all agents would have
belief 0.2 in all options
simID | iteration | agent | option | belief
0 | 0 | 0 | 0 | 0.2
...
0 | 0 | 0 | 4 | 0.2
...
0 | 0 | 3 | 2 | 0.2
"""
sims = np.arange(0, self._numSims)
iterations = np.arange(0, self._parameters['totalIts'])
agents = np.arange(0, self._parameters['popSize'])
options = np.arange(0, self._numOptions)
arrays = np.meshgrid(sims, iterations, agents, options)
data = np.stack([a.reshape(-1).T for a in arrays]).T
df = pd.DataFrame(data=data,
columns=[str('simID'),
str('iteration'),
str('agent'),
str('option')])
df.sort_values(by=['simID',
'iteration',
'agent',
'option'],
inplace=True)
df['belief'] = np.array([belief
for belief in beliefs]).flatten()
return df
def retSimResultsDF(self, simResults):
"""
Stores the number of needed iterations before a correct consensus was
found or a timeout for each simulation in the trial in a dataframe
e.g.
simID | iterations
0 | 4
1 | 10
"""
df = pd.DataFrame([[d['sim'],
d['neededIterations']]
for d in simResults],
columns=['simID', 'iterations'])
return df
def runSimulations(self):
"""
Uses the multiprocessing Pool.map function to simultaneously run
simulations. Sums results to check all numSims simulations were run
before deleting.
Note
----
Pool.map can only take global functions and so uses unwrapper function
for Trial.runSingleSim.
Raises
------
AssertionError
When sum(results) is not equal to the number of simulations and so
not all simulations have been run.
"""
with contextlib.closing(Pool()) as pool:
results = pool.map(unwrap_self_runSingleSim,
zip([self] * self._numSims,
range(0, self._numSims)))
assert (len(results) == self._numSims), "Not all simulations run!"
self.beliefResults = self.retBeliefResultsDF([d["beliefs"]
for d in results])
self.simResults = self.retSimResultsDF(results)
del results
def retConfigDF(self):
configDF = pd.DataFrame(data=self._parameters, index=[0])
configDF['key'] = "{:<50}".format(self._filePath)
return configDF
def saveConfigDF(self):
configDF = self.retConfigDF()
configDF.to_hdf(self._path + 'Config.h5', key='configs', append=True,
mode='a')
def saveBeliefResults(self):
self.beliefResults.to_hdf(self._path + 'Beliefs.h5',
key=self._filePath, mode='a', format='table',
complevel=9, data_columns=True)
def saveSimResults(self):
self.simResults.to_hdf(self._path + 'AllTrialResults.h5', append=True,
key=self._filePath, mode='a', complevel=9)
def saveTrial(self):
self.saveConfigDF()
self.saveBeliefResults()
self.saveSimResults()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment