Commit e92c5ccd authored by David Trudgian's avatar David Trudgian
Browse files

Fixing issues with run, add re validation

parent c5997795
......@@ -15,22 +15,23 @@ param_runner -h | --help | --version
Options:
--verbose Show debug messages
--dry-run Create job scripts, but don't submit to the cluster
"""
from docopt import docopt
import logging
import colorlog
import os
import sys
import subprocess
import colorlog
from docopt import docopt
from runner import __version__
from runner import param
from runner import executors
from runner import param
def main():
arguments = docopt(__doc__, version='param_runner %s' % __version__)
handler = colorlog.StreamHandler()
......@@ -45,10 +46,9 @@ def main():
else:
logger.setLevel(logging.INFO)
if arguments['<paramfile>']:
param_file =arguments['<paramfile>']
param_file = arguments['<paramfile>']
print "param_runner - version %s" % __version__
print "-------------------------------"
......@@ -59,23 +59,63 @@ def main():
try:
p = param.ParamFile(param_file)
p.load()
p = param.ParamFile(param_file)
p.load()
if arguments['run']:
runner = executors.LocalExecutor(p.commands, os.path.dirname(
(os.path.abspath(param_file))), p)
runner.run()
if arguments['srun']:
runner = executors.SrunExecutor(p.commands, os.path.dirname(
(os.path.abspath(param_file))), p)
runner.run()
if arguments['submit']:
if arguments['run']:
runner = executors.LocalExecutor(p.commands, os.path.dirname((os.path.abspath(param_file))), p)
runner.run()
# We submit using sbatch
if arguments['srun']:
runner = executors.SrunExecutor(p.commands, os.path.dirname(
(os.path.abspath(param_file))), p)
runner.run()
python_exe = sys.executable
this_script = os.path.abspath(__file__)
logger.debug("Python is %s" % python_exe)
logger.debug("Script location is %s" % this_script)
nodes = str(p.vals['nodes'])
partition = str(p.vals['partition'])
time_limit = str(p.vals['time_limit'])
sbatch_cmd = [ 'sbatch', '-N', nodes, '-p', partition, '-t', time_limit, ]
batch_script = "#!/bin/bash\n"
batch_script += python_exe
batch_script += ' -u '
batch_script += this_script
batch_script += ' srun '
batch_script += os.path.abspath(param_file)
batch_script += ' 2>&1\n'
logger.debug(sbatch_cmd)
sbatch_proc = subprocess.Popen( sbatch_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = sbatch_proc.communicate(batch_script)
if stdout:
logger.info(stdout)
if stderr:
logger.error(stderr)
logger.info("Done.")
except Exception as e:
raise
logger.error("[%s] %s" % ( e.__class__.__name__, e ))
logger.error("[%s] %s" % (e.__class__.__name__, e))
print "\n"
if __name__ == '__main__':
main()
......@@ -5,6 +5,7 @@ import os
import datetime
import logging
import re
import sys
logger = logging.getLogger(__name__)
......@@ -146,6 +147,8 @@ class SrunExecutor(BaseExecutor):
p.close()
p.join()
logger.info(" - All tasks have completed")
def run_cmd(self, cmd, stderr_file, stdout_file):
......@@ -156,3 +159,9 @@ class SrunExecutor(BaseExecutor):
stdout=stdout_file, env=os.environ)
return ret
import anyconfig
import os
import logging
import re
from copy import deepcopy
from collections import OrderedDict
from yamllint.config import YamlLintConfig, YamlLintConfigError
......@@ -119,10 +120,24 @@ class ParamFile(object):
self.validate()
metadata = anyconfig.load(self.param_file)
self.vals = metadata
if self.vals['summary']:
self.__check_summary_regexes()
self.__compute_param_ranges()
self.commands = self.__compute_commands()
def __check_summary_regexes(self):
for summary in self.vals['summary']:
try:
summary.re = re.compile(summary['regex'])
logging.info( '- Summary %s using regex %s' % (summary['id'], summary['regex']))
except re.error:
raise(re.error("Regular expression for summary %s is invalid" % summary['id']))
def __compute_param_ranges(self):
"""Compute the parameter ranges as lists, from their definitions"""
......
# The command to run, including any arguments that will not be explored by
# the runner.
command: train_ann --input train.set --crossk 10
command: echo train_ann --input train.set --crossk 10
# The standard output from a command will be collected into a file named:
# out_<param1 val>_<param2 val>_<param3_val>.....out
......@@ -22,7 +22,6 @@ summary:
- id: False_pos_Fraction
regex: 'FPF: ([-+]?[0-9]*\.?[0-9])'
# Cluster partition to use
partition: 256GB
......@@ -33,7 +32,7 @@ nodes: 4
cpus_per_task: 4
# Time limit
time_limit: 3d:00:00:00
time_limit: 3-00:00:00
# The list of parameters to explore
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment