Commit 7fd49fbc authored by David Trudgian's avatar David Trudgian
Browse files

Regex summary capture

parent e92c5ccd
......@@ -47,7 +47,12 @@ class BaseExecutor:
for arg in self.commands[0]:
trace_file.write("%s\t" % arg)
trace_file.write("Return Code\tDuration\tStart\tEnd\n")
trace_file.write("Return Code\tDuration\tStart\tEnd")
for summary in self.params.vals['summary']:
trace_file.write("\t%s" % summary['id'])
trace_file.write("\n")
def write_trace(self, msg):
......@@ -62,7 +67,6 @@ class BaseExecutor:
def wrap_cmd(self, cmd_idx):
logger.debug(" - Task %d is running" % cmd_idx)
cmd = self.commands[cmd_idx]
......@@ -85,16 +89,31 @@ class BaseExecutor:
for arg in cmd.values():
summary_line += "%s\t" % str(arg['value'])
summary_line += "%d\t%s\t%s\t%s\n" % (
summary_line += "%d\t%s\t%s\t%s" % (
ret,
duration,
start,
end
)
for summary in self.params.vals['summary']:
summary_line += '\t'
summary_line += self.__pygrep_first(summary['regex'], stdout_path)
summary_line += '\n'
return summary_line
def __pygrep_first(self, regex, file):
for i, line in enumerate(open(file)):
for match in re.finditer(regex, line):
return match.group(1)
return ''
@abstractmethod
def run(self):
pass
......@@ -110,7 +129,7 @@ class LocalExecutor(BaseExecutor):
def run(self):
for cmd_idx, cmd in enumerate(self.commands):
wrap_cmd(cmd_idx,cmd)
self.write_trace(self.wrap_cmd(cmd_idx))
def run_cmd(self, cmd, stderr_file, stdout_file):
......
......@@ -131,7 +131,11 @@ class ParamFile(object):
for summary in self.vals['summary']:
try:
summary.re = re.compile(summary['regex'])
compiled_re = re.compile(summary['regex'])
if compiled_re.groups != 1:
raise(TypeError("Regular expression for summary %s must contain exactly 1 matching group." % summary['id']))
logging.info( '- Summary %s using regex %s' % (summary['id'], summary['regex']))
except re.error:
......
# The command to run, including any arguments that will not be explored by
# the runner.
command: echo train_ann --input train.set --crossk 10
command: echo train_ann --input train.set --crossk 10 TPF=0.01 FPF=0.13
# The standard output from a command will be collected into a file named:
# out_<param1 val>_<param2 val>_<param3_val>.....out
......@@ -9,18 +9,14 @@ command: echo train_ann --input train.set --crossk 10
# The standard error from a command will be collected into a file named:
# out_<param1 val>_<param2 val>_<param3_val>.....err
# If summary is specified, we will create a summary.txt file listing in tabular
# format, the value of each parameter, the standard output of the task.
# To extract only part of the standard output specify a regular expression here.
# Any capture groups in parentheses will be collected as columns in the summary
# file.
summary:
- id: True_Pos_Fraction
regex: 'TPF: ([-+]?[0-9]*\.?[0-9])'
regex: 'TPF=([-+]?[0-9]*\.?[0-9]*)'
- id: False_pos_Fraction
regex: 'FPF: ([-+]?[0-9]*\.?[0-9])'
regex: 'FPF=([-+]?[0-9]*\.?[0-9]*)'
# Cluster partition to use
partition: 256GB
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment