diff --git a/scripts/py-analysis-benchmark.py b/scripts/py-analysis-benchmark.py index 3b89be3ff2f69..7eb73e71852c8 100644 --- a/scripts/py-analysis-benchmark.py +++ b/scripts/py-analysis-benchmark.py @@ -1,3 +1,16 @@ +""" +Script for the local benchmarking of the o2 analysis tasks, +running them with multiple processing jobs (NCORES) +and multiple readers (NREADERS) over input files (INPUT_FILE). +Tasks to be benchmarked are in the BENCHMARK_TASKS dict. + +Usage: python3 py-analysis-benchmark.py + +Ouput: CSV file (OUTPUT_CSV) with benchmarking results: +'tname', 'ncores', 'nreaders', 'time_mean' (s), 'time_std' (s), +'input_size' (MB), 'input_length', 'timestamp', 'cpu_load', 'ncpu', 'machine' +""" + import csv from datetime import datetime import itertools @@ -7,55 +20,85 @@ import subprocess import timeit -O2_ROOT = os.environ.get('O2_ROOT') -if not O2_ROOT: - print('O2_ROOT not found') - raise ValueError - -INPUT_FILE = '@filelist.txt' -OUTPUT_CSV = 'benchmark_data.csv' -with open(INPUT_FILE[1:],'r') as f: - fnames = f.readlines() - input_size = round(sum([os.stat(l.strip('\n')).st_size for l in fnames])/1024/1024) - input_length = len(fnames) +def get_cl_output(cmd) -> str: + try: + output_ = str(subprocess.check_output(cmd, shell=True), 'utf-8') + except subprocess.CalledProcessError: + output_ = '' + return output_.strip('\n') -NCORES = [1, 2, 4] -NREADERS = [1, 2, 4] -NTRIALS = 3 -CPU_SELECTION = False +def get_cpu_load(): + uptime_ = get_cl_output('uptime') + load_last_15 = uptime_.split(' ')[-1] + return load_last_15 -SHA256SUM_TASK = Template('cat ${file_list} | xargs -P ${n} -n1 -I{} sha256sum {}') -#COMPOSITE_TASK = Template('o2-analysis-trackselection -b --pipeline track-selection:${n},track-extension:${n} --aod-file ${file_list} --readers ${n} | o2-analysistutorial-histogram-track-selection -b --pipeline histogram-track-selection:${n} --select=0') +def get_timestamp(): + return datetime.now().strftime("%m/%d/%Y %H:%M") + + +def get_time_std(t_res): + try: + std_ = stat.stdev(t_res) + except stat.StatisticsError: + std_ = -1 + return std_ + +#benchmarking setup +INPUT_FILE = '@filelist.txt' +OUTPUT_CSV = 'benchmark_data.csv' +NCORES = [1, 2, 4] +NREADERS = [1, 2, 4] +NTRIALS = 2 +LARGE_SHM_SEGMENT_SIZE = False +CPU_SELECTION = False + +#tasks to be benchmarked BENCHMARK_TASKS = { + 'o2-analysistutorial-void': '-b --pipeline void:${n}', 'o2-analysistutorial-histograms': '-b --pipeline eta-and-phi-histograms:${n},pt-histogram:${n},etaphi-histogram:${n}', 'o2-analysis-trackselection': '-b --pipeline track-selection:${n},track_extension:${n}', - #'o2-analysis-vertexing-hf': '-b --pipeline vertexerhf-candidatebuildingDzero:${n},vertexerhf-decayvertexbuilder2prong:${n}', + 'o2-analysis-correlations': '-b --pipeline correlation-task:${n}', + #'o2-analysis-vertexing-hf': '-b --pipeline vertexerhf-candidatebuildingDzero:${n},vertexerhf-decayvertexbuilder2prong:${n}' } - + + +O2_ROOT = os.environ.get('O2_ROOT') +if not O2_ROOT: + print('O2_ROOT not found') + raise ValueError + +MACHINE = get_cl_output('hostname') +NCPU = get_cl_output('grep processor /proc/cpuinfo | wc -l') +with open(INPUT_FILE[1:],'r') as f: + fnames = f.readlines() + INPUT_SIZE = round(sum([os.stat(l.strip('\n')).st_size for l in fnames])/1024/1024) + INPUT_LENGTH = len(fnames) + + +SHA256SUM_TASK = Template('cat ${file_list} | xargs -P ${n} -n1 -I{} sha256sum {}') +#COMPOSITE_TASK = Template('o2-analysis-trackselection -b --pipeline track-selection:${n},track-extension:${n} --aod-file ${file_list} --readers ${n} | o2-analysistutorial-histogram-track-selection -b --pipeline histogram-track-selection:${n} --select=0') + for k in BENCHMARK_TASKS: BENCHMARK_TASKS[k] = Template(BENCHMARK_TASKS[k]) with open(OUTPUT_CSV, 'w') as f: writer = csv.writer(f) - writer.writerow(['tname', 'ncores', 'nreaders', 'time_mean', 'time_std', 'input_size', 'input_length']) + writer.writerow(('tname', 'ncores', 'nreaders', 'time_mean', 'time_std', + 'input_size', 'input_length', 'timestamp', 'cpu_load', 'ncpu', 'machine')) for ncores in NCORES: cmd_sha256sum = SHA256SUM_TASK.substitute(file_list=INPUT_FILE[1:], n=str(ncores)) t = timeit.Timer('os.system(cmd_sha256sum)', globals=globals()) t_res = t.repeat(NTRIALS, 1) - writer.writerow( ('sha256sum', ncores, -1, stat.mean(t_res), stat.stdev(t_res), input_size, input_length) ) + writer.writerow( ('sha256sum', ncores, -1, stat.mean(t_res), get_time_std(t_res), + INPUT_SIZE, INPUT_LENGTH, get_timestamp(), get_cpu_load(), NCPU, MACHINE) ) for ncores, nreaders in itertools.product(NCORES, NREADERS): - - #cmd_composite = COMPOSITE_TASK.substitute(file_list=INPUT_FILE,n=str(ncores)) - #t = timeit.Timer('os.system(cmd_composite)', globals=globals()) - #t_res = t.repeat(NTRIALS, 1) - #writer.writerow( ('analysistutorial-histogram-track-selection', ncores, nreaders, stat.mean(t_res), stat.stdev(t_res), input_size, input_length) ) - + for tname, targ in BENCHMARK_TASKS.items(): targ = targ.substitute(n=str(ncores)) cmd_list = [tname] + targ.split(' ') @@ -65,13 +108,17 @@ cmd_list = ['taskset','-c','5,15'] + cmd_list elif ncores == 4: cmd_list = ['taskset','-c','1,3,11,13'] + cmd_list + + if LARGE_SHM_SEGMENT_SIZE: + cmd_list += ['--shm-segment-size', str(16000000000)] cmd_list += ['--aod-file', INPUT_FILE] cmd_list += ['--readers', str(nreaders)] t = timeit.Timer('subprocess.run(cmd_list)', globals=globals()) t_res = t.repeat(NTRIALS, 1) - writer.writerow( (tname[3:], ncores, nreaders, stat.mean(t_res), stat.stdev(t_res), input_size, input_length) ) + writer.writerow( (tname[3:], ncores, nreaders, stat.mean(t_res), get_time_std(t_res), + INPUT_SIZE, INPUT_LENGTH, get_timestamp(), get_cpu_load(), NCPU, MACHINE) ) #alinsure #numa0 0-11,24-35