Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit a3e28c7

Browse filesBrowse files
Throughput Benchmark w/ Perf (#145)
* update max freq on insertion, change debug invariant * perf benchmark script --------- Co-authored-by: Juncheng Yang <1a1a11a@users.noreply.github.com>
1 parent e6f816e commit a3e28c7
Copy full SHA for a3e28c7

File tree

Expand file treeCollapse file tree

1 file changed

+232
-0
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+232
-0
lines changed

‎scripts/benchmark_throughput.py

Copy file name to clipboard
+232Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
import subprocess
2+
import logging
3+
import argparse
4+
from typing import Dict
5+
from utils.setup_utils import setup, CACHESIM_PATH
6+
import re
7+
import csv
8+
import os
9+
import multiprocessing
10+
import pandas as pd
11+
12+
logger = logging.getLogger("cache_sim_monitor")
13+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
14+
15+
def generate_trace(args):
16+
"""Call data_gen.py with specific parameters (for multiprocessing)."""
17+
m, n, a, output_dir = args
18+
trace_filename = f"{output_dir}/zipf_{a}_{m}_{n}.oracleGeneral"
19+
20+
if os.path.exists(trace_filename):
21+
logger.info(f"Trace {trace_filename} already exists. Skipping.")
22+
return trace_filename
23+
24+
cmd = [
25+
"python3", "data_gen.py",
26+
"-m", str(m),
27+
"-n", str(n),
28+
"--alpha", str(a),
29+
"--bin-output", trace_filename
30+
]
31+
cmmd = " ".join(cmd)
32+
logger.info(f"Generating trace: {trace_filename}, Command: {cmmd}")
33+
process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
34+
35+
if process.returncode != 0:
36+
logger.warning(f"Failed to generate trace {trace_filename}")
37+
logger.warning(process.stderr.decode("utf-8"))
38+
return None
39+
return trace_filename
40+
41+
42+
def generate_synthetic_traces(num_objects, num_requests, alpha):
43+
num_objects = [int(x) for x in num_objects.split(",")]
44+
num_requests = [int(x) for x in num_requests.split(",")]
45+
alpha = [float(x) for x in alpha.split(",")]
46+
47+
output_dir = "../data/synthetic_traces"
48+
os.makedirs(output_dir, exist_ok=True)
49+
50+
args_list = [(m, n, a, output_dir) for m in num_objects for n in num_requests for a in alpha]
51+
52+
logger.info(f"Generating {len(args_list)} traces with {min(len(args_list), os.cpu_count())} processes")
53+
with multiprocessing.Pool(processes=min(len(args_list), os.cpu_count())) as pool:
54+
traces = pool.map(generate_trace, args_list)
55+
56+
traces = [t for t in traces if t is not None]
57+
58+
logger.info(f"Generated {len(traces)} traces.")
59+
return traces
60+
61+
62+
def parse_perf_stat(perf_stat_output: str) -> Dict[str, float]:
63+
metrics_regex = {
64+
"cpu_utilization": r"([\d\.]+)\s+CPUs utilized",
65+
"task_clock_msec": r"([\d\.]+)\s+msec task-clock",
66+
"throughput": r"throughput\s+([\d\.]+)\s+MQPS",
67+
"context_switches": r"([\d\.]+)\s+context-switches",
68+
"cpu_migrations": r"([\d\.]+)\s+cpu-migrations",
69+
"cpu_cycles": r"([\d\.]+)\s+cycles",
70+
"instructions": r"([\d\.]+)\s+instructions",
71+
"ipc": r"([\d\.]+)\s+insn per cycle",
72+
"elapsed_time_sec": r"([\d\.]+)\s+seconds time elapsed",
73+
"user_time_sec": r"([\d\.]+)\s+seconds user",
74+
"sys_time_sec": r"([\d\.]+)\s+seconds sys"
75+
}
76+
77+
perf_data = {}
78+
79+
for key, regex in metrics_regex.items():
80+
match = re.search(regex, perf_stat_output)
81+
if match:
82+
try:
83+
perf_data[key] = float(match.group(2) if len(match.groups()) > 1 else match.group(1))
84+
except ValueError:
85+
logger.warning(f"Failed to convert {key} to float")
86+
pass
87+
88+
return perf_data
89+
90+
def run_cachesim(trace: str, algo: str, cache_size: str, ignore_obj_size: bool, num_thread: int, trace_format: str, trace_format_params: str) -> Dict[str, float]:
91+
logger.info(f"Running perf with trace={trace}, algo={algo}, size={cache_size}")
92+
93+
run_args = [
94+
"sudo", "perf", "stat", "-d",
95+
CACHESIM_PATH,
96+
trace,
97+
trace_format,
98+
algo,
99+
cache_size,
100+
"--ignore-obj-size", "1" if ignore_obj_size else "0",
101+
"--num-thread", str(num_thread),
102+
]
103+
104+
if trace_format_params:
105+
run_args.append("--trace-type-params")
106+
run_args.append(trace_format_params)
107+
108+
109+
p = subprocess.run(run_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
110+
111+
stdout_str = p.stdout.decode("utf-8")
112+
perf_json = parse_perf_stat(stdout_str)
113+
114+
if p.returncode != 0 or "Segmentation fault" in stdout_str:
115+
logger.warning("CACHESIM may have crashed with segfault")
116+
perf_json = {}
117+
118+
return perf_json
119+
120+
121+
def generate_summary(results):
122+
summary_file = "result/throughput_log.csv"
123+
os.makedirs("result", exist_ok=True)
124+
125+
df = pd.DataFrame(results)
126+
# algo and cache size should be 1st and 2nd columns
127+
column_order = ['algo', 'cache_size'] + [a for a in df.columns if a not in ['algo', 'cache_size']]
128+
df = df.reindex(columns=column_order)
129+
df.to_csv(summary_file, index=False)
130+
logger.info(f"Summary saved to {summary_file}")
131+
132+
logger.info("Averaging out across all trace")
133+
df = df.drop(columns=['trace'])
134+
avg_df = df.groupby(['algo', 'cache_size']).mean().reset_index()
135+
avg_df.to_csv("result/throughput_avg.csv", index=False)
136+
137+
logger.info(f"Average summary saved to result/throughput_avg.csv")
138+
139+
140+
def main():
141+
default_args = {
142+
"algos": "fifo,lfu,lhd,GLCache",
143+
"sizes": "0.1",
144+
"num_objects": "100,1000",
145+
"num_requests": "10000",
146+
"alpha": "0.1, 0.2",
147+
}
148+
parser = argparse.ArgumentParser(
149+
description="Run cachesim with CPU monitoring"
150+
)
151+
parser.add_argument("--tracepath", type=str, required=False, help="Trace file path")
152+
parser.add_argument(
153+
"--num-objects", type=str, default=default_args["num_objects"],
154+
help="Number of objects"
155+
)
156+
parser.add_argument(
157+
"--num-requests", type=str, default=default_args["num_requests"],
158+
help="Number of requests"
159+
)
160+
parser.add_argument(
161+
"--alpha", type=str, default=default_args["alpha"],
162+
help="Zipf parameter"
163+
)
164+
parser.add_argument(
165+
"--algos", type=str,
166+
default=default_args["algos"],
167+
help="The algorithms to run, separated by comma"
168+
)
169+
parser.add_argument(
170+
"--sizes", type=str,
171+
default=default_args["sizes"],
172+
help="The cache sizes to run, separated by comma"
173+
)
174+
parser.add_argument("--trace-format", type=str, default="oracleGeneral")
175+
parser.add_argument(
176+
"--trace-format-params", type=str,
177+
default="", help="Used by CSV trace"
178+
)
179+
parser.add_argument(
180+
"--ignore-obj-size", action="store_true",
181+
default=False, help="Ignore object size"
182+
)
183+
parser.add_argument(
184+
"--num-thread", type=int, default=-1,
185+
help="Number of threads to use"
186+
)
187+
parser.add_argument("--name", type=str, default="")
188+
parser.add_argument(
189+
"--verbose", action="store_true", default=False,
190+
help="Enable verbose logging"
191+
)
192+
193+
args = parser.parse_args()
194+
195+
if args.verbose:
196+
logger.setLevel(logging.DEBUG)
197+
else:
198+
logger.setLevel(logging.INFO)
199+
200+
# Parse arguments
201+
traces = args.tracepath.split(",") if args.tracepath else generate_synthetic_traces(
202+
num_objects=args.num_objects,
203+
num_requests=args.num_requests,
204+
alpha=args.alpha
205+
)
206+
algos = args.algos.split(",")
207+
cache_sizes = args.sizes.split(",")
208+
209+
results = []
210+
# Run perf on cachesim over each combination of trace, algo, cache_size
211+
for trace in traces:
212+
for algo in algos:
213+
for cache_size in cache_sizes:
214+
result_json = run_cachesim(
215+
trace=trace,
216+
algo=algo,
217+
cache_size=cache_size,
218+
ignore_obj_size=args.ignore_obj_size,
219+
num_thread=args.num_thread,
220+
trace_format=args.trace_format,
221+
trace_format_params=args.trace_format_params
222+
)
223+
result_json['algo'] = algo
224+
result_json['cache_size'] = cache_size
225+
result_json['trace'] = trace
226+
results.append(result_json)
227+
228+
generate_summary(results)
229+
230+
231+
if __name__ == "__main__":
232+
main()

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.