quantlibbenchmark.cpp source code [quantlib/test-suite/quantlibbenchmark.cpp]

1	/ -- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -- /
2
3	/*
4	Copyright (C) 2006, 2008, 2010, 2018, 2023 Klaus Spanderen
5
6	This file is part of QuantLib, a free-software/open-source library
7	for financial quantitative analysts and developers - http://quantlib.org/
8
9	QuantLib is free software: you can redistribute it and/or modify it
10	under the terms of the QuantLib license. You should have received a
11	copy of the license along with this program; if not, please email
12	<quantlib-dev@lists.sf.net>. The license is also available online at
13	<http://quantlib.org/license.shtml>.
14
15	This program is distributed in the hope that it will be useful, but WITHOUT
16	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17	FOR A PARTICULAR PURPOSE. See the license for more details.
18	*/
19
20
21	/*
22	QuantLib Benchmark Suite
23
24	Measures the performance of a preselected set of numerically intensive
25	test cases. The overall QuantLib Benchmark Index is given by the average
26	performance in mflops. This benchmarks supports multiprocessing, e.g.
27
28	Single process benchmark:
29	./quantlib-benchmark
30
31	Benchmark with 16 processes:
32	./quantlib-benchmark --mp=16
33
34	Benchmark with one process per core
35	./quantlib-benchmark --mp
36
37	The number of floating point operations of a given test case was measured
38	using the perfex library, http://user.it.uu.se/~mikpe/linux/perfctr
39	and PAPI, http://icl.cs.utk.edu/papi
40
41	Example results: 1. i7 7820X@3.6GHz :24192.2 mflops
42	2. i7 4702HQ@2.2GHz : 6524.9 mflops
43	3. i7 870@2.93GHz : 4759.2 mflops
44	4. Core2 Q9300@2.5Ghz : 2272.6 mflops
45	5. Core2 Q6600@2.4Ghz : 1984.0 mflops
46	6. i3 540@3.1Ghz : 1755.3 mflops
47	7. Raspberry Pi4@1.5GHz : 1704.2 mflops
48	8. Core2 Dual@2.0Ghz : 835.9 mflops
49	9. Athlon 64 X2 4400+ : 824.2 mflops
50	10. Cortex-A57@2.0GHz : 821.7 mflops
51	11. Core2 Dual@2.0Ghz : 754.1 mflops
52	12. Pentium4 Dual@2.8Ghz : 423.8 mflops
53	13. Raspberry Pi3@1.2GHz : 309.2 mflops
54	14. Pentium4@3.0Ghz : 266.3 mflops
55	15. PentiumIII@1.1Ghz : 146.2 mflops
56	16. Alpha 2xEV68@833Mhz : 184.6 mflops
57	17. Wii PowerPC 750@729MHz : 46.1 mflops
58	18. Raspberry Pi ARM@700Mhz: 28.3 mflops
59	19. MIPS R5000@150MHz : 12.6 mflops
60	20. RISC-V on FPGA@25Mhz : 2.4 mflops
61	21. Strong ARM@206Mhz : 1.4 mflops
62	22. SPARC v7@25MHz : 0.78mflops
63
64	Remarks: OS: Linux, static libs
65	1. g++-6.3.0 -O3 -ffast-math -march=core-avx2
66	Remark: 16 processes
67	2. g++-4.8.1 -O3 -ffast-math -march=core-avx2
68	Remark: eight processes
69	3. gcc-4.6.3, -O3 -ffast-math -mfpmath=sse,387 -march=corei7
70	Remark: eight processes
71	4. icc-11.0, -gcc-version=420 -fast -fp-model fast=2 -ipo-jobs2
72	Remark: four processes
73	5. icc-11.0, -gcc-version=420 -fast -fp-model fast=2 -ipo-jobs2
74	Remark: four processes
75	6. gcc-4.4.5, -O3 -ffast-math -mfpmath=sse,387 -msse4.2 -march=core2
76	Remark: four processes
77	7. gcc-8.3.0, -O3 -ffast-math -mcpu=cortx-a8 -mfpu=neon-fp-armv8
78	Remark: four processes
79	8. icc-11.0, -gcc-version=420 -fast -fp-model fast=2 -ipo-jobs2
80	Remark: two processes
81	9. icc-11.0, -gcc-version=420 -xSSSE3 -O3 -ipo -no-prec-div -static
82	-fp-model fast=2 -ipo-jobs2, Remark: two processes
83	10. clang++-6.0.1 -O2, Remark: four processes
84	11. gcc-4.2.1, -O3 -ffast-math -mfpmath=sse,387 -msse3 -funroll-all-loops
85	Remark: two processes
86	12. gcc-4.0.1, -O3 -march=pentium4 -ffast-math
87	-mfpmath=sse,387 -msse2 -funroll-all-loops, Remark: two processes
88	13. gcc-4.9.2 -O2, Remark: four processes
89	14. gcc-4.0.1, -O3 -march=pentium4 -ffast-math
90	-mfpmath=sse,387 -msse2 -funroll-all-loops
91	15. gcc-4.1.1, -O3 -march=pentium3 -ffast-math
92	-mfpmath=sse,387 -msse -funroll-all-loops
93	16. gcc-3.3.5, -O3 -mcpu=e67 -funroll-all-loops, Remark: two processes
94	17. gcc-4.9.2, -O2 -g on a Nintendo Wii
95	18. gcc-4.6.3, -O3
96	19. gcc-4-7-4, -O2 on a SGI Indy
97	20. gcc-9.2, -O2 on RISC-V softcore on an Artix7 100T FPGA
98	21. gcc-3.4.3, -O2 -g on a Zaurus PDA
99	22. gcc-7.5.0, -O2 on a Sun SPARCstation IPC, FPU: Weitek 3170
100
101	This benchmark is derived from quantlibtestsuite.cpp. Please see the
102	copyrights therein.
103	*/
104
105	#include <ql/types.hpp>
106	#include <ql/version.hpp>
107
108	#ifdef QL_ENABLE_PARALLEL_UNIT_TEST_RUNNER
109	#include <boost/process.hpp>
110	#include <boost/interprocess/ipc/message_queue.hpp>
111	#endif
112
113	#define BOOST_TEST_NO_MAIN 1
114	#include <boost/test/included/unit_test.hpp>
115
116	#include <boost/algorithm/string.hpp>
117	#include <boost/numeric/conversion/cast.hpp>
118
119	#include <iomanip>
120	#include <iostream>
121	#include <vector>
122	#include <string>
123	#include <utility>
124	#include <chrono>
125	#include <thread>
126
127	/ PAPI code*
128	#include <stdio.h
129	#include <papi.h>
130	*/
131
132	/ Use BOOST_MSVC instead of _MSC_VER since some other vendors (Metrowerks,*
133	for example) also #define _MSC_VER
134	*/
135	#if !defined(BOOST_ALL_NO_LIB) && defined(BOOST_MSVC)
136	# include <ql/auto_link.hpp>
137	#endif
138
139	#include "utilities.hpp"
140	#include "americanoption.hpp"
141	#include "asianoptions.hpp"
142	#include "barrieroption.hpp"
143	#include "basketoption.hpp"
144	#include "batesmodel.hpp"
145	#include "convertiblebonds.hpp"
146	#include "digitaloption.hpp"
147	#include "dividendoption.hpp"
148	#include "europeanoption.hpp"
149	#include "fdheston.hpp"
150	#include "hestonmodel.hpp"
151	#include "interpolations.hpp"
152	#include "jumpdiffusion.hpp"
153	#include "marketmodel_smm.hpp"
154	#include "marketmodel_cms.hpp"
155	#include "lowdiscrepancysequences.hpp"
156	#include "quantooption.hpp"
157	#include "riskstats.hpp"
158	#include "shortratemodels.hpp"
159
160
161	namespace {
162
163	class Benchmark {
164	public:
165	typedef void (*fct_ptr)();
166	Benchmark(std::string name, fct_ptr f, double mflop)
167	: f_(f), name_(std::move(name)), mflop_(mflop) {}
168
169	fct_ptr getTestCase() const {
170	return f_;
171	}
172	double getMflop() const {
173	return mflop_;
174	}
175	std::string getName() const {
176	return name_;
177	}
178	void swap(Benchmark& other) {
179	std::swap(a&: f_, b&: other.f_);
180	std::swap(lhs&: name_, rhs&: other.name_);
181	std::swap(a&: mflop_, b&: other.mflop_);
182	}
183	private:
184	fct_ptr f_;
185	std::string name_;
186	double mflop_; // total number of mega floating
187	// point operations (not per sec!)
188	};
189
190	std::vector<Benchmark> bm = {
191	Benchmark ("AmericanOption::FdAmericanGreeks", &AmericanOptionTest::testFdAmericanGreeks, `518.31`),
192	Benchmark ("AsianOption::MCArithmeticAveragePrice", &AsianOptionTest::testMCDiscreteArithmeticAveragePrice, `5186.13`),
193	Benchmark ("BarrierOption::BabsiriValues", &BarrierOptionTest::testBabsiriValues, `880.8`),
194	Benchmark ("BasketOption::EuroTwoValues", &BasketOptionTest::testEuroTwoValues, `340.04`),
195	Benchmark ("BasketOption::TavellaValues", &BasketOptionTest::testTavellaValues, `933.80`),
196	Benchmark ("BasketOption::OddSamples", &BasketOptionTest::testOddSamples, `642.46`),
197	Benchmark ("BatesModel::DAXCalibration", &BatesModelTest::testDAXCalibration, `1993.35`),
198	Benchmark ("ConvertibleBondTest::testBond", &ConvertibleBondTest::testBond, `159.85`),
199	Benchmark ("DigitalOption::MCCashAtHit", &DigitalOptionTest::testMCCashAtHit, `995.87`),
200	Benchmark ("DividendOption::FdEuropeanGreeks", &DividendOptionTest::testFdEuropeanGreeks, `949.52`),
201	Benchmark ("DividendOption::FdAmericanGreeks", &DividendOptionTest::testFdAmericanGreeks, `1113.74`),
202	Benchmark ("EuropeanOption::FdMcEngines", &EuropeanOptionTest::testMcEngines, `1988.63`),
203	Benchmark ("EuropeanOption::ImpliedVol", &EuropeanOptionTest::testImpliedVol, `131.51`),
204	Benchmark ("EuropeanOption::FdEngines", &EuropeanOptionTest::testFdEngines, `148.43`),
205	Benchmark ("FdHestonTest::testFdmHestonAmerican", &FdHestonTest::testFdmHestonAmerican, `234.21`),
206	Benchmark ("HestonModel::DAXCalibration", &HestonModelTest::testDAXCalibration, `555.19`),
207	Benchmark ("InterpolationTest::testSabrInterpolation", &InterpolationTest::testSabrInterpolation, `2266.06`),
208	Benchmark ("JumpDiffusion::Greeks", &JumpDiffusionTest::testGreeks, `433.77`),
209	Benchmark ("MarketModelCmsTest::testCmSwapsSwaptions", &MarketModelCmsTest::testMultiStepCmSwapsAndSwaptions, `11497.73`),
210	Benchmark ("MarketModelSmmTest::testMultiSmmSwaptions", &MarketModelSmmTest::testMultiStepCoterminalSwapsAndSwaptions, `11244.95`),
211	Benchmark ("QuantoOption::ForwardGreeks", &QuantoOptionTest::testForwardGreeks, `90.98`),
212	Benchmark ("RandomNumber::MersenneTwisterDescrepancy", &LowDiscrepancyTest::testMersenneTwisterDiscrepancy, `951.98`),
213	Benchmark ("RiskStatistics::Results", &RiskStatisticsTest::testResults, `300.28`),
214	Benchmark ("ShortRateModel::Swaps", &ShortRateModelTest::testSwaps, `454.73`)
215	};
216
217	/ PAPI code*
218	float real_time, proc_time, mflops;
219	long_long lflop, flop=0;
220	*/
221
222	class TimedBenchmark {
223	public:
224	typedef void (*fct_ptr)();
225	explicit TimedBenchmark(fct_ptr f) : f_(f) {}
226
227	void startMeasurement() const {
228	/ PAPI code*
229	lflop = flop;
230	PAPI_flops(&real_time, &proc_time, &flop, &mflops);
231	*/
232	}
233
234	void stopMeasurement() const {
235	/ PAPI code*
236	PAPI_flops(&real_time, &proc_time, &flop, &mflops);
237	printf("Real_time: %f Proc_time: %f Total mflop: %f\n",
238	real_time, proc_time, (flop-lflop)/1e6);
239	*/
240	}
241
242	double operator()() const {
243	startMeasurement();
244	auto startTime = std::chrono::steady_clock::now();
245	BOOST_CHECK(true); // to prevent no-assertion warning
246	f_();
247	auto stopTime = std::chrono::steady_clock::now();
248	stopMeasurement();
249	return std::chrono::duration_cast<std::chrono::microseconds>(
250	d: stopTime - startTime).count() * `1e-6`;
251	}
252	private:
253	fct_ptr f_;
254	};
255
256	void printResults(
257	unsigned nProc,
258	std::vector<std::pair<Benchmark, double> >& runTimes) {
259
260	const std::string header = "Benchmark Suite QuantLib " QL_VERSION;
261
262	std::cout << std::endl << std::string (`58`,`'-'`) << std::endl;
263	std::cout << header << std::endl;
264	std::cout << std::string (`58`,`'-'`) << std::endl << std::endl;
265
266	std::sort(first: runTimes.begin(), last: runTimes.end(),
267	comp: [](const auto& a, const auto& b) {
268	return a.first.getName() < b.first.getName();
269	}
270	);
271
272	std::vector<std::tuple<Benchmark, int, double> > aggTimes;
273	for (const auto& iter: runTimes) {
274	if (aggTimes.empty()
275	\|\| std::get<`0`>(t&: aggTimes.back()).getName()
276	!= iter.first.getName()) {
277	aggTimes.emplace_back(args: iter.first, args: `1`, args: iter.second);
278	}
279	else {
280	++std::get<`1`>(t&: aggTimes.back());
281	std::get<`2`>(t&: aggTimes.back()) += iter.second;
282	}
283	}
284
285	double sum=`0`;
286	for (const auto& iterT: aggTimes) {
287	const double mflopsPerSec
288	= std::get<`0`>(t: iterT).getMflop() / std::get<`2`>(t: iterT)
289	* nProc * std::get<`1`>(t: iterT);
290
291	std::cout << std::get<`0`>(t: iterT).getName()
292	<< std::string (`42`-std::get<`0`>(t: iterT).getName().length(),`' '`)
293	<< ":" << std::fixed << std::setw(`8`) << std::setprecision(`1`)
294	<< mflopsPerSec
295	<< " mflops" << std::endl;
296
297	sum+=mflopsPerSec;
298	}
299	std::cout << std::string (`58`,`'-'`) << std::endl
300	<< "QuantLib Benchmark Index :"
301	<< std::fixed << std::setw(`8`) << std::setprecision(`1`)
302	<< sum/aggTimes.size()
303	<< " mflops" << std::endl;
304	}
305	#ifdef QL_ENABLE_PARALLEL_UNIT_TEST_RUNNER
306	int worker(const char* exe, const std::vector<std::string>& args) {
307	return boost::process::system(exe, boost::process::args=args);
308	}
309	#endif
310	}
311
312	int main(int argc, char* argv[] ) {
313	const std::string clientModeStr = "--client_mode=true";
314	bool clientMode = false;
315
316	unsigned nProc = `1`;
317	std::vector<std::pair<Benchmark, double> > runTimes;
318
319	for (int i=`1`; i<argc; ++i) {
320	std::string arg = argv[i];
321	std::vector<std::string> tok;
322	boost::split(Result&: tok, Input&: arg, Pred: boost::is_any_of(Set: "="));
323
324	if (tok [`0`] == "--mp") {
325	nProc = (tok.size() == `2`)
326	? boost::numeric_cast<unsigned>(arg: std::stoul(str: tok [`1`]))
327	: std::thread::hardware_concurrency();
328	}
329	else if (arg == "--help" \|\| arg == "-?") {
330	std::cout
331	<< "'quantlib-benchmark' is QuantLib " QL_VERSION " CPU performance benchmark"
332	<< std::endl << std::endl
333	<< "Usage: ./quantlib-benchmark [OPTION]..."
334	<< std::endl << std::endl
335	<< "with the following options:"
336	<< std::endl
337	#ifdef QL_ENABLE_PARALLEL_UNIT_TEST_RUNNER
338	<< "--mp[=PROCESSES] \t parallel execution with PROCESSES processes"
339	<< std::endl
340	#endif
341	<< "-?, --help \t\t display this help and exit"
342	<< std::endl;
343	return `0`;
344	}
345	else if (arg == clientModeStr) {
346	clientMode = true;
347	}
348	else {
349	std::cout << "quantlib-benchmark: unrecognized option '" << arg << "'."
350	<< std::endl
351	<< "Try 'quantlib-benchmark --help' for more information."
352	<< std::endl;
353	return `0`;
354	}
355	}
356
357	if (nProc == `1` && !clientMode) {
358	std::for_each(first: bm.begin(), last: bm.end(),
359	f: [&runTimes](const Benchmark& iter) {
360	runTimes.emplace_back(
361	args: iter, args: TimedBenchmark (iter.getTestCase())());
362	});
363	printResults(nProc, runTimes);
364	}
365	else {
366	#ifdef QL_ENABLE_PARALLEL_UNIT_TEST_RUNNER
367	using namespace boost::interprocess;
368
369	typedef std::pair<unsigned, double> result_type;
370
371	message_queue::size_type recvd_size;
372	unsigned priority, terminateId=-`1`;
373
374	const char* const testUnitIdQueueName = "test_unit_queue";
375	const char* const testResultQueueName = "test_result_queue";
376
377	if (!clientMode) {
378	message_queue::remove(testUnitIdQueueName);
379	message_queue::remove(testResultQueueName);
380	struct queue_remove {
381	explicit queue_remove(const char* name) : name_(name) { }
382	~queue_remove() { message_queue::remove(name_); }
383
384	private:
385	const char* const name_;
386	} remover1(testUnitIdQueueName),remover2(testResultQueueName);
387
388	message_queue mq(
389	open_or_create, testUnitIdQueueName,
390	nProcbm.size(), sizeof(unsigned*)
391	);
392	message_queue rq(
393	open_or_create, testResultQueueName, `16`, sizeof(result_type));
394
395	const std::vector<std::string> workerArgs(`1`, clientModeStr);
396	std::vector<std::thread> threadGroup;
397	for (unsigned i = `0`; i < nProc; ++i) {
398	threadGroup.emplace_back([&]() { worker(argv[`0`], workerArgs); });
399	}
400
401	for (unsigned i=`0`; i < nProc; ++i)
402	for (unsigned j=`0`; j < bm.size(); ++j)
403	mq.send(&j, sizeof(unsigned), `0`);
404
405	result_type r;
406	for (unsigned i = `0`; i < nProc*bm.size(); ++i) {
407	rq.receive(&r, sizeof(result_type), recvd_size, priority);
408	runTimes.push_back(std::make_pair(bm[r.first], r.second));
409	}
410	for (unsigned i=`0`; i < nProc; ++i) {
411	mq.send(&terminateId, sizeof(unsigned), `0`);
412	}
413	for (auto& thread: threadGroup) {
414	thread.join();
415	}
416	printResults(nProc, runTimes);
417	}
418	else {
419	message_queue mq(open_only, testUnitIdQueueName);
420	message_queue rq(open_only, testResultQueueName);
421
422	unsigned id=`0`;
423	mq.receive(&id, sizeof(unsigned), recvd_size, priority);
424
425	while (id != terminateId) {
426	result_type a(id, TimedBenchmark(bm[id].getTestCase())());
427	rq.send(&a, sizeof(result_type), `0`);
428
429	mq.receive(&id, sizeof(unsigned), recvd_size, priority);
430	}
431	}
432	#else
433	std::cout << "Please compile QuantLib with option 'QL_ENABLE_PARALLEL_UNIT_TEST_RUNNER'"
434	" to run the benchmarks in parallel" << std::endl;
435	#endif
436	}
437
438	return `0`;
439	}
440

source code of quantlib/test-suite/quantlibbenchmark.cpp