1/* -*- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
3/*
4 Copyright (C) 2006, 2008, 2010, 2018, 2023 Klaus Spanderen
5
6 This file is part of QuantLib, a free-software/open-source library
7 for financial quantitative analysts and developers - http://quantlib.org/
8
9 QuantLib is free software: you can redistribute it and/or modify it
10 under the terms of the QuantLib license. You should have received a
11 copy of the license along with this program; if not, please email
12 <quantlib-dev@lists.sf.net>. The license is also available online at
13 <http://quantlib.org/license.shtml>.
14
15 This program is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the license for more details.
18*/
19
20
21/*
22 QuantLib Benchmark Suite
23
24 Measures the performance of a preselected set of numerically intensive
25 test cases. The overall QuantLib Benchmark Index is given by the average
26 performance in mflops. This benchmarks supports multiprocessing, e.g.
27
28 Single process benchmark:
29 ./quantlib-benchmark
30
31 Benchmark with 16 processes:
32 ./quantlib-benchmark --mp=16
33
34 Benchmark with one process per core
35 ./quantlib-benchmark --mp
36
37 The number of floating point operations of a given test case was measured
38 using the perfex library, http://user.it.uu.se/~mikpe/linux/perfctr
39 and PAPI, http://icl.cs.utk.edu/papi
40
41 Example results: 1. i7 7820X@3.6GHz :24192.2 mflops
42 2. i7 4702HQ@2.2GHz : 6524.9 mflops
43 3. i7 870@2.93GHz : 4759.2 mflops
44 4. Core2 Q9300@2.5Ghz : 2272.6 mflops
45 5. Core2 Q6600@2.4Ghz : 1984.0 mflops
46 6. i3 540@3.1Ghz : 1755.3 mflops
47 7. Raspberry Pi4@1.5GHz : 1704.2 mflops
48 8. Core2 Dual@2.0Ghz : 835.9 mflops
49 9. Athlon 64 X2 4400+ : 824.2 mflops
50 10. Cortex-A57@2.0GHz : 821.7 mflops
51 11. Core2 Dual@2.0Ghz : 754.1 mflops
52 12. Pentium4 Dual@2.8Ghz : 423.8 mflops
53 13. Raspberry Pi3@1.2GHz : 309.2 mflops
54 14. Pentium4@3.0Ghz : 266.3 mflops
55 15. PentiumIII@1.1Ghz : 146.2 mflops
56 16. Alpha 2xEV68@833Mhz : 184.6 mflops
57 17. Wii PowerPC 750@729MHz : 46.1 mflops
58 18. Raspberry Pi ARM@700Mhz: 28.3 mflops
59 19. MIPS R5000@150MHz : 12.6 mflops
60 20. RISC-V on FPGA@25Mhz : 2.4 mflops
61 21. Strong ARM@206Mhz : 1.4 mflops
62 22. SPARC v7@25MHz : 0.78mflops
63
64 Remarks: OS: Linux, static libs
65 1. g++-6.3.0 -O3 -ffast-math -march=core-avx2
66 Remark: 16 processes
67 2. g++-4.8.1 -O3 -ffast-math -march=core-avx2
68 Remark: eight processes
69 3. gcc-4.6.3, -O3 -ffast-math -mfpmath=sse,387 -march=corei7
70 Remark: eight processes
71 4. icc-11.0, -gcc-version=420 -fast -fp-model fast=2 -ipo-jobs2
72 Remark: four processes
73 5. icc-11.0, -gcc-version=420 -fast -fp-model fast=2 -ipo-jobs2
74 Remark: four processes
75 6. gcc-4.4.5, -O3 -ffast-math -mfpmath=sse,387 -msse4.2 -march=core2
76 Remark: four processes
77 7. gcc-8.3.0, -O3 -ffast-math -mcpu=cortx-a8 -mfpu=neon-fp-armv8
78 Remark: four processes
79 8. icc-11.0, -gcc-version=420 -fast -fp-model fast=2 -ipo-jobs2
80 Remark: two processes
81 9. icc-11.0, -gcc-version=420 -xSSSE3 -O3 -ipo -no-prec-div -static
82 -fp-model fast=2 -ipo-jobs2, Remark: two processes
83 10. clang++-6.0.1 -O2, Remark: four processes
84 11. gcc-4.2.1, -O3 -ffast-math -mfpmath=sse,387 -msse3 -funroll-all-loops
85 Remark: two processes
86 12. gcc-4.0.1, -O3 -march=pentium4 -ffast-math
87 -mfpmath=sse,387 -msse2 -funroll-all-loops, Remark: two processes
88 13. gcc-4.9.2 -O2, Remark: four processes
89 14. gcc-4.0.1, -O3 -march=pentium4 -ffast-math
90 -mfpmath=sse,387 -msse2 -funroll-all-loops
91 15. gcc-4.1.1, -O3 -march=pentium3 -ffast-math
92 -mfpmath=sse,387 -msse -funroll-all-loops
93 16. gcc-3.3.5, -O3 -mcpu=e67 -funroll-all-loops, Remark: two processes
94 17. gcc-4.9.2, -O2 -g on a Nintendo Wii
95 18. gcc-4.6.3, -O3
96 19. gcc-4-7-4, -O2 on a SGI Indy
97 20. gcc-9.2, -O2 on RISC-V softcore on an Artix7 100T FPGA
98 21. gcc-3.4.3, -O2 -g on a Zaurus PDA
99 22. gcc-7.5.0, -O2 on a Sun SPARCstation IPC, FPU: Weitek 3170
100
101 This benchmark is derived from quantlibtestsuite.cpp. Please see the
102 copyrights therein.
103*/
104
105#include <ql/types.hpp>
106#include <ql/version.hpp>
107
108#ifdef QL_ENABLE_PARALLEL_UNIT_TEST_RUNNER
109#include <boost/process.hpp>
110#include <boost/interprocess/ipc/message_queue.hpp>
111#endif
112
113#define BOOST_TEST_NO_MAIN 1
114#include <boost/test/included/unit_test.hpp>
115
116#include <boost/algorithm/string.hpp>
117#include <boost/numeric/conversion/cast.hpp>
118
119#include <iomanip>
120#include <iostream>
121#include <vector>
122#include <string>
123#include <utility>
124#include <chrono>
125#include <thread>
126
127/* PAPI code
128#include <stdio.h
129#include <papi.h>
130*/
131
132/* Use BOOST_MSVC instead of _MSC_VER since some other vendors (Metrowerks,
133 for example) also #define _MSC_VER
134*/
135#if !defined(BOOST_ALL_NO_LIB) && defined(BOOST_MSVC)
136# include <ql/auto_link.hpp>
137#endif
138
139#include "utilities.hpp"
140#include "americanoption.hpp"
141#include "asianoptions.hpp"
142#include "barrieroption.hpp"
143#include "basketoption.hpp"
144#include "batesmodel.hpp"
145#include "convertiblebonds.hpp"
146#include "digitaloption.hpp"
147#include "dividendoption.hpp"
148#include "europeanoption.hpp"
149#include "fdheston.hpp"
150#include "hestonmodel.hpp"
151#include "interpolations.hpp"
152#include "jumpdiffusion.hpp"
153#include "marketmodel_smm.hpp"
154#include "marketmodel_cms.hpp"
155#include "lowdiscrepancysequences.hpp"
156#include "quantooption.hpp"
157#include "riskstats.hpp"
158#include "shortratemodels.hpp"
159
160
161namespace {
162
163 class Benchmark {
164 public:
165 typedef void (*fct_ptr)();
166 Benchmark(std::string name, fct_ptr f, double mflop)
167 : f_(f), name_(std::move(name)), mflop_(mflop) {}
168
169 fct_ptr getTestCase() const {
170 return f_;
171 }
172 double getMflop() const {
173 return mflop_;
174 }
175 std::string getName() const {
176 return name_;
177 }
178 void swap(Benchmark& other) {
179 std::swap(a&: f_, b&: other.f_);
180 std::swap(lhs&: name_, rhs&: other.name_);
181 std::swap(a&: mflop_, b&: other.mflop_);
182 }
183 private:
184 fct_ptr f_;
185 std::string name_;
186 double mflop_; // total number of mega floating
187 // point operations (not per sec!)
188 };
189
190 std::vector<Benchmark> bm = {
191 Benchmark("AmericanOption::FdAmericanGreeks", &AmericanOptionTest::testFdAmericanGreeks, 518.31),
192 Benchmark("AsianOption::MCArithmeticAveragePrice", &AsianOptionTest::testMCDiscreteArithmeticAveragePrice, 5186.13),
193 Benchmark("BarrierOption::BabsiriValues", &BarrierOptionTest::testBabsiriValues, 880.8),
194 Benchmark("BasketOption::EuroTwoValues", &BasketOptionTest::testEuroTwoValues, 340.04),
195 Benchmark("BasketOption::TavellaValues", &BasketOptionTest::testTavellaValues, 933.80),
196 Benchmark("BasketOption::OddSamples", &BasketOptionTest::testOddSamples, 642.46),
197 Benchmark("BatesModel::DAXCalibration", &BatesModelTest::testDAXCalibration, 1993.35),
198 Benchmark("ConvertibleBondTest::testBond", &ConvertibleBondTest::testBond, 159.85),
199 Benchmark("DigitalOption::MCCashAtHit", &DigitalOptionTest::testMCCashAtHit, 995.87),
200 Benchmark("DividendOption::FdEuropeanGreeks", &DividendOptionTest::testFdEuropeanGreeks, 949.52),
201 Benchmark("DividendOption::FdAmericanGreeks", &DividendOptionTest::testFdAmericanGreeks, 1113.74),
202 Benchmark("EuropeanOption::FdMcEngines", &EuropeanOptionTest::testMcEngines, 1988.63),
203 Benchmark("EuropeanOption::ImpliedVol", &EuropeanOptionTest::testImpliedVol, 131.51),
204 Benchmark("EuropeanOption::FdEngines", &EuropeanOptionTest::testFdEngines, 148.43),
205 Benchmark("FdHestonTest::testFdmHestonAmerican", &FdHestonTest::testFdmHestonAmerican, 234.21),
206 Benchmark("HestonModel::DAXCalibration", &HestonModelTest::testDAXCalibration, 555.19),
207 Benchmark("InterpolationTest::testSabrInterpolation", &InterpolationTest::testSabrInterpolation, 2266.06),
208 Benchmark("JumpDiffusion::Greeks", &JumpDiffusionTest::testGreeks, 433.77),
209 Benchmark("MarketModelCmsTest::testCmSwapsSwaptions", &MarketModelCmsTest::testMultiStepCmSwapsAndSwaptions, 11497.73),
210 Benchmark("MarketModelSmmTest::testMultiSmmSwaptions", &MarketModelSmmTest::testMultiStepCoterminalSwapsAndSwaptions, 11244.95),
211 Benchmark("QuantoOption::ForwardGreeks", &QuantoOptionTest::testForwardGreeks, 90.98),
212 Benchmark("RandomNumber::MersenneTwisterDescrepancy", &LowDiscrepancyTest::testMersenneTwisterDiscrepancy, 951.98),
213 Benchmark("RiskStatistics::Results", &RiskStatisticsTest::testResults, 300.28),
214 Benchmark("ShortRateModel::Swaps", &ShortRateModelTest::testSwaps, 454.73)
215 };
216
217 /* PAPI code
218 float real_time, proc_time, mflops;
219 long_long lflop, flop=0;
220 */
221
222 class TimedBenchmark {
223 public:
224 typedef void (*fct_ptr)();
225 explicit TimedBenchmark(fct_ptr f) : f_(f) {}
226
227 void startMeasurement() const {
228 /* PAPI code
229 lflop = flop;
230 PAPI_flops(&real_time, &proc_time, &flop, &mflops);
231 */
232 }
233
234 void stopMeasurement() const {
235 /* PAPI code
236 PAPI_flops(&real_time, &proc_time, &flop, &mflops);
237 printf("Real_time: %f Proc_time: %f Total mflop: %f\n",
238 real_time, proc_time, (flop-lflop)/1e6);
239 */
240 }
241
242 double operator()() const {
243 startMeasurement();
244 auto startTime = std::chrono::steady_clock::now();
245 BOOST_CHECK(true); // to prevent no-assertion warning
246 f_();
247 auto stopTime = std::chrono::steady_clock::now();
248 stopMeasurement();
249 return std::chrono::duration_cast<std::chrono::microseconds>(
250 d: stopTime - startTime).count() * 1e-6;
251 }
252 private:
253 fct_ptr f_;
254 };
255
256 void printResults(
257 unsigned nProc,
258 std::vector<std::pair<Benchmark, double> >& runTimes) {
259
260 const std::string header = "Benchmark Suite QuantLib " QL_VERSION;
261
262 std::cout << std::endl << std::string(58,'-') << std::endl;
263 std::cout << header << std::endl;
264 std::cout << std::string(58,'-') << std::endl << std::endl;
265
266 std::sort(first: runTimes.begin(), last: runTimes.end(),
267 comp: [](const auto& a, const auto& b) {
268 return a.first.getName() < b.first.getName();
269 }
270 );
271
272 std::vector<std::tuple<Benchmark, int, double> > aggTimes;
273 for (const auto& iter: runTimes) {
274 if (aggTimes.empty()
275 || std::get<0>(t&: aggTimes.back()).getName()
276 != iter.first.getName()) {
277 aggTimes.emplace_back(args: iter.first, args: 1, args: iter.second);
278 }
279 else {
280 ++std::get<1>(t&: aggTimes.back());
281 std::get<2>(t&: aggTimes.back()) += iter.second;
282 }
283 }
284
285 double sum=0;
286 for (const auto& iterT: aggTimes) {
287 const double mflopsPerSec
288 = std::get<0>(t: iterT).getMflop() / std::get<2>(t: iterT)
289 * nProc * std::get<1>(t: iterT);
290
291 std::cout << std::get<0>(t: iterT).getName()
292 << std::string(42-std::get<0>(t: iterT).getName().length(),' ')
293 << ":" << std::fixed << std::setw(8) << std::setprecision(1)
294 << mflopsPerSec
295 << " mflops" << std::endl;
296
297 sum+=mflopsPerSec;
298 }
299 std::cout << std::string(58,'-') << std::endl
300 << "QuantLib Benchmark Index :"
301 << std::fixed << std::setw(8) << std::setprecision(1)
302 << sum/aggTimes.size()
303 << " mflops" << std::endl;
304 }
305#ifdef QL_ENABLE_PARALLEL_UNIT_TEST_RUNNER
306 int worker(const char* exe, const std::vector<std::string>& args) {
307 return boost::process::system(exe, boost::process::args=args);
308 }
309#endif
310}
311
312int main(int argc, char* argv[] ) {
313 const std::string clientModeStr = "--client_mode=true";
314 bool clientMode = false;
315
316 unsigned nProc = 1;
317 std::vector<std::pair<Benchmark, double> > runTimes;
318
319 for (int i=1; i<argc; ++i) {
320 std::string arg = argv[i];
321 std::vector<std::string> tok;
322 boost::split(Result&: tok, Input&: arg, Pred: boost::is_any_of(Set: "="));
323
324 if (tok[0] == "--mp") {
325 nProc = (tok.size() == 2)
326 ? boost::numeric_cast<unsigned>(arg: std::stoul(str: tok[1]))
327 : std::thread::hardware_concurrency();
328 }
329 else if (arg == "--help" || arg == "-?") {
330 std::cout
331 << "'quantlib-benchmark' is QuantLib " QL_VERSION " CPU performance benchmark"
332 << std::endl << std::endl
333 << "Usage: ./quantlib-benchmark [OPTION]..."
334 << std::endl << std::endl
335 << "with the following options:"
336 << std::endl
337#ifdef QL_ENABLE_PARALLEL_UNIT_TEST_RUNNER
338 << "--mp[=PROCESSES] \t parallel execution with PROCESSES processes"
339 << std::endl
340#endif
341 << "-?, --help \t\t display this help and exit"
342 << std::endl;
343 return 0;
344 }
345 else if (arg == clientModeStr) {
346 clientMode = true;
347 }
348 else {
349 std::cout << "quantlib-benchmark: unrecognized option '" << arg << "'."
350 << std::endl
351 << "Try 'quantlib-benchmark --help' for more information."
352 << std::endl;
353 return 0;
354 }
355 }
356
357 if (nProc == 1 && !clientMode) {
358 std::for_each(first: bm.begin(), last: bm.end(),
359 f: [&runTimes](const Benchmark& iter) {
360 runTimes.emplace_back(
361 args: iter, args: TimedBenchmark(iter.getTestCase())());
362 });
363 printResults(nProc, runTimes);
364 }
365 else {
366#ifdef QL_ENABLE_PARALLEL_UNIT_TEST_RUNNER
367 using namespace boost::interprocess;
368
369 typedef std::pair<unsigned, double> result_type;
370
371 message_queue::size_type recvd_size;
372 unsigned priority, terminateId=-1;
373
374 const char* const testUnitIdQueueName = "test_unit_queue";
375 const char* const testResultQueueName = "test_result_queue";
376
377 if (!clientMode) {
378 message_queue::remove(testUnitIdQueueName);
379 message_queue::remove(testResultQueueName);
380 struct queue_remove {
381 explicit queue_remove(const char* name) : name_(name) { }
382 ~queue_remove() { message_queue::remove(name_); }
383
384 private:
385 const char* const name_;
386 } remover1(testUnitIdQueueName),remover2(testResultQueueName);
387
388 message_queue mq(
389 open_or_create, testUnitIdQueueName,
390 nProc*bm.size(), sizeof(unsigned)
391 );
392 message_queue rq(
393 open_or_create, testResultQueueName, 16, sizeof(result_type));
394
395 const std::vector<std::string> workerArgs(1, clientModeStr);
396 std::vector<std::thread> threadGroup;
397 for (unsigned i = 0; i < nProc; ++i) {
398 threadGroup.emplace_back([&]() { worker(argv[0], workerArgs); });
399 }
400
401 for (unsigned i=0; i < nProc; ++i)
402 for (unsigned j=0; j < bm.size(); ++j)
403 mq.send(&j, sizeof(unsigned), 0);
404
405 result_type r;
406 for (unsigned i = 0; i < nProc*bm.size(); ++i) {
407 rq.receive(&r, sizeof(result_type), recvd_size, priority);
408 runTimes.push_back(std::make_pair(bm[r.first], r.second));
409 }
410 for (unsigned i=0; i < nProc; ++i) {
411 mq.send(&terminateId, sizeof(unsigned), 0);
412 }
413 for (auto& thread: threadGroup) {
414 thread.join();
415 }
416 printResults(nProc, runTimes);
417 }
418 else {
419 message_queue mq(open_only, testUnitIdQueueName);
420 message_queue rq(open_only, testResultQueueName);
421
422 unsigned id=0;
423 mq.receive(&id, sizeof(unsigned), recvd_size, priority);
424
425 while (id != terminateId) {
426 result_type a(id, TimedBenchmark(bm[id].getTestCase())());
427 rq.send(&a, sizeof(result_type), 0);
428
429 mq.receive(&id, sizeof(unsigned), recvd_size, priority);
430 }
431 }
432#else
433 std::cout << "Please compile QuantLib with option 'QL_ENABLE_PARALLEL_UNIT_TEST_RUNNER'"
434 " to run the benchmarks in parallel" << std::endl;
435#endif
436 }
437
438 return 0;
439}
440

source code of quantlib/test-suite/quantlibbenchmark.cpp

Morty Proxy This is a proxified and sanitized view of the page, visit original site.