JayBeams  0.1
Another project to have fun coding.
bm_reduce_argmax_real.cpp
Go to the documentation of this file.
5 #include <jb/complex_traits.hpp>
6 
7 #include <boost/compute/algorithm/max_element.hpp>
8 #include <boost/compute/container/vector.hpp>
9 #include <boost/compute/context.hpp>
10 #include <boost/compute/types/complex.hpp>
11 #include <iostream>
12 #include <stdexcept>
13 #include <string>
14 
15 /// Functions and types to benchmark the argmax reduction based on
16 /// Boost.Compute
17 namespace {
19 
20 /// Return a table with all the testcases ..
22 } // anonymous namespace
23 
24 int main(int argc, char* argv[]) {
25  auto testcases = create_testcases();
26  return jb::testing::microbenchmark_group_main<config>(argc, argv, testcases);
27 }
28 
29 namespace {
30 constexpr int default_size() {
31  return 32768;
32 }
33 
34 template <typename T>
35 std::size_t gpu_argmax(
36  boost::compute::vector<T> const& dev,
37  boost::compute::command_queue& queue) {
38  typedef T value_type;
39  BOOST_COMPUTE_FUNCTION(
40  bool, less_real, (value_type const& a, value_type const& b),
41  { return a < b; });
42 
43  return std::distance(
44  dev.begin(),
45  boost::compute::max_element(dev.begin(), dev.end(), less_real, queue));
46 }
47 
48 template <typename T>
49 std::size_t gpu_argmax(
50  boost::compute::vector<std::complex<T>> const& dev,
51  boost::compute::command_queue& queue) {
52  typedef std::complex<T> value_type;
53  BOOST_COMPUTE_FUNCTION(
54  bool, less_real, (value_type const& a, value_type const& b),
55  { return a.x < b.x; });
56 
57  return std::distance(
58  dev.begin(),
59  boost::compute::max_element(dev.begin(), dev.end(), less_real, queue));
60 }
61 
62 template <typename T>
63 std::size_t cpu_argmax(std::vector<T> const& host) {
64  typedef T value_type;
65  auto less_real = [](value_type const& a, value_type const& b) {
66  return std::real(a) < std::real(b);
67  };
68 
69  return std::distance(
70  host.begin(), std::max_element(host.begin(), host.end(), less_real));
71 }
72 
73 /**
74  * The benchmark fixture.
75  *
76  * @tparam value_type the values stored in the vector.
77  * @tparam use_gpu if true, the computation is executed using
78  * Boost.Compute, and presumably OpenCL in the GPU.
79  */
80 template <typename value_type, bool use_gpu>
81 class fixture {
82 public:
83  /// Constructor
84  fixture(boost::compute::context& context, boost::compute::command_queue& q)
85  : fixture(default_size(), context, q) {
86  }
87 
88  /// Constructor with a size
89  fixture(
90  int size, boost::compute::context& context,
91  boost::compute::command_queue& q)
92  : dev(size, context)
93  , host(size)
94  , queue(q)
95  , unused(0) {
96  int counter = 0;
97  for (auto& v : host) {
98  v = value_type(++counter);
99  }
100  boost::compute::copy(host.begin(), host.end(), dev.begin(), queue);
101  queue.finish();
102  }
103 
104  int run() {
105  if (use_gpu) {
106  unused += gpu_argmax(dev, queue);
107  } else {
108  unused += cpu_argmax(host);
109  }
110  return static_cast<int>(host.size());
111  }
112 
113  /// Disable aggressive optimizations
114  std::size_t dummy() const {
115  return unused;
116  }
117 
118 private:
119  boost::compute::vector<value_type> dev;
120  std::vector<value_type> host;
121  boost::compute::command_queue queue;
122  std::size_t unused;
123 };
124 
125 /**
126  * Create one of the test-cases for the microbenchmark.
127  */
128 template <typename value_type, bool use_gpu>
129 std::function<void(config const&)> benchmark_test_case() {
130  return [](config const& cfg) {
131  boost::compute::device device = jb::opencl::device_selector(cfg.opencl());
132  boost::compute::context context(device);
133  boost::compute::command_queue queue(context, device);
134 
136  benchmark bm(cfg.microbenchmark());
137 
138  auto r = bm.run(context, queue);
139  bm.typical_output(r);
140  };
141 }
142 
143 /// A table with all the microbenchmark cases
146  {"gpu:complex:float", benchmark_test_case<std::complex<float>, true>()},
147  {"gpu:complex:double", benchmark_test_case<std::complex<double>, true>()},
148  {"cpu:complex:float", benchmark_test_case<std::complex<float>, false>()},
149  {"cpu:complex:double",
150  benchmark_test_case<std::complex<double>, false>()},
151  {"gpu:float", benchmark_test_case<float, true>()},
152  {"gpu:double", benchmark_test_case<double, true>()},
153  {"cpu:float", benchmark_test_case<float, false>()},
154  {"cpu:double", benchmark_test_case<double, false>()},
155  };
156 }
157 } // anonymous namespace
boost::compute::device device_selector(config const &cfg)
Select an OpenCL device matching the current configuration.
The configuration shared by all OpenCL microbenchmarks.
results run(Args &&... args)
Run the microbenchmaark.
std::map< std::string, std::function< void(config const &cfg)> > microbenchmark_group
Define a representation for a group of microbenchmarks.
int main(int argc, char *argv[])
Run a micro-benchmark on a given class.