7 #include <boost/compute/algorithm/max_element.hpp> 8 #include <boost/compute/container/vector.hpp> 9 #include <boost/compute/context.hpp> 10 #include <boost/compute/types/complex.hpp> 24 int main(
int argc,
char* argv[]) {
25 auto testcases = create_testcases();
26 return jb::testing::microbenchmark_group_main<config>(argc, argv, testcases);
30 constexpr
int default_size() {
35 std::size_t gpu_argmax(
36 boost::compute::vector<T>
const& dev,
37 boost::compute::command_queue& queue) {
39 BOOST_COMPUTE_FUNCTION(
40 bool, less_real, (value_type
const& a, value_type
const& b),
45 boost::compute::max_element(dev.begin(), dev.end(), less_real, queue));
49 std::size_t gpu_argmax(
50 boost::compute::vector<std::complex<T>>
const& dev,
51 boost::compute::command_queue& queue) {
52 typedef std::complex<T> value_type;
53 BOOST_COMPUTE_FUNCTION(
54 bool, less_real, (value_type
const& a, value_type
const& b),
55 {
return a.x < b.x; });
59 boost::compute::max_element(dev.begin(), dev.end(), less_real, queue));
63 std::size_t cpu_argmax(std::vector<T>
const& host) {
65 auto less_real = [](value_type
const& a, value_type
const& b) {
66 return std::real(a) < std::real(b);
70 host.begin(), std::max_element(host.begin(), host.end(), less_real));
80 template <
typename value_type,
bool use_gpu>
84 fixture(boost::compute::context& context, boost::compute::command_queue& q)
85 : fixture(default_size(), context, q) {
90 int size, boost::compute::context& context,
91 boost::compute::command_queue& q)
97 for (
auto& v : host) {
98 v = value_type(++counter);
100 boost::compute::copy(host.begin(), host.end(), dev.begin(), queue);
106 unused += gpu_argmax(dev, queue);
108 unused += cpu_argmax(host);
110 return static_cast<int>(host.size());
114 std::size_t dummy()
const {
119 boost::compute::vector<value_type> dev;
120 std::vector<value_type> host;
121 boost::compute::command_queue queue;
128 template <
typename value_type,
bool use_gpu>
129 std::function<void(config const&)> benchmark_test_case() {
130 return [](config
const& cfg) {
132 boost::compute::context context(device);
133 boost::compute::command_queue queue(context, device);
136 benchmark bm(cfg.microbenchmark());
138 auto r = bm.
run(context, queue);
139 bm.typical_output(r);
146 {
"gpu:complex:float", benchmark_test_case<std::complex<float>,
true>()},
147 {
"gpu:complex:double", benchmark_test_case<std::complex<double>,
true>()},
148 {
"cpu:complex:float", benchmark_test_case<std::complex<float>,
false>()},
149 {
"cpu:complex:double",
150 benchmark_test_case<std::complex<double>,
false>()},
151 {
"gpu:float", benchmark_test_case<float, true>()},
152 {
"gpu:double", benchmark_test_case<double, true>()},
153 {
"cpu:float", benchmark_test_case<float, false>()},
154 {
"cpu:double", benchmark_test_case<double, false>()},
boost::compute::device device_selector(config const &cfg)
Select an OpenCL device matching the current configuration.
The configuration shared by all OpenCL microbenchmarks.
results run(Args &&... args)
Run the microbenchmaark.
std::map< std::string, std::function< void(config const &cfg)> > microbenchmark_group
Define a representation for a group of microbenchmarks.
int main(int argc, char *argv[])
Run a micro-benchmark on a given class.