9 #include <boost/compute/algorithm/reduce.hpp> 10 #include <boost/compute/command_queue.hpp> 11 #include <boost/compute/container/vector.hpp> 17 #ifndef JB_OPENCL_bm_generic_reduce_minimum_size 18 #define JB_OPENCL_bm_generic_reduce_minimum_size 16 19 #endif // JB_OPENCL_bm_generic_reduce_minimum_size 37 int main(
int argc,
char* argv[]) {
38 auto testcases = create_testcases();
39 return jb::testing::microbenchmark_group_main<config>(argc, argv, testcases);
43 std::string randomize_size_help() {
44 std::ostringstream os;
45 os <<
"If true, the size is randomized in each iteration." 46 <<
" This is useful when trying to build regression models," 47 <<
" but not when trying to fine tune algorithms." 48 <<
" The random distributes uniformly " 50 <<
" and the configured size of the test.";
57 desc(
"randomize-size").help(randomize_size_help()),
this,
true)
60 .help(
"If set, the test copies fresh data to the OpenCL device" 61 " on each iteration. Effectively that tests copy + " 63 " Disabling this flag tests reduction assuming the data is" 64 " already on the device."),
69 struct opencl_type_traits {};
72 struct opencl_type_traits<double> {
73 static char const* macro_prefix() {
79 struct opencl_type_traits<float> {
80 static char const* macro_prefix() {
91 reduce_min(std::size_t
size, boost::compute::command_queue
const& queue)
96 static std::string initialize_body(
char const* lhs) {
97 return std::string(
"*") + lhs +
" = " +
98 opencl_type_traits<T>::macro_prefix() +
"MAX;";
103 transform_body(
char const* lhs,
char const* value,
char const*) {
104 return std::string(
"*") + lhs +
" = *" + value +
";";
108 static std::string combine_body(
char const* accumulated,
char const* value) {
109 return std::string(
"*") + accumulated +
" = min(*" + accumulated +
", *" +
117 template <
typename T>
122 config
const& cfg, boost::compute::context& context,
123 boost::compute::command_queue& q)
124 : base_fixture(cfg, context, q) {
129 int size, config
const& cfg, boost::compute::context& context,
130 boost::compute::command_queue& q)
132 , device_(size, context)
134 , generator_(jb::testing::initialize_mersenne_twister<std::mt19937_64>(
137 , avoid_optimization_(0)
140 for (
auto& i : host_) {
141 i = size + 1 - ++counter;
143 boost::compute::copy(host_.begin(), host_.end(), device_.begin(), queue_);
148 void iteration_setup() {
149 if (cfg_.randomize_size()) {
151 iteration_size_ = std::uniform_int_distribution<>(
153 host_.size() - 1)(generator_);
158 T avoid_optimization()
const {
159 return avoid_optimization_;
163 std::vector<T> host_;
164 boost::compute::vector<T> device_;
165 boost::compute::command_queue queue_;
166 std::mt19937_64 generator_;
168 T avoid_optimization_;
175 template <
typename T>
176 class boost_fixture :
public base_fixture<T> {
180 config
const& cfg, boost::compute::context& context,
181 boost::compute::command_queue& q)
182 : boost_fixture(1024, cfg, context, q) {
187 int size, config
const& cfg, boost::compute::context& context,
188 boost::compute::command_queue& q)
189 : base_fixture<T>(
size, cfg, context, q) {
193 if (this->cfg_.copy_data()) {
195 (void)boost::compute::copy(
196 this->host_.begin(), this->host_.begin() + this->iteration_size_,
197 this->device_.begin(), this->queue_);
202 boost::compute::reduce(
203 this->device_.begin(), this->device_.begin() + this->iteration_size_,
204 &result, boost::compute::min<T>(), this->queue_);
205 this->queue_.finish();
206 this->avoid_optimization_ += result;
208 return this->iteration_size_;
215 template <
typename T>
216 class boost_async_fixture :
public base_fixture<T> {
220 config
const& cfg, boost::compute::context& context,
221 boost::compute::command_queue& q)
222 : boost_async_fixture(1024, cfg, context, q) {
227 int size, config
const& cfg, boost::compute::context& context,
228 boost::compute::command_queue& q)
229 : base_fixture<T>(
size, cfg, context, q) {
234 if (this->cfg_.copy_data()) {
235 auto end = boost::compute::copy_async(
236 this->host_.begin(), this->host_.begin() + this->iteration_size_,
237 this->device_.begin(), this->queue_);
240 this->queue_.enqueue_barrier();
245 boost::compute::reduce(
246 this->device_.begin(), this->device_.begin() + this->iteration_size_,
247 &result, boost::compute::min<T>(), this->queue_);
248 this->queue_.finish();
249 this->avoid_optimization_ += result;
251 return this->iteration_size_;
258 template <
typename T>
259 class generic_reduce_fixture :
public base_fixture<T> {
262 generic_reduce_fixture(
263 config
const& cfg, boost::compute::context& context,
264 boost::compute::command_queue& q)
265 : generic_reduce_fixture(1024, cfg, context, q) {
269 generic_reduce_fixture(
270 int size, config
const& cfg, boost::compute::context& context,
271 boost::compute::command_queue& q)
272 : base_fixture<T>(
size, cfg, context, q)
273 , reducer_(size, q) {
277 boost::compute::wait_list wl;
278 if (this->cfg_.copy_data()) {
280 auto end = boost::compute::copy_async(
281 this->host_.begin(), this->host_.begin() + this->iteration_size_,
282 this->device_.begin(), this->queue_);
283 wl = boost::compute::wait_list(end.get_event());
287 auto result = reducer_.execute(
288 this->device_.begin(), this->device_.begin() + this->iteration_size_,
291 this->avoid_optimization_ += *result.get();
293 return this->iteration_size_;
297 reduce_min<T> reducer_;
303 template <
typename T>
304 class std_fixture :
public base_fixture<T> {
308 config
const& cfg, boost::compute::context& context,
309 boost::compute::command_queue& q)
310 : std_fixture(1024, cfg, context, q) {
315 int size, config
const& cfg, boost::compute::context& context,
316 boost::compute::command_queue& q)
317 : base_fixture<T>(
size, cfg, context, q) {
321 auto iterator = std::min_element(
322 this->host_.begin(), this->host_.begin() + this->iteration_size_);
323 this->avoid_optimization_ += *iterator;
324 return this->iteration_size_;
331 template <
typename fixture_type>
332 std::function<void(config const&)> test_case() {
333 return [](config
const& cfg) {
335 boost::compute::context context(device);
336 boost::compute::command_queue queue(context, device);
338 std::cerr <<
"device=" << device.name() << std::endl;
341 benchmark bm(cfg.microbenchmark());
343 auto r = bm.
run(cfg, context, queue);
344 bm.typical_output(r);
351 {
"boost:float", test_case<boost_fixture<float>>()},
352 {
"boost:double", test_case<boost_fixture<double>>()},
353 {
"boost_async:float", test_case<boost_async_fixture<float>>()},
354 {
"boost_async:double", test_case<boost_async_fixture<double>>()},
355 {
"generic_reduce:float", test_case<generic_reduce_fixture<float>>()},
356 {
"generic_reduce:double", test_case<generic_reduce_fixture<double>>()},
357 {
"std:float", test_case<std_fixture<float>>()},
358 {
"std:double", test_case<std_fixture<double>>()},
Implement a generic reducer for OpenCL.
boost::compute::device device_selector(config const &cfg)
Select an OpenCL device matching the current configuration.
#define JB_OPENCL_bm_generic_reduce_minimum_size
The configuration shared by all OpenCL microbenchmarks.
results run(Args &&... args)
Run the microbenchmaark.
std::map< std::string, std::function< void(config const &cfg)> > microbenchmark_group
Define a representation for a group of microbenchmarks.
Helper class to easily define configuration attributes.
int main(int argc, char *argv[])
char const default_initialization_marker[]
config_object_constructors(microbenchmark_config)
static attribute_descriptor desc(std::string const &name)
Convenience function to create attribute descriptors with less typing.
Run a micro-benchmark on a given class.