MapReduce in C++11 with Intel TBB in less than 50 lines

.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;

up vote
0
down vote

favorite

Map and reduce sections are quite trivial, however I relied on a non-standard tbb::concurrent_map in the shuffle. This parallelizes in a single process and automatically maximizes the number of threads (unless a tbb::task_scheduler_init is in order). Multiple process/machine parallelism can only be achieved through dividing the data and feeding it separately. It is naive, but I found it to be useful in the last few days.

#ifndef MAP_REDUCE_HPP_
#define MAP_REDUCE_HPP_

#include <cstddef>
#include <functional>
#include <utility>
#include <vector>

#include <tbb/concurrent_hash_map.h>
#include <tbb/parallel_for.h>

template<typename key_type, typename value_type, typename reduction_type>
std::vector<std::pair<key_type, reduction_type>> map_reduce(
 const std::vector<value_type>& data ,
 const std::function<key_type(const value_type&)>& map_function , 
 const std::function<reduction_type(const key_type&, const std::vector<const value_type*>&)>& reduce_function)

 // Map.
 std::vector<key_type> keys(data.size());
 tbb::parallel_for(std::size_t(0), keys.size(), std::size_t(1), [&] (std::size_t index)
 
 keys[index] = map_function(data[index]);
 );

 // Shuffle.
 using group_map_type = tbb::concurrent_hash_map<key_type, std::vector<const value_type*>>;
 using group_map_accessor_type = typename group_map_type::accessor;
 group_map_type group_map;
 tbb::parallel_for(std::size_t(0), keys.size(), std::size_t(1), [&] (std::size_t index)
 
 group_map_accessor_type accessor;
 group_map.count(keys[index]) ? group_map.find(accessor, keys[index]) : group_map.insert(accessor, keys[index]);
 accessor->second.push_back(&data[index]);
 );
 std::vector<std::pair<key_type, std::vector<const value_type*>>> groups(group_map.begin(), group_map.end());

 // Reduce.
 std::vector<std::pair<key_type, reduction_type>> reductions(groups.size());
 tbb::parallel_for(std::size_t(0), reductions.size(), std::size_t(1), [&] (std::size_t index)
 
 reductions[index] = std::pair<key_type, reduction_type>groups[index].first, reduce_function(groups[index].first, groups[index].second);
 );

 return reductions;


#endif

edited Jun 10 at 12:25

asked Jun 10 at 3:25

demiralp

436

Maybe I'm dense, but it doesn't seem like your title is very readable - you don't need to include the language in it, just the purpose of the code. Additionally explaining what it does in your first paragraph is great as would be an example of input - output.
â€“Â Raystafarian
Jun 11 at 4:16

add a commentÂ |Â

up vote
0
down vote

favorite

#ifndef MAP_REDUCE_HPP_
#define MAP_REDUCE_HPP_

#include <cstddef>
#include <functional>
#include <utility>
#include <vector>

#include <tbb/concurrent_hash_map.h>
#include <tbb/parallel_for.h>

template<typename key_type, typename value_type, typename reduction_type>
std::vector<std::pair<key_type, reduction_type>> map_reduce(
 const std::vector<value_type>& data ,
 const std::function<key_type(const value_type&)>& map_function , 
 const std::function<reduction_type(const key_type&, const std::vector<const value_type*>&)>& reduce_function)

 // Map.
 std::vector<key_type> keys(data.size());
 tbb::parallel_for(std::size_t(0), keys.size(), std::size_t(1), [&] (std::size_t index)
 
 keys[index] = map_function(data[index]);
 );

 // Shuffle.
 using group_map_type = tbb::concurrent_hash_map<key_type, std::vector<const value_type*>>;
 using group_map_accessor_type = typename group_map_type::accessor;
 group_map_type group_map;
 tbb::parallel_for(std::size_t(0), keys.size(), std::size_t(1), [&] (std::size_t index)
 
 group_map_accessor_type accessor;
 group_map.count(keys[index]) ? group_map.find(accessor, keys[index]) : group_map.insert(accessor, keys[index]);
 accessor->second.push_back(&data[index]);
 );
 std::vector<std::pair<key_type, std::vector<const value_type*>>> groups(group_map.begin(), group_map.end());

 // Reduce.
 std::vector<std::pair<key_type, reduction_type>> reductions(groups.size());
 tbb::parallel_for(std::size_t(0), reductions.size(), std::size_t(1), [&] (std::size_t index)
 
 reductions[index] = std::pair<key_type, reduction_type>groups[index].first, reduce_function(groups[index].first, groups[index].second);
 );

 return reductions;


#endif

edited Jun 10 at 12:25

asked Jun 10 at 3:25

demiralp

436

Maybe I'm dense, but it doesn't seem like your title is very readable - you don't need to include the language in it, just the purpose of the code. Additionally explaining what it does in your first paragraph is great as would be an example of input - output.
â€“Â Raystafarian
Jun 11 at 4:16

add a commentÂ |Â

up vote
0
down vote

favorite

#ifndef MAP_REDUCE_HPP_
#define MAP_REDUCE_HPP_

#include <cstddef>
#include <functional>
#include <utility>
#include <vector>

#include <tbb/concurrent_hash_map.h>
#include <tbb/parallel_for.h>

template<typename key_type, typename value_type, typename reduction_type>
std::vector<std::pair<key_type, reduction_type>> map_reduce(
 const std::vector<value_type>& data ,
 const std::function<key_type(const value_type&)>& map_function , 
 const std::function<reduction_type(const key_type&, const std::vector<const value_type*>&)>& reduce_function)

 // Map.
 std::vector<key_type> keys(data.size());
 tbb::parallel_for(std::size_t(0), keys.size(), std::size_t(1), [&] (std::size_t index)
 
 keys[index] = map_function(data[index]);
 );

 // Shuffle.
 using group_map_type = tbb::concurrent_hash_map<key_type, std::vector<const value_type*>>;
 using group_map_accessor_type = typename group_map_type::accessor;
 group_map_type group_map;
 tbb::parallel_for(std::size_t(0), keys.size(), std::size_t(1), [&] (std::size_t index)
 
 group_map_accessor_type accessor;
 group_map.count(keys[index]) ? group_map.find(accessor, keys[index]) : group_map.insert(accessor, keys[index]);
 accessor->second.push_back(&data[index]);
 );
 std::vector<std::pair<key_type, std::vector<const value_type*>>> groups(group_map.begin(), group_map.end());

 // Reduce.
 std::vector<std::pair<key_type, reduction_type>> reductions(groups.size());
 tbb::parallel_for(std::size_t(0), reductions.size(), std::size_t(1), [&] (std::size_t index)
 
 reductions[index] = std::pair<key_type, reduction_type>groups[index].first, reduce_function(groups[index].first, groups[index].second);
 );

 return reductions;


#endif

edited Jun 10 at 12:25

asked Jun 10 at 3:25

demiralp

436

#ifndef MAP_REDUCE_HPP_
#define MAP_REDUCE_HPP_

#include <cstddef>
#include <functional>
#include <utility>
#include <vector>

#include <tbb/concurrent_hash_map.h>
#include <tbb/parallel_for.h>

template<typename key_type, typename value_type, typename reduction_type>
std::vector<std::pair<key_type, reduction_type>> map_reduce(
 const std::vector<value_type>& data ,
 const std::function<key_type(const value_type&)>& map_function , 
 const std::function<reduction_type(const key_type&, const std::vector<const value_type*>&)>& reduce_function)

 // Map.
 std::vector<key_type> keys(data.size());
 tbb::parallel_for(std::size_t(0), keys.size(), std::size_t(1), [&] (std::size_t index)
 
 keys[index] = map_function(data[index]);
 );

 // Shuffle.
 using group_map_type = tbb::concurrent_hash_map<key_type, std::vector<const value_type*>>;
 using group_map_accessor_type = typename group_map_type::accessor;
 group_map_type group_map;
 tbb::parallel_for(std::size_t(0), keys.size(), std::size_t(1), [&] (std::size_t index)
 
 group_map_accessor_type accessor;
 group_map.count(keys[index]) ? group_map.find(accessor, keys[index]) : group_map.insert(accessor, keys[index]);
 accessor->second.push_back(&data[index]);
 );
 std::vector<std::pair<key_type, std::vector<const value_type*>>> groups(group_map.begin(), group_map.end());

 // Reduce.
 std::vector<std::pair<key_type, reduction_type>> reductions(groups.size());
 tbb::parallel_for(std::size_t(0), reductions.size(), std::size_t(1), [&] (std::size_t index)
 
 reductions[index] = std::pair<key_type, reduction_type>groups[index].first, reduce_function(groups[index].first, groups[index].second);
 );

 return reductions;


#endif

edited Jun 10 at 12:25

asked Jun 10 at 3:25

demiralp

436

edited Jun 10 at 12:25

asked Jun 10 at 3:25

demiralp

436

asked Jun 10 at 3:25

demiralp

436

asked Jun 10 at 3:25

demiralp

436

Maybe I'm dense, but it doesn't seem like your title is very readable - you don't need to include the language in it, just the purpose of the code. Additionally explaining what it does in your first paragraph is great as would be an example of input - output.
â€“Â Raystafarian
Jun 11 at 4:16

add a commentÂ |Â

Maybe I'm dense, but it doesn't seem like your title is very readable - you don't need to include the language in it, just the purpose of the code. Additionally explaining what it does in your first paragraph is great as would be an example of input - output.
â€“Â Raystafarian
Jun 11 at 4:16

Maybe I'm dense, but it doesn't seem like your title is very readable - you don't need to include the language in it, just the purpose of the code. Additionally explaining what it does in your first paragraph is great as would be an example of input - output.
â€“Â Raystafarian
Jun 11 at 4:16

add a commentÂ |Â

active

oldest

votes

Your Answer

StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);

);

draft saved

draft discarded

StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f196206%2fmapreduce-in-c11-with-intel-tbb-in-less-than-50-lines%23new-answer', 'question_page');

);

Post as a guest

Name

active

oldest

votes

draft saved

draft discarded

draft saved

draft discarded

Post as a guest

Name

搜尋此網誌

trjhtr