962{
964 using threadLocalDense_t = mtbb::enumerable_thread_specific<ElementArray>;
965 using threadLocalStats_t = mtbb::enumerable_thread_specific<NCF4Stats>;
966
967
968 threadLocalDense_t threadLocalDense([&]() {
970 });
972
973 threadLocalStats_t threadLocalStats([&]() {
975 });
976
977
978
979
980
981
982 mtbb::queuing_mutex lock;
984 [&](const mtbb::blocked_range<int>& r)
985 {
986 threadLocalDense_t::reference my_dense = threadLocalDense.local();
987 threadLocalStats_t::reference my_threadStats = threadLocalStats.local();
988 for (auto i = r.begin(); i != r.end(); ++i) {
990 mRows[i].columnIndices[0],
991 -1,
992 my_threadStats,
993 my_dense,
994 lock);
995 my_threadStats.numRows++;
996 }
997 });
998
999 int numThreads = 0;
1000 for (auto tlStats : threadLocalStats)
1001 {
1002 ++numThreads;
1004 {
1005 std::cout << "numCancellations for this thread: " << tlStats.numCancellations << std::endl;
1006 std::cout << "numRows for this thread: " << tlStats.numRows << std::endl;
1007 }
1008 ncF4Stats.numCancellations += tlStats.numCancellations;
1009 }
1010
1011
1014 mRows[i].columnIndices[0],
1015 -1,
1016 ncF4Stats,
1017 denseVector);
1018
1019
1020
1023 mRows[i-1].columnIndices[1],
1024 mRows[i-1].columnIndices[0],
1025 ncF4Stats,
1026 denseVector);
1027
1028 for (auto tlDense : threadLocalDense)
1030
1033 {
1034 std::cout << "Number of cancellations: " << ncF4Stats.numCancellations << std::endl;
1035 std::cout << "Number of threads used: " << numThreads << std::endl;
1036 }
1037}
void reduceF4Row(int index, int first, int firstcol, NCF4Stats &ncF4Stats, ElementArray &dense)
void parallelReduceF4Row(int index, int first, int firstcol, NCF4Stats &ncF4Stats, ElementArray &dense, mtbb::queuing_mutex &lock)
MonomialHash mColumnMonomials
const VectorArithmetic * mVectorArithmetic
Per-thread counters tracking how much work the F4 reduction did.