Macaulay2 Engine
Loading...
Searching...
No Matches

◆ parallelReduceF4Matrix()

void NCF4::parallelReduceF4Matrix ( )
private

Definition at line 961 of file NCF4.cpp.

962{
963 NCF4Stats ncF4Stats;
964 using threadLocalDense_t = mtbb::enumerable_thread_specific<ElementArray>;
965 using threadLocalStats_t = mtbb::enumerable_thread_specific<NCF4Stats>;
966
967 // create a dense array for each thread
968 threadLocalDense_t threadLocalDense([&]() {
969 return mVectorArithmetic->allocateElementArray(mColumnMonomials.size());
970 });
971 auto denseVector = mVectorArithmetic->allocateElementArray(mColumnMonomials.size());
972
973 threadLocalStats_t threadLocalStats([&]() {
974 return NCF4Stats();
975 });
976
977 // access the number of allowable threads this way.
978 //std::cout << "M2 Number of Threads: " << getAllowableThreads() << std::endl;
979
980 // reduce each overlap row by mRows.
981
982 mtbb::queuing_mutex lock;
983 mtbb::parallel_for(mtbb::blocked_range<int>{mFirstOverlap,(int)mRows.size()},
984 [&](const mtbb::blocked_range<int>& r)
985 {
986 threadLocalDense_t::reference my_dense = threadLocalDense.local();
987 threadLocalStats_t::reference my_threadStats = threadLocalStats.local();
988 for (auto i = r.begin(); i != r.end(); ++i) {
990 mRows[i].columnIndices[0],
991 -1,
992 my_threadStats,
993 my_dense,
994 lock);
995 my_threadStats.numRows++;
996 }
997 });
998
999 int numThreads = 0;
1000 for (auto tlStats : threadLocalStats)
1001 {
1002 ++numThreads;
1003 if (M2_gbTrace >= 2)
1004 {
1005 std::cout << "numCancellations for this thread: " << tlStats.numCancellations << std::endl;
1006 std::cout << "numRows for this thread: " << tlStats.numRows << std::endl;
1007 }
1008 ncF4Stats.numCancellations += tlStats.numCancellations;
1009 }
1010
1011 // sequentially perform one more pass to reduce the spair rows down
1012 for (auto i = mFirstOverlap; i < mRows.size(); ++i)
1013 reduceF4Row(i,
1014 mRows[i].columnIndices[0],
1015 -1,
1016 ncF4Stats,
1017 denseVector);
1018
1019 // interreduce the matrix with respect to these overlaps.
1020 // This needs to be sequential as well
1021 for (auto i = mRows.size(); i > mFirstOverlap; --i)
1022 reduceF4Row(i-1,
1023 mRows[i-1].columnIndices[1],
1024 mRows[i-1].columnIndices[0],
1025 ncF4Stats,
1026 denseVector);
1027
1028 for (auto tlDense : threadLocalDense)
1029 mVectorArithmetic->deallocateElementArray(tlDense);
1030
1031 mVectorArithmetic->deallocateElementArray(denseVector);
1032 if (M2_gbTrace >= 2)
1033 {
1034 std::cout << "Number of cancellations: " << ncF4Stats.numCancellations << std::endl;
1035 std::cout << "Number of threads used: " << numThreads << std::endl;
1036 }
1037}
void reduceF4Row(int index, int first, int firstcol, NCF4Stats &ncF4Stats, ElementArray &dense)
Definition NCF4.hpp:357
void parallelReduceF4Row(int index, int first, int firstcol, NCF4Stats &ncF4Stats, ElementArray &dense, mtbb::queuing_mutex &lock)
Definition NCF4.hpp:373
MonomialHash mColumnMonomials
Definition NCF4.hpp:235
int mFirstOverlap
Definition NCF4.hpp:247
RowsVector mRows
Definition NCF4.hpp:243
const VectorArithmetic * mVectorArithmetic
Definition NCF4.hpp:250
int M2_gbTrace
Definition m2-types.cpp:52
Per-thread counters tracking how much work the F4 reduction did.
Definition NCF4.hpp:209

References M2_gbTrace, mColumnMonomials, mFirstOverlap, mRows, mVectorArithmetic, NCF4::NCF4Stats::numCancellations, parallelReduceF4Row(), and reduceF4Row().

Referenced by process().