◆ parallelReduceF4Matrix()

void NCF4::parallelReduceF4Matrix ( )
private
Definition at line 961 of file NCF4.cpp.
{
  NCF4Stats ncF4Stats;
  using threadLocalDense_t = mtbb::enumerable_thread_specific<ElementArray>;
  using threadLocalStats_t = mtbb::enumerable_thread_specific<NCF4Stats>;
 
  // create a dense array for each thread
  threadLocalDense_t threadLocalDense([&]() { 
    return mVectorArithmetic->allocateElementArray(mColumnMonomials.size());
  });
  auto denseVector = mVectorArithmetic->allocateElementArray(mColumnMonomials.size());
  
  threadLocalStats_t threadLocalStats([&]() {
    return NCF4Stats();
  });
 
  // access the number of allowable threads this way.
  //std::cout << "M2 Number of Threads: " << getAllowableThreads() << std::endl;
 
  // reduce each overlap row by mRows.
 
  mtbb::queuing_mutex lock;
  mtbb::parallel_for(mtbb::blocked_range<int>{mFirstOverlap,(int)mRows.size()},
                    [&](const mtbb::blocked_range<int>& r)
                    {
                      threadLocalDense_t::reference my_dense = threadLocalDense.local();
                      threadLocalStats_t::reference my_threadStats = threadLocalStats.local();
                      for (auto i = r.begin(); i != r.end(); ++i) {
                        parallelReduceF4Row(i,
                                            mRows[i].columnIndices[0],
                                            -1,
                                            my_threadStats,
                                            my_dense,
                                            lock);
                        my_threadStats.numRows++;
                      }
                    });
  
  int numThreads = 0;
  for (auto tlStats : threadLocalStats)
  {
    ++numThreads;
    if (M2_gbTrace >= 2)
      {
        std::cout << "numCancellations for this thread: " << tlStats.numCancellations << std::endl;
        std::cout << "numRows for this thread: " << tlStats.numRows << std::endl;
      }
    ncF4Stats.numCancellations += tlStats.numCancellations;
  }
 
  // sequentially perform one more pass to reduce the spair rows down 
  for (auto i = mFirstOverlap; i < mRows.size(); ++i)
    reduceF4Row(i,
                mRows[i].columnIndices[0],
                -1,
                ncF4Stats,
                denseVector);
 
  // interreduce the matrix with respect to these overlaps.
  // This needs to be sequential as well
  for (auto i = mRows.size(); i > mFirstOverlap; --i)
    reduceF4Row(i-1,
                mRows[i-1].columnIndices[1],
                mRows[i-1].columnIndices[0],
                ncF4Stats,
                denseVector);
 
  for (auto tlDense : threadLocalDense)
    mVectorArithmetic->deallocateElementArray(tlDense);
 
  mVectorArithmetic->deallocateElementArray(denseVector);
  if (M2_gbTrace >= 2)
    {
      std::cout << "Number of cancellations: " << ncF4Stats.numCancellations << std::endl;
      std::cout << "Number of threads used: " << numThreads << std::endl;
    }
}
References M2_gbTrace, mColumnMonomials, mFirstOverlap, mRows, mVectorArithmetic, NCF4::NCF4Stats::numCancellations, parallelReduceF4Row(), and reduceF4Row().
Referenced by process().