Generated/html/sorting_8h_source.html

 #pragma once

 /*+
 ________________________________________________________________________

  (C) dGB Beheer B.V.; (LICENSE) http://opendtect.org/OpendTect_license.txt
  Author:        A.H. Bril
  Date:          19-4-2000
  Contents:      Array sorting
 ________________________________________________________________________

 -*/

 #include "algomod.h"
 #include "gendefs.h"
 #include "ptrman.h"
 #include "thread.h"
 #include "paralleltask.h"


 #define mDoSort(extra_var,extra_action,sztype) \
 { \
     T tmp; extra_var; \
     for ( sztype d=sz/2; d>0; d=d/2 ) \
         for ( sztype i=d; i<sz; i++ ) \
             for ( sztype j=i-d; j>=0 && arr[j]>arr[j+d]; j-=d ) \
             { \
                 tmp = arr[j]; arr[j] = arr[j+d]; arr[j+d] = tmp; \
                 extra_action; \
             } \
 }

 template <class T,class I>
 inline void sort_array( T* arr, I sz )
 mDoSort(,,I)


 template <class T, class IT,class I>
 inline void sort_coupled( T* arr, IT* idxs, I sz )
 mDoSort(IT itmp,itmp = idxs[j]; idxs[j] = idxs[j+d]; idxs[j+d] = itmp,I)

 #undef mDoSort
 #define mDoSort(extra_var,extra_action,sztype) \
 { \
     extra_var; \
     for ( sztype d=sz/2; d>0; d=d/2 ) \
         for ( sztype i=d; i<sz; i++ ) \
             for ( sztype j=i-d; j>=0 && arr[j]>arr[j+d]; j-=d ) \
             { \
                 Swap( arr[j], arr[j+d] ); \
                 extra_action; \
             } \
 }

 template <class T>
 inline void sort_idxabl( T& arr, int sz )
 mDoSort(,,int)


 template <class T, class IT>
 inline void sort_idxabl_coupled( T& arr, IT* idxs, int sz )
 mDoSort(IT itmp,itmp = idxs[j]; idxs[j] = idxs[j+d]; idxs[j+d] = itmp,int)
 #undef mDoSort


 template <class T,class I>
 inline bool duplicate_sort( T* arr, I sz, int maxnrvals )
 {
     TypeSet<T> vals;
     TypeSet<int> count;
     for ( I idx=0; idx<sz; ++idx )
     {
         const int vidx = vals.indexOf( arr[idx] );
         if ( vidx<0 )
         {
             if ( vals.size()>maxnrvals )
             {
                 return false;
             }

             count += 1;
             vals += arr[idx];
         }
         else
             count[vidx] += 1;
     }

     const int vsize = mCast(int,vals.size());
     TypeSet<int> idxs;
     for ( int idx=0; idx<vsize; idx++ )
         idxs += idx;
     sort_coupled( vals.arr(), idxs.arr(), vsize );

     I index = -1;
     for ( int idx=0; idx<vsize; ++idx )
     {
         for ( int idy=count[idxs[idx]]-1; idy>=0; --idy )
             arr[++index] = vals[idx];
     }

     return true;
 }


 template <class T>
 mClass(Algo) ParallelSorter : public ParallelTask
 {
 public:
                                 ParallelSorter(T* vals, int sz);
                                 ParallelSorter(T* vals, int* idxs, int sz);
 protected:
     od_int64                    nrIterations() const { return nrvals_; }

     int                         minThreadSize() const { return 10000; }
     bool                        doPrepare(int);
     bool                        doFinish(bool);
     bool                        doWork(od_int64,od_int64,int);
     static bool                 mergeLists(const T* vals, T* res,
                                            int start0,int start1,int start2,
                                            int stop, int& totalsz );
     od_int64                    nrDone() const { return totalnr_; }

     T*                          vals_;
     ArrPtrMan<T>                tmpbuffer_;

     int*                        idxs_;
     T*                          curvals_;
     T*                          buf_;

     const int                   nrvals_;
     int                         totalnr_;

     Threads::ConditionVar       condvar_;
     TypeSet<int>                starts_;
     TypeSet<int>                newstarts_;

     Threads::Barrier            barrier_;
 };


 #define NSMALL 7
 #define FM 7875
 #define FA 211
 #define FC 1663
 #define NSTACK 50

 mExtern(Algo) Threads::Atomic<int> partsortglobalseed;

 inline float getPartSortSeed()
 {
     const int localseed = (partsortglobalseed * FA + FC) % FM;

     //This is not really atomic, so a MT environment may alter the
     //global seed, but who cares as it is a seed, and should be
     //a random number

     partsortglobalseed = localseed;

     return (float) localseed;
 }


 template <class T,class I> inline
 void partSort( T* arr, I istart, I istop,
                       I* jstart, I* jstop )
 {
     I ipivot, ileft, iright;
     T pivotval, tmp;

     const float localseed = getPartSortSeed();

     ipivot = (int)(istart + (istop-istart) * (float)localseed / (float)FM + .5);
     if ( ipivot < istart ) ipivot = istart;
     if ( ipivot > istop ) ipivot = istop;
     pivotval = arr[ipivot];

     for ( ileft=istart, iright=istop; ; )
     {
         while ( arr[ileft] <=pivotval && ileft<istop )   ileft++;
         while ( arr[iright]>=pivotval && iright>istart ) iright--;
         if ( ileft < iright )
         {
             tmp = arr[ileft];
             arr[ileft++] = arr[iright];
             arr[iright--] = tmp;
         }
         else break;
     }

     if ( ileft < ipivot )
     {
         tmp = arr[ileft];
         arr[ileft++] = arr[ipivot];
         arr[ipivot] = tmp;
     }
     else if ( ipivot < iright )
     {
         tmp = arr[iright];
         arr[iright--] = arr[ipivot];
         arr[ipivot] = tmp;
     }

     *jstart = iright;
     *jstop = ileft;
 }


 template <class T, class I> inline
 void insertionSort( T* arr, I istart, I istop )
 {
     I i, j;
     T arr_i;

     for ( i=istart+1; i<=istop; i++ )
     {
         for ( arr_i=arr[i],j=i; j>istart && arr[j-1]>arr_i; j-- )
             arr[j] = arr[j-1];
         arr[j] = arr_i;
     }
 }


 template <class T,class I> inline
 void sortFor( T* arr, I sz, I itarget )
 {
     I j, k, p = 0, q = sz-1;

     while( q - p > NSMALL )
     {
         partSort( arr, p, q, &j, &k );

         if ( itarget <= j )             q = j;
         else if ( itarget >= k )        p = k;
         else                            return;
     }

     insertionSort( arr, p, q );
 }


 template <class T,class I> inline
 void quickSort( T* arr, I sz )
 {
     I pstack[NSTACK], qstack[NSTACK], j, k, p, q, top=0;

     pstack[top] = 0;
     qstack[top++] = sz - 1;

     while( top )
     {
         p = pstack[--top];
         q = qstack[top];

         while( q - p > NSMALL )
         {
             partSort( arr, p, q, &j, &k );

             if ( j-p < q-k )
             {
                 pstack[top] = k;
                 qstack[top++] = q;
                 q = j;
             }
             else
             {
                 pstack[top] = p;
                 qstack[top++] = j;
                 p = k;
             }
         }
         insertionSort( arr, p, q );
     }
 }


 template <class T, class IT> inline
 void partSort( T* arr, IT* iarr, int istart, int istop, int* jstart, int* jstop)
 {
     int ipivot, ileft, iright;
     T pivotval, tmp;
     IT itmp;

     const float localseed = getPartSortSeed();

     ipivot = (int)(istart + (istop-istart) * (float)localseed / (float)FM);
     if ( ipivot < istart ) ipivot = istart;
     if ( ipivot > istop ) ipivot = istop;
     pivotval = arr[ipivot];

     for ( ileft=istart, iright=istop; ; )
     {
         while ( arr[ileft] <=pivotval && ileft<istop )   ileft++;
         while ( arr[iright]>=pivotval && iright>istart ) iright--;
         if ( ileft < iright )
         {
             itmp = iarr[ileft];
             tmp = arr[ileft];

             iarr[ileft] = iarr[iright];
             arr[ileft++] = arr[iright];

             iarr[iright] = itmp;
             arr[iright--] = tmp;
         }
         else break;
     }

     if ( ileft < ipivot )
     {
         itmp = iarr[ileft];
         tmp = arr[ileft];

         iarr[ileft] = iarr[ipivot];
         arr[ileft++] = arr[ipivot];

         iarr[ipivot] = itmp;
         arr[ipivot] = tmp;
     }
     else if ( ipivot < iright )
     {
         itmp = iarr[iright];
         tmp = arr[iright];

         iarr[iright] = iarr[ipivot];
         arr[iright--] = arr[ipivot];

         iarr[ipivot] = itmp;
         arr[ipivot] = tmp;
     }

     *jstart = iright;
     *jstop = ileft;
 }


 template <class T, class IT> inline
 void insertionSort( T* arr, IT* iarr, int istart, int istop )
 {
     int i, j;
     T arr_i;
     IT iarr_i;

     for ( i=istart+1; i<=istop; i++ )
     {
         for ( iarr_i=iarr[i],arr_i=arr[i],j=i; j>istart && arr[j-1]>arr_i; j-- )
         {
             arr[j] = arr[j-1];
             iarr[j] = iarr[j-1];
         }

         arr[j] = arr_i;
         iarr[j] = iarr_i;
     }
 }

 template <class T, class IT>
 void sortFor( T* arr, IT* iarr, int sz, int itarget )
 {
     int j, k, p = 0, q = sz-1;

     while( q - p > NSMALL )
     {
         partSort( arr, iarr, p, q, &j, &k );

         if ( itarget <= j )             q = j;
         else if ( itarget >= k )        p = k;
         else                            return;
     }

     insertionSort( arr, iarr, p, q );
 }


 template <class T, class IT> inline
 void quickSort( T* arr, IT* iarr, int sz )
 {
     int pstack[NSTACK], qstack[NSTACK], j, k, p, q, top=0;

     pstack[top] = 0;
     qstack[top++] = sz - 1;

     while( top )
     {
         p = pstack[--top];
         q = qstack[top];

         while( q - p > NSMALL )
         {
             partSort( arr, iarr, p, q, &j, &k );

             if ( j-p < q-k )
             {
                 pstack[top] = k;
                 qstack[top++] = q;
                 q = j;
             }
             else
             {
                 pstack[top] = p;
                 qstack[top++] = j;
                 p = k;
             }
         }

         insertionSort( arr, iarr, p, q );
     }
 }

 #undef NSMALL
 #undef FM
 #undef FA
 #undef FC
 #undef NSTACK


 //ParallelSort implementation
 template <class T> inline
 ParallelSorter<T>::ParallelSorter(T* vals, int sz)
     : vals_( vals )
     , nrvals_( sz )
     , tmpbuffer_( 0 )
     , barrier_( -1, false )
     , totalnr_(0)
     , idxs_( 0 )
 {
     mTryAlloc( tmpbuffer_, T[sz] );
 }


 template <class T> inline
 ParallelSorter<T>::ParallelSorter(T* vals, int* idxs, int sz)
     : vals_( vals )
     , nrvals_( sz )
     , tmpbuffer_( 0 )
     , totalnr_(0)
     , barrier_( -1, false )
     , idxs_( idxs )
 {
     mTryAlloc( tmpbuffer_, T[sz] );
 }


 template <class T> inline
 bool ParallelSorter<T>::doPrepare( int nrthreads )
 {
     if ( !tmpbuffer_ )
         return false;

     barrier_.setNrThreads( nrthreads );

     starts_.erase();
     newstarts_.erase();

     int nrmerges = -1;
     while ( nrthreads )
     {
         nrmerges++;
         nrthreads>>=1;
     }

     totalnr_ = (1+nrmerges)*nrvals_;
     return true;
 }


 template <class T> inline
 bool ParallelSorter<T>::doFinish( bool success )
 {
     if ( !success )
         return false;

     if ( curvals_!=vals_ )
         OD::memCopy( vals_, curvals_, nrvals_*sizeof(T) );

     return true;
 }


 template <class T> inline
 bool ParallelSorter<T>::doWork( od_int64 start, od_int64 stop, int thread )
 {
     const int threadsize = stop-start+1;
     if ( threadsize<100 )
     {
         if ( idxs_ )
             sort_coupled( vals_+start, idxs_+start, threadsize );
         else
             sort_array( vals_+start, threadsize );
     }
     else
     {
         if ( idxs_ )
             quickSort( vals_+start, idxs_+start, threadsize );
         else
             quickSort( vals_+start, threadsize );
     }

     if ( !shouldContinue() )
         return false;

     addToNrDone( threadsize );

     barrier_.mutex().lock();
     newstarts_ += start;
     barrier_.mutex().unLock();

     while ( true )
     {
         if ( barrier_.waitForAll(false) )
         {
             if ( curvals_==vals_ )
             {
                 curvals_ = tmpbuffer_;
                 buf_ = vals_;
             }
             else
             {
                 buf_ = tmpbuffer_;
                 curvals_ = vals_;
             }

             starts_ = newstarts_;
             barrier_.setNrThreads( starts_.size()/2 );
             barrier_.releaseAllNoLock();
         }

         if ( thread>=barrier_.nrThreads() )
         {
             barrier_.mutex().unLock();
             //I'm not needed any longer
             break;
         }

         const int curstart0 = starts_[0]; starts_.removeSingle( 0 );
         const int curstart1 = starts_[0]; starts_.removeSingle( 0 );
         int curstart2;
         if ( starts_.size()==1 )
         {
             curstart2 = starts_[0];
             starts_.removeSingle( 0 );
         }
         else
             curstart2 = -1;

         const int curstop = (starts_.size() ? starts_[0] : nrvals_)-1;
         newstarts_ += curstart0;
         barrier_.mutex().unLock();

         int cursize;
         if ( !mergeLists( curvals_, buf_,
                     curstart0, curstart1, curstart2, curstop, cursize) )
             return false;

         if ( !shouldContinue() )
             return false;

         addToNrDone( cursize );
     }

     return true;
 }


 template <class T> inline
 bool ParallelSorter<T>::mergeLists( const T* valptr, T* result,
                                     int start0, int start1, int start2,
                                     int stop, int& totalsz )
 {
     const int sz0 = start1-start0;
     const int sz1 = start2==-1 ? stop-start1+1 : start2-start1;
     const int sz2 = start2==-1 ? 0 : stop-start2+1;
     totalsz = sz0+sz1+sz2;

     const T* ptr0 = valptr + start0;
     const T* stopptr0 = ptr0+sz0;
     const T* ptr1 = valptr + start1;
     const T* stopptr1 = ptr1+sz1;
     const T* ptr2 = start2==-1 ? 0 : valptr + start2;
     const T* stopptr2 = ptr2+sz2;

     while ( true )
     {
         if ( ptr0 && (!ptr1 || *ptr0<*ptr1) && (!ptr2 || *ptr0<*ptr2 ) )
         {
             (*result++) = (*ptr0++);
             if ( ptr0==stopptr0 )
                 ptr0 = 0;
         }
         else if ( ptr1 && ( !ptr2 || *ptr1<*ptr2 ) )
         {
             (*result++) = (*ptr1++);
             if ( ptr1==stopptr1 )
                 ptr1 = 0;
         }
         else if ( ptr2 )
         {
             (*result++) = (*ptr2++);
             if ( ptr2==stopptr2 )
                 ptr2 = 0;
         }
         else
             break;
     }

     return true;
 }
Threads::ConditionVar
Is an object that faciliates many threads to wait for something to happen.
Definition: thread.h:108

paralleltask.h

insertionSort
void insertionSort(T *arr, I istart, I istop)
Definition: sorting.h:221

partSort
void partSort(T *arr, I istart, I istop, I *jstart, I *jstop)
Definition: sorting.h:176

sort_idxabl
void sort_idxabl(T &arr, int sz)
Definition: sorting.h:58

Threads::Barrier::mutex
Mutex & mutex()
Definition: thread.h:266

Threads::Barrier::nrThreads
int nrThreads() const
Definition: thread.h:249

ParallelSorter::nrvals_
const int nrvals_
Definition: sorting.h:142

mExtern
#define mExtern(module)
Definition: commondefs.h:163

NSMALL
#define NSMALL
Definition: sorting.h:153

Threads::Barrier::setNrThreads
void setNrThreads(int)

sortFor
void sortFor(T *arr, I sz, I itarget)
Definition: sorting.h:236

sort_array
void sort_array(T *arr, I sz)
Definition: sorting.h:35

FC
#define FC
Definition: sorting.h:156

ParallelSorter::starts_
TypeSet< int > starts_
Definition: sorting.h:146

Threads::Mutex::unLock
void unLock()

mCast
#define mCast(tp, v)
Definition: commondefs.h:120

od_int64
#define od_int64
Definition: plftypes.h:34

ParallelSorter
Sorting in parallel. Code is still experimental.
Definition: sorting.h:118

ParallelSorter::totalnr_
int totalnr_
Definition: sorting.h:143

ptrman.h

mDoSort
#define mDoSort(extra_var, extra_action, sztype)
Definition: sorting.h:44

Task::shouldContinue
virtual bool shouldContinue()

getPartSortSeed
mExtern(Algo) Threads float getPartSortSeed()
Definition: sorting.h:161

Threads::Mutex::lock
void lock()

ParallelSorter::nrDone
od_int64 nrDone() const
May be -1, i.e. class does not report nrdone.
Definition: sorting.h:133

ParallelTask
Generalization of a task that can be run in parallel.
Definition: paralleltask.h:64

TypeSetBase< T, int >::arr
virtual T * arr()
3rd party access
Definition: typeset.h:86

quickSort
void quickSort(T *arr, I sz)
Definition: sorting.h:257

thread.h

Threads
interface to threads that should be portable.
Definition: atomic.h:24

Threads::Barrier::releaseAllNoLock
void releaseAllNoLock()

duplicate_sort
bool duplicate_sort(T *arr, I sz, int maxnrvals)
Definition: sorting.h:70

gendefs.h

TypeSet
Set of (small) copyable elements.
Definition: commontypes.h:26

ParallelSorter::minThreadSize
int minThreadSize() const
Definition: sorting.h:126

ParallelSorter::tmpbuffer_
ArrPtrMan< T > tmpbuffer_
Definition: sorting.h:136

ParallelSorter::newstarts_
TypeSet< int > newstarts_
Definition: sorting.h:147

mTryAlloc
#define mTryAlloc(var, stmt)
Catches bad_alloc and sets ptr to null as normal.
Definition: commondefs.h:244

FA
#define FA
Definition: sorting.h:155

ParallelSorter::doPrepare
bool doPrepare(int)
Definition: sorting.h:460

sort_idxabl_coupled
void sort_idxabl_coupled(T &arr, IT *idxs, int sz)
Definition: sorting.h:63

ParallelSorter::ParallelSorter
ParallelSorter(T *vals, int sz)
Definition: sorting.h:434

ParallelSorter::doFinish
bool doFinish(bool)
Definition: sorting.h:483

NSTACK
#define NSTACK
Definition: sorting.h:157

Threads::Barrier
Waits for a number of threads to reach a certain point (i.e. the call to Barrier::waitForAll). Once everyone has arrived, everyone is released.
Definition: thread.h:244

ParallelSorter::mergeLists
static bool mergeLists(const T *vals, T *res, int start0, int start1, int start2, int stop, int &totalsz)
Definition: sorting.h:581

ParallelSorter::doWork
bool doWork(od_int64, od_int64, int)
Definition: sorting.h:496

FM
#define FM
Definition: sorting.h:154

TypeSetBase< T, int >::size
size_type size() const

ArrPtrMan
Definition: ptrman.h:154

ParallelSorter::nrIterations
od_int64 nrIterations() const
Definition: sorting.h:124

TypeSetBase::erase
virtual void erase()
Definition: typeset.h:360

ParallelSorter::buf_
T * buf_
Definition: sorting.h:140

ParallelSorter::barrier_
Threads::Barrier barrier_
Definition: sorting.h:149

ParallelSorter::condvar_
Threads::ConditionVar condvar_
Definition: sorting.h:145

ParallelSorter::idxs_
int * idxs_
Definition: sorting.h:138

ParallelSorter::vals_
T * vals_
Definition: sorting.h:135

Threads::Barrier::waitForAll
bool waitForAll(bool unlock=true)

TypeSetBase::removeSingle
virtual void removeSingle(size_type, bool preserver_order=true)
Definition: typeset.h:507

mClass
#define mClass(module)
Definition: commondefs.h:161

TypeSetBase< T, int >::indexOf
virtual size_type indexOf(T, bool forward=true, size_type start=-1) const

ParallelTask::addToNrDone
void addToNrDone(int64_t increment)

ParallelSorter::curvals_
T * curvals_
Definition: sorting.h:139

sort_coupled
void sort_coupled(T *arr, IT *idxs, I sz)
Definition: sorting.h:40