OpendTect-6_4  6.4
sorting.h
Go to the documentation of this file.
1 #ifndef sorting_h
2 #define sorting_h
3 
4 /*+
5 ________________________________________________________________________
6 
7  (C) dGB Beheer B.V.; (LICENSE) http://opendtect.org/OpendTect_license.txt
8  Author: A.H. Bril
9  Date: 19-4-2000
10  Contents: Array sorting
11  RCS: $Id$
12 ________________________________________________________________________
13 
14 -*/
15 
16 #include "algomod.h"
17 #include "gendefs.h"
18 #include "ptrman.h"
19 #include "thread.h"
20 #include "paralleltask.h"
21 
22 
23 #define mDoSort(extra_var,extra_action,sztype) \
24 { \
25  T tmp; extra_var; \
26  for ( sztype d=sz/2; d>0; d=d/2 ) \
27  for ( sztype i=d; i<sz; i++ ) \
28  for ( sztype j=i-d; j>=0 && arr[j]>arr[j+d]; j-=d ) \
29  { \
30  tmp = arr[j]; arr[j] = arr[j+d]; arr[j+d] = tmp; \
31  extra_action; \
32  } \
33 }
34 
36 template <class T,class I>
37 inline void sort_array( T* arr, I sz )
38 mDoSort(,,I)
39 
40 
41 template <class T, class IT,class I>
42 inline void sort_coupled( T* arr, IT* idxs, I sz )
43 mDoSort(IT itmp,itmp = idxs[j]; idxs[j] = idxs[j+d]; idxs[j+d] = itmp,I)
44 
45 #undef mDoSort
46 #define mDoSort(extra_var,extra_action,sztype) \
47 { \
48  extra_var; \
49  for ( sztype d=sz/2; d>0; d=d/2 ) \
50  for ( sztype i=d; i<sz; i++ ) \
51  for ( sztype j=i-d; j>=0 && arr[j]>arr[j+d]; j-=d ) \
52  { \
53  Swap( arr[j], arr[j+d] ); \
54  extra_action; \
55  } \
56 }
57 
59 template <class T>
60 inline void sort_idxabl( T& arr, int sz )
61 mDoSort(,,int)
62 
63 
64 template <class T, class IT>
65 inline void sort_idxabl_coupled( T& arr, IT* idxs, int sz )
66 mDoSort(IT itmp,itmp = idxs[j]; idxs[j] = idxs[j+d]; idxs[j+d] = itmp,int)
67 #undef mDoSort
68 
69 
71 template <class T,class I>
72 inline bool duplicate_sort( T* arr, I sz, int maxnrvals )
73 {
74  TypeSet<T> vals;
75  TypeSet<int> count;
76  for ( I idx=0; idx<sz; ++idx )
77  {
78  const int vidx = vals.indexOf( arr[idx] );
79  if ( vidx<0 )
80  {
81  if ( vals.size()>maxnrvals )
82  {
83  return false;
84  }
85 
86  count += 1;
87  vals += arr[idx];
88  }
89  else
90  count[vidx] += 1;
91  }
92 
93  const int vsize = mCast(int,vals.size());
94  TypeSet<int> idxs;
95  for ( int idx=0; idx<vsize; idx++ )
96  idxs += idx;
97  sort_coupled( vals.arr(), idxs.arr(), vsize );
98 
99  I index = -1;
100  for ( int idx=0; idx<vsize; ++idx )
101  {
102  for ( int idy=count[idxs[idx]]-1; idy>=0; --idy )
103  arr[++index] = vals[idx];
104  }
105 
106  return true;
107 }
108 
109 
119 template <class T>
121 {
122 public:
123  ParallelSorter(T* vals, int sz);
124  ParallelSorter(T* vals, int* idxs, int sz);
125 protected:
126  od_int64 nrIterations() const { return nrvals_; }
127 
128  int minThreadSize() const { return 10000; }
129  bool doPrepare(int);
130  bool doFinish(bool);
131  bool doWork(od_int64,od_int64,int);
132  static bool mergeLists(const T* vals, T* res,
133  int start0,int start1,int start2,
134  int stop, int& totalsz );
135  od_int64 nrDone() const { return totalnr_; }
136 
137  T* vals_;
139 
140  int* idxs_;
142  T* buf_;
143 
144  const int nrvals_;
145  int totalnr_;
146 
150 
152 };
153 
154 
155 #define NSMALL 7
156 #define FM 7875
157 #define FA 211
158 #define FC 1663
159 #define NSTACK 50
160 
161 mExtern(Algo) Threads::Atomic<int> partsortglobalseed;
162 
163 inline float getPartSortSeed()
164 {
165  const int localseed = (partsortglobalseed * FA + FC) % FM;
166 
167  //This is not really atomic, so a MT environment may alter the
168  //global seed, but who cares as it is a seed, and should be
169  //a random number
170 
171  partsortglobalseed = localseed;
172 
173  return (float) localseed;
174 }
175 
176 
177 template <class T,class I> inline
178 void partSort( T* arr, I istart, I istop,
179  I* jstart, I* jstop )
180 {
181  I ipivot, ileft, iright;
182  T pivotval, tmp;
183 
184  const float localseed = getPartSortSeed();
185 
186  ipivot = (int)(istart + (istop-istart) * (float)localseed / (float)FM + .5);
187  if ( ipivot < istart ) ipivot = istart;
188  if ( ipivot > istop ) ipivot = istop;
189  pivotval = arr[ipivot];
190 
191  for ( ileft=istart, iright=istop; ; )
192  {
193  while ( arr[ileft] <=pivotval && ileft<istop ) ileft++;
194  while ( arr[iright]>=pivotval && iright>istart ) iright--;
195  if ( ileft < iright )
196  {
197  tmp = arr[ileft];
198  arr[ileft++] = arr[iright];
199  arr[iright--] = tmp;
200  }
201  else break;
202  }
203 
204  if ( ileft < ipivot )
205  {
206  tmp = arr[ileft];
207  arr[ileft++] = arr[ipivot];
208  arr[ipivot] = tmp;
209  }
210  else if ( ipivot < iright )
211  {
212  tmp = arr[iright];
213  arr[iright--] = arr[ipivot];
214  arr[ipivot] = tmp;
215  }
216 
217  *jstart = iright;
218  *jstop = ileft;
219 }
220 
221 
222 template <class T, class I> inline
223 void insertionSort( T* arr, I istart, I istop )
224 {
225  I i, j;
226  T arr_i;
227 
228  for ( i=istart+1; i<=istop; i++ )
229  {
230  for ( arr_i=arr[i],j=i; j>istart && arr[j-1]>arr_i; j-- )
231  arr[j] = arr[j-1];
232  arr[j] = arr_i;
233  }
234 }
235 
236 
237 template <class T,class I> inline
238 void sortFor( T* arr, I sz, I itarget )
242 {
243  I j, k, p = 0, q = sz-1;
244 
245  while( q - p > NSMALL )
246  {
247  partSort( arr, p, q, &j, &k );
248 
249  if ( itarget <= j ) q = j;
250  else if ( itarget >= k ) p = k;
251  else return;
252  }
253 
254  insertionSort( arr, p, q );
255 }
256 
257 
258 template <class T,class I> inline
259 void quickSort( T* arr, I sz )
261 {
262  I pstack[NSTACK], qstack[NSTACK], j, k, p, q, top=0;
263 
264  pstack[top] = 0;
265  qstack[top++] = sz - 1;
266 
267  while( top )
268  {
269  p = pstack[--top];
270  q = qstack[top];
271 
272  while( q - p > NSMALL )
273  {
274  partSort( arr, p, q, &j, &k );
275 
276  if ( j-p < q-k )
277  {
278  pstack[top] = k;
279  qstack[top++] = q;
280  q = j;
281  }
282  else
283  {
284  pstack[top] = p;
285  qstack[top++] = j;
286  p = k;
287  }
288  }
289  insertionSort( arr, p, q );
290  }
291 }
292 
293 
294 template <class T, class IT> inline
295 void partSort( T* arr, IT* iarr, int istart, int istop, int* jstart, int* jstop)
296 {
297  int ipivot, ileft, iright;
298  T pivotval, tmp;
299  IT itmp;
300 
301  const float localseed = getPartSortSeed();
302 
303  ipivot = (int)(istart + (istop-istart) * (float)localseed / (float)FM);
304  if ( ipivot < istart ) ipivot = istart;
305  if ( ipivot > istop ) ipivot = istop;
306  pivotval = arr[ipivot];
307 
308  for ( ileft=istart, iright=istop; ; )
309  {
310  while ( arr[ileft] <=pivotval && ileft<istop ) ileft++;
311  while ( arr[iright]>=pivotval && iright>istart ) iright--;
312  if ( ileft < iright )
313  {
314  itmp = iarr[ileft];
315  tmp = arr[ileft];
316 
317  iarr[ileft] = iarr[iright];
318  arr[ileft++] = arr[iright];
319 
320  iarr[iright] = itmp;
321  arr[iright--] = tmp;
322  }
323  else break;
324  }
325 
326  if ( ileft < ipivot )
327  {
328  itmp = iarr[ileft];
329  tmp = arr[ileft];
330 
331  iarr[ileft] = iarr[ipivot];
332  arr[ileft++] = arr[ipivot];
333 
334  iarr[ipivot] = itmp;
335  arr[ipivot] = tmp;
336  }
337  else if ( ipivot < iright )
338  {
339  itmp = iarr[iright];
340  tmp = arr[iright];
341 
342  iarr[iright] = iarr[ipivot];
343  arr[iright--] = arr[ipivot];
344 
345  iarr[ipivot] = itmp;
346  arr[ipivot] = tmp;
347  }
348 
349  *jstart = iright;
350  *jstop = ileft;
351 }
352 
353 
354 template <class T, class IT> inline
355 void insertionSort( T* arr, IT* iarr, int istart, int istop )
356 {
357  int i, j;
358  T arr_i;
359  IT iarr_i;
360 
361  for ( i=istart+1; i<=istop; i++ )
362  {
363  for ( iarr_i=iarr[i],arr_i=arr[i],j=i; j>istart && arr[j-1]>arr_i; j-- )
364  {
365  arr[j] = arr[j-1];
366  iarr[j] = iarr[j-1];
367  }
368 
369  arr[j] = arr_i;
370  iarr[j] = iarr_i;
371  }
372 }
373 
374 template <class T, class IT>
375 void sortFor( T* arr, IT* iarr, int sz, int itarget )
376 {
377  int j, k, p = 0, q = sz-1;
378 
379  while( q - p > NSMALL )
380  {
381  partSort( arr, iarr, p, q, &j, &k );
382 
383  if ( itarget <= j ) q = j;
384  else if ( itarget >= k ) p = k;
385  else return;
386  }
387 
388  insertionSort( arr, iarr, p, q );
389 }
390 
391 
392 template <class T, class IT> inline
393 void quickSort( T* arr, IT* iarr, int sz )
394 {
395  int pstack[NSTACK], qstack[NSTACK], j, k, p, q, top=0;
396 
397  pstack[top] = 0;
398  qstack[top++] = sz - 1;
399 
400  while( top )
401  {
402  p = pstack[--top];
403  q = qstack[top];
404 
405  while( q - p > NSMALL )
406  {
407  partSort( arr, iarr, p, q, &j, &k );
408 
409  if ( j-p < q-k )
410  {
411  pstack[top] = k;
412  qstack[top++] = q;
413  q = j;
414  }
415  else
416  {
417  pstack[top] = p;
418  qstack[top++] = j;
419  p = k;
420  }
421  }
422 
423  insertionSort( arr, iarr, p, q );
424  }
425 }
426 
427 #undef NSMALL
428 #undef FM
429 #undef FA
430 #undef FC
431 #undef NSTACK
432 
433 
434 //ParallelSort implementation
435 template <class T> inline
437  : vals_( vals )
438  , nrvals_( sz )
439  , tmpbuffer_( 0 )
440  , barrier_( -1, false )
441  , totalnr_(0)
442  , idxs_( 0 )
443 {
444  mTryAlloc( tmpbuffer_, T[sz] );
445 }
446 
447 
448 template <class T> inline
449 ParallelSorter<T>::ParallelSorter(T* vals, int* idxs, int sz)
450  : vals_( vals )
451  , nrvals_( sz )
452  , tmpbuffer_( 0 )
453  , totalnr_(0)
454  , barrier_( -1, false )
455  , idxs_( idxs )
456 {
457  mTryAlloc( tmpbuffer_, T[sz] );
458 }
459 
460 
461 template <class T> inline
462 bool ParallelSorter<T>::doPrepare( int nrthreads )
463 {
464  if ( !tmpbuffer_ )
465  return false;
466 
467  barrier_.setNrThreads( nrthreads );
468 
469  starts_.erase();
470  newstarts_.erase();
471 
472  int nrmerges = -1;
473  while ( nrthreads )
474  {
475  nrmerges++;
476  nrthreads>>=1;
477  }
478 
479  totalnr_ = (1+nrmerges)*nrvals_;
480  return true;
481 }
482 
483 
484 template <class T> inline
485 bool ParallelSorter<T>::doFinish( bool success )
486 {
487  if ( !success )
488  return false;
489 
490  if ( curvals_!=vals_ )
491  OD::memCopy( vals_, curvals_, nrvals_*sizeof(T) );
492 
493  return true;
494 }
495 
496 
497 template <class T> inline
498 bool ParallelSorter<T>::doWork( od_int64 start, od_int64 stop, int thread )
499 {
500  const int threadsize = stop-start+1;
501  if ( threadsize<100 )
502  {
503  if ( idxs_ )
504  sort_coupled( vals_+start, idxs_+start, threadsize );
505  else
506  sort_array( vals_+start, threadsize );
507  }
508  else
509  {
510  if ( idxs_ )
511  quickSort( vals_+start, idxs_+start, threadsize );
512  else
513  quickSort( vals_+start, threadsize );
514  }
515 
516  if ( !shouldContinue() )
517  return false;
518 
519  addToNrDone( threadsize );
520 
521  barrier_.mutex().lock();
522  newstarts_ += start;
523  barrier_.mutex().unLock();
524 
525  while ( true )
526  {
527  if ( barrier_.waitForAll(false) )
528  {
529  if ( curvals_==vals_ )
530  {
532  buf_ = vals_;
533  }
534  else
535  {
536  buf_ = tmpbuffer_;
537  curvals_ = vals_;
538  }
539 
543  }
544 
545  if ( thread>=barrier_.nrThreads() )
546  {
547  barrier_.mutex().unLock();
548  //I'm not needed any longer
549  break;
550  }
551 
552  const int curstart0 = starts_[0]; starts_.removeSingle( 0 );
553  const int curstart1 = starts_[0]; starts_.removeSingle( 0 );
554  int curstart2;
555  if ( starts_.size()==1 )
556  {
557  curstart2 = starts_[0];
558  starts_.removeSingle( 0 );
559  }
560  else
561  curstart2 = -1;
562 
563  const int curstop = (starts_.size() ? starts_[0] : nrvals_)-1;
564  newstarts_ += curstart0;
565  barrier_.mutex().unLock();
566 
567  int cursize;
568  if ( !mergeLists( curvals_, buf_,
569  curstart0, curstart1, curstart2, curstop, cursize) )
570  return false;
571 
572  if ( !shouldContinue() )
573  return false;
574 
575  addToNrDone( cursize );
576  }
577 
578  return true;
579 }
580 
581 
582 template <class T> inline
583 bool ParallelSorter<T>::mergeLists( const T* valptr, T* result,
584  int start0, int start1, int start2,
585  int stop, int& totalsz )
586 {
587  const int sz0 = start1-start0;
588  const int sz1 = start2==-1 ? stop-start1+1 : start2-start1;
589  const int sz2 = start2==-1 ? 0 : stop-start2+1;
590  totalsz = sz0+sz1+sz2;
591 
592  const T* ptr0 = valptr + start0;
593  const T* stopptr0 = ptr0+sz0;
594  const T* ptr1 = valptr + start1;
595  const T* stopptr1 = ptr1+sz1;
596  const T* ptr2 = start2==-1 ? 0 : valptr + start2;
597  const T* stopptr2 = ptr2+sz2;
598 
599  while ( true )
600  {
601  if ( ptr0 && (!ptr1 || *ptr0<*ptr1) && (!ptr2 || *ptr0<*ptr2 ) )
602  {
603  (*result++) = (*ptr0++);
604  if ( ptr0==stopptr0 )
605  ptr0 = 0;
606  }
607  else if ( ptr1 && ( !ptr2 || *ptr1<*ptr2 ) )
608  {
609  (*result++) = (*ptr1++);
610  if ( ptr1==stopptr1 )
611  ptr1 = 0;
612  }
613  else if ( ptr2 )
614  {
615  (*result++) = (*ptr2++);
616  if ( ptr2==stopptr2 )
617  ptr2 = 0;
618  }
619  else
620  break;
621  }
622 
623  return true;
624 }
625 
626 
627 #endif
Is an object that faciliates many threads to wait for something to happen.
Definition: thread.h:108
void insertionSort(T *arr, I istart, I istop)
Definition: sorting.h:223
void partSort(T *arr, I istart, I istop, I *jstart, I *jstop)
Definition: sorting.h:178
void sort_idxabl(T &arr, int sz)
Definition: sorting.h:60
Mutex & mutex()
Definition: thread.h:262
int nrThreads() const
Definition: thread.h:245
const int nrvals_
Definition: sorting.h:144
#define mExtern(module)
Definition: commondefs.h:166
#define NSMALL
Definition: sorting.h:155
void setNrThreads(int)
void sortFor(T *arr, I sz, I itarget)
Definition: sorting.h:238
void sort_array(T *arr, I sz)
Definition: sorting.h:37
#define FC
Definition: sorting.h:158
TypeSet< int > starts_
Definition: sorting.h:148
#define mCast(tp, v)
Definition: commondefs.h:124
#define od_int64
Definition: plftypes.h:36
Sorting in parallel. Code is still experimental.
Definition: sorting.h:120
int totalnr_
Definition: sorting.h:145
#define mDoSort(extra_var, extra_action, sztype)
Definition: sorting.h:46
virtual bool shouldContinue()
mExtern(Algo) Threads float getPartSortSeed()
Definition: sorting.h:163
od_int64 nrDone() const
May be -1, i.e. class does not report nrdone.
Definition: sorting.h:135
Generalization of a task that can be run in parallel.
Definition: paralleltask.h:66
virtual T * arr()
3rd party access
Definition: typeset.h:92
void quickSort(T *arr, I sz)
Definition: sorting.h:259
interface to threads that should be portable.
Definition: atomic.h:28
void releaseAllNoLock()
bool duplicate_sort(T *arr, I sz, int maxnrvals)
Definition: sorting.h:72
Set of (small) copyable elements.
Definition: commontypes.h:30
int minThreadSize() const
Definition: sorting.h:128
ArrPtrMan< T > tmpbuffer_
Definition: sorting.h:138
TypeSet< int > newstarts_
Definition: sorting.h:149
#define mTryAlloc(var, stmt)
Catches bad_alloc and sets ptr to null as normal.
Definition: commondefs.h:241
#define FA
Definition: sorting.h:157
bool doPrepare(int)
Definition: sorting.h:462
void sort_idxabl_coupled(T &arr, IT *idxs, int sz)
Definition: sorting.h:65
ParallelSorter(T *vals, int sz)
Definition: sorting.h:436
bool doFinish(bool)
Definition: sorting.h:485
#define NSTACK
Definition: sorting.h:159
Waits for a number of threads to reach a certain point (i.e. the call to Barrier::waitForAll). Once everyone has arrived, everyone is released.
Definition: thread.h:240
static bool mergeLists(const T *vals, T *res, int start0, int start1, int start2, int stop, int &totalsz)
Definition: sorting.h:583
bool doWork(od_int64, od_int64, int)
Definition: sorting.h:498
#define FM
Definition: sorting.h:156
size_type size() const
Definition: ptrman.h:119
od_int64 nrIterations() const
Definition: sorting.h:126
virtual void erase()
Definition: typeset.h:339
T * buf_
Definition: sorting.h:142
Threads::Barrier barrier_
Definition: sorting.h:151
Threads::ConditionVar condvar_
Definition: sorting.h:147
int * idxs_
Definition: sorting.h:140
T * vals_
Definition: sorting.h:137
bool waitForAll(bool unlock=true)
virtual void removeSingle(size_type, bool preserver_order=true)
Definition: typeset.h:500
#define mClass(module)
Definition: commondefs.h:164
virtual size_type indexOf(T, bool forward=true, size_type start=-1) const
void addToNrDone(int64_t increment)
T * curvals_
Definition: sorting.h:141
void sort_coupled(T *arr, IT *idxs, I sz)
Definition: sorting.h:42

Generated at for the OpendTect seismic interpretation project. Copyright (C): dGB Beheer B. V. 2019