OpendTect  6.6
sorting.h
Go to the documentation of this file.
1 #pragma once
2 
3 /*+
4 ________________________________________________________________________
5 
6  (C) dGB Beheer B.V.; (LICENSE) http://opendtect.org/OpendTect_license.txt
7  Author: A.H. Bril
8  Date: 19-4-2000
9  Contents: Array sorting
10  RCS: $Id$
11 ________________________________________________________________________
12 
13 -*/
14 
15 #include "algomod.h"
16 #include "gendefs.h"
17 #include "ptrman.h"
18 #include "thread.h"
19 #include "paralleltask.h"
20 
21 
22 #define mDoSort(extra_var,extra_action,sztype) \
23 { \
24  T tmp; extra_var; \
25  for ( sztype d=sz/2; d>0; d=d/2 ) \
26  for ( sztype i=d; i<sz; i++ ) \
27  for ( sztype j=i-d; j>=0 && arr[j]>arr[j+d]; j-=d ) \
28  { \
29  tmp = arr[j]; arr[j] = arr[j+d]; arr[j+d] = tmp; \
30  extra_action; \
31  } \
32 }
33 
35 template <class T,class I>
36 inline void sort_array( T* arr, I sz )
37 mDoSort(,,I)
38 
39 
40 template <class T, class IT,class I>
41 inline void sort_coupled( T* arr, IT* idxs, I sz )
42 mDoSort(IT itmp,itmp = idxs[j]; idxs[j] = idxs[j+d]; idxs[j+d] = itmp,I)
43 
44 #undef mDoSort
45 #define mDoSort(extra_var,extra_action,sztype) \
46 { \
47  extra_var; \
48  for ( sztype d=sz/2; d>0; d=d/2 ) \
49  for ( sztype i=d; i<sz; i++ ) \
50  for ( sztype j=i-d; j>=0 && arr[j]>arr[j+d]; j-=d ) \
51  { \
52  Swap( arr[j], arr[j+d] ); \
53  extra_action; \
54  } \
55 }
56 
58 template <class T>
59 inline void sort_idxabl( T& arr, int sz )
60 mDoSort(,,int)
61 
62 
63 template <class T, class IT>
64 inline void sort_idxabl_coupled( T& arr, IT* idxs, int sz )
65 mDoSort(IT itmp,itmp = idxs[j]; idxs[j] = idxs[j+d]; idxs[j+d] = itmp,int)
66 #undef mDoSort
67 
68 
70 template <class T,class I>
71 inline bool duplicate_sort( T* arr, I sz, int maxnrvals )
72 {
73  TypeSet<T> vals;
74  TypeSet<int> count;
75  for ( I idx=0; idx<sz; ++idx )
76  {
77  const int vidx = vals.indexOf( arr[idx] );
78  if ( vidx<0 )
79  {
80  if ( vals.size()>maxnrvals )
81  {
82  return false;
83  }
84 
85  count += 1;
86  vals += arr[idx];
87  }
88  else
89  count[vidx] += 1;
90  }
91 
92  const int vsize = mCast(int,vals.size());
93  TypeSet<int> idxs;
94  for ( int idx=0; idx<vsize; idx++ )
95  idxs += idx;
96  sort_coupled( vals.arr(), idxs.arr(), vsize );
97 
98  I index = -1;
99  for ( int idx=0; idx<vsize; ++idx )
100  {
101  for ( int idy=count[idxs[idx]]-1; idy>=0; --idy )
102  arr[++index] = vals[idx];
103  }
104 
105  return true;
106 }
107 
108 
118 template <class T>
121 public:
122 
125 
126  ParallelSorter(T* vals,size_type);
128 protected:
129  od_int64 nrIterations() const { return nrvals_; }
130 
131  int minThreadSize() const { return 10000; }
132  bool doPrepare(int);
133  bool doFinish(bool);
134  bool doWork(od_int64,od_int64,int);
135  static bool mergeLists(const T* vals, T* res,
136  idx_type start0,idx_type start1,
137  idx_type start2,idx_type stop,
138  size_type& totalsz );
139  od_int64 nrDone() const { return totalnr_; }
140 
141  T* vals_;
143 
146  T* buf_;
147 
150 
154 
156 };
157 
158 
159 #define NSMALL 7
160 #define FM 7875
161 #define FA 211
162 #define FC 1663
163 #define NSTACK 50
164 
165 mExtern(Algo) Threads::Atomic<int> partsortglobalseed;
166 
167 inline float getPartSortSeed()
168 {
169  const int localseed = (partsortglobalseed * FA + FC) % FM;
170 
171  //This is not really atomic, so a MT environment may alter the
172  //global seed, but who cares as it is a seed, and should be
173  //a random number
174 
175  partsortglobalseed = localseed;
176 
177  return (float) localseed;
178 }
179 
180 
181 template <class T,class I> inline
182 void partSort( T* arr, I istart, I istop,
183  I* jstart, I* jstop )
184 {
185  I ipivot, ileft, iright;
186  T pivotval, tmp;
187 
188  const float localseed = getPartSortSeed();
189 
190  ipivot = istart + (istop-istart) * (float)localseed / (float)FM + .5;
191  if ( ipivot < istart ) ipivot = istart;
192  if ( ipivot > istop ) ipivot = istop;
193  pivotval = arr[ipivot];
194 
195  for ( ileft=istart, iright=istop; ; )
196  {
197  while ( arr[ileft] <=pivotval && ileft<istop ) ileft++;
198  while ( arr[iright]>=pivotval && iright>istart ) iright--;
199  if ( ileft < iright )
200  {
201  tmp = arr[ileft];
202  arr[ileft++] = arr[iright];
203  arr[iright--] = tmp;
204  }
205  else break;
206  }
207 
208  if ( ileft < ipivot )
209  {
210  tmp = arr[ileft];
211  arr[ileft++] = arr[ipivot];
212  arr[ipivot] = tmp;
213  }
214  else if ( ipivot < iright )
215  {
216  tmp = arr[iright];
217  arr[iright--] = arr[ipivot];
218  arr[ipivot] = tmp;
219  }
220 
221  *jstart = iright;
222  *jstop = ileft;
223 }
224 
225 
226 template <class T, class I> inline
227 void insertionSort( T* arr, I istart, I istop )
228 {
229  I i, j;
230  T arr_i;
231 
232  for ( i=istart+1; i<=istop; i++ )
233  {
234  for ( arr_i=arr[i],j=i; j>istart && arr[j-1]>arr_i; j-- )
235  arr[j] = arr[j-1];
236  arr[j] = arr_i;
237  }
238 }
239 
240 
241 template <class T,class I> inline
242 void sortFor( T* arr, I sz, I itarget )
246 {
247  I j, k, p = 0, q = sz-1;
248 
249  while( q - p > NSMALL )
250  {
251  partSort( arr, p, q, &j, &k );
252 
253  if ( itarget <= j ) q = j;
254  else if ( itarget >= k ) p = k;
255  else return;
256  }
257 
258  insertionSort( arr, p, q );
259 }
260 
261 
262 template <class T,class I> inline
263 void quickSort( T* arr, I sz )
265 {
266  I pstack[NSTACK], qstack[NSTACK], j, k, p, q, top=0;
267 
268  pstack[top] = 0;
269  qstack[top++] = sz - 1;
270 
271  while( top )
272  {
273  p = pstack[--top];
274  q = qstack[top];
275 
276  while( q - p > NSMALL )
277  {
278  partSort( arr, p, q, &j, &k );
279 
280  if ( j-p < q-k )
281  {
282  pstack[top] = k;
283  qstack[top++] = q;
284  q = j;
285  }
286  else
287  {
288  pstack[top] = p;
289  qstack[top++] = j;
290  p = k;
291  }
292  }
293  insertionSort( arr, p, q );
294  }
295 }
296 
297 
298 template <class T, class IT> inline
299 void partSort( T* arr, IT* iarr, od_int64 istart, od_int64 istop,
300  od_int64* jstart, od_int64* jstop)
301 {
302  od_int64 ipivot, ileft, iright;
303  T pivotval, tmp;
304  IT itmp;
305 
306  const float localseed = getPartSortSeed();
307 
308  ipivot = istart + (istop-istart) * (float)localseed / (float)FM;
309  if ( ipivot < istart ) ipivot = istart;
310  if ( ipivot > istop ) ipivot = istop;
311  pivotval = arr[ipivot];
312 
313  for ( ileft=istart, iright=istop; ; )
314  {
315  while ( arr[ileft] <=pivotval && ileft<istop ) ileft++;
316  while ( arr[iright]>=pivotval && iright>istart ) iright--;
317  if ( ileft < iright )
318  {
319  itmp = iarr[ileft];
320  tmp = arr[ileft];
321 
322  iarr[ileft] = iarr[iright];
323  arr[ileft++] = arr[iright];
324 
325  iarr[iright] = itmp;
326  arr[iright--] = tmp;
327  }
328  else break;
329  }
330 
331  if ( ileft < ipivot )
332  {
333  itmp = iarr[ileft];
334  tmp = arr[ileft];
335 
336  iarr[ileft] = iarr[ipivot];
337  arr[ileft++] = arr[ipivot];
338 
339  iarr[ipivot] = itmp;
340  arr[ipivot] = tmp;
341  }
342  else if ( ipivot < iright )
343  {
344  itmp = iarr[iright];
345  tmp = arr[iright];
346 
347  iarr[iright] = iarr[ipivot];
348  arr[iright--] = arr[ipivot];
349 
350  iarr[ipivot] = itmp;
351  arr[ipivot] = tmp;
352  }
353 
354  *jstart = iright;
355  *jstop = ileft;
356 }
357 
358 
359 template <class T, class IT> inline
360 void insertionSort( T* arr, IT* iarr, od_int64 istart, od_int64 istop )
361 {
362  od_int64 i, j;
363  T arr_i;
364  IT iarr_i;
365 
366  for ( i=istart+1; i<=istop; i++ )
367  {
368  for ( iarr_i=iarr[i],arr_i=arr[i],j=i; j>istart && arr[j-1]>arr_i; j-- )
369  {
370  arr[j] = arr[j-1];
371  iarr[j] = iarr[j-1];
372  }
373 
374  arr[j] = arr_i;
375  iarr[j] = iarr_i;
376  }
377 }
378 
379 template <class T, class IT>
380 void sortFor( T* arr, IT* iarr, od_int64 sz, od_int64 itarget )
381 {
382  od_int64 j, k, p = 0, q = sz-1;
383 
384  while( q - p > NSMALL )
385  {
386  partSort( arr, iarr, p, q, &j, &k );
387 
388  if ( itarget <= j ) q = j;
389  else if ( itarget >= k ) p = k;
390  else return;
391  }
392 
393  insertionSort( arr, iarr, p, q );
394 }
395 
396 
397 template <class T, class IT> inline
398 void quickSort( T* arr, IT* iarr, od_int64 sz )
399 {
400  od_int64 pstack[NSTACK], qstack[NSTACK], j, k, p, q, top=0;
401 
402  pstack[top] = 0;
403  qstack[top++] = sz - 1;
404 
405  while( top )
406  {
407  p = pstack[--top];
408  q = qstack[top];
409 
410  while( q - p > NSMALL )
411  {
412  partSort( arr, iarr, p, q, &j, &k );
413 
414  if ( j-p < q-k )
415  {
416  pstack[top] = k;
417  qstack[top++] = q;
418  q = j;
419  }
420  else
421  {
422  pstack[top] = p;
423  qstack[top++] = j;
424  p = k;
425  }
426  }
427 
428  insertionSort( arr, iarr, p, q );
429  }
430 }
431 
432 #undef NSMALL
433 #undef FM
434 #undef FA
435 #undef FC
436 #undef NSTACK
437 
438 
439 //ParallelSort implementation
440 template <class T> inline
442  : vals_( vals )
443  , nrvals_( sz )
444  , tmpbuffer_( 0 )
445  , barrier_( -1, false )
446  , totalnr_(0)
447  , idxs_( 0 )
448 {
449  mTryAlloc( tmpbuffer_, T[sz] );
450 }
451 
452 
453 template <class T> inline
455  : vals_( vals )
456  , nrvals_( sz )
457  , tmpbuffer_( 0 )
458  , totalnr_(0)
459  , barrier_( -1, false )
460  , idxs_( idxs )
461 {
462  mTryAlloc( tmpbuffer_, T[sz] );
463 }
464 
465 
466 template <class T> inline
467 bool ParallelSorter<T>::doPrepare( int nrthreads )
468 {
469  if ( !tmpbuffer_ )
470  return false;
471 
472  barrier_.setNrThreads( nrthreads );
473 
474  starts_.erase();
475  newstarts_.erase();
476 
477  od_int64 nrmerges = -1;
478  while ( nrthreads )
479  {
480  nrmerges++;
481  nrthreads>>=1;
482  }
483 
484  totalnr_ = (1+nrmerges)*nrvals_;
485  return true;
486 }
487 
488 
489 template <class T> inline
490 bool ParallelSorter<T>::doFinish( bool success )
491 {
492  if ( !success )
493  return false;
494 
495  if ( curvals_!=vals_ )
496  OD::memCopy( vals_, curvals_, nrvals_*sizeof(T) );
497 
498  return true;
499 }
500 
501 
502 template <class T> inline
503 bool ParallelSorter<T>::doWork( od_int64 start, od_int64 stop, int thread )
504 {
505  const od_int64 threadsize = stop-start+1;
506  if ( threadsize<100 )
507  {
508  if ( idxs_ )
509  sort_coupled( vals_+start, idxs_+start, threadsize );
510  else
511  sort_array( vals_+start, threadsize );
512  }
513  else
514  {
515  if ( idxs_ )
516  quickSort( vals_+start, idxs_+start, threadsize );
517  else
518  quickSort( vals_+start, threadsize );
519  }
520 
521  if ( !shouldContinue() )
522  return false;
523 
524  addToNrDone( threadsize );
525 
526  barrier_.mutex().lock();
527  newstarts_ += start;
528  barrier_.mutex().unLock();
529 
530  while ( true )
531  {
532  if ( barrier_.waitForAll(false) )
533  {
534  if ( curvals_==vals_ )
535  {
536  curvals_ = tmpbuffer_;
537  buf_ = vals_;
538  }
539  else
540  {
541  buf_ = tmpbuffer_;
542  curvals_ = vals_;
543  }
544 
545  starts_ = newstarts_;
546  barrier_.setNrThreads( starts_.size()/2 );
547  barrier_.releaseAllNoLock();
548  }
549 
550  if ( thread>=barrier_.nrThreads() )
551  {
552  barrier_.mutex().unLock();
553  //I'm not needed any longer
554  break;
555  }
556 
557  const idx_type curstart0 = starts_[0]; starts_.removeSingle( 0 );
558  const idx_type curstart1 = starts_[0]; starts_.removeSingle( 0 );
559  idx_type curstart2;
560  if ( starts_.size()==1 )
561  {
562  curstart2 = starts_[0];
563  starts_.removeSingle( 0 );
564  }
565  else
566  curstart2 = -1;
567 
568  const idx_type curstop = (starts_.size() ? starts_[0] : nrvals_)-1;
569  newstarts_ += curstart0;
570  barrier_.mutex().unLock();
571 
572  size_type cursize;
573  if ( !mergeLists( curvals_, buf_,
574  curstart0, curstart1, curstart2, curstop, cursize) )
575  return false;
576 
577  if ( !shouldContinue() )
578  return false;
579 
580  addToNrDone( cursize );
581  }
582 
583  return true;
584 }
585 
586 
587 template <class T> inline
588 bool ParallelSorter<T>::mergeLists( const T* valptr, T* result,
589  idx_type start0, idx_type start1,
590  idx_type start2, idx_type stop,
591  size_type& totalsz )
592 {
593  const size_type sz0 = start1-start0;
594  const size_type sz1 = start2==-1 ? stop-start1+1 : start2-start1;
595  const size_type sz2 = start2==-1 ? 0 : stop-start2+1;
596  totalsz = sz0+sz1+sz2;
597 
598  const T* ptr0 = valptr + start0;
599  const T* stopptr0 = ptr0+sz0;
600  const T* ptr1 = valptr + start1;
601  const T* stopptr1 = ptr1+sz1;
602  const T* ptr2 = start2==-1 ? 0 : valptr + start2;
603  const T* stopptr2 = ptr2+sz2;
604 
605  while ( true )
606  {
607  if ( ptr0 && (!ptr1 || *ptr0<*ptr1) && (!ptr2 || *ptr0<*ptr2 ) )
608  {
609  (*result++) = (*ptr0++);
610  if ( ptr0==stopptr0 )
611  ptr0 = 0;
612  }
613  else if ( ptr1 && ( !ptr2 || *ptr1<*ptr2 ) )
614  {
615  (*result++) = (*ptr1++);
616  if ( ptr1==stopptr1 )
617  ptr1 = 0;
618  }
619  else if ( ptr2 )
620  {
621  (*result++) = (*ptr2++);
622  if ( ptr2==stopptr2 )
623  ptr2 = 0;
624  }
625  else
626  break;
627  }
628 
629  return true;
630 }
631 
632 
ParallelSorter::minThreadSize
int minThreadSize() const
Definition: sorting.h:131
ParallelSorter
Sorting in parallel. Code is still experimental.
Definition: sorting.h:120
sortFor
void sortFor(T *arr, I sz, I itarget)
Definition: sorting.h:242
sort_coupled
void sort_coupled(T *arr, IT *idxs, I sz)
Definition: sorting.h:41
FA
#define FA
Definition: sorting.h:161
od_int64
#define od_int64
Definition: plftypes.h:35
ParallelSorter::mergeLists
static bool mergeLists(const T *vals, T *res, idx_type start0, idx_type start1, idx_type start2, idx_type stop, size_type &totalsz)
Definition: sorting.h:588
getPartSortSeed
mExtern(Algo) Threads float getPartSortSeed()
Definition: sorting.h:167
ParallelSorter::nrvals_
const size_type nrvals_
Definition: sorting.h:148
ParallelSorter::tmpbuffer_
ArrPtrMan< T > tmpbuffer_
Definition: sorting.h:142
ParallelSorter::totalnr_
size_type totalnr_
Definition: sorting.h:149
ParallelSorter::newstarts_
TypeSet< idx_type > newstarts_
Definition: sorting.h:153
FM
#define FM
Definition: sorting.h:160
NSMALL
#define NSMALL
Definition: sorting.h:159
ParallelSorter::idx_type
od_int64 idx_type
Definition: sorting.h:123
ParallelSorter::doPrepare
bool doPrepare(int)
Definition: sorting.h:467
mExtern
#define mExtern(module)
Definition: commondefs.h:183
ParallelSorter::ParallelSorter
ParallelSorter(T *vals, size_type)
Definition: sorting.h:441
ptrman.h
ParallelSorter::size_type
idx_type size_type
Definition: sorting.h:124
quickSort
void quickSort(T *arr, I sz)
Definition: sorting.h:263
mClass
#define mClass(module)
Definition: commondefs.h:181
mDoSort
#define mDoSort(extra_var, extra_action, sztype)
Definition: sorting.h:45
mTryAlloc
#define mTryAlloc(var, stmt)
Catches bad_alloc and sets ptr to null as normal.
Definition: commondefs.h:246
Threads::ConditionVar
Is an object that faciliates many threads to wait for something to happen.
Definition: thread.h:110
gendefs.h
ArrPtrMan
Definition: ptrman.h:160
duplicate_sort
bool duplicate_sort(T *arr, I sz, int maxnrvals)
Definition: sorting.h:71
FC
#define FC
Definition: sorting.h:162
ParallelSorter::buf_
T * buf_
Definition: sorting.h:146
ParallelSorter::condvar_
Threads::ConditionVar condvar_
Definition: sorting.h:151
ParallelSorter::vals_
T * vals_
Definition: sorting.h:141
ParallelSorter::nrDone
od_int64 nrDone() const
May be -1, i.e. class does not report nrdone.
Definition: sorting.h:139
partSort
void partSort(T *arr, I istart, I istop, I *jstart, I *jstop)
Definition: sorting.h:182
insertionSort
void insertionSort(T *arr, I istart, I istop)
Definition: sorting.h:227
ParallelTask
Generalization of a task that can be run in parallel.
Definition: paralleltask.h:66
mCast
#define mCast(tp, v)
Definition: commondefs.h:137
Threads::Barrier
Waits for a number of threads to reach a certain point (i.e. the call to Barrier::waitForAll)....
Definition: thread.h:239
ParallelSorter::nrIterations
od_int64 nrIterations() const
Definition: sorting.h:129
NSTACK
#define NSTACK
Definition: sorting.h:163
ParallelSorter::doFinish
bool doFinish(bool)
Definition: sorting.h:490
ParallelSorter::idxs_
idx_type * idxs_
Definition: sorting.h:144
ParallelSorter::curvals_
T * curvals_
Definition: sorting.h:145
ParallelSorter::mODTextTranslationClass
mODTextTranslationClass(ParallelSorter)
thread.h
sort_array
void sort_array(T *arr, I sz)
Definition: sorting.h:36
paralleltask.h
Threads::Atomic< int >
sort_idxabl
void sort_idxabl(T &arr, int sz)
Definition: sorting.h:59
ParallelSorter::doWork
bool doWork(od_int64, od_int64, int)
Definition: sorting.h:503
ParallelSorter::barrier_
Threads::Barrier barrier_
Definition: sorting.h:155
ParallelSorter::starts_
TypeSet< idx_type > starts_
Definition: sorting.h:152
TypeSet
Sets of (small) copyable elements.
Definition: commontypes.h:29
sort_idxabl_coupled
void sort_idxabl_coupled(T &arr, IT *idxs, int sz)
Definition: sorting.h:64

Generated at for the OpendTect seismic interpretation project. Copyright (C): dGB Beheer B.V. 1995-2021