OpendTect  6.3
datadistributionextracter.h
Go to the documentation of this file.
1 #pragma once
2 
3 /*+
4 ________________________________________________________________________
5 
6  (C) dGB Beheer B.V.; (LICENSE) http://opendtect.org/OpendTect_license.txt
7  Author: Bert
8  Date: Mar 2017
9 ________________________________________________________________________
10 
11 
12 -*/
13 
14 #include "datadistribution.h"
15 #include "arraynd.h"
16 #include "valseries.h"
17 #include "paralleltask.h"
18 
19 
26 template <class vT>
28 {
29 public:
30 
35 
36  DataDistributionExtracter( const ArrNDType& arr )
37  : arrnd_(&arr)
38  , vs_(0)
39  , totalsz_(arr.info().getTotalSz())
40  , arr_(arr.getData()) { init(); }
41  DataDistributionExtracter( const vT* arr, od_int64 sz )
42  : arr_(arr)
43  , vs_(0)
44  , totalsz_(sz)
45  , arrnd_(0) { init(); }
47  od_int64 sz )
48  : vs_(&vs)
49  , arr_(vs.arr())
50  , totalsz_(sz)
51  , arrnd_(0) { init(); }
53  : DataDistributionExtracter(ts.arr(),ts.size())
54  {}
56 
57  RangeType getDataRange() const;
58  int getDefNrBins() const;
59  static SamplingData<vT> getSamplingFor(RangeType,int nrbins);
60 
61  void setNrBins( int nr ) { nrbins_ = nr; }
62  void setBounds( RangeType intv ) { bounds_ = intv; }
63 
64  virtual od_int64 nrIterations() const { return totalsz_; }
65 
66  DistribRef getDistribution();
67  void reset() { init(); }
68 
69 
70 protected:
71 
73  const vT* arr_;
76  DistribRef distrib_;
77  int nrbins_;
78  RangeType bounds_;
79 
80  virtual bool doPrepare(int);
81  virtual bool doWork(od_int64,od_int64,int);
82 
83  void init();
84  void determineBounds();
85  static void includeInRange(RangeType&,vT);
86  void putInBin(vT,TypeSet<vT>&,
87  const SamplingData<vT>&,const int);
88 
89 };
90 
91 
102 template <class vT>
104 {
105 public:
106 
111 
113  TaskRunner* tskr=0 )
114  : extracter_(arr) { init(tskr); }
116  TaskRunner* tskr=0 )
117  : extracter_(a,sz) { init(tskr); }
119  od_int64 sz, TaskRunner* tskr=0 )
120  : extracter_(v,sz) { init(tskr); }
122  TaskRunner* tskr=0 )
123  : extracter_(ts) { init(tskr); }
125 
126  DistribRef getDistribution() { return distrib_; }
127 
128 protected:
129 
131  DistribRef distrib_;
132 
133  void init(TaskRunner*);
134  bool deSpike();
135 
136 };
137 
138 
139 
140 template <class vT> inline
142 {
143  distrib_ = 0;
144  nrbins_ = mUdf(int);
145  bounds_ = RangeType( mUdf(vT), mUdf(vT) );
146 }
147 
148 
149 template <class vT> inline typename DataDistributionExtracter<vT>::DistribRef
151 {
152  if ( !distrib_ )
153  execute();
154  return distrib_;
155 }
156 
157 
158 template <class vT> inline
160 {
161  if ( mIsUdf(val) )
162  return;
163  if ( mIsUdf(rg.start) || val < rg.start )
164  rg.start = val;
165  if ( mIsUdf(rg.stop) || val > rg.stop )
166  rg.stop = val;
167 }
168 
169 
170 template <class vT> inline
172 {
173  RangeType ret( mUdf(vT), mUdf(vT) );
174  if ( arr_ )
175  {
176  const vT* stopptr = arr_ + totalsz_;
177  for ( const vT* cur = arr_; cur != stopptr; cur++ )
178  includeInRange( ret, *cur );
179  }
180  else if ( vs_ )
181  {
182  for ( int idx=0; idx<totalsz_; idx++ )
183  includeInRange( ret, (*vs_)[idx] );
184  }
185  else
186  {
187  ArrayNDIter iter( arrnd_->info() );
188  while ( iter.next() )
189  includeInRange( ret, arrnd_->getND( iter.getPos() ) );
190  }
191  return ret;
192 }
193 
194 
195 template <class vT> inline
197 {
198  int ret = (int)(totalsz_ / 132);
199  if ( ret < 8 )
200  ret = 8;
201  if ( ret > 256 )
202  ret = 256;
203  return ret;
204 }
205 
206 
207 template <class vT> inline SamplingData<vT>
209 {
210  SamplingData<vT> sd;
211  sd.step = (rg.stop - rg.start) / nrbins;
212  sd.start = rg.start + sd.step * vT(0.5);
213  return sd;
214 }
215 
216 
217 template <class vT> inline
219 {
220  if ( totalsz_ < 1 )
221  return false;
222  if ( !arrnd_ && !arr_ && !vs_ )
223  { pErrMsg("Duh"); return false; }
224 
225  if ( mIsUdf(nrbins_) )
226  nrbins_ = getDefNrBins();
227 
228  if ( mIsUdf(bounds_.start) || mIsUdf(bounds_.stop) )
229  determineBounds();
230 
231  if ( bounds_.start == bounds_.stop )
232  bounds_.stop = bounds_.start + (vT)1;
233  else if ( bounds_.start > bounds_.stop )
234  std::swap( bounds_.start, bounds_.stop );
235 
236  SamplingData<vT> sd = getSamplingFor( bounds_, nrbins_ );
237  distrib_ = new DistribType( sd, nrbins_ );
238 
239  return true;
240 }
241 
242 
243 template <class vT> inline
245 {
246  RangeType rg = getDataRange();
247 
248  if ( mIsUdf(bounds_.start) )
249  bounds_.start = rg.start;
250  if ( mIsUdf(bounds_.stop) )
251  bounds_.stop = rg.stop;
252 
253  if ( mIsUdf(bounds_.start) || mIsUdf(bounds_.stop) )
254  { bounds_.start = vT(0); bounds_.stop = vT(1); }
255 }
256 
257 
258 template <class vT> inline
260  const SamplingData<vT>& sd, const int nrbins )
261 {
262  if ( !mIsUdf(val) )
263  {
264  const int ibin
265  = DataDistribution<vT>::getBinNrFor( val, sd, nrbins );
266  subdistrib[ibin]++;
267  }
268 }
269 
270 
271 template <class vT> inline
273 {
274  TypeSet<vT> subdistrib( distrib_->size(), 0 );
275  const SamplingData<vT> sd = distrib_->sampling();
276  const int nrbins = distrib_->size();
277  if ( arr_ )
278  {
279  for ( od_int64 idx=start; idx<=stop; idx++ )
280  putInBin( arr_[idx], subdistrib, sd, nrbins );
281  }
282  else if ( vs_ )
283  {
284  for ( od_int64 idx=start; idx<=stop; idx++ )
285  putInBin( (*vs_)[idx], subdistrib, sd, nrbins );
286  }
287  else
288  {
289  ArrayNDIter iter( arrnd_->info() );
290  iter.setGlobalPos( start-1 );
291  while ( iter.next() )
292  putInBin( arrnd_->getND(iter.getPos()), subdistrib, sd, nrbins );
293  }
294 
295  distrib_->add( subdistrib.arr() );
296  return true;
297 }
298 
299 
300 template <class vT> inline
302 {
303  const int targetnrbins = extracter_.getDefNrBins();
304  extracter_.setNrBins( 32 * targetnrbins );
305  extracter_.setBounds( extracter_.getDataRange() );
306  if ( !TaskRunner::execute(tskr,extracter_) )
307  { distrib_ = new DistribType; return; }
308 
309  RefMan<DistribType> finedistr = extracter_.getDistribution();
310  const vT sumvals = finedistr->sumOfValues();
311  const vT cutoffrelpos = vT(0.0025);
312  const vT locutoff = cutoffrelpos * sumvals;
313  const vT hicutoff = (1-cutoffrelpos) * sumvals;
314  const RangeType posrg( finedistr->positionForCumulative( locutoff ),
315  finedistr->positionForCumulative( hicutoff ) );
316 
317  const SamplingData<vT> targetsd
318  = extracter_.getSamplingFor( posrg, targetnrbins );
319  distrib_ = new DistribType( targetsd, targetnrbins );
320  const int finesz = finedistr->size();
321  const SamplingData<vT> finesd = finedistr->sampling();
322  typename DistribType::SetType distrarr( targetnrbins, vT(0) );
323  for ( int idx=0; idx<finesz; idx++ )
324  {
325  const int binnr = distrib_->getBinNr( finesd.atIndex(idx) );
326  distrarr[binnr] += finedistr->get( idx );
327  }
328  distrib_->set( distrarr.arr() );
329 }
#define mIsUdf(val)
Use mIsUdf to check for undefinedness of simple types.
Definition: undefval.h:285
ArrayND< vT > ArrNDType
Definition: datadistributionextracter.h:31
T step
Definition: samplingdata.h:48
Does some work to limit the range of distribution extraction.
Definition: datadistributionextracter.h:103
virtual ~DataDistributionExtracter()
Definition: datadistributionextracter.h:55
DataDistribution< vT > DistribType
Definition: datadistributionextracter.h:108
DistribRef distrib_
Definition: datadistributionextracter.h:76
RangeLimitedDataDistributionExtracter(const ArrNDType &arr, TaskRunner *tskr=0)
Definition: datadistributionextracter.h:112
DistribRef distrib_
Definition: datadistributionextracter.h:131
static void includeInRange(RangeType &, vT)
Definition: datadistributionextracter.h:159
#define od_int64
Definition: plftypes.h:34
bool init()
DataDistributionExtracter(const ArrNDType &arr)
Definition: datadistributionextracter.h:36
RefMan< DistribType > DistribRef
Definition: datadistributionextracter.h:110
RangeType bounds_
Definition: datadistributionextracter.h:78
RangeLimitedDataDistributionExtracter(const vT *a, od_int64 sz, TaskRunner *tskr=0)
Definition: datadistributionextracter.h:115
static bool execute(TaskRunner *tskr, Task &)
Taskrunner may be zero.
void setBounds(RangeType intv)
Definition: datadistributionextracter.h:62
DataDistributionExtracter(const TypeSet< vT > &ts)
Definition: datadistributionextracter.h:52
void putInBin(vT, TypeSet< vT > &, const SamplingData< vT > &, const int)
Definition: datadistributionextracter.h:259
RangeLimitedDataDistributionExtracter(const TypeSet< vT > &ts, TaskRunner *tskr=0)
Definition: datadistributionextracter.h:121
void setNrBins(int nr)
Definition: datadistributionextracter.h:61
DistribRef getDistribution()
Definition: datadistributionextracter.h:150
void reset()
Definition: datadistributionextracter.h:67
Generalization of a task that can be run in parallel.
Definition: paralleltask.h:64
ArrayND< vT > ArrNDType
Definition: datadistributionextracter.h:107
RefMan< DistribType > DistribRef
Definition: datadistributionextracter.h:34
virtual T * arr()
3rd party access
Definition: typeset.h:86
void determineBounds()
Definition: datadistributionextracter.h:244
T atIndex(IT) const
Definition: samplingdata.h:156
virtual od_int64 nrIterations() const
Definition: datadistributionextracter.h:64
Set of (small) copyable elements.
Definition: commontypes.h:26
Extracts a data distribution from input data: TypeSet or simply ptr + size ArrayND (will try to work ...
Definition: datadistributionextracter.h:27
int getDefNrBins() const
Definition: datadistributionextracter.h:196
RangeLimitedDataDistributionExtracter(const ValueSeries< vT > &v, od_int64 sz, TaskRunner *tskr=0)
Definition: datadistributionextracter.h:118
Class that can execute a task.
Definition: task.h:193
const ArrayND< vT > * arrnd_
Definition: datadistributionextracter.h:72
#define mUdf(type)
Use this macro to get the undefined for simple types.
Definition: undefval.h:270
virtual ~RangeLimitedDataDistributionExtracter()
Definition: datadistributionextracter.h:124
T start
Definition: samplingdata.h:47
DataDistributionExtracter< vT > extracter_
Definition: datadistributionextracter.h:130
bool setGlobalPos(int64_t)
Definition: arraynd.h:260
DataDistribution< VT > DistribType
Definition: datadistributiontools.h:27
DistribRef getDistribution()
Definition: datadistributionextracter.h:126
const od_int64 totalsz_
Definition: datadistributionextracter.h:75
static IdxType getBinNrFor(PosType, const SamplingType &, size_type nrbins)
Definition: datadistribution.h:206
Iterates through all samples in an ArrayND.
Definition: arraynd.h:175
Interval< vT > RangeType
Definition: datadistributionextracter.h:109
const vT * arr_
Definition: datadistributionextracter.h:73
T stop
Definition: ranges.h:91
const ValueSeries< vT > * vs_
Definition: datadistributionextracter.h:74
virtual bool doWork(od_int64, od_int64, int)
Definition: datadistributionextracter.h:272
RangeType getDataRange() const
Definition: datadistributionextracter.h:171
static SamplingData< vT > getSamplingFor(RangeType, int nrbins)
Definition: datadistributionextracter.h:208
T start
Definition: ranges.h:90
Sharable data distribution. Sampling defaults to 0 step 1.
Definition: datadistribution.h:42
Holds the fundamental sampling info: start and interval.
Definition: samplingdata.h:20
void init()
Definition: datadistributionextracter.h:141
T * set(T *p, bool doerase=true)
Returns old pointer if not erased.
Definition: ptrman.h:264
#define mClass(module)
Definition: commondefs.h:161
virtual bool doPrepare(int)
Definition: datadistributionextracter.h:218
#define pErrMsg(msg)
Usual access point for programmer error messages.
Definition: errmsg.h:34
DataDistributionExtracter(const vT *arr, od_int64 sz)
Definition: datadistributionextracter.h:41
void init(TaskRunner *)
Definition: datadistributionextracter.h:301
int nrbins_
Definition: datadistributionextracter.h:77
DataDistribution< vT > DistribType
Definition: datadistributionextracter.h:32
DataDistributionExtracter(const ValueSeries< vT > &vs, od_int64 sz)
Definition: datadistributionextracter.h:46
Interval< vT > RangeType
Definition: datadistributionextracter.h:33

Generated at for the OpendTect seismic interpretation project. Copyright (C): dGB Beheer B. V. 2017