OpendTect  6.6
datadistributionextracter.h
Go to the documentation of this file.
1 #pragma once
2 
3 /*+
4 ________________________________________________________________________
5 
6  (C) dGB Beheer B.V.; (LICENSE) http://opendtect.org/OpendTect_license.txt
7  Author: Bert
8  Date: Mar 2017
9 ________________________________________________________________________
10 
11 
12 -*/
13 
14 #include "datadistribution.h"
15 #include "arraynd.h"
16 #include "valseries.h"
17 #include "paralleltask.h"
18 
19 
26 template <class vT>
28 {
29 public:
30 
35 
37  : arrnd_(&arr)
38  , vs_(0)
39  , totalsz_(arr.totalSize())
40  , arr_(arr.getData()) { init(); }
41  DataDistributionExtracter( const vT* arr, od_int64 sz )
42  : arr_(arr)
43  , vs_(0)
44  , totalsz_(sz)
45  , arrnd_(0) { init(); }
47  od_int64 sz )
48  : vs_(&vs)
49  , arr_(vs.arr())
50  , totalsz_(sz)
51  , arrnd_(0) { init(); }
53  : DataDistributionExtracter(ts.arr(),ts.size())
54  {}
56 
57  RangeType getDataRange() const;
58  int getDefNrBins() const;
59  static SamplingData<vT> getSamplingFor(RangeType,int nrbins);
60 
61  void setNrBins( int nr ) { nrbins_ = nr; }
62  void setBounds( RangeType intv ) { bounds_ = intv; }
63 
64  virtual od_int64 nrIterations() const { return totalsz_; }
65 
66  DistribRef getDistribution();
67  void reset() { init(); }
68 
69 
70 protected:
71 
73  const vT* arr_;
77  int nrbins_;
79 
80  virtual bool doPrepare(int);
81  virtual bool doWork(od_int64,od_int64,int);
82 
83  void init();
84  void determineBounds();
85  static void includeInRange(RangeType&,vT);
86  void putInBin(vT,TypeSet<vT>&,
87  const SamplingData<vT>&,const int);
88 
89 };
90 
91 
102 template <class vT>
104 {
105 public:
106 
111 
113  TaskRunner* taskr=nullptr )
114  : extracter_(arr) { init(taskr); }
116  TaskRunner* taskr=nullptr )
117  : extracter_(a,sz) { init(taskr); }
119  od_int64 sz, TaskRunner* taskr=nullptr )
120  : extracter_(v,sz) { init(taskr); }
122  TaskRunner* taskr=nullptr )
123  : extracter_(ts) { init(taskr); }
125 
126  DistribRef getDistribution() { return distrib_; }
127 
128 protected:
129 
132 
133  void init(TaskRunner*);
134  bool deSpike();
135 
136 };
137 
138 
139 
140 template <class vT> inline
142 {
143  distrib_ = 0;
144  nrbins_ = mUdf(int);
145  bounds_ = RangeType( mUdf(vT), mUdf(vT) );
146 }
147 
148 
149 template <class vT> inline typename DataDistributionExtracter<vT>::DistribRef
151 {
152  if ( !distrib_ )
153  execute();
154  return distrib_;
155 }
156 
157 
158 template <class vT> inline
160 {
161  if ( mIsUdf(val) )
162  return;
163  if ( mIsUdf(rg.start) || val < rg.start )
164  rg.start = val;
165  if ( mIsUdf(rg.stop) || val > rg.stop )
166  rg.stop = val;
167 }
168 
169 
170 template <class vT> inline
172 {
173  RangeType ret( mUdf(vT), mUdf(vT) );
174  if ( arr_ )
175  {
176  const vT* stopptr = arr_ + totalsz_;
177  for ( const vT* cur = arr_; cur != stopptr; cur++ )
178  includeInRange( ret, *cur );
179  }
180  else if ( vs_ )
181  {
182  for ( int idx=0; idx<totalsz_; idx++ )
183  includeInRange( ret, (*vs_)[idx] );
184  }
185  else
186  {
187  ArrayNDIter iter( arrnd_->info() );
188  while ( iter.next() )
189  includeInRange( ret, arrnd_->getND( iter.getPos() ) );
190  }
191  return ret;
192 }
193 
194 
195 template <class vT> inline
197 {
198  int ret = (int)(totalsz_ / 132);
199  if ( ret < 8 )
200  ret = 8;
201  if ( ret > 256 )
202  ret = 256;
203  return ret;
204 }
205 
206 
207 template <class vT> inline SamplingData<vT>
209 {
210  SamplingData<vT> sd;
211  sd.step = (rg.stop - rg.start) / nrbins;
212  sd.start = rg.start + sd.step * vT(0.5);
213  return sd;
214 }
215 
216 
217 template <class vT> inline
219 {
220  if ( totalsz_ < 1 )
221  return false;
222  if ( !arrnd_ && !arr_ && !vs_ )
223  { pErrMsg("Duh"); return false; }
224 
225  if ( mIsUdf(nrbins_) )
226  nrbins_ = getDefNrBins();
227 
228  if ( mIsUdf(bounds_.start) || mIsUdf(bounds_.stop) )
229  determineBounds();
230 
231  if ( bounds_.start == bounds_.stop )
232  bounds_.stop = bounds_.start + (vT)1;
233  else if ( bounds_.start > bounds_.stop )
234  std::swap( bounds_.start, bounds_.stop );
235 
236  SamplingData<vT> sd = getSamplingFor( bounds_, nrbins_ );
237  distrib_ = new DistribType( sd, nrbins_ );
238 
239  return true;
240 }
241 
242 
243 template <class vT> inline
245 {
246  RangeType rg = getDataRange();
247 
248  if ( mIsUdf(bounds_.start) )
249  bounds_.start = rg.start;
250  if ( mIsUdf(bounds_.stop) )
251  bounds_.stop = rg.stop;
252 
253  if ( mIsUdf(bounds_.start) || mIsUdf(bounds_.stop) )
254  { bounds_.start = vT(0); bounds_.stop = vT(1); }
255 }
256 
257 
258 template <class vT> inline
260  const SamplingData<vT>& sd, const int nrbins )
261 {
262  if ( !mIsUdf(val) )
263  {
264  const int ibin
265  = DataDistribution<vT>::getBinNrFor( val, sd, nrbins );
266  subdistrib[ibin]++;
267  }
268 }
269 
270 
271 template <class vT> inline
273 {
274  TypeSet<vT> subdistrib( distrib_->size(), 0 );
275  const SamplingData<vT> sd = distrib_->sampling();
276  const int nrbins = distrib_->size();
277  if ( arr_ )
278  {
279  for ( od_int64 idx=start; idx<=stop; idx++ )
280  putInBin( arr_[idx], subdistrib, sd, nrbins );
281  }
282  else if ( vs_ )
283  {
284  for ( od_int64 idx=start; idx<=stop; idx++ )
285  putInBin( (*vs_)[idx], subdistrib, sd, nrbins );
286  }
287  else
288  {
289  ArrayNDIter iter( arrnd_->info() );
290  iter.setGlobalPos( start-1 );
291  while ( iter.next() )
292  putInBin( arrnd_->getND(iter.getPos()), subdistrib, sd, nrbins );
293  }
294 
295  distrib_->add( subdistrib.arr() );
296  return true;
297 }
298 
299 
300 template <class vT> inline
302 {
303  const int targetnrbins = extracter_.getDefNrBins();
304  extracter_.setNrBins( 32 * targetnrbins );
305  extracter_.setBounds( extracter_.getDataRange() );
306  const bool res = TaskRunner::execute( taskr, extracter_ );
307  if ( !res )
308  {
309  distrib_ = new DistribType;
310  return;
311  }
312 
313  RefMan<DistribType> finedistr = extracter_.getDistribution();
314  const vT sumvals = finedistr->sumOfValues();
315  const vT cutoffrelpos = vT(0.0025);
316  const vT locutoff = cutoffrelpos * sumvals;
317  const vT hicutoff = (1-cutoffrelpos) * sumvals;
318  const RangeType posrg( finedistr->positionForCumulative( locutoff ),
319  finedistr->positionForCumulative( hicutoff ) );
320 
321  const SamplingData<vT> targetsd
322  = extracter_.getSamplingFor( posrg, targetnrbins );
323  distrib_ = new DistribType( targetsd, targetnrbins );
324  const int finesz = finedistr->size();
325  const SamplingData<vT> finesd = finedistr->sampling();
326  TypeSet<vT> distrarr( targetnrbins, vT(0) );
327  for ( int idx=0; idx<finesz; idx++ )
328  {
329  const int binnr = distrib_->getBinNr( finesd.atIndex(idx) );
330  distrarr[binnr] += finedistr->get( idx );
331  }
332  distrib_->set( distrarr.arr() );
333 }
ValueSeries< vT >
DataDistributionExtracter::DataDistributionExtracter
DataDistributionExtracter(const vT *arr, od_int64 sz)
Definition: datadistributionextracter.h:41
DataDistribution
Sharable data distribution. Sampling defaults to 0 step 1.
Definition: datadistribution.h:43
TaskRunner::execute
static bool execute(TaskRunner *tr, Task &)
Taskrunner may be zero.
RangeLimitedDataDistributionExtracter::DistribRef
RefMan< DistribType > DistribRef
Definition: datadistributionextracter.h:110
Strat::init
void init()
RangeLimitedDataDistributionExtracter::RangeLimitedDataDistributionExtracter
RangeLimitedDataDistributionExtracter(const ArrNDType &arr, TaskRunner *taskr=nullptr)
Definition: datadistributionextracter.h:112
DataDistributionExtracter::determineBounds
void determineBounds()
Definition: datadistributionextracter.h:244
RangeLimitedDataDistributionExtracter::DistribType
DataDistribution< vT > DistribType
Definition: datadistributionextracter.h:108
DataDistributionExtracter::ArrNDType
ArrayND< vT > ArrNDType
Definition: datadistributionextracter.h:31
RangeLimitedDataDistributionExtracter::RangeLimitedDataDistributionExtracter
RangeLimitedDataDistributionExtracter(const TypeSet< vT > &ts, TaskRunner *taskr=nullptr)
Definition: datadistributionextracter.h:121
valseries.h
mIsUdf
#define mIsUdf(val)
Use mIsUdf to check for undefinedness of simple types.
Definition: undefval.h:289
od_int64
#define od_int64
Definition: plftypes.h:35
RangeLimitedDataDistributionExtracter::deSpike
bool deSpike()
DataDistributionExtracter::doWork
virtual bool doWork(od_int64, od_int64, int)
Definition: datadistributionextracter.h:272
DataDistributionExtracter::getDefNrBins
int getDefNrBins() const
Definition: datadistributionextracter.h:196
RangeLimitedDataDistributionExtracter::~RangeLimitedDataDistributionExtracter
virtual ~RangeLimitedDataDistributionExtracter()
Definition: datadistributionextracter.h:124
RangeLimitedDataDistributionExtracter
Does some work to limit the range of distribution extraction.
Definition: datadistributionextracter.h:104
DataDistributionExtracter::nrIterations
virtual od_int64 nrIterations() const
Definition: datadistributionextracter.h:64
DataDistributionExtracter::DataDistributionExtracter
DataDistributionExtracter(const ArrNDType &arr)
Definition: datadistributionextracter.h:36
DataDistributionExtracter::setBounds
void setBounds(RangeType intv)
Definition: datadistributionextracter.h:62
DataDistributionExtracter::putInBin
void putInBin(vT, TypeSet< vT > &, const SamplingData< vT > &, const int)
Definition: datadistributionextracter.h:259
DataDistributionExtracter::DataDistributionExtracter
DataDistributionExtracter(const TypeSet< vT > &ts)
Definition: datadistributionextracter.h:52
DataDistributionIter::DistribType
DataDistribution< VT > DistribType
Definition: datadistributiontools.h:25
arraynd.h
DataDistributionExtracter::reset
void reset()
Definition: datadistributionextracter.h:67
RangeLimitedDataDistributionExtracter::distrib_
DistribRef distrib_
Definition: datadistributionextracter.h:131
RangeLimitedDataDistributionExtracter::ArrNDType
ArrayND< vT > ArrNDType
Definition: datadistributionextracter.h:107
SamplingData::atIndex
T atIndex(IT) const
Definition: samplingdata.h:164
DataDistributionExtracter::arrnd_
const ArrayND< vT > * arrnd_
Definition: datadistributionextracter.h:72
DataDistributionExtracter::vs_
const ValueSeries< vT > * vs_
Definition: datadistributionextracter.h:74
datadistribution.h
ArrayNDIter::setGlobalPos
bool setGlobalPos(int64_t)
Definition: arraynd.h:293
DataDistributionExtracter::getDistribution
DistribRef getDistribution()
Definition: datadistributionextracter.h:150
PtrManBase::set
T * set(T *p, bool doerase=true)
Returns old pointer if not erased.
Definition: ptrman.h:269
DataDistribution::getBinNrFor
static int getBinNrFor(VT, const SamplingData< VT > &, int nrbins)
Definition: datadistribution.h:201
DataDistributionExtracter
Extracts a data distribution from input data: TypeSet or simply ptr + size ArrayND (will try to work ...
Definition: datadistributionextracter.h:28
mClass
#define mClass(module)
Definition: commondefs.h:181
RangeLimitedDataDistributionExtracter::RangeLimitedDataDistributionExtracter
RangeLimitedDataDistributionExtracter(const ValueSeries< vT > &v, od_int64 sz, TaskRunner *taskr=nullptr)
Definition: datadistributionextracter.h:118
ArrayNDIter::getPos
const int * getPos() const
Definition: arraynd.h:220
SamplingData
Holds the fundamental sampling info: start and interval.
Definition: samplingdata.h:22
RangeLimitedDataDistributionExtracter::getDistribution
DistribRef getDistribution()
Definition: datadistributionextracter.h:126
ArrayNDIter
Iterates through all samples in an ArrayND.
Definition: arraynd.h:209
RangeLimitedDataDistributionExtracter::RangeLimitedDataDistributionExtracter
RangeLimitedDataDistributionExtracter(const vT *a, od_int64 sz, TaskRunner *taskr=nullptr)
Definition: datadistributionextracter.h:115
TaskRunner
Class that can execute a task.
Definition: task.h:170
ArrayNDIter::next
bool next()
DataDistributionExtracter::bounds_
RangeType bounds_
Definition: datadistributionextracter.h:78
SamplingData::start
T start
Definition: samplingdata.h:49
DataDistributionExtracter::doPrepare
virtual bool doPrepare(int)
Definition: datadistributionextracter.h:218
DataDistributionExtracter::setNrBins
void setNrBins(int nr)
Definition: datadistributionextracter.h:61
DataDistributionExtracter::~DataDistributionExtracter
virtual ~DataDistributionExtracter()
Definition: datadistributionextracter.h:55
pErrMsg
#define pErrMsg(msg)
Usual access point for programmer error messages.
Definition: errmsg.h:37
RangeLimitedDataDistributionExtracter::RangeType
Interval< vT > RangeType
Definition: datadistributionextracter.h:109
DataDistributionExtracter::DistribType
DataDistribution< vT > DistribType
Definition: datadistributionextracter.h:32
DataDistributionExtracter::distrib_
DistribRef distrib_
Definition: datadistributionextracter.h:76
DataDistributionExtracter::getDataRange
RangeType getDataRange() const
Definition: datadistributionextracter.h:171
ParallelTask
Generalization of a task that can be run in parallel.
Definition: paralleltask.h:66
DataDistributionExtracter::includeInRange
static void includeInRange(RangeType &, vT)
Definition: datadistributionextracter.h:159
DataDistributionExtracter::RangeType
Interval< vT > RangeType
Definition: datadistributionextracter.h:33
DataDistributionExtracter::totalsz_
const od_int64 totalsz_
Definition: datadistributionextracter.h:75
swap
void swap(BufferStringSet &o1, BufferStringSet &o2)
Definition: bufstringset.h:154
ArrayND< vT >
mUdf
#define mUdf(type)
Use this macro to get the undefined for simple types.
Definition: undefval.h:274
DataDistributionExtracter::arr_
const vT * arr_
Definition: datadistributionextracter.h:73
DataDistributionExtracter::DistribRef
RefMan< DistribType > DistribRef
Definition: datadistributionextracter.h:34
DataDistributionExtracter::getSamplingFor
static SamplingData< vT > getSamplingFor(RangeType, int nrbins)
Definition: datadistributionextracter.h:208
Interval< vT >
DataDistributionExtracter::DataDistributionExtracter
DataDistributionExtracter(const ValueSeries< vT > &vs, od_int64 sz)
Definition: datadistributionextracter.h:46
SamplingData::step
T step
Definition: samplingdata.h:50
RangeLimitedDataDistributionExtracter::extracter_
DataDistributionExtracter< vT > extracter_
Definition: datadistributionextracter.h:130
paralleltask.h
RangeLimitedDataDistributionExtracter::init
void init(TaskRunner *)
Definition: datadistributionextracter.h:301
DataDistributionExtracter::nrbins_
int nrbins_
Definition: datadistributionextracter.h:77
DataDistributionExtracter::init
void init()
Definition: datadistributionextracter.h:141
TypeSet
Sets of (small) copyable elements.
Definition: commontypes.h:29
RefMan< DistribType >

Generated at for the OpendTect seismic interpretation project. Copyright (C): dGB Beheer B.V. 1995-2021