43 #ifndef ION_ANALYTICS_DISCREPANCY_H_
44 #define ION_ANALYTICS_DISCREPANCY_H_
73 time_begin_ = time_begin;
74 normalized_begin_ = 0.5 /
static_cast<double>(num_samples);
75 const double normalized_end = (
static_cast<double>(num_samples) - 0.5) /
76 static_cast<double>(num_samples);
77 scale_ = (normalized_end - normalized_begin_) / (time_end - time_begin_);
78 inv_scale_ = 1.0 / scale_;
83 return normalized_begin_ + scale_ * (time_sample - time_begin_);
88 return time_begin_ + inv_scale_ * (normalized_sample - normalized_begin_);
98 double normalized_begin_;
107 template <
class InputIterator>
110 size_t num_samples = last - first;
111 std::vector<double> result(num_samples);
112 std::copy(first, last, result.begin());
113 std::sort(result.begin(), result.end());
119 template <
class ContainerType>
135 : discrepancy(discrepancy),
138 num_samples(num_samples) {}
161 template <
class InputIterator>
164 const size_t num_samples = last - first;
165 if (num_samples == 0)
return largest_interval_discrepancy;
167 const double inv_sample_count = 1.0 /
static_cast<double>(num_samples);
168 std::vector<double> locations;
170 std::vector<size_t> count_less;
173 std::vector<size_t> count_less_equal;
177 locations.push_back(0.0);
178 count_less.push_back(0);
179 count_less_equal.push_back(0);
182 for (
auto iter = first; iter != last; ++iter) {
183 locations.push_back(*iter);
184 count_less.push_back(i);
185 count_less_equal.push_back(i + 1);
188 if (*(last - 1) < 1.0) {
189 locations.push_back(1.0);
190 count_less.push_back(num_samples);
191 count_less_equal.push_back(num_samples);
200 double interval_discrepancy = 0.0;
203 size_t interval_begin = 0;
204 size_t interval_end = 0;
205 for (
size_t i = 1; i < locations.size(); ++i) {
207 const double length = locations[i] - locations[i - 1];
210 const size_t count_open_increment = count_less[i] - count_less[i - 1];
212 const double extended_interval_discrepancy =
213 interval_discrepancy +
214 (length -
static_cast<double>(count_open_increment) * inv_sample_count);
218 const size_t new_count_open = count_less[i] - count_less_equal[i - 1];
220 const double new_interval_discrepancy =
221 length -
static_cast<double>(new_count_open) * inv_sample_count;
224 if (extended_interval_discrepancy >= new_interval_discrepancy) {
226 interval_discrepancy = extended_interval_discrepancy;
230 interval_discrepancy = new_interval_discrepancy;
231 interval_begin = i - 1;
236 if (interval_discrepancy > largest_interval_discrepancy.
discrepancy) {
238 interval_discrepancy, locations[interval_begin],
239 locations[interval_end],
240 count_less[interval_end] - count_less_equal[interval_begin]);
244 return largest_interval_discrepancy;
248 template <
class ContainerType>
250 return Discrepancy(samples.begin(), samples.end());
265 template <
class RandomIt>
268 const size_t num_samples = last - first;
273 *std::max_element(first, last), num_samples);
274 std::vector<double> normalized_timestamps =
277 Discrepancy(normalized_timestamps.begin(), normalized_timestamps.end());
282 largest_interval_discrepancy.
begin =
284 largest_interval_discrepancy.
end =
287 return largest_interval_discrepancy;
291 template <
class ContainerType>
293 const ContainerType& timestamps) {
300 #endif // ION_ANALYTICS_DISCREPANCY_H_
Helper class for transforming samples between the time domain and the (unitless) normalized domain us...
SampleMapping(double time_begin, double time_end, size_t num_samples)
#define CHECK_GT(val1, val2)
IntervalDiscrepancy()
Default constructor, initializing all values to zero.
IntervalDiscrepancy AbsoluteTimestampDiscrepancy(RandomIt first, RandomIt last)
A discrepancy-based metric for measuring the irregularity of timestamps.
#define CHECK_LT(val1, val2)
double discrepancy
The discrepancy of the samples in the interval.
double DurationFromLength(double length) const
Maps a duration back from normalized (unitless) domain to time domain.
std::vector< double > NormalizeSamples(InputIterator first, InputIterator last, const SampleMapping &sample_mapping)
Sorts a sequence of numbers and normalizes it to the range [0, 1] using the given sample_mapping...
Copyright 2016 Google Inc.
double NormalizedFromTime(double time_sample) const
Maps a sample from time domain to normalized (unitless) domain.
size_t num_samples
The number of samples in the interval from begin to end.
IntervalDiscrepancy Discrepancy(InputIterator first, InputIterator last)
Computes the discrepancy of a sequence of numbers in the range [0,1].
double end
The end of the interval where the value was measured.
IntervalDiscrepancy(double discrepancy, double begin, double end, size_t num_samples)
Constructor.
Result of a discrepancy computation, including the value measured and the bounds of the interval wher...
double TimeFromNormalized(double normalized_sample) const
Maps a sample back from normalized (unitless) domain to time domain.
double begin
The beginning of the interval where the value was measured.