JayBeams  0.1
Another project to have fun coding.
offline_feed_statistics.hpp
Go to the documentation of this file.
1 #ifndef jb_offline_feed_stats_hpp
2 #define jb_offline_feed_stats_hpp
3 
4 #include <jb/config_object.hpp>
6 #include <jb/histogram.hpp>
8 
9 #include <iosfwd>
10 
11 namespace jb {
12 
13 /**
14  * Keep statistics about a feed and its offline processor.
15  *
16  * In JayBeams we will find ourselves processing all kinds of offline
17  * feeds, that is, files with the contents of a real-time feed. We
18  * want to measure things like the processing time for the data, what
19  * is the distribution of message rates like, what is the distribution
20  * of mesage inter-arrival times like.
21  *
22  * This class encapsulates a lot of the logic for those computations.
23  * The class can be thoroughly configured at run-time, for ease of
24  * experimentation, but the default values should be reasonable.
25  *
26  * The source of timestamps can be an actual clock, the recorded
27  * timestamps is a feed file, or some virtual clock used in
28  * simulations. The only requirement is for the timestamps to be
29  * compatible with std::chrono::duration<>. It might seem odd to use
30  * std::chrono::duration<> instead of std::chrono::time_point<>, this
31  * is because most feeds are timestamps to a well known time point:
32  * midnight at the beginning of the day, or 00:00:00am.
33  *
34  * Likewise, the processing latency measurements can be generated from
35  * an actual clock or some virtual measurement (say counting
36  * interrupts, or CPU cycles vs. elapsed time). The only requirement
37  * is for the measurements to be compatible with
38  * std::chono::duration<>.
39  */
41 public:
42  class config;
43 
44  /// Constructor
45  explicit offline_feed_statistics(config const& cfg);
46 
47  /**
48  * Record a sample, that is process a message received at the given
49  * timestamp.
50  *
51  * @tparam event_timestamp_t the type used to record the event
52  * timestamps.
53  * @tparam duration_t the type used to record the processing latency
54  * for the event.
55  *
56  * @param ts the event timestamp, please see the class documentation
57  * for timestamps vs. time points.
58  * @param processing_latency the time it took to process the event.
59  */
60  template <typename event_timestamp_t, typename duration_t>
61  void sample(event_timestamp_t ts, duration_t processing_latency) {
62  using std::chrono::duration_cast;
63  using std::chrono::nanoseconds;
65  duration_cast<nanoseconds>(ts),
66  duration_cast<nanoseconds>(processing_latency));
67  }
68 
69  /**
70  * Print a CSV header.
71  *
72  * Assuming there are many offline_feed_statistics<> objects (say
73  * one for each symbol, or one for each minute interval) this
74  * function can be used to print the CSV header for all of them.
75  *
76  * The fields include:
77  * - name: the name of the offline_feed_statistics<> object.
78  * - nsamples: the number of samples received.
79  * - minRatePerSec: the minimum messages/second rate
80  * - p25RatePerSec: the 25th percentile for the messages/second rate
81  * - p50RatePerSec: the 50th percentile for the messages/second rate
82  * - p75RatePerSec: the 75th percentile for the messages/second rate
83  * - p90RatePerSec: the 90th percentile for the messages/second rate
84  * - p99RatePerSec: the 99th percentile for the messages/second rate
85  * - p999RatePerSec: the 99.9th percentile for the messages/second rate
86  * - p9999RatePerSec: the 99.99th percentile for the messages/second rate
87  * - maxRatePerSec: the maximum for the messages/second rate
88  * - minRatePerMSec, p25RatePerMSec, ..., maxRatePerMSec: the
89  * statistics for the messages/millisecond rate.
90  * - minRatePerUSec, p25RatePerUSec, ..., maxRatePerUSec: the
91  * statistics for the messages/microsecond rate.
92  * - minProcessingLatency, ..., maxProcessingLatency: the statistics
93  * for processing latency of the events, in nanoseconds.
94  * - minArrival: the minimum of the timestamp difference between two
95  * consecutive messages, in nanoseconds.
96  * - p0001Arrival: the 0.01th percentile of the timestamp difference
97  * between two consecutive messages, in nanoseconds.
98  * - p001Arrival: the 0.1th percentile of the timestamp difference
99  * between two consecutive messages, in nanoseconds.
100  * - p01Arrival: the 1st percentile of the timestamp difference
101  * between two consecutive messages, in nanoseconds.
102  * - p10Arrival: the 10th percentile of the timestamp difference
103  * between two consecutive messages, in nanoseconds.
104  * - p25Arrival, p50Arrival, p75Arrival, maxArrival: more statistics
105  * about the arrival time.
106  *
107  * @param os the output stream
108  */
109  static void print_csv_header(std::ostream& os);
110 
111  /**
112  * Print all the measurements in CSV format.
113  */
114  void print_csv(std::string const& name, std::ostream& os) const;
115 
116  /**
117  * Final progress report at the end of the input.
118  */
119  void log_final_progress() const;
120 
121 private:
122  /**
123  * Report progress up to a certain point in the input
124  *
125  * @param ts the (logical) timestamp of the current event in the input
126  */
127  void log_progress(std::chrono::nanoseconds ts) const;
128 
129  /**
130  * Refactor non-template portions of sample()
131  *
132  * @param ts the logical timestamp of the event in the input stream
133  * @param processing_latency the processing latency for the event
134  */
135  void record_sample(
136  std::chrono::nanoseconds ts, std::chrono::nanoseconds processing_latency);
137 
138 private:
147 
151 
152  std::chrono::seconds reporting_interval_;
153  std::chrono::nanoseconds last_ts_;
154  std::chrono::nanoseconds last_report_ts_;
155 };
156 
157 /**
158  * Configure an offline_feed_statistics object
159  */
161 public:
162  config();
164 
165  /// Validate the configuration
166  void validate() const override;
167 
174 };
175 
176 } // namespace jb
177 
178 #endif // jb_offline_feed_statistics_hpp
jb::config_attribute< config, int > reporting_interval_seconds
void record_sample(std::chrono::nanoseconds ts, std::chrono::nanoseconds processing_latency)
Refactor non-template portions of sample()
void log_progress(std::chrono::nanoseconds ts) const
Report progress up to a certain point in the input.
Base class for all configuration objects.
event_rate_histogram< std::chrono::nanoseconds, std::int64_t > rate_histogram
A histogram class with controllable binning and range strategy.
Definition: histogram.hpp:46
histogram< integer_range_binning< std::uint64_t > > processing_latency_histogram_t
Keep statistics about a feed and its offline processor.
std::chrono::nanoseconds last_report_ts_
histogram< integer_range_binning< std::int64_t > > interarrival_histogram_t
jb::config_attribute< config, int > max_messages_per_microsecond
void sample(event_timestamp_t ts, duration_t processing_latency)
Record a sample, that is process a message received at the given timestamp.
jb::config_attribute< config, int > max_processing_latency_nanoseconds
jb::config_attribute< config, std::int64_t > max_interarrival_time_nanoseconds
offline_feed_statistics(config const &cfg)
Constructor.
jb::config_attribute< config, int > max_messages_per_millisecond
jb::config_attribute< config, int > max_messages_per_second
void print_csv(std::string const &name, std::ostream &os) const
Print all the measurements in CSV format.
#define config_object_constructors(NAME)
Helper class to easily define configuration attributes.
void log_final_progress() const
Final progress report at the end of the input.
interarrival_histogram_t interarrival_
processing_latency_histogram_t processing_latency_
static void print_csv_header(std::ostream &os)
Print a CSV header.
Configure an offline_feed_statistics object.
The top-level namespace for the JayBeams library.
Definition: as_hhmmss.hpp:7