001/*
002 * ModeShape (http://www.modeshape.org)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *       http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.modeshape.common.statistic;
017
018import java.util.Collections;
019import java.util.Comparator;
020import java.util.LinkedList;
021import java.util.List;
022import java.util.concurrent.locks.Lock;
023import org.modeshape.common.annotation.ThreadSafe;
024import org.modeshape.common.math.MathOperations;
025import org.modeshape.common.text.Inflector;
026import org.modeshape.common.util.StringUtil;
027
028/**
029 * Encapsulation of the statistics for a series of values to which new values are frequently added. The statistics include the
030 * {@link #getMinimum() minimum}, {@link #getMaximum() maximum}, {@link #getTotal() total (aggregate sum)},
031 * {@link #getMean() mean (average)}, {@link #getMedian() median}, {@link #getStandardDeviation() standard deviation} and the
032 * {@link #getHistogram() histogram} of the values.
033 * <p>
034 * This class uses an efficient running calculation of the mean and standard deviation that is not as susceptible to roundoff
035 * errors as other traditional algorithms. The recursive algorithm is as follows, where M is the median value, sigma is the
036 * standard deviation, and S is a variable used in the calculation of sigma:
037 * 
038 * <pre>
039 *   M(1) = x(1)
040 *   S(1) = 0
041 *   M(k) = M(k-1) + ( x(k) - M(k-1) ) / k
042 *   S(k) = S(k-1) + ( x(k) - M(k-1) ) * (x(k) - M(k))
043 * </pre>
044 * 
045 * Then, the standard deviation for n values in x is
046 * 
047 * <pre>
048 * sigma = sqrt(S(n) / n)
049 * </pre>
050 * 
051 * </p>
052 * Unlike the other quantities, the median value (the value at which half of the values are greater and half the values are lower)
053 * cannot be calculated incrementally. Therefore, this class does record the values so that the median can be properly calculated.
054 * This fact should be kept in mind when performing statistics on large numbers of values.
055 * </p>
056 * <p>
057 * This class is threadsafe.
058 * </p>
059 * @param <T> the number type for these statistics
060 */
061@ThreadSafe
062public class DetailedStatistics<T extends Number> extends SimpleStatistics<T> {
063
064    private T median;
065    private Double medianValue;
066    private double s = 0.0d; // used in the calculation of standard deviation (sigma)
067    private double sigma = 0.0d;
068    private final List<T> values = new LinkedList<T>();
069    private final List<T> unmodifiableValues = Collections.unmodifiableList(this.values);
070    private Histogram<T> histogram;
071
072    public DetailedStatistics( MathOperations<T> operations ) {
073        super(operations);
074        this.medianValue = 0.0d;
075        this.median = this.math.createZeroValue();
076    }
077
078    /**
079     * Get the values that have been recorded in these statistics. The contents of this list may change if new values are
080     * {@link #add(Number) added} in another thread.
081     * @return the unmodifiable collection of values, in insertion order
082     */
083    public List<T> getValues() {
084        return this.unmodifiableValues;
085    }
086
087    @Override
088    protected void doAddValue( T value ) {
089        if (value == null) {
090            return;
091        }
092        double previousMean = this.getMeanValue();
093        super.doAddValue(value);
094        this.values.add(value);
095        this.medianValue = null;
096
097        // Calculate the mean and standard deviation ...
098        int count = getCount();
099        if (count == 1) {
100            this.s = 0.0d;
101            this.sigma = 0.0d;
102        } else {
103            double dValue = value.doubleValue();
104            double dCount = count;
105            // M(k) = M(k-1) + ( x(k) - M(k-1) ) / k
106            double meanValue = previousMean + ((dValue - previousMean) / dCount);
107            // S(k) = S(k-1) + ( x(k) - M(k-1) ) * ( x(k) - M(k) )
108            this.s = this.s + (dValue - previousMean) * (dValue - meanValue);
109            // sigma = sqrt( S(n) / (n-1) )
110            this.sigma = Math.sqrt(this.s / dCount);
111        }
112    }
113
114    /**
115     * Return the approximate mean (average) value represented as an instance of the operand type. Note that this may truncate if
116     * the operand type is not able to have the required precision. For the accurate mean, see {@link #getMedianValue() }.
117     * @return the mean (average), or 0.0 if the {@link #getCount() count} is 0
118     */
119    public T getMedian() {
120        getMedianValue();
121        return this.median;
122    }
123
124    /**
125     * Return the median value.
126     * @return the median value, or 0.0 if the {@link #getCount() count} is 0
127     * @see #getMedian()
128     */
129    public double getMedianValue() {
130        Lock lock = this.getLock().writeLock();
131        try {
132            lock.lock();
133            int count = this.values.size();
134            if (count == 0) {
135                return 0.0d;
136            }
137            if (this.medianValue == null) {
138                // Sort the values in numerical order..
139                Comparator<T> comparator = this.math.getComparator();
140                Collections.sort(this.values, comparator);
141                this.medianValue = 0.0d;
142                // If there is only one value, then the median is that value ...
143                if (count == 1) {
144                    this.medianValue = this.values.get(0).doubleValue();
145                }
146                // If there is an odd number of values, find value that is in the middle ..
147                else if (count % 2 != 0) {
148                    this.medianValue = this.values.get(((count + 1) / 2) - 1).doubleValue();
149                }
150                // Otherwise, there is an even number of values, so find the average of the middle two values ...
151                else {
152                    int upperMiddleValueIndex = count / 2;
153                    int lowerMiddleValueIndex = upperMiddleValueIndex - 1;
154                    double lowerValue = this.values.get(lowerMiddleValueIndex).doubleValue();
155                    double upperValue = this.values.get(upperMiddleValueIndex).doubleValue();
156                    this.medianValue = (lowerValue + upperValue) / 2.0d;
157                }
158                this.median = this.math.create(this.medianValue);
159                this.histogram = null;
160            }
161        } finally {
162            lock.unlock();
163        }
164        return this.medianValue;
165    }
166
167    /**
168     * Return the standard deviation. The standard deviation is a measure of the variation in a series of values. Values with a
169     * lower standard deviation has less variance in the values than a series of values with a higher standard deviation.
170     * @return the standard deviation, or 0.0 if the {@link #getCount() count} is 0 or if all of the values are the same.
171     */
172    public double getStandardDeviation() {
173        Lock lock = this.getLock().readLock();
174        lock.lock();
175        try {
176            return this.sigma;
177        } finally {
178            lock.unlock();
179        }
180    }
181
182    /**
183     * Return the histogram of the {@link #getValues() values}. This method returns a histogram where all of the buckets are
184     * distributed normally and all have the same width. In this case, the 'numSigmas' should be set to 0. For other variations,
185     * see {@link #getHistogram(int)}.
186     * @return the histogram
187     * @see #getHistogram(int)
188     */
189    public Histogram<T> getHistogram() {
190        return getHistogram(0);
191    }
192
193    /**
194     * Return the histogram of the {@link #getValues() values}. This method is capable of creating two kinds of histograms. The
195     * first kind is a histogram where all of the buckets are distributed normally and all have the same width. In this case, the
196     * 'numSigmas' should be set to 0. See {@link #getHistogram()}.
197     * <p>
198     * The second kind of histogram is more useful when most of the data that is clustered near one value. This histogram is
199     * focused around the values that are up to 'numSigmas' above and below the {@link #getMedian() median}, and all values
200     * outside of this range are placed in the first and last bucket.
201     * </p>
202     * @param numSigmas the number of standard deviations from the {@link #getMedian() median}, or 0 if the buckets of the
203     * histogram should be evenly distributed
204     * @return the histogram
205     * @see #getHistogram()
206     */
207    public Histogram<T> getHistogram( int numSigmas ) {
208        Lock lock = this.getLock().writeLock();
209        lock.lock();
210        try {
211            Histogram<T> hist = new Histogram<T>(this.math, this.values);
212            if (numSigmas > 0) {
213                // The 'getMediaValue()' method will reset the current histogram, so don't set it...
214                hist.setStrategy(this.getMedianValue(), this.getStandardDeviation(), numSigmas);
215            }
216            this.histogram = hist;
217            return this.histogram;
218        } finally {
219            lock.unlock();
220        }
221    }
222
223    @Override
224    protected void doReset() {
225        super.doReset();
226        this.medianValue = 0.0d;
227        this.median = this.math.createZeroValue();
228        this.s = 0.0d;
229        this.sigma = 0.0d;
230        this.values.clear();
231    }
232
233    @Override
234    public String toString() {
235        int count = this.getCount();
236        String samples = Inflector.getInstance().pluralize("sample", count);
237        return StringUtil.createString("{0} {1}: min={2}; avg={3}; median={4}; stddev={5}; max={6}", count, samples, this.getMinimum(), this.getMean(), this.getMedian(), this.getStandardDeviation(),
238                                       this.getMaximum());
239    }
240
241}