001/* 002 * ModeShape (http://www.modeshape.org) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.modeshape.common.statistic; 017 018import java.util.Collections; 019import java.util.Comparator; 020import java.util.LinkedList; 021import java.util.List; 022import java.util.concurrent.locks.Lock; 023import org.modeshape.common.annotation.ThreadSafe; 024import org.modeshape.common.math.MathOperations; 025import org.modeshape.common.text.Inflector; 026import org.modeshape.common.util.StringUtil; 027 028/** 029 * Encapsulation of the statistics for a series of values to which new values are frequently added. The statistics include the 030 * {@link #getMinimum() minimum}, {@link #getMaximum() maximum}, {@link #getTotal() total (aggregate sum)}, 031 * {@link #getMean() mean (average)}, {@link #getMedian() median}, {@link #getStandardDeviation() standard deviation} and the 032 * {@link #getHistogram() histogram} of the values. 033 * <p> 034 * This class uses an efficient running calculation of the mean and standard deviation that is not as susceptible to roundoff 035 * errors as other traditional algorithms. The recursive algorithm is as follows, where M is the median value, sigma is the 036 * standard deviation, and S is a variable used in the calculation of sigma: 037 * 038 * <pre> 039 * M(1) = x(1) 040 * S(1) = 0 041 * M(k) = M(k-1) + ( x(k) - M(k-1) ) / k 042 * S(k) = S(k-1) + ( x(k) - M(k-1) ) * (x(k) - M(k)) 043 * </pre> 044 * 045 * Then, the standard deviation for n values in x is 046 * 047 * <pre> 048 * sigma = sqrt(S(n) / n) 049 * </pre> 050 * 051 * </p> 052 * Unlike the other quantities, the median value (the value at which half of the values are greater and half the values are lower) 053 * cannot be calculated incrementally. Therefore, this class does record the values so that the median can be properly calculated. 054 * This fact should be kept in mind when performing statistics on large numbers of values. 055 * </p> 056 * <p> 057 * This class is threadsafe. 058 * </p> 059 * @param <T> the number type for these statistics 060 */ 061@ThreadSafe 062public class DetailedStatistics<T extends Number> extends SimpleStatistics<T> { 063 064 private T median; 065 private Double medianValue; 066 private double s = 0.0d; // used in the calculation of standard deviation (sigma) 067 private double sigma = 0.0d; 068 private final List<T> values = new LinkedList<T>(); 069 private final List<T> unmodifiableValues = Collections.unmodifiableList(this.values); 070 private Histogram<T> histogram; 071 072 public DetailedStatistics( MathOperations<T> operations ) { 073 super(operations); 074 this.medianValue = 0.0d; 075 this.median = this.math.createZeroValue(); 076 } 077 078 /** 079 * Get the values that have been recorded in these statistics. The contents of this list may change if new values are 080 * {@link #add(Number) added} in another thread. 081 * @return the unmodifiable collection of values, in insertion order 082 */ 083 public List<T> getValues() { 084 return this.unmodifiableValues; 085 } 086 087 @Override 088 protected void doAddValue( T value ) { 089 if (value == null) { 090 return; 091 } 092 double previousMean = this.getMeanValue(); 093 super.doAddValue(value); 094 this.values.add(value); 095 this.medianValue = null; 096 097 // Calculate the mean and standard deviation ... 098 int count = getCount(); 099 if (count == 1) { 100 this.s = 0.0d; 101 this.sigma = 0.0d; 102 } else { 103 double dValue = value.doubleValue(); 104 double dCount = count; 105 // M(k) = M(k-1) + ( x(k) - M(k-1) ) / k 106 double meanValue = previousMean + ((dValue - previousMean) / dCount); 107 // S(k) = S(k-1) + ( x(k) - M(k-1) ) * ( x(k) - M(k) ) 108 this.s = this.s + (dValue - previousMean) * (dValue - meanValue); 109 // sigma = sqrt( S(n) / (n-1) ) 110 this.sigma = Math.sqrt(this.s / dCount); 111 } 112 } 113 114 /** 115 * Return the approximate mean (average) value represented as an instance of the operand type. Note that this may truncate if 116 * the operand type is not able to have the required precision. For the accurate mean, see {@link #getMedianValue() }. 117 * @return the mean (average), or 0.0 if the {@link #getCount() count} is 0 118 */ 119 public T getMedian() { 120 getMedianValue(); 121 return this.median; 122 } 123 124 /** 125 * Return the median value. 126 * @return the median value, or 0.0 if the {@link #getCount() count} is 0 127 * @see #getMedian() 128 */ 129 public double getMedianValue() { 130 Lock lock = this.getLock().writeLock(); 131 try { 132 lock.lock(); 133 int count = this.values.size(); 134 if (count == 0) { 135 return 0.0d; 136 } 137 if (this.medianValue == null) { 138 // Sort the values in numerical order.. 139 Comparator<T> comparator = this.math.getComparator(); 140 Collections.sort(this.values, comparator); 141 this.medianValue = 0.0d; 142 // If there is only one value, then the median is that value ... 143 if (count == 1) { 144 this.medianValue = this.values.get(0).doubleValue(); 145 } 146 // If there is an odd number of values, find value that is in the middle .. 147 else if (count % 2 != 0) { 148 this.medianValue = this.values.get(((count + 1) / 2) - 1).doubleValue(); 149 } 150 // Otherwise, there is an even number of values, so find the average of the middle two values ... 151 else { 152 int upperMiddleValueIndex = count / 2; 153 int lowerMiddleValueIndex = upperMiddleValueIndex - 1; 154 double lowerValue = this.values.get(lowerMiddleValueIndex).doubleValue(); 155 double upperValue = this.values.get(upperMiddleValueIndex).doubleValue(); 156 this.medianValue = (lowerValue + upperValue) / 2.0d; 157 } 158 this.median = this.math.create(this.medianValue); 159 this.histogram = null; 160 } 161 } finally { 162 lock.unlock(); 163 } 164 return this.medianValue; 165 } 166 167 /** 168 * Return the standard deviation. The standard deviation is a measure of the variation in a series of values. Values with a 169 * lower standard deviation has less variance in the values than a series of values with a higher standard deviation. 170 * @return the standard deviation, or 0.0 if the {@link #getCount() count} is 0 or if all of the values are the same. 171 */ 172 public double getStandardDeviation() { 173 Lock lock = this.getLock().readLock(); 174 lock.lock(); 175 try { 176 return this.sigma; 177 } finally { 178 lock.unlock(); 179 } 180 } 181 182 /** 183 * Return the histogram of the {@link #getValues() values}. This method returns a histogram where all of the buckets are 184 * distributed normally and all have the same width. In this case, the 'numSigmas' should be set to 0. For other variations, 185 * see {@link #getHistogram(int)}. 186 * @return the histogram 187 * @see #getHistogram(int) 188 */ 189 public Histogram<T> getHistogram() { 190 return getHistogram(0); 191 } 192 193 /** 194 * Return the histogram of the {@link #getValues() values}. This method is capable of creating two kinds of histograms. The 195 * first kind is a histogram where all of the buckets are distributed normally and all have the same width. In this case, the 196 * 'numSigmas' should be set to 0. See {@link #getHistogram()}. 197 * <p> 198 * The second kind of histogram is more useful when most of the data that is clustered near one value. This histogram is 199 * focused around the values that are up to 'numSigmas' above and below the {@link #getMedian() median}, and all values 200 * outside of this range are placed in the first and last bucket. 201 * </p> 202 * @param numSigmas the number of standard deviations from the {@link #getMedian() median}, or 0 if the buckets of the 203 * histogram should be evenly distributed 204 * @return the histogram 205 * @see #getHistogram() 206 */ 207 public Histogram<T> getHistogram( int numSigmas ) { 208 Lock lock = this.getLock().writeLock(); 209 lock.lock(); 210 try { 211 Histogram<T> hist = new Histogram<T>(this.math, this.values); 212 if (numSigmas > 0) { 213 // The 'getMediaValue()' method will reset the current histogram, so don't set it... 214 hist.setStrategy(this.getMedianValue(), this.getStandardDeviation(), numSigmas); 215 } 216 this.histogram = hist; 217 return this.histogram; 218 } finally { 219 lock.unlock(); 220 } 221 } 222 223 @Override 224 protected void doReset() { 225 super.doReset(); 226 this.medianValue = 0.0d; 227 this.median = this.math.createZeroValue(); 228 this.s = 0.0d; 229 this.sigma = 0.0d; 230 this.values.clear(); 231 } 232 233 @Override 234 public String toString() { 235 int count = this.getCount(); 236 String samples = Inflector.getInstance().pluralize("sample", count); 237 return StringUtil.createString("{0} {1}: min={2}; avg={3}; median={4}; stddev={5}; max={6}", count, samples, this.getMinimum(), this.getMean(), this.getMedian(), this.getStandardDeviation(), 238 this.getMaximum()); 239 } 240 241}