001/*
002 * ModeShape (http://www.modeshape.org)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *       http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.modeshape.common.statistic;
017
018import java.math.BigDecimal;
019import java.text.DecimalFormat;
020import java.util.ArrayList;
021import java.util.Arrays;
022import java.util.Collections;
023import java.util.Iterator;
024import java.util.LinkedList;
025import java.util.List;
026import org.modeshape.common.annotation.NotThreadSafe;
027import org.modeshape.common.math.MathOperations;
028import org.modeshape.common.util.HashCode;
029import org.modeshape.common.util.StringUtil;
030
031/**
032 * A representation of a histogram of values.
033 * 
034 * @param <T> the type of value
035 */
036@NotThreadSafe
037public class Histogram<T extends Number> {
038
039    public static final int DEFAULT_BUCKET_COUNT = 10;
040    public static final int DEFAULT_SIGNIFICANT_FIGURES = 4;
041
042    protected final MathOperations<T> math;
043    protected final List<T> values;
044    private int bucketCount = DEFAULT_BUCKET_COUNT;
045    private int significantFigures = DEFAULT_SIGNIFICANT_FIGURES;
046    private BigDecimal bucketWidth;
047    private LinkedList<Bucket> buckets;
048    private BucketingStrategy actualValueStrategy = new DefaultBucketingStrategy();
049    private BucketingStrategy bucketingStrategy = actualValueStrategy;
050
051    public Histogram( MathOperations<T> operations,
052                      List<T> values ) {
053        this.math = operations;
054        this.values = new LinkedList<T>(values);
055        this.buckets = new LinkedList<Bucket>();
056        this.bucketWidth = null;
057        // Sort the data using natural order ...
058        Collections.sort(this.values, this.math.getComparator());
059    }
060
061    @SafeVarargs
062    public Histogram( MathOperations<T> operations, T... values ) {
063        this(operations, Arrays.asList(values));
064    }
065
066    public BucketingStrategy getStrategy() {
067        return this.bucketingStrategy;
068    }
069
070    public MathOperations<T> getMathOperations() {
071        return this.math;
072    }
073
074    /**
075     * Set the histogram to use the standard deviation to determine the bucket sizes.
076     * 
077     * @param median
078     * @param standardDeviation
079     * @param sigma
080     */
081    public void setStrategy( double median,
082                             double standardDeviation,
083                             int sigma ) {
084        this.bucketingStrategy = new StandardDeviationBucketingStrategy(median, standardDeviation, sigma);
085        this.bucketWidth = null;
086    }
087
088    /**
089     * Set the histogram to use the supplied minimum and maximum values to determine the bucket size.
090     * 
091     * @param minimum
092     * @param maximum
093     */
094    public void setStrategy( T minimum,
095                             T maximum ) {
096        this.bucketingStrategy = new ExplicitBucketingStrategy(minimum, maximum);
097        this.bucketWidth = null;
098    }
099
100    /**
101     * Set the histogram to use the actual minimum and maximum values to determine the bucket sizes.
102     */
103    public void setStrategyToDefault() {
104        this.bucketingStrategy = this.actualValueStrategy;
105        this.bucketWidth = null;
106    }
107
108    public int getSignificantFigures() {
109        return significantFigures;
110    }
111
112    /**
113     * Set the number of significant figures used in the calculation of the bucket widths.
114     * 
115     * @param significantFigures the number of significant figures for the bucket widths
116     * @return this histogram, useful for method-chaining
117     * @see #DEFAULT_SIGNIFICANT_FIGURES
118     */
119    public Histogram<T> setSignificantFigures( int significantFigures ) {
120        if (significantFigures != this.significantFigures) {
121            this.significantFigures = significantFigures;
122            this.bucketWidth = null;
123            this.buckets.clear();
124        }
125        return this;
126    }
127
128    /**
129     * Return the number of buckets in this histogram.
130     * 
131     * @return the number of buckets.
132     */
133    public int getBucketCount() {
134        return bucketCount;
135    }
136
137    /**
138     * Set the number of buckets that this histogram will use.
139     * 
140     * @param count the number of buckets
141     * @return this histogram, useful for method-chaining
142     * @see #DEFAULT_BUCKET_COUNT
143     */
144    public Histogram<T> setBucketCount( int count ) {
145        if (count != this.bucketCount) {
146            this.bucketCount = count;
147            this.bucketWidth = null;
148            this.buckets.clear();
149        }
150        return this;
151    }
152
153    /**
154     * Get the buckets in this histogram. If the histogram has not yet been computed, this method will cause it to be generated.
155     * The resulting list should not be modified.
156     * 
157     * @return the histogram buckets.
158     */
159    public List<Bucket> getBuckets() {
160        compute();
161        return this.buckets;
162    }
163
164    protected void compute() {
165        // Only compute if there is not already a histogram ...
166        if (this.bucketWidth != null) return;
167
168        // Find the lower and upper bounds of the histogram using the strategy ...
169        T lowerBound = this.bucketingStrategy.getLowerBound();
170        T upperBound = this.bucketingStrategy.getUpperBound();
171
172        // Find the actual minimum and maximum values ...
173        T actualMinimum = this.actualValueStrategy.getLowerBound();
174        T actualMaximum = this.actualValueStrategy.getUpperBound();
175
176        // Create the buckets ...
177        List<T> boundaries = getBucketBoundaries(this.math,
178                                                 lowerBound,
179                                                 upperBound,
180                                                 actualMinimum,
181                                                 actualMaximum,
182                                                 this.bucketCount,
183                                                 this.significantFigures);
184        this.buckets.clear();
185        int numBuckets = boundaries.isEmpty() ? 0 : boundaries.size() - 1;
186        for (int i = 0; i != numBuckets; ++i) {
187            this.buckets.add(new Bucket(boundaries.get(i), boundaries.get(i + 1)));
188        }
189
190        // Create the histogram by adding values to each range ...
191        Iterator<Bucket> intervalIterator = this.buckets.iterator();
192        Bucket currentInterval = null;
193        for (T value : this.values) {
194            while (currentInterval == null || currentInterval.checkValue(value, !intervalIterator.hasNext()) > 0) {
195                if (!intervalIterator.hasNext()) break;
196                currentInterval = intervalIterator.next();
197            }
198            if (currentInterval != null) currentInterval.addValue(value);
199        }
200    }
201
202    /**
203     * Return the total number of values that have gone into this histogram.
204     * 
205     * @return the total number of values
206     * @see Bucket#getPercentageOfValues()
207     */
208    public long getTotalNumberOfValues() {
209        return this.values.size();
210    }
211
212    protected float getMaximumPercentage() {
213        float maxPercentage = 0.0f;
214        for (Bucket bucket : this.buckets) {
215            maxPercentage = Math.max(maxPercentage, bucket.getPercentageOfValues());
216        }
217        return maxPercentage;
218    }
219
220    protected long getMaximumCount() {
221        long maxCount = 0l;
222        for (Bucket bucket : this.buckets) {
223            maxCount = Math.max(maxCount, bucket.getNumberOfValues());
224        }
225        return maxCount;
226    }
227
228    /**
229     * Generate a textual (horizontal) bar graph of this histogram.
230     * 
231     * @param maxBarLength the maximum bar length, or 0 if the bar length is to represent actual counts
232     * @return the strings that make up the histogram
233     */
234    public List<String> getTextGraph( int maxBarLength ) {
235        compute();
236        if (maxBarLength < 1) maxBarLength = (int)this.getMaximumCount();
237        final float barLengthForHundredPercent = this.buckets.isEmpty() ? maxBarLength : 100.0f * maxBarLength
238                                                                                         / getMaximumPercentage();
239        final String fullLengthBar = StringUtil.createString('*', (int)barLengthForHundredPercent);
240        List<String> result = new LinkedList<String>();
241        // First calculate the labels and the max length ...
242        int maxLowerBoundLength = 0;
243        int maxUpperBoundLength = 0;
244        for (Bucket bucket : this.buckets) {
245            maxLowerBoundLength = Math.max(bucket.getLowerBound().toString().length(), maxLowerBoundLength);
246            maxUpperBoundLength = Math.max(bucket.getUpperBound().toString().length(), maxUpperBoundLength);
247        }
248
249        // Create the header ...
250        int rangeWidth = 1 + maxLowerBoundLength + 3 + maxUpperBoundLength + 1;
251        int barWidth = maxBarLength + 20;
252        result.add(StringUtil.justifyLeft("Ranges", rangeWidth, ' ') + " Distribution");
253        result.add(StringUtil.createString('-', rangeWidth) + ' ' + StringUtil.createString('-', barWidth));
254        for (Bucket bucket : this.buckets) {
255            float percent = bucket.getPercentageOfValues();
256            long number = bucket.getNumberOfValues();
257            StringBuilder sb = new StringBuilder();
258            sb.append("[");
259            sb.append(StringUtil.justifyLeft(bucket.getLowerBound().toString(), maxLowerBoundLength, ' '));
260            sb.append(" - ");
261            sb.append(StringUtil.justifyLeft(bucket.getUpperBound().toString(), maxUpperBoundLength, ' '));
262            sb.append("] ");
263            int barLength = Math.max((int)(barLengthForHundredPercent * percent / 100.0f), 0);
264            if (barLength == 0 && number != 0) barLength = 1; // make sure there is a bar for all non-zero buckets
265            sb.append(fullLengthBar.substring(0, barLength));
266            if (number != 0) {
267                sb.append(" ");
268                sb.append(number);
269                sb.append(" (");
270                sb.append(new DecimalFormat("###.#").format(percent));
271                sb.append("%)");
272            }
273            result.add(sb.toString());
274        }
275        return result;
276    }
277
278    protected static <T> List<T> getBucketBoundaries( MathOperations<T> math,
279                                                      T lowerBound,
280                                                      T upperBound,
281                                                      T actualMinimum,
282                                                      T actualMaximum,
283                                                      int bucketCount,
284                                                      int bucketWidthSigFigs ) {
285        lowerBound = math.compare(lowerBound, actualMinimum) < 0 ? actualMinimum : lowerBound;
286        upperBound = math.compare(actualMaximum, upperBound) < 0 ? actualMaximum : upperBound;
287        if (math.compare(lowerBound, upperBound) == 0) {
288            List<T> boundaries = new ArrayList<T>();
289            boundaries.add(lowerBound);
290            boundaries.add(upperBound);
291            return boundaries;
292        }
293        final boolean extraLowerBucketNeeded = math.compare(lowerBound, actualMinimum) > 0;
294        final boolean extraUpperBucketNeeded = math.compare(actualMaximum, upperBound) > 0;
295        if (extraLowerBucketNeeded) --bucketCount;
296        if (extraUpperBucketNeeded) --bucketCount;
297
298        // Compute the delta between the lower and upper bound ...
299        T totalWidth = math.subtract(upperBound, lowerBound);
300        int totalWidthScale = math.getExponentInScientificNotation(totalWidth);
301
302        // Modify the lower bound by rounding down to the next lower meaningful value,
303        // using the scale of the totalWidth to determine how to round down.
304        T roundedLowerBound = math.roundDown(lowerBound, -totalWidthScale);
305        T roundedUpperBound = math.roundUp(upperBound, -totalWidthScale);
306
307        // Create the ranges ...
308        double finalLowerBound = math.doubleValue(roundedLowerBound);
309        double finalUpperBound = math.doubleValue(roundedUpperBound);
310        double finalBucketCount = bucketCount;
311        double bucketWidth = (finalUpperBound - finalLowerBound) / finalBucketCount;
312
313        // DoubleOperations doubleOps = new DoubleOperations();
314        // bucketWidth = doubleOps.keepSignificantFigures(bucketWidth,bucketWidthSigFigs);
315
316        List<T> boundaries = new ArrayList<T>();
317        if (bucketWidth > 0.0d) {
318            if (extraLowerBucketNeeded) boundaries.add(actualMinimum);
319            double nextBoundary = finalLowerBound;
320            for (int i = 0; i != bucketCount; ++i) {
321                boundaries.add(math.create(nextBoundary));
322                nextBoundary = nextBoundary + bucketWidth;
323                // nextBoundary = doubleOps.roundUp(nextBoundary + bucketWidth, bucketWidthSigFigs );
324            }
325            boundaries.add(roundedUpperBound);
326            if (extraUpperBucketNeeded) boundaries.add(actualMaximum);
327        }
328        return boundaries;
329    }
330
331    /**
332     * Represents a bucket in a histogram.
333     */
334    public class Bucket implements Comparable<Bucket> {
335
336        private final T lowerBound;
337        private final T upperBound;
338        private final T width;
339        private long numValues;
340
341        protected Bucket( T lowerBound,
342                          T upperBound ) {
343            this.lowerBound = lowerBound;
344            this.upperBound = upperBound;
345            this.width = Histogram.this.math.subtract(upperBound, lowerBound);
346        }
347
348        /**
349         * Get the lower bound of this bucket.
350         * 
351         * @return the lower bound
352         */
353        public T getLowerBound() {
354            return lowerBound;
355        }
356
357        /**
358         * Get the upper bound of this bucket.
359         * 
360         * @return the upper bound
361         */
362        public T getUpperBound() {
363            return upperBound;
364        }
365
366        /**
367         * Get the width of this bucket.
368         * 
369         * @return the width
370         */
371        public T getWidth() {
372            return this.width;
373        }
374
375        /**
376         * Return the percentage of values in the histogram that appear in this bucket.
377         * 
378         * @return the percentage of all values in the histogram that appear in this bucket.
379         */
380        public float getPercentageOfValues() {
381            float total = Histogram.this.getTotalNumberOfValues();
382            if (total == 0.0f) return 0.0f;
383            float numValuesFloat = this.numValues;
384            return 100.0f * numValuesFloat / total;
385        }
386
387        /**
388         * Add a value to this bucket
389         * 
390         * @param value
391         */
392        protected void addValue( T value ) {
393            ++this.numValues;
394        }
395
396        /**
397         * Get the number of values in this bucket.
398         * 
399         * @return the number of values
400         */
401        public long getNumberOfValues() {
402            return this.numValues;
403        }
404
405        /**
406         * Check whether the value fits in this bucket.
407         * 
408         * @param value the value to check
409         * @param isLast
410         * @return 0 if the value fits in this bucket, -1 if the value fits in a prior bucket, or 1 if the value fits in a later
411         *         bucket
412         */
413        public int checkValue( T value,
414                               boolean isLast ) {
415            if (Histogram.this.math.compare(this.lowerBound, value) > 0) return -1;
416            if (isLast) {
417                if (Histogram.this.math.compare(value, this.upperBound) > 0) return 1;
418            } else {
419                if (Histogram.this.math.compare(value, this.upperBound) >= 0) return 1;
420            }
421            return 0;
422        }
423
424        @Override
425        public int compareTo( Bucket that ) {
426            // This is lower if 'that' has a lowerBound that is greater than 'this' lower bound ...
427            if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) < 0) return -1;
428            if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) > 0) return 1;
429            // The lower bounds are the same, so 'this' is lower if 'that' has an upperBound that is greater than 'this' lower
430            // bound ...
431            if (Histogram.this.math.compare(this.upperBound, that.upperBound) < 0) return -1;
432            if (Histogram.this.math.compare(this.upperBound, that.upperBound) > 0) return 1;
433            return 0;
434        }
435
436        protected Class<T> getNumberClass() {
437            return Histogram.this.math.getOperandClass();
438        }
439
440        @Override
441        public int hashCode() {
442            // Equals asserts that two buckets are equal when all values are equal ...
443            return HashCode.compute(lowerBound, upperBound, width);
444        }
445
446        @SuppressWarnings( "unchecked" )
447        @Override
448        public boolean equals( Object obj ) {
449            if (obj instanceof Histogram.Bucket) {
450                Bucket that = (Bucket)obj;
451                if (this.getNumberClass().isAssignableFrom(that.getNumberClass())) {
452                    if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) != 0) return false;
453                    if (Histogram.this.math.compare(this.upperBound, that.upperBound) != 0) return false;
454                    if (Histogram.this.math.compare(this.width, that.width) != 0) return false;
455                    return true;
456                }
457            }
458            return false;
459        }
460
461        @Override
462        public String toString() {
463            return "[" + this.lowerBound + "," + this.upperBound + ")";
464        }
465
466    }
467
468    public abstract class BucketingStrategy {
469
470        public List<T> getValues() {
471            return Histogram.this.values;
472        }
473
474        public abstract T getLowerBound();
475
476        public abstract T getUpperBound();
477    }
478
479    public class DefaultBucketingStrategy extends BucketingStrategy {
480
481        @Override
482        public T getLowerBound() {
483            if (getValues().isEmpty()) return Histogram.this.math.createZeroValue();
484            return getValues().get(0);
485        }
486
487        @Override
488        public T getUpperBound() {
489            if (getValues().isEmpty()) return Histogram.this.math.createZeroValue();
490            return getValues().get(getValues().size() - 1);
491        }
492    }
493
494    public class ExplicitBucketingStrategy extends BucketingStrategy {
495
496        private final T lowerBound;
497        private final T upperBound;
498
499        protected ExplicitBucketingStrategy( T lowerBound,
500                                             T upperBound ) {
501            this.lowerBound = lowerBound;
502            this.upperBound = upperBound;
503        }
504
505        @Override
506        public T getLowerBound() {
507            return this.lowerBound;
508        }
509
510        @Override
511        public T getUpperBound() {
512            return this.upperBound;
513        }
514    }
515
516    public class StandardDeviationBucketingStrategy extends BucketingStrategy {
517
518        private final double median;
519        private final double standardDeviation;
520        private final int numberOfDeviationsAboveAndBelow;
521
522        protected StandardDeviationBucketingStrategy( double median,
523                                                      double standardDeviation,
524                                                      int numDeviationsAboveAndBelow ) {
525            this.median = median;
526            this.standardDeviation = Math.abs(standardDeviation);
527            this.numberOfDeviationsAboveAndBelow = Math.abs(numDeviationsAboveAndBelow);
528        }
529
530        @Override
531        public T getLowerBound() {
532            double lower = this.median - (standardDeviation * numberOfDeviationsAboveAndBelow);
533            return Histogram.this.math.create(lower);
534        }
535
536        @Override
537        public T getUpperBound() {
538            double upper = this.median + (standardDeviation * numberOfDeviationsAboveAndBelow);
539            return Histogram.this.math.create(upper);
540        }
541    }
542
543}