001/* 002 * ModeShape (http://www.modeshape.org) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.modeshape.common.statistic; 017 018import java.math.BigDecimal; 019import java.text.DecimalFormat; 020import java.util.ArrayList; 021import java.util.Arrays; 022import java.util.Collections; 023import java.util.Iterator; 024import java.util.LinkedList; 025import java.util.List; 026import org.modeshape.common.annotation.NotThreadSafe; 027import org.modeshape.common.math.MathOperations; 028import org.modeshape.common.util.HashCode; 029import org.modeshape.common.util.StringUtil; 030 031/** 032 * A representation of a histogram of values. 033 * 034 * @param <T> the type of value 035 */ 036@NotThreadSafe 037public class Histogram<T extends Number> { 038 039 public static final int DEFAULT_BUCKET_COUNT = 10; 040 public static final int DEFAULT_SIGNIFICANT_FIGURES = 4; 041 042 protected final MathOperations<T> math; 043 protected final List<T> values; 044 private int bucketCount = DEFAULT_BUCKET_COUNT; 045 private int significantFigures = DEFAULT_SIGNIFICANT_FIGURES; 046 private BigDecimal bucketWidth; 047 private LinkedList<Bucket> buckets; 048 private BucketingStrategy actualValueStrategy = new DefaultBucketingStrategy(); 049 private BucketingStrategy bucketingStrategy = actualValueStrategy; 050 051 public Histogram( MathOperations<T> operations, 052 List<T> values ) { 053 this.math = operations; 054 this.values = new LinkedList<T>(values); 055 this.buckets = new LinkedList<Bucket>(); 056 this.bucketWidth = null; 057 // Sort the data using natural order ... 058 Collections.sort(this.values, this.math.getComparator()); 059 } 060 061 @SafeVarargs 062 public Histogram( MathOperations<T> operations, T... values ) { 063 this(operations, Arrays.asList(values)); 064 } 065 066 public BucketingStrategy getStrategy() { 067 return this.bucketingStrategy; 068 } 069 070 public MathOperations<T> getMathOperations() { 071 return this.math; 072 } 073 074 /** 075 * Set the histogram to use the standard deviation to determine the bucket sizes. 076 * 077 * @param median 078 * @param standardDeviation 079 * @param sigma 080 */ 081 public void setStrategy( double median, 082 double standardDeviation, 083 int sigma ) { 084 this.bucketingStrategy = new StandardDeviationBucketingStrategy(median, standardDeviation, sigma); 085 this.bucketWidth = null; 086 } 087 088 /** 089 * Set the histogram to use the supplied minimum and maximum values to determine the bucket size. 090 * 091 * @param minimum 092 * @param maximum 093 */ 094 public void setStrategy( T minimum, 095 T maximum ) { 096 this.bucketingStrategy = new ExplicitBucketingStrategy(minimum, maximum); 097 this.bucketWidth = null; 098 } 099 100 /** 101 * Set the histogram to use the actual minimum and maximum values to determine the bucket sizes. 102 */ 103 public void setStrategyToDefault() { 104 this.bucketingStrategy = this.actualValueStrategy; 105 this.bucketWidth = null; 106 } 107 108 public int getSignificantFigures() { 109 return significantFigures; 110 } 111 112 /** 113 * Set the number of significant figures used in the calculation of the bucket widths. 114 * 115 * @param significantFigures the number of significant figures for the bucket widths 116 * @return this histogram, useful for method-chaining 117 * @see #DEFAULT_SIGNIFICANT_FIGURES 118 */ 119 public Histogram<T> setSignificantFigures( int significantFigures ) { 120 if (significantFigures != this.significantFigures) { 121 this.significantFigures = significantFigures; 122 this.bucketWidth = null; 123 this.buckets.clear(); 124 } 125 return this; 126 } 127 128 /** 129 * Return the number of buckets in this histogram. 130 * 131 * @return the number of buckets. 132 */ 133 public int getBucketCount() { 134 return bucketCount; 135 } 136 137 /** 138 * Set the number of buckets that this histogram will use. 139 * 140 * @param count the number of buckets 141 * @return this histogram, useful for method-chaining 142 * @see #DEFAULT_BUCKET_COUNT 143 */ 144 public Histogram<T> setBucketCount( int count ) { 145 if (count != this.bucketCount) { 146 this.bucketCount = count; 147 this.bucketWidth = null; 148 this.buckets.clear(); 149 } 150 return this; 151 } 152 153 /** 154 * Get the buckets in this histogram. If the histogram has not yet been computed, this method will cause it to be generated. 155 * The resulting list should not be modified. 156 * 157 * @return the histogram buckets. 158 */ 159 public List<Bucket> getBuckets() { 160 compute(); 161 return this.buckets; 162 } 163 164 protected void compute() { 165 // Only compute if there is not already a histogram ... 166 if (this.bucketWidth != null) return; 167 168 // Find the lower and upper bounds of the histogram using the strategy ... 169 T lowerBound = this.bucketingStrategy.getLowerBound(); 170 T upperBound = this.bucketingStrategy.getUpperBound(); 171 172 // Find the actual minimum and maximum values ... 173 T actualMinimum = this.actualValueStrategy.getLowerBound(); 174 T actualMaximum = this.actualValueStrategy.getUpperBound(); 175 176 // Create the buckets ... 177 List<T> boundaries = getBucketBoundaries(this.math, 178 lowerBound, 179 upperBound, 180 actualMinimum, 181 actualMaximum, 182 this.bucketCount, 183 this.significantFigures); 184 this.buckets.clear(); 185 int numBuckets = boundaries.isEmpty() ? 0 : boundaries.size() - 1; 186 for (int i = 0; i != numBuckets; ++i) { 187 this.buckets.add(new Bucket(boundaries.get(i), boundaries.get(i + 1))); 188 } 189 190 // Create the histogram by adding values to each range ... 191 Iterator<Bucket> intervalIterator = this.buckets.iterator(); 192 Bucket currentInterval = null; 193 for (T value : this.values) { 194 while (currentInterval == null || currentInterval.checkValue(value, !intervalIterator.hasNext()) > 0) { 195 if (!intervalIterator.hasNext()) break; 196 currentInterval = intervalIterator.next(); 197 } 198 if (currentInterval != null) currentInterval.addValue(value); 199 } 200 } 201 202 /** 203 * Return the total number of values that have gone into this histogram. 204 * 205 * @return the total number of values 206 * @see Bucket#getPercentageOfValues() 207 */ 208 public long getTotalNumberOfValues() { 209 return this.values.size(); 210 } 211 212 protected float getMaximumPercentage() { 213 float maxPercentage = 0.0f; 214 for (Bucket bucket : this.buckets) { 215 maxPercentage = Math.max(maxPercentage, bucket.getPercentageOfValues()); 216 } 217 return maxPercentage; 218 } 219 220 protected long getMaximumCount() { 221 long maxCount = 0l; 222 for (Bucket bucket : this.buckets) { 223 maxCount = Math.max(maxCount, bucket.getNumberOfValues()); 224 } 225 return maxCount; 226 } 227 228 /** 229 * Generate a textual (horizontal) bar graph of this histogram. 230 * 231 * @param maxBarLength the maximum bar length, or 0 if the bar length is to represent actual counts 232 * @return the strings that make up the histogram 233 */ 234 public List<String> getTextGraph( int maxBarLength ) { 235 compute(); 236 if (maxBarLength < 1) maxBarLength = (int)this.getMaximumCount(); 237 final float barLengthForHundredPercent = this.buckets.isEmpty() ? maxBarLength : 100.0f * maxBarLength 238 / getMaximumPercentage(); 239 final String fullLengthBar = StringUtil.createString('*', (int)barLengthForHundredPercent); 240 List<String> result = new LinkedList<String>(); 241 // First calculate the labels and the max length ... 242 int maxLowerBoundLength = 0; 243 int maxUpperBoundLength = 0; 244 for (Bucket bucket : this.buckets) { 245 maxLowerBoundLength = Math.max(bucket.getLowerBound().toString().length(), maxLowerBoundLength); 246 maxUpperBoundLength = Math.max(bucket.getUpperBound().toString().length(), maxUpperBoundLength); 247 } 248 249 // Create the header ... 250 int rangeWidth = 1 + maxLowerBoundLength + 3 + maxUpperBoundLength + 1; 251 int barWidth = maxBarLength + 20; 252 result.add(StringUtil.justifyLeft("Ranges", rangeWidth, ' ') + " Distribution"); 253 result.add(StringUtil.createString('-', rangeWidth) + ' ' + StringUtil.createString('-', barWidth)); 254 for (Bucket bucket : this.buckets) { 255 float percent = bucket.getPercentageOfValues(); 256 long number = bucket.getNumberOfValues(); 257 StringBuilder sb = new StringBuilder(); 258 sb.append("["); 259 sb.append(StringUtil.justifyLeft(bucket.getLowerBound().toString(), maxLowerBoundLength, ' ')); 260 sb.append(" - "); 261 sb.append(StringUtil.justifyLeft(bucket.getUpperBound().toString(), maxUpperBoundLength, ' ')); 262 sb.append("] "); 263 int barLength = Math.max((int)(barLengthForHundredPercent * percent / 100.0f), 0); 264 if (barLength == 0 && number != 0) barLength = 1; // make sure there is a bar for all non-zero buckets 265 sb.append(fullLengthBar.substring(0, barLength)); 266 if (number != 0) { 267 sb.append(" "); 268 sb.append(number); 269 sb.append(" ("); 270 sb.append(new DecimalFormat("###.#").format(percent)); 271 sb.append("%)"); 272 } 273 result.add(sb.toString()); 274 } 275 return result; 276 } 277 278 protected static <T> List<T> getBucketBoundaries( MathOperations<T> math, 279 T lowerBound, 280 T upperBound, 281 T actualMinimum, 282 T actualMaximum, 283 int bucketCount, 284 int bucketWidthSigFigs ) { 285 lowerBound = math.compare(lowerBound, actualMinimum) < 0 ? actualMinimum : lowerBound; 286 upperBound = math.compare(actualMaximum, upperBound) < 0 ? actualMaximum : upperBound; 287 if (math.compare(lowerBound, upperBound) == 0) { 288 List<T> boundaries = new ArrayList<T>(); 289 boundaries.add(lowerBound); 290 boundaries.add(upperBound); 291 return boundaries; 292 } 293 final boolean extraLowerBucketNeeded = math.compare(lowerBound, actualMinimum) > 0; 294 final boolean extraUpperBucketNeeded = math.compare(actualMaximum, upperBound) > 0; 295 if (extraLowerBucketNeeded) --bucketCount; 296 if (extraUpperBucketNeeded) --bucketCount; 297 298 // Compute the delta between the lower and upper bound ... 299 T totalWidth = math.subtract(upperBound, lowerBound); 300 int totalWidthScale = math.getExponentInScientificNotation(totalWidth); 301 302 // Modify the lower bound by rounding down to the next lower meaningful value, 303 // using the scale of the totalWidth to determine how to round down. 304 T roundedLowerBound = math.roundDown(lowerBound, -totalWidthScale); 305 T roundedUpperBound = math.roundUp(upperBound, -totalWidthScale); 306 307 // Create the ranges ... 308 double finalLowerBound = math.doubleValue(roundedLowerBound); 309 double finalUpperBound = math.doubleValue(roundedUpperBound); 310 double finalBucketCount = bucketCount; 311 double bucketWidth = (finalUpperBound - finalLowerBound) / finalBucketCount; 312 313 // DoubleOperations doubleOps = new DoubleOperations(); 314 // bucketWidth = doubleOps.keepSignificantFigures(bucketWidth,bucketWidthSigFigs); 315 316 List<T> boundaries = new ArrayList<T>(); 317 if (bucketWidth > 0.0d) { 318 if (extraLowerBucketNeeded) boundaries.add(actualMinimum); 319 double nextBoundary = finalLowerBound; 320 for (int i = 0; i != bucketCount; ++i) { 321 boundaries.add(math.create(nextBoundary)); 322 nextBoundary = nextBoundary + bucketWidth; 323 // nextBoundary = doubleOps.roundUp(nextBoundary + bucketWidth, bucketWidthSigFigs ); 324 } 325 boundaries.add(roundedUpperBound); 326 if (extraUpperBucketNeeded) boundaries.add(actualMaximum); 327 } 328 return boundaries; 329 } 330 331 /** 332 * Represents a bucket in a histogram. 333 */ 334 public class Bucket implements Comparable<Bucket> { 335 336 private final T lowerBound; 337 private final T upperBound; 338 private final T width; 339 private long numValues; 340 341 protected Bucket( T lowerBound, 342 T upperBound ) { 343 this.lowerBound = lowerBound; 344 this.upperBound = upperBound; 345 this.width = Histogram.this.math.subtract(upperBound, lowerBound); 346 } 347 348 /** 349 * Get the lower bound of this bucket. 350 * 351 * @return the lower bound 352 */ 353 public T getLowerBound() { 354 return lowerBound; 355 } 356 357 /** 358 * Get the upper bound of this bucket. 359 * 360 * @return the upper bound 361 */ 362 public T getUpperBound() { 363 return upperBound; 364 } 365 366 /** 367 * Get the width of this bucket. 368 * 369 * @return the width 370 */ 371 public T getWidth() { 372 return this.width; 373 } 374 375 /** 376 * Return the percentage of values in the histogram that appear in this bucket. 377 * 378 * @return the percentage of all values in the histogram that appear in this bucket. 379 */ 380 public float getPercentageOfValues() { 381 float total = Histogram.this.getTotalNumberOfValues(); 382 if (total == 0.0f) return 0.0f; 383 float numValuesFloat = this.numValues; 384 return 100.0f * numValuesFloat / total; 385 } 386 387 /** 388 * Add a value to this bucket 389 * 390 * @param value 391 */ 392 protected void addValue( T value ) { 393 ++this.numValues; 394 } 395 396 /** 397 * Get the number of values in this bucket. 398 * 399 * @return the number of values 400 */ 401 public long getNumberOfValues() { 402 return this.numValues; 403 } 404 405 /** 406 * Check whether the value fits in this bucket. 407 * 408 * @param value the value to check 409 * @param isLast 410 * @return 0 if the value fits in this bucket, -1 if the value fits in a prior bucket, or 1 if the value fits in a later 411 * bucket 412 */ 413 public int checkValue( T value, 414 boolean isLast ) { 415 if (Histogram.this.math.compare(this.lowerBound, value) > 0) return -1; 416 if (isLast) { 417 if (Histogram.this.math.compare(value, this.upperBound) > 0) return 1; 418 } else { 419 if (Histogram.this.math.compare(value, this.upperBound) >= 0) return 1; 420 } 421 return 0; 422 } 423 424 @Override 425 public int compareTo( Bucket that ) { 426 // This is lower if 'that' has a lowerBound that is greater than 'this' lower bound ... 427 if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) < 0) return -1; 428 if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) > 0) return 1; 429 // The lower bounds are the same, so 'this' is lower if 'that' has an upperBound that is greater than 'this' lower 430 // bound ... 431 if (Histogram.this.math.compare(this.upperBound, that.upperBound) < 0) return -1; 432 if (Histogram.this.math.compare(this.upperBound, that.upperBound) > 0) return 1; 433 return 0; 434 } 435 436 protected Class<T> getNumberClass() { 437 return Histogram.this.math.getOperandClass(); 438 } 439 440 @Override 441 public int hashCode() { 442 // Equals asserts that two buckets are equal when all values are equal ... 443 return HashCode.compute(lowerBound, upperBound, width); 444 } 445 446 @SuppressWarnings( "unchecked" ) 447 @Override 448 public boolean equals( Object obj ) { 449 if (obj instanceof Histogram.Bucket) { 450 Bucket that = (Bucket)obj; 451 if (this.getNumberClass().isAssignableFrom(that.getNumberClass())) { 452 if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) != 0) return false; 453 if (Histogram.this.math.compare(this.upperBound, that.upperBound) != 0) return false; 454 if (Histogram.this.math.compare(this.width, that.width) != 0) return false; 455 return true; 456 } 457 } 458 return false; 459 } 460 461 @Override 462 public String toString() { 463 return "[" + this.lowerBound + "," + this.upperBound + ")"; 464 } 465 466 } 467 468 public abstract class BucketingStrategy { 469 470 public List<T> getValues() { 471 return Histogram.this.values; 472 } 473 474 public abstract T getLowerBound(); 475 476 public abstract T getUpperBound(); 477 } 478 479 public class DefaultBucketingStrategy extends BucketingStrategy { 480 481 @Override 482 public T getLowerBound() { 483 if (getValues().isEmpty()) return Histogram.this.math.createZeroValue(); 484 return getValues().get(0); 485 } 486 487 @Override 488 public T getUpperBound() { 489 if (getValues().isEmpty()) return Histogram.this.math.createZeroValue(); 490 return getValues().get(getValues().size() - 1); 491 } 492 } 493 494 public class ExplicitBucketingStrategy extends BucketingStrategy { 495 496 private final T lowerBound; 497 private final T upperBound; 498 499 protected ExplicitBucketingStrategy( T lowerBound, 500 T upperBound ) { 501 this.lowerBound = lowerBound; 502 this.upperBound = upperBound; 503 } 504 505 @Override 506 public T getLowerBound() { 507 return this.lowerBound; 508 } 509 510 @Override 511 public T getUpperBound() { 512 return this.upperBound; 513 } 514 } 515 516 public class StandardDeviationBucketingStrategy extends BucketingStrategy { 517 518 private final double median; 519 private final double standardDeviation; 520 private final int numberOfDeviationsAboveAndBelow; 521 522 protected StandardDeviationBucketingStrategy( double median, 523 double standardDeviation, 524 int numDeviationsAboveAndBelow ) { 525 this.median = median; 526 this.standardDeviation = Math.abs(standardDeviation); 527 this.numberOfDeviationsAboveAndBelow = Math.abs(numDeviationsAboveAndBelow); 528 } 529 530 @Override 531 public T getLowerBound() { 532 double lower = this.median - (standardDeviation * numberOfDeviationsAboveAndBelow); 533 return Histogram.this.math.create(lower); 534 } 535 536 @Override 537 public T getUpperBound() { 538 double upper = this.median + (standardDeviation * numberOfDeviationsAboveAndBelow); 539 return Histogram.this.math.create(upper); 540 } 541 } 542 543}