object EstimationUtils
- Alphabetic
- By Inheritance
- EstimationUtils
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Type Members
-
case class
OverlappedRange(lo: Double, hi: Double, leftNdv: Double, rightNdv: Double, leftNumRows: Double, rightNumRows: Double) extends Product with Serializable
A join between two equi-height histograms may produce multiple overlapped ranges.
A join between two equi-height histograms may produce multiple overlapped ranges. Each overlapped range is produced by a part of one bin in the left histogram and a part of one bin in the right histogram.
- lo
lower bound of this overlapped range.
- hi
higher bound of this overlapped range.
- leftNdv
ndv in the left part.
- rightNdv
ndv in the right part.
- leftNumRows
number of rows in the left part.
- rightNumRows
number of rows in the right part.
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
- def ceil(bigDecimal: BigDecimal): BigInt
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
columnStatsExist(statsAndAttr: (Statistics, Attribute)*): Boolean
Check if each attribute has column stat in the corresponding statistics.
-
def
columnStatsWithCountsExist(statsAndAttr: (Statistics, Attribute)*): Boolean
Check if each attribute has column stat containing distinct and null counts in the corresponding statistic.
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
- def fromDouble(double: Double, dataType: DataType): Any
-
def
getAliasStats(expressions: Seq[Expression], attributeStats: AttributeMap[ColumnStat]): Seq[(Attribute, ColumnStat)]
Returns the stats for aliases of child's attributes
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getOutputMap(inputMap: AttributeMap[ColumnStat], output: Seq[Attribute]): AttributeMap[ColumnStat]
Get column stats for output attributes.
- def getOutputSize(attributes: Seq[Attribute], outputRowCount: BigInt, attrStats: AttributeMap[ColumnStat] = AttributeMap(Nil)): BigInt
-
def
getOverlappedRanges(leftHistogram: Histogram, rightHistogram: Histogram, lowerBound: Double, upperBound: Double): Seq[OverlappedRange]
Returns overlapped ranges between two histograms, in the given value range [lowerBound, upperBound].
- def getSizePerRow(attributes: Seq[Attribute], attrStats: AttributeMap[ColumnStat] = AttributeMap(Nil)): BigInt
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
nullColumnStat(dataType: DataType, rowCount: BigInt): ColumnStat
Statistics for a Column containing only NULLs.
-
def
numBinsHoldingRange(upperBound: Double, upperBoundInclusive: Boolean, lowerBound: Double, lowerBoundInclusive: Boolean, bins: Array[HistogramBin]): Double
Returns the number of histogram bins holding values within the given range [lowerBound, upperBound].
Returns the number of histogram bins holding values within the given range [lowerBound, upperBound].
Note that the returned value is double type, because the range boundaries usually occupy a portion of a bin. An extreme case is [value, value] which is generated by equal predicate
col = value, we can get higher accuracy by allowing returning portion of histogram bins.- upperBound
the highest value of the given range
- upperBoundInclusive
whether the upperBound is included in the range
- lowerBound
the lowest value of the given range
- lowerBoundInclusive
whether the lowerBound is included in the range
- bins
an array of bins for a given numeric equi-height histogram
-
def
rowCountsExist(plans: LogicalPlan*): Boolean
Check if each plan has rowCount in its statistics.
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toDouble(value: Any, dataType: DataType): Double
For simplicity we use Double to unify operations for data types whose min/max values can be represented as numbers, e.g.
For simplicity we use Double to unify operations for data types whose min/max values can be represented as numbers, e.g. Boolean can be represented as 0 (false) or 1 (true). The two methods below are the contract of conversion.
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
def
trimBin(bin: HistogramBin, height: Double, lowerBound: Double, upperBound: Double): (HistogramBin, Double)
Given an original bin and a value range [lowerBound, upperBound], returns the trimmed part of the bin in that range and its number of rows.
Given an original bin and a value range [lowerBound, upperBound], returns the trimmed part of the bin in that range and its number of rows.
- bin
the input histogram bin.
- height
the number of rows of the given histogram bin inside an equi-height histogram.
- lowerBound
lower bound of the given range.
- upperBound
upper bound of the given range.
- returns
trimmed part of the given bin and its number of rows.
-
def
updateNdv(oldNumRows: BigInt, newNumRows: BigInt, oldNdv: BigInt): BigInt
Updates (scales down) the number of distinct values if the number of rows decreases after some operation (such as filter, join).
Updates (scales down) the number of distinct values if the number of rows decreases after some operation (such as filter, join). Otherwise keep it unchanged.
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()