final class UTF8String extends Comparable[UTF8String] with Externalizable with KryoSerializable with Cloneable
A UTF-8 String for internal Spark use.
A String encoded in UTF-8 as an Array[Byte], which can be used for comparison, search, see http://en.wikipedia.org/wiki/UTF-8 for details.
Note: This is not designed for general use cases, should not be used outside SQL.
- Alphabetic
- By Inheritance
- UTF8String
- Cloneable
- KryoSerializable
- Externalizable
- Serializable
- Comparable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): UTF8String
- Definition Classes
- UTF8String → AnyRef
- def compare(other: UTF8String): Int
-
def
compareTo(other: UTF8String): Int
- Definition Classes
- UTF8String → Comparable
-
def
contains(substring: UTF8String): Boolean
Returns whether this contains
substringor not. - def copy(): UTF8String
- def endsWith(suffix: UTF8String): Boolean
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(other: Any): Boolean
- Definition Classes
- UTF8String → AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
- def findInSet(match: UTF8String): Int
- def getBaseObject(): AnyRef
- def getBaseOffset(): Long
-
def
getByteBuffer(): ByteBuffer
Returns a
ByteBufferwrapping the base object if it is a byte array or a copy of the data if the base object is not a byte array.Returns a
ByteBufferwrapping the base object if it is a byte array or a copy of the data if the base object is not a byte array.Unlike getBytes this will not create a copy the array if this is a slice.
-
def
getBytes(): Array[Byte]
Returns the underline bytes, will be a copy of it if it's part of another array.
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getPrefix(): Long
Returns a 64-bit integer that can be used as the prefix used in sorting.
-
def
hashCode(): Int
- Definition Classes
- UTF8String → AnyRef → Any
-
def
indexOf(v: UTF8String, start: Int): Int
Returns the position of the first occurrence of substr in current string from the specified position (0-based index).
Returns the position of the first occurrence of substr in current string from the specified position (0-based index).
- v
the string to be searched
- start
the start position of the current string for searching
- returns
the position of the first occurrence of substr, if not found, -1 returned.
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
def
levenshteinDistance(other: UTF8String): Int
Levenshtein distance is a metric for measuring the distance of two strings.
Levenshtein distance is a metric for measuring the distance of two strings. The distance is defined by the minimum number of single-character edits (i.e. insertions, deletions or substitutions) that are required to change one of the strings into the other.
-
def
lpad(len: Int, pad: UTF8String): UTF8String
Returns str, left-padded with pad to a length of len.
Returns str, left-padded with pad to a length of len. For example: ('hi', 5, '??') => '???hi' ('hi', 1, '??') => 'h'
- def matchAt(s: UTF8String, pos: Int): Boolean
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
numBytes(): Int
Returns the number of bytes
-
def
numChars(): Int
Returns the number of code points in it.
-
def
read(kryo: Kryo, in: Input): Unit
- Definition Classes
- UTF8String → KryoSerializable
-
def
readExternal(in: ObjectInput): Unit
- Definition Classes
- UTF8String → Externalizable
- def repeat(times: Int): UTF8String
- def replace(search: UTF8String, replace: UTF8String): UTF8String
- def reverse(): UTF8String
-
def
rpad(len: Int, pad: UTF8String): UTF8String
Returns str, right-padded with pad to a length of len For example: ('hi', 5, '??') => 'hi???' ('hi', 1, '??') => 'h'
-
def
soundex(): UTF8String
Encodes a string into a Soundex value.
Encodes a string into a Soundex value. Soundex is an encoding used to relate similar names, but can also be used as a general purpose scheme to find word with similar phonemes. https://en.wikipedia.org/wiki/Soundex
- def split(pattern: UTF8String, limit: Int): Array[UTF8String]
- def startsWith(prefix: UTF8String): Boolean
-
def
subStringIndex(delim: UTF8String, count: Int): UTF8String
Returns the substring from string str before count occurrences of the delimiter delim.
Returns the substring from string str before count occurrences of the delimiter delim. If count is positive, everything the left of the final delimiter (counting from left) is returned. If count is negative, every to the right of the final delimiter (counting from the right) is returned. subStringIndex performs a case-sensitive match when searching for delim.
-
def
substring(start: Int, until: Int): UTF8String
Returns a substring of this.
Returns a substring of this.
- start
the position of first code point
- until
the position after last code point, exclusive.
- def substringSQL(pos: Int, length: Int): UTF8String
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
- def toByte(intWrapper: IntWrapper): Boolean
- def toByteExact(): Byte
-
def
toInt(intWrapper: IntWrapper): Boolean
Parses this UTF8String(trimmed if needed) to int.
Parses this UTF8String(trimmed if needed) to int.
Note that, in this method we accumulate the result in negative format, and convert it to positive format at the end, if this string is not started with '-'. This is because min value is bigger than max value in digits, e.g. Integer.MAX_VALUE is '2147483647' and Integer.MIN_VALUE is '-2147483648'.
This code is mostly copied from LazyInt.parseInt in Hive.
Note that, this method is almost same as
toLong, but we leave it duplicated for performance reasons, like Hive does.- intWrapper
If a valid
intwas parsed from this UTF8String, then its value would be set inintWrapper- returns
true if the parsing was successful else false
-
def
toIntExact(): Int
Parses UTF8String(trimmed if needed) to int.
Parses UTF8String(trimmed if needed) to int. This method is used when ANSI is enabled.
- returns
If string contains valid numeric value then it returns the int value otherwise a NumberFormatException is thrown.
-
def
toLong(toLongResult: LongWrapper): Boolean
Parses this UTF8String(trimmed if needed) to long.
Parses this UTF8String(trimmed if needed) to long.
Note that, in this method we accumulate the result in negative format, and convert it to positive format at the end, if this string is not started with '-'. This is because min value is bigger than max value in digits, e.g. Long.MAX_VALUE is '9223372036854775807' and Long.MIN_VALUE is '-9223372036854775808'.
This code is mostly copied from LazyLong.parseLong in Hive.
- toLongResult
If a valid
longwas parsed from this UTF8String, then its value would be set intoLongResult- returns
true if the parsing was successful else false
-
def
toLongExact(): Long
Parses UTF8String(trimmed if needed) to long.
Parses UTF8String(trimmed if needed) to long. This method is used when ANSI is enabled.
- returns
If string contains valid numeric value then it returns the long value otherwise a NumberFormatException is thrown.
-
def
toLowerCase(): UTF8String
Returns the lower case of this string
- def toShort(intWrapper: IntWrapper): Boolean
- def toShortExact(): Short
-
def
toString(): String
- Definition Classes
- UTF8String → AnyRef → Any
-
def
toTitleCase(): UTF8String
Returns the title case of this string, that could be used as title.
-
def
toUpperCase(): UTF8String
Returns the upper case of this string
- def translate(dict: Map[Character, Character]): UTF8String
-
def
trim(trimString: UTF8String): UTF8String
Trims instances of the given trim string from both ends of this string.
Trims instances of the given trim string from both ends of this string.
- trimString
the trim character string
- returns
this string with no occurrences of the trim string at the start or end, or
nulliftrimStringisnull
-
def
trim(): UTF8String
Trims space characters (ASCII 32) from both ends of this string.
Trims space characters (ASCII 32) from both ends of this string.
- returns
this string with no spaces at the start or end
-
def
trimAll(): UTF8String
Trims whitespaces (<= ASCII 32) from both ends of this string.
Trims whitespaces (<= ASCII 32) from both ends of this string.
Note that, this method is the same as java's
String#trim, and different fromUTF8String#trim()which remove only spaces(= ASCII 32) from both ends.- returns
A UTF8String whose value is this UTF8String, with any leading and trailing white space removed, or this UTF8String if it has no leading or trailing whitespace.
-
def
trimLeft(trimString: UTF8String): UTF8String
Trims instances of the given trim string from the start of this string.
Trims instances of the given trim string from the start of this string.
- trimString
the trim character string
- returns
this string with no occurrences of the trim string at the start, or
nulliftrimStringisnull
-
def
trimLeft(): UTF8String
Trims space characters (ASCII 32) from the start of this string.
Trims space characters (ASCII 32) from the start of this string.
- returns
this string with no spaces at the start
-
def
trimRight(trimString: UTF8String): UTF8String
Trims instances of the given trim string from the end of this string.
Trims instances of the given trim string from the end of this string.
- trimString
the trim character string
- returns
this string with no occurrences of the trim string at the end, or
nulliftrimStringisnull
-
def
trimRight(): UTF8String
Trims space characters (ASCII 32) from the end of this string.
Trims space characters (ASCII 32) from the end of this string.
- returns
this string with no spaces at the end
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
write(kryo: Kryo, out: Output): Unit
- Definition Classes
- UTF8String → KryoSerializable
-
def
writeExternal(out: ObjectOutput): Unit
- Definition Classes
- UTF8String → Externalizable
- def writeTo(out: OutputStream): Unit
- def writeTo(buffer: ByteBuffer): Unit
-
def
writeToMemory(target: Any, targetOffset: Long): Unit
Writes the content of this string into a memory address, identified by an object and an offset.
Writes the content of this string into a memory address, identified by an object and an offset. The target memory address must already been allocated, and have enough space to hold all the bytes in this string.