public class arrow_dataset extends arrow_dataset
arrow_dataset.CustomOpenWithCompression, arrow_dataset.ExpressionVisitor| Constructor and Description |
|---|
arrow_dataset() |
| Modifier and Type | Method and Description |
|---|---|
static Expression |
and_(Expression arg0) |
static Expression |
and_(Expression lhs,
Expression rhs) |
static ListArrayResult |
ApplyGroupings(ListArray groupings,
Array array)
\brief Produce a ListArray whose slots are selections of
array which correspond to
the provided groupings. |
static RecordBatchVectorResult |
ApplyGroupings(ListArray groupings,
RecordBatch batch)
\brief Produce selections of a RecordBatch which correspond to the provided groupings.
|
static Expression |
call(BytePointer function,
Expression arguments) |
static Expression |
call(BytePointer function,
Expression arguments,
FunctionOptions options) |
static Expression |
call(String function,
Expression arguments) |
static Expression |
call(String function,
Expression arguments,
FunctionOptions options) |
static ExpressionResult |
Canonicalize(Expression arg0) |
static ExpressionResult |
Canonicalize(Expression arg0,
ExecContext arg1)
\defgroup expression-passes Functions for modification of Expressions
\{
These transform bound expressions.
|
static Status |
CheckProjectable(Schema from,
Schema to)
Logic for automatically determining the structure of multi-file
dataset with possible partitioning according to available
partitioning
|
static ExpressionResult |
Deserialize(ArrowBuffer arg0) |
static void |
DieWithMessage(BytePointer msg) |
static void |
DieWithMessage(String msg)
\brief Readahead iterator that iterates on the underlying iterator in a
separate thread, getting up to N values in advance.
|
static Expression |
equal(Expression lhs,
Expression rhs) |
static boolean |
equals(Expression l,
Expression r)
\brief Execute an expression that returns a Result, extracting its value
into the variable defined by
lhs (or returning a Status on error). |
static DatumResult |
ExecuteScalarExpression(Expression arg0,
Datum input) |
static DatumResult |
ExecuteScalarExpression(Expression arg0,
Datum input,
ExecContext arg2)
Execute a scalar expression against the provided state and input Datum.
|
static FieldRefDatumMapResult |
ExtractKnownFieldValues(Expression guaranteed_true_predicate)
Assemble a mapping from field references to known values.
|
static Expression |
field_ref(FieldRef ref) |
static FieldRef |
FieldsInExpression(Expression arg0)
Assemble a list of all fields referenced by an Expression at any depth.
|
static ExpressionResult |
FoldConstants(Expression arg0)
Simplify Expressions based on literal arguments (for example, add(null, x) will always
be null so replace the call with a null literal).
|
static Expression |
greater_equal(Expression lhs,
Expression rhs) |
static Expression |
greater(Expression lhs,
Expression rhs) |
static void |
InvalidValueOrDie(Status st) |
static long |
kDefaultBatchSize() |
static Expression |
less_equal(Expression lhs,
Expression rhs) |
static Expression |
less(Expression lhs,
Expression rhs) |
static Expression |
literal(Datum lit) |
static StructArrayResult |
MakeGroupings(StructArray by)
\brief Assemble lists of indices of identical rows.
|
static Expression |
not_(Expression operand) |
static Expression |
not_equal(Expression lhs,
Expression rhs) |
static boolean |
notEquals(Expression l,
Expression r) |
static Expression |
or_(Expression arg0) |
static Expression |
or_(Expression lhs,
Expression rhs) |
static Expression |
project(Expression values,
StringVector names) |
static ExpressionResult |
ReplaceFieldsWithKnownValues(FieldRefDatumMap known_values,
Expression arg1)
Simplify Expressions by replacing with known values of the fields which it references.
|
static ScanTaskIteratorResult |
ScanTaskIteratorFromRecordBatch(RecordBatchVector batches,
ScanOptions options,
ScanContext arg2) |
static BufferResult |
Serialize(Expression arg0) |
static ExpressionResult |
SimplifyWithGuarantee(Expression arg0,
Expression guaranteed_true_predicate)
Simplify an expression by replacing subexpressions based on a guarantee:
a boolean expression which is guaranteed to evaluate to
true. |
static BytePointer |
StripPrefixAndFilename(BytePointer path,
BytePointer prefix) |
static StringVector |
StripPrefixAndFilename(FileInfo files,
BytePointer prefix) |
static StringVector |
StripPrefixAndFilename(FileInfo files,
String prefix)
\brief Vector version of StripPrefixAndFilename.
|
static String |
StripPrefixAndFilename(String path,
String prefix)
\brief Remove a prefix and the filename of a path.
|
static StringVector |
StripPrefixAndFilename(StringVector paths,
BytePointer prefix) |
static StringVector |
StripPrefixAndFilename(StringVector paths,
String prefix)
\brief Vector version of StripPrefixAndFilename.
|
map@Namespace(value="arrow::internal") public static void DieWithMessage(@StdString String msg)
@Namespace(value="arrow::internal") public static void DieWithMessage(@StdString BytePointer msg)
@Namespace(value="arrow::internal") public static void InvalidValueOrDie(@Const @ByRef Status st)
@Namespace(value="arrow::dataset") @Cast(value="bool") @Name(value="operator ==") public static boolean equals(@Const @ByRef Expression l, @Const @ByRef Expression r)
lhs (or returning a Status on error).
Example: Assigning to a new value:
ARROW_ASSIGN_OR_RAISE(auto value, MaybeGetValue(arg));
Example: Assigning to an existing value:
ValueType value;
ARROW_ASSIGN_OR_RAISE(value, MaybeGetValue(arg));
WARNING: ARROW_ASSIGN_OR_RAISE expands into multiple statements;
it cannot be used in a single statement (e.g. as the body of an if
statement without {})!
WARNING: ARROW_ASSIGN_OR_RAISE std::moves its right operand. If you have
an lvalue Result which you *don't* want to move out of cast appropriately.@Namespace(value="arrow::dataset") @Cast(value="bool") @Name(value="operator !=") public static boolean notEquals(@Const @ByRef Expression l, @Const @ByRef Expression r)
@Namespace(value="arrow::dataset") @ByVal public static Expression literal(@ByVal Datum lit)
@Namespace(value="arrow::dataset") @ByVal public static Expression field_ref(@ByVal FieldRef ref)
@Namespace(value="arrow::dataset") @ByVal public static Expression call(@StdString String function, @StdVector Expression arguments, @SharedPtr FunctionOptions options)
@Namespace(value="arrow::dataset") @ByVal public static Expression call(@StdString String function, @StdVector Expression arguments)
@Namespace(value="arrow::dataset") @ByVal public static Expression call(@StdString BytePointer function, @StdVector Expression arguments, @SharedPtr FunctionOptions options)
@Namespace(value="arrow::dataset") @ByVal public static Expression call(@StdString BytePointer function, @StdVector Expression arguments)
@Namespace(value="arrow::dataset") @StdVector public static FieldRef FieldsInExpression(@Const @ByRef Expression arg0)
@Namespace(value="arrow::dataset") @ByVal public static FieldRefDatumMapResult ExtractKnownFieldValues(@Const @ByRef Expression guaranteed_true_predicate)
@Namespace(value="arrow::dataset") @ByVal public static ExpressionResult Canonicalize(@ByVal Expression arg0, ExecContext arg1)
Weak canonicalization which establishes guarantees for subsequent passes. Even equivalent Expressions may result in different canonicalized expressions. TODO this could be a strong canonicalization
@Namespace(value="arrow::dataset") @ByVal public static ExpressionResult Canonicalize(@ByVal Expression arg0)
@Namespace(value="arrow::dataset") @ByVal public static ExpressionResult FoldConstants(@ByVal Expression arg0)
@Namespace(value="arrow::dataset") @ByVal public static ExpressionResult ReplaceFieldsWithKnownValues(@Const @ByRef FieldRefDatumMap known_values, @ByVal Expression arg1)
@Namespace(value="arrow::dataset") @ByVal public static ExpressionResult SimplifyWithGuarantee(@ByVal Expression arg0, @Const @ByRef Expression guaranteed_true_predicate)
true. For example, this is
used to remove redundant function calls from a filter expression or to replace a
reference to a constant-value field with a literal.@Namespace(value="arrow::dataset") @ByVal public static DatumResult ExecuteScalarExpression(@Const @ByRef Expression arg0, @Const @ByRef Datum input, ExecContext arg2)
@Namespace(value="arrow::dataset") @ByVal public static DatumResult ExecuteScalarExpression(@Const @ByRef Expression arg0, @Const @ByRef Datum input)
@Namespace(value="arrow::dataset") @ByVal public static BufferResult Serialize(@Const @ByRef Expression arg0)
@Namespace(value="arrow::dataset") @ByVal public static ExpressionResult Deserialize(@SharedPtr ArrowBuffer arg0)
@Namespace(value="arrow::dataset") @ByVal public static Expression project(@StdVector Expression values, @ByVal StringVector names)
@Namespace(value="arrow::dataset") @ByVal public static Expression equal(@ByVal Expression lhs, @ByVal Expression rhs)
@Namespace(value="arrow::dataset") @ByVal public static Expression not_equal(@ByVal Expression lhs, @ByVal Expression rhs)
@Namespace(value="arrow::dataset") @ByVal public static Expression less(@ByVal Expression lhs, @ByVal Expression rhs)
@Namespace(value="arrow::dataset") @ByVal public static Expression less_equal(@ByVal Expression lhs, @ByVal Expression rhs)
@Namespace(value="arrow::dataset") @ByVal public static Expression greater(@ByVal Expression lhs, @ByVal Expression rhs)
@Namespace(value="arrow::dataset") @ByVal public static Expression greater_equal(@ByVal Expression lhs, @ByVal Expression rhs)
@Namespace(value="arrow::dataset") @ByVal public static Expression and_(@ByVal Expression lhs, @ByVal Expression rhs)
@Namespace(value="arrow::dataset") @ByVal public static Expression and_(@StdVector Expression arg0)
@Namespace(value="arrow::dataset") @ByVal public static Expression or_(@ByVal Expression lhs, @ByVal Expression rhs)
@Namespace(value="arrow::dataset") @ByVal public static Expression or_(@StdVector Expression arg0)
@Namespace(value="arrow::dataset") @ByVal public static Expression not_(@ByVal Expression operand)
@Namespace(value="arrow::dataset") @StdString public static String StripPrefixAndFilename(@StdString String path, @StdString String prefix)
StripPrefixAndFilename("/data/year=2019/c.txt", "/data") -> "year=2019"@Namespace(value="arrow::dataset") @StdString public static BytePointer StripPrefixAndFilename(@StdString BytePointer path, @StdString BytePointer prefix)
@Namespace(value="arrow::dataset") @ByVal public static StringVector StripPrefixAndFilename(@Const @ByRef StringVector paths, @StdString String prefix)
@Namespace(value="arrow::dataset") @ByVal public static StringVector StripPrefixAndFilename(@Const @ByRef StringVector paths, @StdString BytePointer prefix)
@Namespace(value="arrow::dataset") @ByVal public static StringVector StripPrefixAndFilename(@StdVector FileInfo files, @StdString String prefix)
@Namespace(value="arrow::dataset") @ByVal public static StringVector StripPrefixAndFilename(@StdVector FileInfo files, @StdString BytePointer prefix)
@Namespace(value="arrow::dataset") @ByVal public static StructArrayResult MakeGroupings(@Const @ByRef StructArray by)
by - [in] A StructArray whose columns will be used as grouping criteria.
Top level nulls are invalid, as are empty criteria (no grouping
columns).struct<values: by.type, groupings: list<int64>>,
which is a mapping from unique rows (field "values") to lists of
indices into by where that row appears (field "groupings").
For example,
MakeGroupings([
{"a": "ex", "b": 0},
{"a": "ex", "b": 0},
{"a": "why", "b": 0},
{"a": "why", "b": 0},
{"a": "ex", "b": 0},
{"a": "why", "b": 1}
]) == [
{"values": {"a": "ex", "b": 0}, "groupings": [0, 1, 4]},
{"values": {"a": "why", "b": 0}, "groupings": [2, 3]},
{"values": {"a": "why", "b": 1}, "groupings": [5]}
]@Namespace(value="arrow::dataset") @ByVal public static ListArrayResult ApplyGroupings(@Const @ByRef ListArray groupings, @Const @ByRef Array array)
array which correspond to
the provided groupings.
For example,
ApplyGroupings([[0, 1, 4], [2, 3], [5]], [
{"a": "ex", "b": 0, "passenger": 0},
{"a": "ex", "b": 0, "passenger": 1},
{"a": "why", "b": 0, "passenger": 2},
{"a": "why", "b": 0, "passenger": 3},
{"a": "ex", "b": 0, "passenger": 4},
{"a": "why", "b": 1, "passenger": 5}
]) == [
[
{"a": "ex", "b": 0, "passenger": 0},
{"a": "ex", "b": 0, "passenger": 1},
{"a": "ex", "b": 0, "passenger": 4},
],
[
{"a": "why", "b": 0, "passenger": 2},
{"a": "why", "b": 0, "passenger": 3},
],
[
{"a": "why", "b": 1, "passenger": 5}
]
]@Namespace(value="arrow::dataset") @ByVal public static RecordBatchVectorResult ApplyGroupings(@Const @ByRef ListArray groupings, @SharedPtr @Cast(value={"","std::shared_ptr<arrow::RecordBatch>"}) RecordBatch batch)
@Namespace(value="arrow::dataset") @ByVal public static Status CheckProjectable(@Const @ByRef Schema from, @Const @ByRef Schema to)
@Namespace(value="arrow::dataset") @MemberGetter @Cast(value="const int64_t") public static long kDefaultBatchSize()
@Namespace(value="arrow::dataset") @ByVal public static ScanTaskIteratorResult ScanTaskIteratorFromRecordBatch(@ByVal RecordBatchVector batches, @SharedPtr ScanOptions options, @SharedPtr ScanContext arg2)
Copyright © 2021. All rights reserved.