public interface DataFrame extends Dataset<Tuple>, java.lang.Iterable<BaseVector>
Modifier and Type | Method and Description |
---|---|
default BaseVector |
apply(java.lang.Enum<?> e)
Selects column using an enum value.
|
default BaseVector |
apply(java.lang.String colName)
Selects column based on the column name and return it as a Column.
|
default BooleanVector |
booleanVector(java.lang.Enum<?> e)
Selects column using an enum value.
|
BooleanVector |
booleanVector(int i)
Selects column based on the column index.
|
default BooleanVector |
booleanVector(java.lang.String colName)
Selects column based on the column name.
|
default ByteVector |
byteVector(java.lang.Enum<?> e)
Selects column using an enum value.
|
ByteVector |
byteVector(int i)
Selects column based on the column index.
|
default ByteVector |
byteVector(java.lang.String colName)
Selects column based on the column name.
|
default CharVector |
charVector(java.lang.Enum<?> e)
Selects column using an enum value.
|
CharVector |
charVector(int i)
Selects column based on the column index.
|
default CharVector |
charVector(java.lang.String colName)
Selects column based on the column name.
|
static java.util.stream.Collector<Tuple,java.util.List<Tuple>,DataFrame> |
collect()
Returns a stream collector that accumulates tuples into a DataFrame.
|
static <T> java.util.stream.Collector<T,java.util.List<T>,DataFrame> |
collect(java.lang.Class<T> clazz)
Returns a stream collector that accumulates objects into a DataFrame.
|
default BaseVector |
column(java.lang.Enum<?> e)
Selects column using an enum value.
|
BaseVector |
column(int i)
Selects column based on the column index.
|
default BaseVector |
column(java.lang.String colName)
Selects column based on the column name.
|
int |
columnIndex(java.lang.String name)
Returns the index of a given column name.
|
default DoubleVector |
doubleVector(java.lang.Enum<?> e)
Selects column using an enum value.
|
DoubleVector |
doubleVector(int i)
Selects column based on the column index.
|
default DoubleVector |
doubleVector(java.lang.String colName)
Selects column based on the column name.
|
DataFrame |
drop(int... cols)
Returns a new DataFrame without given column indices.
|
default DataFrame |
drop(java.lang.String... cols)
Returns a new DataFrame without given column names.
|
default DataFrame |
factorize(java.lang.String... cols)
Returns a new DataFrame with given columns converted to nominal.
|
default FloatVector |
floatVector(java.lang.Enum<?> e)
Selects column using an enum value.
|
FloatVector |
floatVector(int i)
Selects column based on the column index.
|
default FloatVector |
floatVector(java.lang.String colName)
Selects column based on the column name.
|
default java.lang.Object |
get(int i,
int j)
Returns the cell at (i, j).
|
default java.lang.Object |
get(int i,
java.lang.String field)
Returns the cell at (i, j).
|
default <T> T[] |
getArray(int i,
int j)
Returns the value at position (i, j) of array type.
|
default <T> T[] |
getArray(int i,
java.lang.String field)
Returns the field value of array type.
|
default boolean |
getBoolean(int i,
int j)
Returns the value at position (i, j) as a primitive boolean.
|
default boolean |
getBoolean(int i,
java.lang.String field)
Returns the field value as a primitive boolean.
|
default byte |
getByte(int i,
int j)
Returns the value at position (i, j) as a primitive byte.
|
default byte |
getByte(int i,
java.lang.String field)
Returns the field value as a primitive byte.
|
default char |
getChar(int i,
int j)
Returns the value at position (i, j) as a primitive byte.
|
default char |
getChar(int i,
java.lang.String field)
Returns the field value as a primitive byte.
|
default java.time.LocalDate |
getDate(int i,
int j)
Returns the value at position (i, j) of date type as java.time.LocalDate.
|
default java.time.LocalDate |
getDate(int i,
java.lang.String field)
Returns the field value of date type as java.time.LocalDate.
|
default java.time.LocalDateTime |
getDateTime(int i,
int j)
Returns the value at position (i, j) as java.time.LocalDateTime.
|
default java.time.LocalDateTime |
getDateTime(int i,
java.lang.String field)
Returns the field value as java.time.LocalDateTime.
|
default java.math.BigDecimal |
getDecimal(int i,
int j)
Returns the value at position (i, j) of decimal type as java.math.BigDecimal.
|
default java.math.BigDecimal |
getDecimal(int i,
java.lang.String field)
Returns the field value of decimal type as java.math.BigDecimal.
|
default double |
getDouble(int i,
int j)
Returns the value at position (i, j) as a primitive double.
|
default double |
getDouble(int i,
java.lang.String field)
Returns the field value as a primitive double.
|
default float |
getFloat(int i,
int j)
Returns the value at position (i, j) as a primitive float.
|
default float |
getFloat(int i,
java.lang.String field)
Returns the field value as a primitive float.
|
default int |
getInt(int i,
int j)
Returns the value at position (i, j) as a primitive int.
|
default int |
getInt(int i,
java.lang.String field)
Returns the field value as a primitive int.
|
default long |
getLong(int i,
int j)
Returns the value at position (i, j) as a primitive long.
|
default long |
getLong(int i,
java.lang.String field)
Returns the field value as a primitive long.
|
default java.lang.String |
getScale(int i,
int j)
Returns the value at position (i, j) of NominalScale or OrdinalScale.
|
default java.lang.String |
getScale(int i,
java.lang.String field)
Returns the field value of NominalScale or OrdinalScale.
|
default short |
getShort(int i,
int j)
Returns the value at position (i, j) as a primitive short.
|
default short |
getShort(int i,
java.lang.String field)
Returns the field value as a primitive short.
|
default java.lang.String |
getString(int i,
int j)
Returns the value at position (i, j) as a String object.
|
default java.lang.String |
getString(int i,
java.lang.String field)
Returns the field value as a String object.
|
default Tuple |
getStruct(int i,
int j)
Returns the value at position (i, j) of struct type.
|
default Tuple |
getStruct(int i,
java.lang.String field)
Returns the field value of struct type.
|
default java.time.LocalTime |
getTime(int i,
int j)
Returns the value at position (i, j) of date type as java.time.LocalTime.
|
default java.time.LocalTime |
getTime(int i,
java.lang.String field)
Returns the field value of date type as java.time.LocalTime.
|
default IntVector |
intVector(java.lang.Enum<?> e)
Selects column using an enum value.
|
IntVector |
intVector(int i)
Selects column based on the column index.
|
default IntVector |
intVector(java.lang.String colName)
Selects column based on the column name.
|
default boolean |
isNullAt(int i,
int j)
Checks whether the value at position (i, j) is null.
|
default boolean |
isNullAt(int i,
java.lang.String field)
Checks whether the field value is null.
|
default LongVector |
longVector(java.lang.Enum<?> e)
Selects column using an enum value.
|
LongVector |
longVector(int i)
Selects column based on the column index.
|
default LongVector |
longVector(java.lang.String colName)
Selects column based on the column name.
|
static java.util.stream.Collector<Tuple,java.util.List<Tuple>,Matrix> |
matrix()
Returns a stream collector that accumulates tuples into a Matrix.
|
default Measure[] |
measures()
Returns the column measures.
|
DataFrame |
merge(BaseVector... vectors)
Merges data frames horizontally by columns.
|
DataFrame |
merge(DataFrame... dataframes)
Merges data frames horizontally by columns.
|
default java.lang.String[] |
names()
Returns the column names.
|
int |
ncols()
Returns the number of columns.
|
default int |
nrows()
Returns the number of rows.
|
static DataFrame |
of(BaseVector... vectors)
Creates a DataFrame from a set of vectors.
|
default DataFrame |
of(boolean... index)
Returns a new data frame with boolean indexing.
|
static <T> DataFrame |
of(java.util.Collection<java.util.Map<java.lang.String,T>> data,
StructType schema)
Creates a DataFrame from a set of Maps.
|
static DataFrame |
of(double[][] data,
java.lang.String... names)
Creates a DataFrame from a 2-dimensional array.
|
static DataFrame |
of(float[][] data,
java.lang.String... names)
Creates a DataFrame from a 2-dimensional array.
|
default DataFrame |
of(int... index)
Returns a new data frame with row indexing.
|
static DataFrame |
of(int[][] data,
java.lang.String... names)
Creates a DataFrame from a 2-dimensional array.
|
static DataFrame |
of(java.util.List<? extends Tuple> data)
Creates a DataFrame from a set of tuples.
|
static DataFrame |
of(java.util.List<? extends Tuple> data,
StructType schema)
Creates a DataFrame from a set of tuples.
|
static <T> DataFrame |
of(java.util.List<T> data,
java.lang.Class<T> clazz)
Creates a DataFrame from a collection.
|
static DataFrame |
of(java.sql.ResultSet rs)
Creates a DataFrame from a JDBC ResultSet.
|
static DataFrame |
of(java.util.stream.Stream<? extends Tuple> data)
Creates a DataFrame from a stream of tuples.
|
static DataFrame |
of(java.util.stream.Stream<? extends Tuple> data,
StructType schema)
Creates a DataFrame from a stream of tuples.
|
default DataFrame |
omitNullRows()
Returns a new data frame without rows that have null/missing values.
|
StructType |
schema()
Returns the schema of DataFrame.
|
DataFrame |
select(int... cols)
Selects a new DataFrame with given column indices.
|
default DataFrame |
select(java.lang.String... cols)
Selects a new DataFrame with given column names.
|
default ShortVector |
shortVector(java.lang.Enum<?> e)
Selects column using an enum value.
|
ShortVector |
shortVector(int i)
Selects column based on the column index.
|
default ShortVector |
shortVector(java.lang.String colName)
Selects column based on the column name.
|
default DataFrame |
slice(int from,
int to)
Copies the specified range into a new data frame.
|
default StringVector |
stringVector(java.lang.Enum<?> e)
Selects column using an enum value.
|
StringVector |
stringVector(int i)
Selects column based on the column index.
|
default StringVector |
stringVector(java.lang.String colName)
Selects column based on the column name.
|
default DataFrame |
structure()
Returns the structure of data frame.
|
default DataFrame |
summary()
Returns the statistic summary of numeric columns.
|
default double[][] |
toArray()
Return an array obtained by converting all the variables
in a data frame to numeric mode and then binding them together
as the columns of a matrix.
|
default double[][] |
toArray(boolean bias,
CategoricalEncoder encoder)
Return an array obtained by converting all the variables
in a data frame to numeric mode and then binding them together
as the columns of a matrix.
|
default Matrix |
toMatrix()
Return a matrix obtained by converting all the variables
in a data frame to numeric mode and then binding them together
as the columns of a matrix.
|
default Matrix |
toMatrix(boolean bias,
CategoricalEncoder encoder,
java.lang.String rowNames)
Return a matrix obtained by converting all the variables
in a data frame to numeric mode and then binding them together
as the columns of a matrix.
|
default java.lang.String |
toString(int numRows)
Returns the string representation of top rows.
|
default java.lang.String |
toString(int numRows,
boolean truncate)
Returns the string representation of top rows.
|
default java.lang.String |
toString(int i,
int j)
Returns the string representation of the value at position (i, j).
|
default java.lang.String |
toString(int i,
java.lang.String field)
Returns the string representation of the field value.
|
default java.lang.String[][] |
toStrings(int numRows)
Returns the string representation of top rows.
|
default java.lang.String[][] |
toStrings(int numRows,
boolean truncate)
Returns the string representation of top rows.
|
default DataType[] |
types()
Returns the column types.
|
DataFrame |
union(DataFrame... dataframes)
Merges data frames vertically by rows.
|
default <T> Vector<T> |
vector(java.lang.Enum<?> e)
Selects column using an enum value.
|
<T> Vector<T> |
vector(int i)
Selects column based on the column index.
|
default <T> Vector<T> |
vector(java.lang.String colName)
Selects column based on the column name.
|
StructType schema()
default java.lang.String[] names()
default DataType[] types()
default Measure[] measures()
default int nrows()
int ncols()
default DataFrame structure()
default DataFrame omitNullRows()
default java.lang.Object get(int i, int j)
default java.lang.Object get(int i, java.lang.String field)
default DataFrame of(int... index)
default DataFrame of(boolean... index)
default DataFrame slice(int from, int to)
from
- the initial index of the range to be copied, inclusiveto
- the final index of the range to be copied, exclusive.default boolean isNullAt(int i, int j)
default boolean isNullAt(int i, java.lang.String field)
default boolean getBoolean(int i, int j)
java.lang.ClassCastException
- when data type does not match.java.lang.NullPointerException
- when value is null.default boolean getBoolean(int i, java.lang.String field)
java.lang.ClassCastException
- when data type does not match.java.lang.NullPointerException
- when value is null.default char getChar(int i, int j)
java.lang.ClassCastException
- when data type does not match.java.lang.NullPointerException
- when value is null.default char getChar(int i, java.lang.String field)
java.lang.ClassCastException
- when data type does not match.java.lang.NullPointerException
- when value is null.default byte getByte(int i, int j)
java.lang.ClassCastException
- when data type does not match.java.lang.NullPointerException
- when value is null.default byte getByte(int i, java.lang.String field)
java.lang.ClassCastException
- when data type does not match.java.lang.NullPointerException
- when value is null.default short getShort(int i, int j)
java.lang.ClassCastException
- when data type does not match.java.lang.NullPointerException
- when value is null.default short getShort(int i, java.lang.String field)
java.lang.ClassCastException
- when data type does not match.java.lang.NullPointerException
- when value is null.default int getInt(int i, int j)
java.lang.ClassCastException
- when data type does not match.java.lang.NullPointerException
- when value is null.default int getInt(int i, java.lang.String field)
java.lang.ClassCastException
- when data type does not match.java.lang.NullPointerException
- when value is null.default long getLong(int i, int j)
java.lang.ClassCastException
- when data type does not match.java.lang.NullPointerException
- when value is null.default long getLong(int i, java.lang.String field)
java.lang.ClassCastException
- when data type does not match.java.lang.NullPointerException
- when value is null.default float getFloat(int i, int j)
java.lang.ClassCastException
- when data type does not match.java.lang.NullPointerException
- when value is null.default float getFloat(int i, java.lang.String field)
java.lang.ClassCastException
- when data type does not match.java.lang.NullPointerException
- when value is null.default double getDouble(int i, int j)
java.lang.ClassCastException
- when data type does not match.java.lang.NullPointerException
- when value is null.default double getDouble(int i, java.lang.String field)
java.lang.ClassCastException
- when data type does not match.java.lang.NullPointerException
- when value is null.default java.lang.String getString(int i, int j)
java.lang.ClassCastException
- when data type does not match.default java.lang.String getString(int i, java.lang.String field)
java.lang.ClassCastException
- when data type does not match.default java.lang.String toString(int i, int j)
default java.lang.String toString(int i, java.lang.String field)
default java.math.BigDecimal getDecimal(int i, int j)
java.lang.ClassCastException
- when data type does not match.default java.math.BigDecimal getDecimal(int i, java.lang.String field)
java.lang.ClassCastException
- when data type does not match.default java.time.LocalDate getDate(int i, int j)
java.lang.ClassCastException
- when data type does not match.default java.time.LocalDate getDate(int i, java.lang.String field)
java.lang.ClassCastException
- when data type does not match.default java.time.LocalTime getTime(int i, int j)
java.lang.ClassCastException
- when data type does not match.default java.time.LocalTime getTime(int i, java.lang.String field)
java.lang.ClassCastException
- when data type does not match.default java.time.LocalDateTime getDateTime(int i, int j)
java.lang.ClassCastException
- when data type does not match.default java.time.LocalDateTime getDateTime(int i, java.lang.String field)
java.lang.ClassCastException
- when data type does not match.default java.lang.String getScale(int i, int j)
java.lang.ClassCastException
- when the data is not nominal or ordinal.default java.lang.String getScale(int i, java.lang.String field)
java.lang.ClassCastException
- when the data is not nominal or ordinal.default <T> T[] getArray(int i, int j)
java.lang.ClassCastException
- when data type does not match.default <T> T[] getArray(int i, java.lang.String field)
java.lang.ClassCastException
- when data type does not match.default Tuple getStruct(int i, int j)
java.lang.ClassCastException
- when data type does not match.default Tuple getStruct(int i, java.lang.String field)
java.lang.ClassCastException
- when data type does not match.int columnIndex(java.lang.String name)
java.lang.IllegalArgumentException
- when a field `name` does not exist.default BaseVector apply(java.lang.String colName)
default BaseVector apply(java.lang.Enum<?> e)
BaseVector column(int i)
default BaseVector column(java.lang.String colName)
default BaseVector column(java.lang.Enum<?> e)
<T> Vector<T> vector(int i)
default <T> Vector<T> vector(java.lang.String colName)
default <T> Vector<T> vector(java.lang.Enum<?> e)
BooleanVector booleanVector(int i)
default BooleanVector booleanVector(java.lang.String colName)
default BooleanVector booleanVector(java.lang.Enum<?> e)
CharVector charVector(int i)
default CharVector charVector(java.lang.String colName)
default CharVector charVector(java.lang.Enum<?> e)
ByteVector byteVector(int i)
default ByteVector byteVector(java.lang.String colName)
default ByteVector byteVector(java.lang.Enum<?> e)
ShortVector shortVector(int i)
default ShortVector shortVector(java.lang.String colName)
default ShortVector shortVector(java.lang.Enum<?> e)
IntVector intVector(int i)
default IntVector intVector(java.lang.String colName)
default IntVector intVector(java.lang.Enum<?> e)
LongVector longVector(int i)
default LongVector longVector(java.lang.String colName)
default LongVector longVector(java.lang.Enum<?> e)
FloatVector floatVector(int i)
default FloatVector floatVector(java.lang.String colName)
default FloatVector floatVector(java.lang.Enum<?> e)
DoubleVector doubleVector(int i)
default DoubleVector doubleVector(java.lang.String colName)
default DoubleVector doubleVector(java.lang.Enum<?> e)
StringVector stringVector(int i)
default StringVector stringVector(java.lang.String colName)
default StringVector stringVector(java.lang.Enum<?> e)
DataFrame select(int... cols)
default DataFrame select(java.lang.String... cols)
DataFrame drop(int... cols)
DataFrame merge(DataFrame... dataframes)
DataFrame merge(BaseVector... vectors)
DataFrame union(DataFrame... dataframes)
default DataFrame drop(java.lang.String... cols)
default DataFrame factorize(java.lang.String... cols)
cols
- column names. If empty, all object columns
in the data frame will be converted.default double[][] toArray()
default double[][] toArray(boolean bias, CategoricalEncoder encoder)
bias
- if true, add the first column of all 1's.encoder
- the categorical variable encoder.default Matrix toMatrix()
default Matrix toMatrix(boolean bias, CategoricalEncoder encoder, java.lang.String rowNames)
bias
- if true, add the first column of all 1's.encoder
- the categorical variable encoder.rowNames
- the column to be used as row names.default DataFrame summary()
default java.lang.String toString(int numRows)
default java.lang.String toString(int numRows, boolean truncate)
numRows
- Number of rows to showtruncate
- Whether truncate long strings and align cells right.default java.lang.String[][] toStrings(int numRows)
numRows
- Number of rows to showdefault java.lang.String[][] toStrings(int numRows, boolean truncate)
numRows
- Number of rows to showtruncate
- Whether truncate long strings.static DataFrame of(BaseVector... vectors)
vectors
- The column vectors.static DataFrame of(double[][] data, java.lang.String... names)
data
- The data array.names
- the name of columns.static DataFrame of(float[][] data, java.lang.String... names)
data
- The data array.names
- the name of columns.static DataFrame of(int[][] data, java.lang.String... names)
data
- The data array.names
- the name of columns.static <T> DataFrame of(java.util.List<T> data, java.lang.Class<T> clazz)
T
- The type of elements.data
- The data collection.clazz
- The class type of elements.static DataFrame of(java.util.stream.Stream<? extends Tuple> data)
data
- The data stream.static DataFrame of(java.util.stream.Stream<? extends Tuple> data, StructType schema)
data
- The data stream.static DataFrame of(java.util.List<? extends Tuple> data)
data
- The data collection.static DataFrame of(java.util.List<? extends Tuple> data, StructType schema)
data
- The data collection.static <T> DataFrame of(java.util.Collection<java.util.Map<java.lang.String,T>> data, StructType schema)
data
- The data collection.static DataFrame of(java.sql.ResultSet rs) throws java.sql.SQLException
rs
- The JDBC result set.java.sql.SQLException
static <T> java.util.stream.Collector<T,java.util.List<T>,DataFrame> collect(java.lang.Class<T> clazz)
T
- the type of input elements to the reduction operationclazz
- The class type of elements.static java.util.stream.Collector<Tuple,java.util.List<Tuple>,DataFrame> collect()