类 DataFrame

  • 所有已实现的接口:
    Iterable

    public class DataFrame
    extends Object
    implements Iterable
    作者:
    Yaqiang Wang
    • 构造器详细资料

      • DataFrame

        public DataFrame()
        Constructor
      • DataFrame

        public DataFrame​(ColumnIndex columns)
        Constructor
        参数:
        columns - Columns
      • DataFrame

        public DataFrame​(Index index)
        Constructor
        参数:
        index - Index
      • DataFrame

        public DataFrame​(List index)
        Constructor
        参数:
        index - Index
      • DataFrame

        public DataFrame​(Array data,
                         Index index,
                         ColumnIndex columns)
        Constructor
        参数:
        data - Data array
        columns - Columns
        index - Index
      • DataFrame

        public DataFrame​(Array data,
                         Index index,
                         List<String> columns)
        Constructor
        参数:
        data - Data array
        columns - Columns
        index - Index
      • DataFrame

        public DataFrame​(Index index,
                         ColumnIndex columns,
                         Object data)
        Constructor
        参数:
        index - Index
        columns - Columns
        data - Data
      • DataFrame

        public DataFrame​(Index index,
                         List<String> columns,
                         Object data)
        Constructor
        参数:
        index - Index
        columns - Columns
        data - Data
      • DataFrame

        public DataFrame​(Array data,
                         List index,
                         List<String> columns)
        Constructor
        参数:
        data - Data array
        columns - Columns
        index - Index
      • DataFrame

        public DataFrame​(List<Array> data,
                         Index index,
                         ColumnIndex columns)
        Constructor
        参数:
        data - Data array list
        columns - Columns
        index - Index
      • DataFrame

        public DataFrame​(List<Array> data,
                         Index index,
                         List<String> columns)
        Constructor
        参数:
        data - Data array list
        columns - Columns
        index - Index
      • DataFrame

        public DataFrame​(List<Array> data,
                         List index,
                         List columns)
        Constructor
        参数:
        data - Data array list
        columns - Columns
        index - Index
    • 方法详细资料

      • getData

        public Object getData()
        Get data array
        返回:
        Data array
      • setData

        public void setData​(Array value)
        Set data array
        参数:
        value - Data array
      • setData

        public void setData​(List<Array> value)
        Set data array
        参数:
        value - Data array
      • getIndex

        public Index getIndex()
        Get index
        返回:
        Index
      • setIndex

        public void setIndex​(Index value)
        Set index
        参数:
        value - Index
      • setIndex

        public void setIndex​(List value)
        Set index
        参数:
        value - Index value
      • getColumns

        public ColumnIndex getColumns()
        Get columns
        返回:
        Columns
      • getColumnNames

        public List<String> getColumnNames()
        Get column names
        返回:
        Column names
      • getColumnDataTypes

        public List<DataType> getColumnDataTypes()
        Get column data types
        返回:
        Column data types
      • setColumns

        public void setColumns​(ColumnIndex value)
        Set columns
        参数:
        value - Columns
      • setColumns

        public void setColumns​(List<String> colNames)
        Set column names
        参数:
        colNames - Column names
      • isArray2D

        public boolean isArray2D()
        Get if is 2D array
        返回:
        Boolean
      • updateColumnFormats

        public void updateColumnFormats()
        Update columns formats
      • size

        public int size()
        Get the number of columns
        返回:
        The number of columns
      • length

        public int length()
        Get the number of rows
        返回:
        The number of rows
      • isEmpty

        public boolean isEmpty()
        Return true if the data frame contains no data.
         
         > DataFrame<Object> df = new DataFrame<>();
         > df.isEmpty();
         true 
        返回:
        the number of columns
      • col

        public List col​(Integer column)
        Return a data frame column as a list.
         
         > DataFrame<Object> df = new DataFrame<>(
         >         Collections.emptyList(),
         >         Arrays.asList("name", "value"),
         >         Arrays.asList(
         >             Arrays.<Object>asList("alpha", "bravo", "charlie"),
         >             Arrays.<Object>asList(1, 2, 3)
         >         )
         >     );
         > df.col(1);
         [1, 2, 3] 
        参数:
        column - the column index
        返回:
        the list of values
      • row

        public List row​(Integer row)
        Return a data frame row as a list.
         
         > DataFrame<Object> df = new DataFrame<>(
         >         Collections.emptyList(),
         >         Collections.emptyList(),
         >         Arrays.asList(
         >             Arrays.<Object>asList("alpha", "bravo", "charlie"),
         >             Arrays.<Object>asList(1, 2, 3)
         >         )
         >     );
         > df.row(1);
         [bravo, 2] 
        参数:
        row - the row index
        返回:
        the list of values
      • rowSeries

        public Series rowSeries​(int row)
        Get row series
        参数:
        row - Row index
        返回:
        Series
      • getShape

        public int[] getShape()
        Get shape
        返回:
        Shape
      • getValue

        public Object getValue​(Object row,
                               Column col)
        Get value
        参数:
        row - Row object
        col - Column object
        返回:
        Value
      • getValue

        public Object getValue​(int row,
                               int col)
        Get value
        参数:
        row - Row index
        col - Column index
        返回:
        Value
      • getValue

        public Object getValue​(int row,
                               String colName)
        Get value
        参数:
        row - Row index
        colName - Column name
        返回:
        Value
      • setValue

        public void setValue​(Object row,
                             Column col,
                             Object v)
        Set value
        参数:
        row - Row
        col - Column
        v - Value
      • setValue

        public void setValue​(int row,
                             int col,
                             Object v)
        Set value
        参数:
        row - Row index
        col - Column index
        v - Value
      • setValue

        public void setValue​(int row,
                             String colName,
                             Object v)
        Set value
        参数:
        row - Row index
        colName - Column name
        v - Value
      • setValue

        public void setValue​(int row,
                             Column column,
                             Object v)
        Set value
        参数:
        row - Row index
        column - Column
        v - Value
      • addColumn

        public void addColumn​(Column column)
        Add column data
        参数:
        column - Column
      • setColumnName

        public void setColumnName​(int idx,
                                  String colName)
        Set column name
        参数:
        idx - Column index
        colName - Column name
      • append

        public DataFrame append​(DataFrame df)
        Append another data frame
        参数:
        df - Another data frame
        返回:
        Appended data frame
      • dataToList

        public void dataToList()
        Convert array 2D to array list.
      • append

        public void append​(Object name,
                           List row)
        Append row data
        参数:
        name - Index element
        row - Row data list
      • append

        public void append​(Object name,
                           Array row)
        Append row data
        参数:
        name - Index element
        row - Row data array
      • append

        public void append​(List row)
        Append row data
        参数:
        row - Row data list
      • setRow

        public void setRow​(Object key,
                           List row)
        Set row data
        参数:
        key - Index key
        row - Row data
      • setRow

        public void setRow​(Object key,
                           Array row)
        Set row data
        参数:
        key - Index key
        row - Row data
      • drop

        public DataFrame drop​(List<String> colNames)
        Create a new data frame by leaving out the specified columns.
        参数:
        colNames - Column names
        返回:
        a shallow copy of the data frame with the columns removed
      • drop

        public DataFrame drop​(Column... cols)
        Create a new data frame by leaving out the specified columns.
        参数:
        cols - the names of columns to be removed
        返回:
        a shallow copy of the data frame with the columns removed
      • drop

        public DataFrame drop​(Integer... cols)
        Create a new data frame by leaving out the specified columns.
        参数:
        cols - the indices of the columns to be removed
        返回:
        a shallow copy of the data frame with the columns removed
      • retain

        public DataFrame retain​(Object... cols)
        Create a new data frame containing only the specified columns.
         
         > DataFrame<Object> df = new DataFrame<>("name", "value", "category");
         > df.retain("name", "category").columns();
         [name, category] 
        参数:
        cols - the columns to include in the new data frame
        返回:
        a new data frame containing only the specified columns
      • retain

        public DataFrame retain​(Integer... cols)
        Create a new data frame containing only the specified columns.
         
          DataFrame<Object> df = new DataFrame<>("name", "value", "category");
          df.retain(0, 2).columns();
         [name, category] 
        参数:
        cols - the columns to include in the new data frame
        返回:
        a new data frame containing only the specified columns
      • numeric

        public DataFrame numeric()
        Return a data frame containing only columns with numeric data.
         
         > DataFrame<Object> df = new DataFrame<>("name", "value");
         > df.append(Arrays.asList("one", 1));
         > df.append(Arrays.asList("two", 2));
         > df.numeric().columns();
         [value] 
        返回:
        a data frame containing only the numeric columns
      • select

        public Object select​(int row,
                             List<Integer> colRange)
        Select by row and column ranges
        参数:
        row - Row index
        colRange - Column range
        返回:
        Selected data frame or series
      • select

        public Object select​(List<Integer> rowRange,
                             List<Integer> colRange)
        Select by row and column ranges
        参数:
        rowRange - Row range
        colRange - Column range
        返回:
        Selected data frame or series
      • transpose

        public DataFrame transpose()
        Transpose
        返回:
        Transposed data frame
      • head

        public String head​(int n)
        Convert to string - head
        参数:
        n - Head row number
        返回:
        The string
      • tail

        public String tail​(int n)
        Convert to string - tail
        参数:
        n - Tail row number
        返回:
        The string
      • readTable

        public static DataFrame readTable​(String fileName,
                                          String delimiter,
                                          int skipRows,
                                          String formatSpec,
                                          String encoding,
                                          int indexCol,
                                          String indexFormat,
                                          List<String> names,
                                          Integer header,
                                          int skipFooter)
                                   throws FileNotFoundException,
                                          IOException,
                                          Exception
        Read data frame from ASCII file
        参数:
        fileName - File name
        delimiter - Delimiter
        skipRows - Number of lines to skip at begining of the file
        formatSpec - Format specifiers string
        encoding - Fle encoding
        indexCol - Column to be used as index
        indexFormat - Index format
        names - Column names
        header - Row number to use as the column names
        skipFooter - Number of lines at bottom of file to skip
        返回:
        DataFrame object
        抛出:
        FileNotFoundException
        IOException
        Exception
      • readTable

        public static DataFrame readTable​(String fileName,
                                          String delimiter,
                                          int skipRows,
                                          String formatSpec,
                                          String encoding,
                                          int indexCol,
                                          String indexFormat,
                                          List<String> names,
                                          Integer header,
                                          int skipFooter,
                                          List<Object> usecolsin)
                                   throws FileNotFoundException,
                                          IOException,
                                          Exception
        Read data frame from ASCII file
        参数:
        fileName - File name
        delimiter - Delimiter
        skipRows - Number of lines to skip at begining of the file
        formatSpec - Format specifiers string
        encoding - Fle encoding
        indexCol - Column to be used as index
        indexFormat - Index format
        names - Column names
        header - Row number to use as the column names
        skipFooter - Number of lines at bottom of file to skip
        usecolsin - Filter columns by column names or indices
        返回:
        DataFrame object
        抛出:
        FileNotFoundException
        IOException
        Exception
      • saveCSV

        public void saveCSV​(String fileName,
                            String delimiter,
                            String formatSpec,
                            String dateFormat,
                            String floatFormat,
                            boolean index)
                     throws IOException
        Save as CSV file
        参数:
        fileName - File name
        delimiter - Delimiter
        formatSpec - Format specifiers string
        dateFormat - Date format string
        floatFormat - Float format string
        index - If write index
        抛出:
        IOException
      • describe

        public <V> DataFrame describe()
      • sortByIndex

        public DataFrame sortByIndex​(boolean ascending)
      • groupBy

        public DataFrameGroupBy groupBy​(KeyFunction function)
        Group the data frame rows using the specified key function.
        参数:
        function - the function to reduce rows to grouping keys
        返回:
        the grouping
      • groupBy

        public DataFrameGroupBy groupBy​(Integer... columns)
        Group the data frame rows using columns
        参数:
        columns - The columns
        返回:
        The grouping
      • groupBy

        public DataFrameGroupBy groupBy​(Object... columns)
        Group the data frame rows using columns
        参数:
        columns - The columns
        返回:
        The grouping
      • groupBy

        public DataFrameGroupBy groupBy​(List<Object> columns)
        Group the data frame rows using columns
        参数:
        columns - The columns
        返回:
        The grouping
      • groupByIndex

        public DataFrameGroupBy groupByIndex​(WindowFunction function)
        Group the data frame rows using the specified key function.
        参数:
        function - the function to reduce rows to grouping keys
        返回:
        the grouping
      • groupByIndex

        public DataFrameGroupBy groupByIndex​(String pStr)
        Group the data frame rows using the specified key function.
        参数:
        pStr - Period string
        返回:
        the grouping
      • apply

        public <V> DataFrame apply​(Function<?,​?> function)
        Apply a function
        类型参数:
        V -
        参数:
        function - The function
        返回:
        Result data frame
      • count

        public DataFrame count()
        Compute the sum of the numeric columns.
        返回:
        the new data frame
      • sum

        public DataFrame sum()
        Compute the sum of the numeric columns.
        返回:
        the new data frame
      • mean

        public DataFrame mean()
        Compute the mean of the numeric columns.
        返回:
        the new data frame
      • min

        public DataFrame min()
        Compute the minimum of the numeric columns.
        返回:
        the new data frame
      • max

        public DataFrame max()
        Compute the Maximum of the numeric columns.
        返回:
        the new data frame
      • median

        public DataFrame median()
        Compute the median of the numeric columns.
        返回:
        the new data frame
      • stdDev

        public DataFrame stdDev()
        Compute the median of the numeric columns.
        返回:
        the new data frame