public class TrainingDataset extends TrainingDatasetBase
coalesce, dataFormat, description, eventEndTime, eventStartTime, extraFilter, features, featureStore, id, label, location, LOGGER, name, seed, splits, statisticsConfig, storageConnector, trainingDatasetType, trainSplit, type, vectorServer, version
Constructor and Description |
---|
TrainingDataset(@NonNull String name,
Integer version,
String description,
DataFormat dataFormat,
Boolean coalesce,
StorageConnector storageConnector,
String location,
List<Split> splits,
String trainSplit,
Long seed,
FeatureStore featureStore,
StatisticsConfig statisticsConfig,
List<String> label,
String eventStartTime,
String eventEndTime,
TrainingDatasetType trainingDatasetType,
Float validationSize,
Float testSize,
String trainStart,
String trainEnd,
String validationStart,
String validationEnd,
String testStart,
String testEnd,
Integer timeSplitSize,
FilterLogic extraFilterLogic,
Filter extraFilter) |
Modifier and Type | Method and Description |
---|---|
void |
addTag(String name,
Object value)
Add name/value tag to the training dataset.
|
Statistics |
computeStatistics()
Recompute the statistics for the entire training dataset and save them to the feature store.
|
void |
delete()
Delete training dataset and all associated metadata.
|
void |
deleteTag(String name)
Delete a tag of the training dataset.
|
String |
getQuery(Storage storage,
boolean withLabel) |
List<Object> |
getServingVector(Map<String,Object> entry)
Retrieve feature vector from online feature store.
|
Statistics |
getStatistics()
Get the last statistics commit for the training dataset.
|
Statistics |
getStatistics(String commitTime)
Get the statistics of a specific commit time for the training dataset.
|
Object |
getTag(String name)
Get a single tag value of the training dataset.
|
Map<String,Object> |
getTags()
Get all tags of the training dataset.
|
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read()
Read the content of the training dataset.
|
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(Map<String,String> readOptions)
Read the content of the training dataset.
|
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(String split)
Read all a single split from the training dataset.
|
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(String split,
Map<String,String> readOptions)
Read a single split from the training dataset.
|
void |
save(Query query) |
void |
save(Query query,
Map<String,String> writeOptions)
Create the training dataset based on the content of the feature store query.
|
void |
show(int numRows)
Show numRows from the training dataset (across all splits).
|
void |
updateStatisticsConfig()
Update the statistics configuration of the training dataset.
|
getLabel, getQuery, getQuery, getQuery, getServingKeys, getServingVector, getServingVectors, getServingVectors, getTrainingDatasetType, initPreparedStatement, initPreparedStatement, initPreparedStatement, setLabel, setTimeSeriesSplits, setValTestSplit
public TrainingDataset(@NonNull @NonNull String name, Integer version, String description, DataFormat dataFormat, Boolean coalesce, StorageConnector storageConnector, String location, List<Split> splits, String trainSplit, Long seed, FeatureStore featureStore, StatisticsConfig statisticsConfig, List<String> label, String eventStartTime, String eventEndTime, TrainingDatasetType trainingDatasetType, Float validationSize, Float testSize, String trainStart, String trainEnd, String validationStart, String validationEnd, String testStart, String testEnd, Integer timeSplitSize, FilterLogic extraFilterLogic, Filter extraFilter) throws FeatureStoreException, ParseException
FeatureStoreException
ParseException
public void save(Query query) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public void save(Query query, Map<String,String> writeOptions) throws FeatureStoreException, IOException
query
- the query to save as training datasetwriteOptions
- options to pass to the Spark write operationFeatureStoreException
- FeatureStoreExceptionIOException
- IOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read() throws FeatureStoreException, IOException
read
in class TrainingDatasetBase
FeatureStoreException
- if the training dataset has splits and the split was not specifiedIOException
- IOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(Map<String,String> readOptions) throws FeatureStoreException, IOException
readOptions
- options to pass to the Spark read operationFeatureStoreException
- if the training dataset has splits and the split was not specifiedIOException
- IOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(String split) throws FeatureStoreException, IOException
split
- the split nameFeatureStoreException
- if the training dataset has splits and the split was not specifiedIOException
- IOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(String split, Map<String,String> readOptions) throws FeatureStoreException, IOException
read
in class TrainingDatasetBase
split
- the split namereadOptions
- options to pass to the Spark read operationFeatureStoreException
- if the training dataset has splits and the split was not specifiedIOException
- IOExceptionpublic void show(int numRows) throws FeatureStoreException, IOException
show
in class TrainingDatasetBase
numRows
- number of rows to displayFeatureStoreException
- FeatureStoreExceptionIOException
- IOExceptionpublic Statistics computeStatistics() throws FeatureStoreException, IOException
computeStatistics
in class TrainingDatasetBase
FeatureStoreException
- FeatureStoreExceptionIOException
- IOExceptionpublic void updateStatisticsConfig() throws FeatureStoreException, IOException
updateStatisticsConfig
in class TrainingDatasetBase
FeatureStoreException
- FeatureStoreExceptionIOException
- IOExceptionpublic Statistics getStatistics() throws FeatureStoreException, IOException
getStatistics
in class TrainingDatasetBase
FeatureStoreException
- FeatureStoreExceptionIOException
- IOExceptionpublic Statistics getStatistics(String commitTime) throws FeatureStoreException, IOException
getStatistics
in class TrainingDatasetBase
commitTime
- commit time in the format "YYYYMMDDhhmmss"FeatureStoreException
- FeatureStoreExceptionIOException
- IOExceptionpublic void addTag(String name, Object value) throws FeatureStoreException, IOException
addTag
in class TrainingDatasetBase
name
- name of the tagvalue
- value of the tag. The value of a tag can be any valid json - primitives, arrays or json objectsFeatureStoreException
- FeatureStoreExceptionIOException
- IOExceptionpublic Map<String,Object> getTags() throws FeatureStoreException, IOException
getTags
in class TrainingDatasetBase
FeatureStoreException
- FeatureStoreExceptionIOException
- IOExceptionpublic Object getTag(String name) throws FeatureStoreException, IOException
getTag
in class TrainingDatasetBase
name
- name of the tagFeatureStoreException
- FeatureStoreExceptionIOException
- IOExceptionpublic void deleteTag(String name) throws FeatureStoreException, IOException
deleteTag
in class TrainingDatasetBase
name
- name of the tag to be deletedFeatureStoreException
- FeatureStoreExceptionIOException
- IOExceptionpublic String getQuery(Storage storage, boolean withLabel) throws FeatureStoreException, IOException
getQuery
in class TrainingDatasetBase
FeatureStoreException
IOException
public List<Object> getServingVector(Map<String,Object> entry) throws SQLException, FeatureStoreException, IOException, ClassNotFoundException
getServingVector
in class TrainingDatasetBase
entry
- Map object with kes as primary key names of the training dataset features groups and values as
corresponding ids to retrieve feature vector from online feature store.SQLException
- SQLExceptionIOException
- IOExceptionFeatureStoreException
- FeatureStoreExceptionClassNotFoundException
- ClassNotFoundExceptionpublic void delete() throws FeatureStoreException, IOException
delete
in class TrainingDatasetBase
FeatureStoreException
- FeatureStoreExceptionIOException
- IOExceptionCopyright © 2023. All rights reserved.