public class TrainingDataset extends TrainingDatasetBase
coalesce, dataFormat, description, eventEndTime, eventStartTime, extraFilter, features, featureStore, id, label, location, LOGGER, name, seed, splits, statisticsConfig, storageConnector, trainingDatasetType, trainSplit, type, vectorServer, version| Constructor and Description |
|---|
TrainingDataset(@NonNull String name,
Integer version,
String description,
DataFormat dataFormat,
Boolean coalesce,
StorageConnector storageConnector,
String location,
List<Split> splits,
String trainSplit,
Long seed,
FeatureStore featureStore,
StatisticsConfig statisticsConfig,
List<String> label,
String eventStartTime,
String eventEndTime,
TrainingDatasetType trainingDatasetType,
Float validationSize,
Float testSize,
String trainStart,
String trainEnd,
String validationStart,
String validationEnd,
String testStart,
String testEnd,
Integer timeSplitSize,
FilterLogic extraFilterLogic,
Filter extraFilter) |
| Modifier and Type | Method and Description |
|---|---|
void |
addTag(String name,
Object value)
Add name/value tag to the training dataset.
|
Statistics |
computeStatistics()
Recompute the statistics for the entire training dataset and save them to the feature store.
|
void |
delete()
Delete training dataset and all associated metadata.
|
void |
deleteTag(String name)
Delete a tag of the training dataset.
|
String |
getQuery(Storage storage,
boolean withLabel) |
List<Object> |
getServingVector(Map<String,Object> entry)
Retrieve feature vector from online feature store.
|
Statistics |
getStatistics()
Get the last statistics commit for the training dataset.
|
Statistics |
getStatistics(String commitTime)
Get the statistics of a specific commit time for the training dataset.
|
Object |
getTag(String name)
Get a single tag value of the training dataset.
|
Map<String,Object> |
getTags()
Get all tags of the training dataset.
|
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read()
Read the content of the training dataset.
|
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(Map<String,String> readOptions)
Read the content of the training dataset.
|
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(String split)
Read all a single split from the training dataset.
|
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(String split,
Map<String,String> readOptions)
Read a single split from the training dataset.
|
void |
save(Query query) |
void |
save(Query query,
Map<String,String> writeOptions)
Create the training dataset based on the content of the feature store query.
|
void |
show(int numRows)
Show numRows from the training dataset (across all splits).
|
void |
updateStatisticsConfig()
Update the statistics configuration of the training dataset.
|
getLabel, getQuery, getQuery, getQuery, getServingKeys, getServingVector, getServingVectors, getServingVectors, getTrainingDatasetType, initPreparedStatement, initPreparedStatement, initPreparedStatement, setLabel, setTimeSeriesSplits, setValTestSplitpublic TrainingDataset(@NonNull
@NonNull String name,
Integer version,
String description,
DataFormat dataFormat,
Boolean coalesce,
StorageConnector storageConnector,
String location,
List<Split> splits,
String trainSplit,
Long seed,
FeatureStore featureStore,
StatisticsConfig statisticsConfig,
List<String> label,
String eventStartTime,
String eventEndTime,
TrainingDatasetType trainingDatasetType,
Float validationSize,
Float testSize,
String trainStart,
String trainEnd,
String validationStart,
String validationEnd,
String testStart,
String testEnd,
Integer timeSplitSize,
FilterLogic extraFilterLogic,
Filter extraFilter)
throws FeatureStoreException,
ParseException
FeatureStoreExceptionParseExceptionpublic void save(Query query) throws FeatureStoreException, IOException
FeatureStoreExceptionIOExceptionpublic void save(Query query, Map<String,String> writeOptions) throws FeatureStoreException, IOException
query - the query to save as training datasetwriteOptions - options to pass to the Spark write operationFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read()
throws FeatureStoreException,
IOException
read in class TrainingDatasetBaseFeatureStoreException - if the training dataset has splits and the split was not specifiedIOException - IOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(Map<String,String> readOptions) throws FeatureStoreException, IOException
readOptions - options to pass to the Spark read operationFeatureStoreException - if the training dataset has splits and the split was not specifiedIOException - IOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(String split) throws FeatureStoreException, IOException
split - the split nameFeatureStoreException - if the training dataset has splits and the split was not specifiedIOException - IOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(String split, Map<String,String> readOptions) throws FeatureStoreException, IOException
read in class TrainingDatasetBasesplit - the split namereadOptions - options to pass to the Spark read operationFeatureStoreException - if the training dataset has splits and the split was not specifiedIOException - IOExceptionpublic void show(int numRows)
throws FeatureStoreException,
IOException
show in class TrainingDatasetBasenumRows - number of rows to displayFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic Statistics computeStatistics() throws FeatureStoreException, IOException
computeStatistics in class TrainingDatasetBaseFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic void updateStatisticsConfig()
throws FeatureStoreException,
IOException
updateStatisticsConfig in class TrainingDatasetBaseFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic Statistics getStatistics() throws FeatureStoreException, IOException
getStatistics in class TrainingDatasetBaseFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic Statistics getStatistics(String commitTime) throws FeatureStoreException, IOException
getStatistics in class TrainingDatasetBasecommitTime - commit time in the format "YYYYMMDDhhmmss"FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic void addTag(String name, Object value) throws FeatureStoreException, IOException
addTag in class TrainingDatasetBasename - name of the tagvalue - value of the tag. The value of a tag can be any valid json - primitives, arrays or json objectsFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic Map<String,Object> getTags() throws FeatureStoreException, IOException
getTags in class TrainingDatasetBaseFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic Object getTag(String name) throws FeatureStoreException, IOException
getTag in class TrainingDatasetBasename - name of the tagFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic void deleteTag(String name) throws FeatureStoreException, IOException
deleteTag in class TrainingDatasetBasename - name of the tag to be deletedFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic String getQuery(Storage storage, boolean withLabel) throws FeatureStoreException, IOException
getQuery in class TrainingDatasetBaseFeatureStoreExceptionIOExceptionpublic List<Object> getServingVector(Map<String,Object> entry) throws SQLException, FeatureStoreException, IOException, ClassNotFoundException
getServingVector in class TrainingDatasetBaseentry - Map object with kes as primary key names of the training dataset features groups and values as
corresponding ids to retrieve feature vector from online feature store.SQLException - SQLExceptionIOException - IOExceptionFeatureStoreException - FeatureStoreExceptionClassNotFoundException - ClassNotFoundExceptionpublic void delete()
throws FeatureStoreException,
IOException
delete in class TrainingDatasetBaseFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionCopyright © 2023. All rights reserved.