public abstract class TrainingDatasetBase extends Object
| Modifier and Type | Field and Description |
|---|---|
protected Boolean |
coalesce |
protected DataFormat |
dataFormat |
protected String |
description |
protected Date |
eventEndTime |
protected Date |
eventStartTime |
protected FilterLogic |
extraFilter |
protected List<TrainingDatasetFeature> |
features |
protected FeatureStoreBase |
featureStore |
protected Integer |
id |
protected List<String> |
label |
protected String |
location |
protected static org.slf4j.Logger |
LOGGER |
protected String |
name |
protected Long |
seed |
protected List<Split> |
splits |
protected StatisticsConfig |
statisticsConfig |
protected StorageConnector |
storageConnector |
protected TrainingDatasetType |
trainingDatasetType |
protected String |
trainSplit |
protected String |
type |
protected VectorServer |
vectorServer |
protected Integer |
version |
| Constructor and Description |
|---|
TrainingDatasetBase() |
| Modifier and Type | Method and Description |
|---|---|
abstract void |
addTag(String name,
Object value)
Add name/value tag to the training dataset.
|
abstract Statistics |
computeStatistics()
Recompute the statistics for the entire training dataset and save them to the feature store.
|
abstract void |
delete()
Delete training dataset and all associated metadata.
|
abstract void |
deleteTag(String name)
Delete a tag of the training dataset.
|
List<String> |
getLabel() |
String |
getQuery() |
String |
getQuery(boolean withLabel) |
String |
getQuery(Storage storage) |
abstract String |
getQuery(Storage storage,
boolean withLabel) |
HashSet<String> |
getServingKeys()
Set of primary key names that is used as keys in input dict object for `get_serving_vector` method.
|
List<Object> |
getServingVector(Map<String,Object> entry)
Retrieve feature vector from online feature store.
|
List<Object> |
getServingVector(Map<String,Object> entry,
boolean external)
Retrieve feature vector from online feature store.
|
List<List<Object>> |
getServingVectors(Map<String,List<Object>> entry) |
List<List<Object>> |
getServingVectors(Map<String,List<Object>> entry,
boolean external) |
abstract Statistics |
getStatistics()
Get the last statistics commit for the training dataset.
|
abstract Statistics |
getStatistics(String commitTime)
Get the statistics of a specific commit time for the training dataset.
|
abstract Object |
getTag(String name)
Get a single tag value of the training dataset.
|
abstract Map<String,Object> |
getTags()
Get all tags of the training dataset.
|
TrainingDatasetType |
getTrainingDatasetType(StorageConnector storageConnector) |
void |
initPreparedStatement()
Initialise and cache parametrised prepared statement to retrieve feature vector from online feature store.
|
void |
initPreparedStatement(boolean external)
Initialise and cache parametrised prepared statement to retrieve feature vector from online feature store.
|
void |
initPreparedStatement(boolean external,
boolean batch)
Initialise and cache parametrised prepared statement to retrieve batch feature vectors from online feature store.
|
abstract <T> T |
read()
Read the content of the training dataset.
|
abstract Object |
read(String split,
Map<String,String> readOptions)
Read a single split from the training dataset.
|
void |
setLabel(List<String> label) |
void |
setTimeSeriesSplits(Integer timeSplitSize,
String trainStart,
String trainEnd,
String valStart,
String valEnd,
String testStart,
String testEnd) |
void |
setValTestSplit(Float valSize,
Float testSize) |
abstract void |
show(int numRows)
Show numRows from the training dataset (across all splits).
|
abstract void |
updateStatisticsConfig()
Update the statistics configuration of the training dataset.
|
protected Integer id
protected String name
protected Integer version
protected String description
protected Boolean coalesce
protected TrainingDatasetType trainingDatasetType
protected List<TrainingDatasetFeature> features
protected FeatureStoreBase featureStore
protected String location
protected Long seed
protected String trainSplit
protected Date eventStartTime
protected Date eventEndTime
protected FilterLogic extraFilter
protected DataFormat dataFormat
protected StorageConnector storageConnector
protected StatisticsConfig statisticsConfig
protected String type
protected static final org.slf4j.Logger LOGGER
protected VectorServer vectorServer
public void setTimeSeriesSplits(Integer timeSplitSize, String trainStart, String trainEnd, String valStart, String valEnd, String testStart, String testEnd) throws FeatureStoreException, ParseException
FeatureStoreExceptionParseExceptionpublic String getQuery() throws FeatureStoreException, IOException
FeatureStoreExceptionIOExceptionpublic String getQuery(boolean withLabel) throws FeatureStoreException, IOException
FeatureStoreExceptionIOExceptionpublic String getQuery(Storage storage) throws FeatureStoreException, IOException
FeatureStoreExceptionIOExceptionpublic abstract String getQuery(Storage storage, boolean withLabel) throws FeatureStoreException, IOException
FeatureStoreExceptionIOExceptionpublic void initPreparedStatement()
throws SQLException,
IOException,
FeatureStoreException,
ClassNotFoundException
public void initPreparedStatement(boolean external)
throws SQLException,
IOException,
FeatureStoreException,
ClassNotFoundException
public void initPreparedStatement(boolean external,
boolean batch)
throws SQLException,
IOException,
FeatureStoreException,
ClassNotFoundException
public List<Object> getServingVector(Map<String,Object> entry) throws SQLException, FeatureStoreException, IOException, ClassNotFoundException
entry - Map object with kes as primary key names of the training dataset features groups and values as
corresponding ids to retrieve feature vector from online feature store.FeatureStoreExceptionIOExceptionSQLExceptionClassNotFoundExceptionpublic List<Object> getServingVector(Map<String,Object> entry, boolean external) throws SQLException, FeatureStoreException, IOException, ClassNotFoundException
entry - Map object with kes as primary key names of the training dataset features groups and values as
corresponding ids to retrieve feature vector from online feature store.external - If true, the connection to the online feature store will be established using the hostname
provided in the hsfs.connection() setup.FeatureStoreExceptionIOExceptionSQLExceptionClassNotFoundExceptionpublic List<List<Object>> getServingVectors(Map<String,List<Object>> entry) throws SQLException, FeatureStoreException, IOException, ClassNotFoundException
public List<List<Object>> getServingVectors(Map<String,List<Object>> entry, boolean external) throws SQLException, FeatureStoreException, IOException, ClassNotFoundException
public HashSet<String> getServingKeys() throws SQLException, IOException, FeatureStoreException, ClassNotFoundException
SQLExceptionIOExceptionFeatureStoreExceptionClassNotFoundExceptionpublic TrainingDatasetType getTrainingDatasetType(StorageConnector storageConnector)
public abstract <T> T read()
throws FeatureStoreException,
IOException
FeatureStoreException - if the training dataset has splits and the split was not specifiedIOExceptionpublic abstract Object read(String split, Map<String,String> readOptions) throws FeatureStoreException, IOException
split - the split namereadOptions - options to pass to the Spark read operationFeatureStoreException - if the training dataset has splits and the split was not specifiedIOExceptionpublic abstract void show(int numRows)
throws FeatureStoreException,
IOException
numRows - FeatureStoreExceptionIOExceptionpublic abstract Statistics computeStatistics() throws FeatureStoreException, IOException
FeatureStoreExceptionIOExceptionpublic abstract void updateStatisticsConfig()
throws FeatureStoreException,
IOException
FeatureStoreExceptionIOExceptionpublic abstract Statistics getStatistics() throws FeatureStoreException, IOException
FeatureStoreExceptionIOExceptionpublic abstract Statistics getStatistics(String commitTime) throws FeatureStoreException, IOException
commitTime - commit time in the format "YYYYMMDDhhmmss"FeatureStoreExceptionIOExceptionpublic abstract void addTag(String name, Object value) throws FeatureStoreException, IOException
name - name of the tagvalue - value of the tag. The value of a tag can be any valid json - primitives, arrays or json objectsFeatureStoreExceptionIOExceptionpublic abstract Map<String,Object> getTags() throws FeatureStoreException, IOException
FeatureStoreExceptionIOExceptionpublic abstract Object getTag(String name) throws FeatureStoreException, IOException
name - name of the tagFeatureStoreExceptionIOExceptionpublic abstract void deleteTag(String name) throws FeatureStoreException, IOException
name - name of the tag to be deletedFeatureStoreExceptionIOExceptionpublic abstract void delete()
throws FeatureStoreException,
IOException
FeatureStoreExceptionIOExceptionCopyright © 2023. All rights reserved.