public abstract class TrainingDatasetBase extends Object
Modifier and Type | Field and Description |
---|---|
protected Boolean |
coalesce |
protected DataFormat |
dataFormat |
protected String |
description |
protected Date |
eventEndTime |
protected Date |
eventStartTime |
protected FilterLogic |
extraFilter |
protected List<TrainingDatasetFeature> |
features |
protected FeatureStoreBase |
featureStore |
protected Integer |
id |
protected List<String> |
label |
protected String |
location |
protected static org.slf4j.Logger |
LOGGER |
protected String |
name |
protected Long |
seed |
protected List<Split> |
splits |
protected StatisticsConfig |
statisticsConfig |
protected StorageConnector |
storageConnector |
protected TrainingDatasetType |
trainingDatasetType |
protected String |
trainSplit |
protected String |
type |
protected VectorServer |
vectorServer |
protected Integer |
version |
Constructor and Description |
---|
TrainingDatasetBase() |
Modifier and Type | Method and Description |
---|---|
abstract void |
addTag(String name,
Object value)
Add name/value tag to the training dataset.
|
abstract Statistics |
computeStatistics()
Recompute the statistics for the entire training dataset and save them to the feature store.
|
abstract void |
delete()
Delete training dataset and all associated metadata.
|
abstract void |
deleteTag(String name)
Delete a tag of the training dataset.
|
List<String> |
getLabel() |
String |
getQuery() |
String |
getQuery(boolean withLabel) |
String |
getQuery(Storage storage) |
abstract String |
getQuery(Storage storage,
boolean withLabel) |
HashSet<String> |
getServingKeys()
Set of primary key names that is used as keys in input dict object for `get_serving_vector` method.
|
List<Object> |
getServingVector(Map<String,Object> entry)
Retrieve feature vector from online feature store.
|
List<Object> |
getServingVector(Map<String,Object> entry,
boolean external)
Retrieve feature vector from online feature store.
|
List<List<Object>> |
getServingVectors(Map<String,List<Object>> entry) |
List<List<Object>> |
getServingVectors(Map<String,List<Object>> entry,
boolean external) |
abstract Statistics |
getStatistics()
Get the last statistics commit for the training dataset.
|
abstract Statistics |
getStatistics(String commitTime)
Get the statistics of a specific commit time for the training dataset.
|
abstract Object |
getTag(String name)
Get a single tag value of the training dataset.
|
abstract Map<String,Object> |
getTags()
Get all tags of the training dataset.
|
TrainingDatasetType |
getTrainingDatasetType(StorageConnector storageConnector) |
void |
initPreparedStatement()
Initialise and cache parametrised prepared statement to retrieve feature vector from online feature store.
|
void |
initPreparedStatement(boolean external)
Initialise and cache parametrised prepared statement to retrieve feature vector from online feature store.
|
void |
initPreparedStatement(boolean external,
boolean batch)
Initialise and cache parametrised prepared statement to retrieve batch feature vectors from online feature store.
|
abstract <T> T |
read()
Read the content of the training dataset.
|
abstract Object |
read(String split,
Map<String,String> readOptions)
Read a single split from the training dataset.
|
void |
setLabel(List<String> label) |
void |
setTimeSeriesSplits(Integer timeSplitSize,
String trainStart,
String trainEnd,
String valStart,
String valEnd,
String testStart,
String testEnd) |
void |
setValTestSplit(Float valSize,
Float testSize) |
abstract void |
show(int numRows)
Show numRows from the training dataset (across all splits).
|
abstract void |
updateStatisticsConfig()
Update the statistics configuration of the training dataset.
|
protected Integer id
protected String name
protected Integer version
protected String description
protected Boolean coalesce
protected TrainingDatasetType trainingDatasetType
protected List<TrainingDatasetFeature> features
protected FeatureStoreBase featureStore
protected String location
protected Long seed
protected String trainSplit
protected Date eventStartTime
protected Date eventEndTime
protected FilterLogic extraFilter
protected DataFormat dataFormat
protected StorageConnector storageConnector
protected StatisticsConfig statisticsConfig
protected String type
protected static final org.slf4j.Logger LOGGER
protected VectorServer vectorServer
public void setTimeSeriesSplits(Integer timeSplitSize, String trainStart, String trainEnd, String valStart, String valEnd, String testStart, String testEnd) throws FeatureStoreException, ParseException
FeatureStoreException
ParseException
public String getQuery() throws FeatureStoreException, IOException
FeatureStoreException
IOException
public String getQuery(boolean withLabel) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public String getQuery(Storage storage) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public abstract String getQuery(Storage storage, boolean withLabel) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public void initPreparedStatement() throws SQLException, IOException, FeatureStoreException, ClassNotFoundException
public void initPreparedStatement(boolean external) throws SQLException, IOException, FeatureStoreException, ClassNotFoundException
public void initPreparedStatement(boolean external, boolean batch) throws SQLException, IOException, FeatureStoreException, ClassNotFoundException
public List<Object> getServingVector(Map<String,Object> entry) throws SQLException, FeatureStoreException, IOException, ClassNotFoundException
entry
- Map object with kes as primary key names of the training dataset features groups and values as
corresponding ids to retrieve feature vector from online feature store.FeatureStoreException
IOException
SQLException
ClassNotFoundException
public List<Object> getServingVector(Map<String,Object> entry, boolean external) throws SQLException, FeatureStoreException, IOException, ClassNotFoundException
entry
- Map object with kes as primary key names of the training dataset features groups and values as
corresponding ids to retrieve feature vector from online feature store.external
- If true, the connection to the online feature store will be established using the hostname
provided in the hsfs.connection() setup.FeatureStoreException
IOException
SQLException
ClassNotFoundException
public List<List<Object>> getServingVectors(Map<String,List<Object>> entry) throws SQLException, FeatureStoreException, IOException, ClassNotFoundException
public List<List<Object>> getServingVectors(Map<String,List<Object>> entry, boolean external) throws SQLException, FeatureStoreException, IOException, ClassNotFoundException
public HashSet<String> getServingKeys() throws SQLException, IOException, FeatureStoreException, ClassNotFoundException
SQLException
IOException
FeatureStoreException
ClassNotFoundException
public TrainingDatasetType getTrainingDatasetType(StorageConnector storageConnector)
public abstract <T> T read() throws FeatureStoreException, IOException
FeatureStoreException
- if the training dataset has splits and the split was not specifiedIOException
public abstract Object read(String split, Map<String,String> readOptions) throws FeatureStoreException, IOException
split
- the split namereadOptions
- options to pass to the Spark read operationFeatureStoreException
- if the training dataset has splits and the split was not specifiedIOException
public abstract void show(int numRows) throws FeatureStoreException, IOException
numRows
- FeatureStoreException
IOException
public abstract Statistics computeStatistics() throws FeatureStoreException, IOException
FeatureStoreException
IOException
public abstract void updateStatisticsConfig() throws FeatureStoreException, IOException
FeatureStoreException
IOException
public abstract Statistics getStatistics() throws FeatureStoreException, IOException
FeatureStoreException
IOException
public abstract Statistics getStatistics(String commitTime) throws FeatureStoreException, IOException
commitTime
- commit time in the format "YYYYMMDDhhmmss"FeatureStoreException
IOException
public abstract void addTag(String name, Object value) throws FeatureStoreException, IOException
name
- name of the tagvalue
- value of the tag. The value of a tag can be any valid json - primitives, arrays or json objectsFeatureStoreException
IOException
public abstract Map<String,Object> getTags() throws FeatureStoreException, IOException
FeatureStoreException
IOException
public abstract Object getTag(String name) throws FeatureStoreException, IOException
name
- name of the tagFeatureStoreException
IOException
public abstract void deleteTag(String name) throws FeatureStoreException, IOException
name
- name of the tag to be deletedFeatureStoreException
IOException
public abstract void delete() throws FeatureStoreException, IOException
FeatureStoreException
IOException
Copyright © 2023. All rights reserved.