public class FeatureGroupEngine extends FeatureGroupEngineBase
featureGroupApi, LOGGER, tagsApi, utils
Constructor and Description |
---|
FeatureGroupEngine() |
Modifier and Type | Method and Description |
---|---|
<T extends FeatureGroupBase> |
appendFeatures(FeatureGroupBase featureGroup,
List<Feature> features,
Class<T> fgClass) |
FeatureGroupCommit |
commitDelete(FeatureGroupBase featureGroupBase,
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> genericDataset,
Map<String,String> writeOptions) |
Map<Long,Map<String,String>> |
commitDetails(FeatureGroupBase featureGroupBase,
Integer limit) |
Map<Long,Map<String,String>> |
commitDetailsByWallclockTime(FeatureGroupBase featureGroup,
String wallclockTime,
Integer limit) |
ExternalFeatureGroup |
getExternalFeatureGroup(FeatureStore featureStore,
String fgName,
Integer fgVersion) |
List<ExternalFeatureGroup> |
getExternalFeatureGroups(FeatureStore featureStore,
String fgName) |
FeatureGroup |
getFeatureGroup(FeatureStore featureStore,
String fgName,
Integer fgVersion) |
List<FeatureGroup> |
getFeatureGroups(FeatureStore featureStore,
String fgName) |
FeatureGroup |
getOrCreateFeatureGroup(FeatureStore featureStore,
String name,
Integer version,
String description,
List<String> primaryKeys,
List<String> partitionKeys,
String hudiPrecombineKey,
boolean onlineEnabled,
TimeTravelFormat timeTravelFormat,
StatisticsConfig statisticsConfig,
String eventTime) |
StreamFeatureGroup |
getOrCreateStreamFeatureGroup(FeatureStore featureStore,
String name,
Integer version,
String description,
List<String> primaryKeys,
List<String> partitionKeys,
String hudiPrecombineKey,
boolean onlineEnabled,
StatisticsConfig statisticsConfig,
String eventTime) |
StreamFeatureGroup |
getStreamFeatureGroup(FeatureStore featureStore,
String fgName,
Integer fgVersion) |
List<StreamFeatureGroup> |
getStreamFeatureGroups(FeatureStore featureStore,
String fgName) |
void |
insert(ExternalFeatureGroup externalFeatureGroup,
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions) |
void |
insert(FeatureGroup featureGroup,
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage,
HudiOperationType operation,
org.apache.spark.sql.SaveMode saveMode,
List<String> partitionKeys,
String hudiPrecombineKey,
Map<String,String> writeOptions) |
void |
insert(StreamFeatureGroup streamFeatureGroup,
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
org.apache.spark.sql.SaveMode saveMode,
List<String> partitionKeys,
String hudiPrecombineKey,
Map<String,String> writeOptions,
JobConfiguration jobConfiguration) |
org.apache.spark.sql.streaming.StreamingQuery |
insertStream(FeatureGroup featureGroup,
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode,
boolean awaitTermination,
Long timeout,
String checkpointLocation,
List<String> partitionKeys,
String hudiPrecombineKey,
Map<String,String> writeOptions)
Deprecated.
|
org.apache.spark.sql.streaming.StreamingQuery |
insertStream(StreamFeatureGroup streamFeatureGroup,
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode,
boolean awaitTermination,
Long timeout,
String checkpointLocation,
List<String> partitionKeys,
String hudiPrecombineKey,
Map<String,String> writeOptions,
JobConfiguration jobConfiguration) |
FeatureGroup |
save(FeatureGroup featureGroup,
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataset,
List<String> partitionKeys,
String hudiPrecombineKey,
Map<String,String> writeOptions)
Create the metadata and write the data to the online/offline feature store.
|
StreamFeatureGroup |
save(StreamFeatureGroup featureGroup,
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataset,
List<String> partitionKeys,
String hudiPrecombineKey,
Map<String,String> writeOptions,
JobConfiguration sparkJobConfiguration)
Create the metadata and write the data to the online/offline feature store.
|
void |
saveDataframe(FeatureGroup featureGroup,
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataset,
Storage storage,
HudiOperationType operation,
Map<String,String> offlineWriteOptions,
Map<String,String> onlineWriteOptions,
Integer validationId) |
ExternalFeatureGroup |
saveExternalFeatureGroup(ExternalFeatureGroup externalFeatureGroup) |
FeatureGroup |
saveFeatureGroupMetaData(FeatureGroup featureGroup,
List<String> partitionKeys,
String hudiPrecombineKey,
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
boolean saveEmpty) |
StreamFeatureGroup |
saveFeatureGroupMetaData(StreamFeatureGroup featureGroup,
List<String> partitionKeys,
String hudiPrecombineKey,
Map<String,String> writeOptions,
JobConfiguration sparkJobConfiguration,
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) |
addTag, delete, deleteTag, getTag, getTags, saveExtennalFeatureGroupMetaData, updateDescription, updateFeatures, updateStatisticsConfig
public FeatureGroup save(FeatureGroup featureGroup, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataset, List<String> partitionKeys, String hudiPrecombineKey, Map<String,String> writeOptions) throws FeatureStoreException, IOException, ParseException
featureGroup
- Feature Group metadata object.dataset
- Spark DataFrame or RDD.partitionKeys
- A list of feature names to be used as partition key when writing the feature data to the
offline storage, defaults to empty list.hudiPrecombineKey
- A feature name to be used as a precombine key for the `TimeTravelFormat.HUDI` feature
group. If feature group has `TimeTravelFormat.HUDI` and hudi precombine key was not
specified then the first primary key of the feature group will be used as hudi precombine
key.writeOptions
- Additional write options as key-value pairs, defaults to empty Map.FeatureStoreException
- In case Client is not connected to Hopsworks, unable to identify date format and/or
no commit information was found for the feature group;IOException
- Generic IO exception.ParseException
- In case it's unable to parse HUDI and or statistics commit date string to date type.public StreamFeatureGroup save(StreamFeatureGroup featureGroup, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataset, List<String> partitionKeys, String hudiPrecombineKey, Map<String,String> writeOptions, JobConfiguration sparkJobConfiguration) throws FeatureStoreException, IOException, ParseException
featureGroup
- dataset
- partitionKeys
- writeOptions
- sparkJobConfiguration
- FeatureStoreException
IOException
ParseException
public void insert(FeatureGroup featureGroup, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, Storage storage, HudiOperationType operation, org.apache.spark.sql.SaveMode saveMode, List<String> partitionKeys, String hudiPrecombineKey, Map<String,String> writeOptions) throws FeatureStoreException, IOException, ParseException
public void insert(StreamFeatureGroup streamFeatureGroup, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, org.apache.spark.sql.SaveMode saveMode, List<String> partitionKeys, String hudiPrecombineKey, Map<String,String> writeOptions, JobConfiguration jobConfiguration) throws FeatureStoreException, IOException, ParseException
public void insert(ExternalFeatureGroup externalFeatureGroup, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, Map<String,String> writeOptions) throws FeatureStoreException, IOException
FeatureStoreException
IOException
@Deprecated public org.apache.spark.sql.streaming.StreamingQuery insertStream(FeatureGroup featureGroup, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode, boolean awaitTermination, Long timeout, String checkpointLocation, List<String> partitionKeys, String hudiPrecombineKey, Map<String,String> writeOptions) throws FeatureStoreException, IOException, org.apache.spark.sql.streaming.StreamingQueryException, TimeoutException, ParseException
FeatureStoreException
IOException
org.apache.spark.sql.streaming.StreamingQueryException
TimeoutException
ParseException
public org.apache.spark.sql.streaming.StreamingQuery insertStream(StreamFeatureGroup streamFeatureGroup, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode, boolean awaitTermination, Long timeout, String checkpointLocation, List<String> partitionKeys, String hudiPrecombineKey, Map<String,String> writeOptions, JobConfiguration jobConfiguration)
public void saveDataframe(FeatureGroup featureGroup, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> dataset, Storage storage, HudiOperationType operation, Map<String,String> offlineWriteOptions, Map<String,String> onlineWriteOptions, Integer validationId) throws IOException, FeatureStoreException, ParseException
public FeatureGroup saveFeatureGroupMetaData(FeatureGroup featureGroup, List<String> partitionKeys, String hudiPrecombineKey, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, boolean saveEmpty) throws FeatureStoreException, IOException, ParseException
public StreamFeatureGroup saveFeatureGroupMetaData(StreamFeatureGroup featureGroup, List<String> partitionKeys, String hudiPrecombineKey, Map<String,String> writeOptions, JobConfiguration sparkJobConfiguration, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public FeatureGroup getOrCreateFeatureGroup(FeatureStore featureStore, String name, Integer version, String description, List<String> primaryKeys, List<String> partitionKeys, String hudiPrecombineKey, boolean onlineEnabled, TimeTravelFormat timeTravelFormat, StatisticsConfig statisticsConfig, String eventTime) throws IOException, FeatureStoreException
IOException
FeatureStoreException
public FeatureGroup getFeatureGroup(FeatureStore featureStore, String fgName, Integer fgVersion) throws IOException, FeatureStoreException
IOException
FeatureStoreException
public List<FeatureGroup> getFeatureGroups(FeatureStore featureStore, String fgName) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public StreamFeatureGroup getOrCreateStreamFeatureGroup(FeatureStore featureStore, String name, Integer version, String description, List<String> primaryKeys, List<String> partitionKeys, String hudiPrecombineKey, boolean onlineEnabled, StatisticsConfig statisticsConfig, String eventTime) throws IOException, FeatureStoreException
IOException
FeatureStoreException
public StreamFeatureGroup getStreamFeatureGroup(FeatureStore featureStore, String fgName, Integer fgVersion) throws IOException, FeatureStoreException
IOException
FeatureStoreException
public List<StreamFeatureGroup> getStreamFeatureGroups(FeatureStore featureStore, String fgName) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public <T extends FeatureGroupBase> void appendFeatures(FeatureGroupBase featureGroup, List<Feature> features, Class<T> fgClass) throws FeatureStoreException, IOException, ParseException
public Map<Long,Map<String,String>> commitDetails(FeatureGroupBase featureGroupBase, Integer limit) throws IOException, FeatureStoreException, ParseException
public Map<Long,Map<String,String>> commitDetailsByWallclockTime(FeatureGroupBase featureGroup, String wallclockTime, Integer limit) throws IOException, FeatureStoreException, ParseException
public FeatureGroupCommit commitDelete(FeatureGroupBase featureGroupBase, org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> genericDataset, Map<String,String> writeOptions) throws IOException, FeatureStoreException, ParseException
public ExternalFeatureGroup saveExternalFeatureGroup(ExternalFeatureGroup externalFeatureGroup) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public List<ExternalFeatureGroup> getExternalFeatureGroups(FeatureStore featureStore, String fgName) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public ExternalFeatureGroup getExternalFeatureGroup(FeatureStore featureStore, String fgName, Integer fgVersion) throws IOException, FeatureStoreException
IOException
FeatureStoreException
Copyright © 2023. All rights reserved.