public class ExternalFeatureGroup extends FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
created, creator, deltaStreamerJobConf, deprecated, description, eventTime, expectationsNames, featureGroupEngineBase, features, featureStore, hudiPrecombineKey, id, location, LOGGER, name, notificationTopicName, onlineConfig, onlineEnabled, onlineIngestionApi, onlineTopicName, partitionKeys, path, primaryKeys, statisticColumns, statisticsConfig, storageConnector, subject, timeTravelFormat, topicName, type, utils, version
Constructor and Description |
---|
ExternalFeatureGroup() |
ExternalFeatureGroup(FeatureStore featureStore,
int id) |
ExternalFeatureGroup(FeatureStore featureStore,
@NonNull String name,
Integer version,
String query,
ExternalDataFormat dataFormat,
String path,
Map<String,String> options,
@NonNull StorageConnector storageConnector,
String description,
List<String> primaryKeys,
List<Feature> features,
StatisticsConfig statisticsConfig,
String eventTime,
boolean onlineEnabled,
String onlineTopicName,
String topicName,
String notificationTopicName,
OnlineConfig onlineConfig) |
Modifier and Type | Method and Description |
---|---|
void |
appendFeatures(Feature features)
Append a single feature to the schema of the feature group.
|
void |
appendFeatures(List<Feature> features)
Append features to the schema of the feature group.
|
QueryBase |
asOf(String wallclockTime) |
QueryBase |
asOf(String wallclockTime,
String excludeUntil) |
void |
commitDeleteRecord(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) |
void |
commitDeleteRecord(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions) |
Map<Long,Map<String,String>> |
commitDetails() |
Map<Long,Map<String,String>> |
commitDetails(Integer limit) |
Map<Long,Map<String,String>> |
commitDetails(String wallclockTime) |
Map<Long,Map<String,String>> |
commitDetails(String wallclockTime,
Integer limit) |
Statistics |
computeStatistics()
Recompute the statistics for the feature group and save them to the feature store.
|
Statistics |
computeStatistics(String wallclockTime) |
Statistics |
getStatistics()
Get the last statistics commit for the feature group.
|
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData)
Incrementally insert data to the online storage of an external feature group.
|
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
boolean overwrite) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
boolean overwrite,
Map<String,String> writeOptions) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
boolean overwrite,
Map<String,String> writeOptions,
JobConfiguration jobConfiguration) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
HudiOperationType operation) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
JobConfiguration jobConfiguration) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions)
Incrementally insert data to the online storage of an external feature group.
|
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage,
boolean overwrite) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage,
boolean overwrite,
HudiOperationType operation,
Map<String,String> writeOptions) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
Map<String,String> writeOptions) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode,
boolean awaitTermination,
Long timeout) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode,
boolean awaitTermination,
Long timeout,
String checkpointLocation) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode,
boolean awaitTermination,
Long timeout,
String checkpointLocation,
Map<String,String> writeOptions) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode,
boolean awaitTermination,
Long timeout,
String checkpointLocation,
Map<String,String> writeOptions,
JobConfiguration jobConfiguration) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode,
boolean awaitTermination,
String checkpointLocation) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode,
String checkpointLocation) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read() |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(boolean online) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(boolean online,
Map<String,String> readOptions) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(Map<String,String> readOptions) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(String wallclockTime) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(String wallclockTime,
Map<String,String> readOptions) |
void |
save() |
Query |
select(List<String> features) |
Query |
selectAll() |
Query |
selectExcept(List<String> features) |
Query |
selectExceptFeatures(List<Feature> features) |
Query |
selectFeatures(List<Feature> features) |
void |
show(int numRows) |
void |
show(int numRows,
boolean online) |
void |
updateFeatures(Feature feature)
Update the metadata of multiple features.
|
void |
updateFeatures(List<Feature> features)
Update the metadata of multiple features.
|
addTag, checkDeprecated, delete, deleteTag, getAvroSchema, getComplexFeatures, getDeserializedAvroSchema, getDeserializedEncodedAvroSchema, getEncodedAvroSchema, getFeature, getFeatureAvroSchema, getLatestOnlineIngestion, getOnlineIngestion, getPrimaryKeys, getSubject, getTag, getTags, setDeprecated, unloadSubject, updateDeprecated, updateDeprecated, updateDescription, updateFeatureDescription, updateNotificationTopicName, updateStatisticsConfig
public ExternalFeatureGroup(FeatureStore featureStore, @NonNull @NonNull String name, Integer version, String query, ExternalDataFormat dataFormat, String path, Map<String,String> options, @NonNull @NonNull StorageConnector storageConnector, String description, List<String> primaryKeys, List<Feature> features, StatisticsConfig statisticsConfig, String eventTime, boolean onlineEnabled, String onlineTopicName, String topicName, String notificationTopicName, OnlineConfig onlineConfig)
public ExternalFeatureGroup()
public ExternalFeatureGroup(FeatureStore featureStore, int id)
public void save() throws FeatureStoreException, IOException
FeatureStoreException
IOException
public org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read() throws FeatureStoreException, IOException
read
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
IOException
public org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(boolean online) throws FeatureStoreException, IOException
read
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
IOException
public org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(Map<String,String> readOptions) throws FeatureStoreException, IOException
read
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
IOException
public org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(boolean online, Map<String,String> readOptions) throws FeatureStoreException, IOException
read
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
IOException
public org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(String wallclockTime) throws FeatureStoreException, IOException, ParseException
read
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
IOException
ParseException
public org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(String wallclockTime, Map<String,String> readOptions) throws FeatureStoreException, IOException, ParseException
read
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
IOException
ParseException
public QueryBase asOf(String wallclockTime) throws FeatureStoreException, ParseException
asOf
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
ParseException
public QueryBase asOf(String wallclockTime, String excludeUntil) throws FeatureStoreException, ParseException
asOf
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
ParseException
public void show(int numRows) throws FeatureStoreException, IOException
show
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
IOException
public void show(int numRows, boolean online) throws FeatureStoreException, IOException
show
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
IOException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, Storage storage) throws IOException, FeatureStoreException, ParseException
insert
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
IOException
FeatureStoreException
ParseException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, boolean overwrite) throws IOException, FeatureStoreException, ParseException
insert
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
IOException
FeatureStoreException
ParseException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, Storage storage, boolean overwrite) throws IOException, FeatureStoreException, ParseException
insert
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
IOException
FeatureStoreException
ParseException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, boolean overwrite, Map<String,String> writeOptions) throws FeatureStoreException, IOException, ParseException
insert
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
IOException
ParseException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, HudiOperationType operation) throws FeatureStoreException, IOException, ParseException
insert
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
IOException
ParseException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, Storage storage, boolean overwrite, HudiOperationType operation, Map<String,String> writeOptions) throws FeatureStoreException, IOException, ParseException
insert
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
IOException
ParseException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, JobConfiguration jobConfiguration) throws FeatureStoreException, IOException, ParseException
insert
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
IOException
ParseException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, boolean overwrite, Map<String,String> writeOptions, JobConfiguration jobConfiguration) throws FeatureStoreException, IOException, ParseException
insert
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
IOException
ParseException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) throws FeatureStoreException, IOException
// get feature store handle
FeatureStore fs = HopsworksConnection.builder().build().getFeatureStore();
// get feature group handle
ExternalFeatureGroup fg = fs.getExternalFeatureGroup("electricity_prices", 1);
// insert data
fg.insert(featureData, writeOptions);
insert
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
featureData
- Spark DataFrame, RDD. Features to be saved.IOException
- Generic IO exception.FeatureStoreException
- If client is not connected to Hopsworks; cannot run read query on storage and/or
can't reconcile schema.public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, Map<String,String> writeOptions) throws FeatureStoreException, IOException, ParseException
// get feature store handle
FeatureStore fs = HopsworksConnection.builder().build().getFeatureStore();
// get feature group handle
ExternalFeatureGroup fg = fs.getExternalFeatureGroup("electricity_prices", 1);
// Define additional write options (for example for Spark)
Map<String, String> writeOptions = = new HashMap<String, String>();
// insert data
fg.insert(featureData, writeOptions);
insert
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
featureData
- Spark DataFrame, RDD. Features to be saved.writeOptions
- Additional write options as key-value pairs.IOException
- Generic IO exception.FeatureStoreException
- If client is not connected to Hopsworks; cannot run read query on storage and/or
can't reconcile schema.ParseException
public void commitDeleteRecord(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) throws FeatureStoreException, IOException, ParseException
commitDeleteRecord
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
IOException
ParseException
public void commitDeleteRecord(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, Map<String,String> writeOptions) throws FeatureStoreException, IOException, ParseException
commitDeleteRecord
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
IOException
ParseException
public Map<Long,Map<String,String>> commitDetails() throws IOException, FeatureStoreException, ParseException
commitDetails
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
IOException
FeatureStoreException
ParseException
public Map<Long,Map<String,String>> commitDetails(Integer limit) throws IOException, FeatureStoreException, ParseException
commitDetails
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
IOException
FeatureStoreException
ParseException
public Map<Long,Map<String,String>> commitDetails(String wallclockTime) throws IOException, FeatureStoreException, ParseException
commitDetails
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
IOException
FeatureStoreException
ParseException
public Map<Long,Map<String,String>> commitDetails(String wallclockTime, Integer limit) throws IOException, FeatureStoreException, ParseException
commitDetails
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
IOException
FeatureStoreException
ParseException
public Query selectFeatures(List<Feature> features)
selectFeatures
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
public Query select(List<String> features)
select
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
public Query selectAll()
selectAll
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
public Query selectExceptFeatures(List<Feature> features)
selectExceptFeatures
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
public Query selectExcept(List<String> features)
selectExcept
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
public Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) throws Exception
insertStream
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
Exception
public Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName) throws Exception
insertStream
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
Exception
public Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, Map<String,String> writeOptions) throws Exception
insertStream
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
Exception
public Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, Map<String,String> writeOptions) throws Exception
insertStream
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
Exception
public Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode) throws Exception
insertStream
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
Exception
public Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode, String checkpointLocation) throws Exception
insertStream
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
Exception
public Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode, boolean awaitTermination, Long timeout) throws Exception
insertStream
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
Exception
public Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode, boolean awaitTermination, Long timeout, String checkpointLocation) throws Exception
insertStream
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
Exception
public Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode, boolean awaitTermination, Long timeout, String checkpointLocation, Map<String,String> writeOptions) throws Exception
insertStream
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
Exception
public Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode, boolean awaitTermination, String checkpointLocation) throws Exception
insertStream
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
Exception
public Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode, boolean awaitTermination, Long timeout, String checkpointLocation, Map<String,String> writeOptions, JobConfiguration jobConfiguration) throws Exception
insertStream
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
Exception
public void updateFeatures(List<Feature> features) throws FeatureStoreException, IOException, ParseException
FeatureGroupBase
updateFeatures
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
features
- List of Feature metadata objectsFeatureStoreException
- FeatureStoreExceptionIOException
- IOExceptionParseException
- ParseExceptionpublic void updateFeatures(Feature feature) throws FeatureStoreException, IOException, ParseException
FeatureGroupBase
updateFeatures
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
feature
- Feature metadata objectFeatureStoreException
- FeatureStoreExceptionIOException
- IOExceptionParseException
- ParseExceptionpublic void appendFeatures(List<Feature> features) throws FeatureStoreException, IOException, ParseException
FeatureGroupBase
appendFeatures
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
features
- list of Feature metadata objectsFeatureStoreException
- FeatureStoreExceptionIOException
- IOExceptionParseException
- ParseExceptionpublic void appendFeatures(Feature features) throws FeatureStoreException, IOException, ParseException
FeatureGroupBase
appendFeatures
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
features
- List of Feature metadata objectsFeatureStoreException
- FeatureStoreExceptionIOException
- IOExceptionParseException
- ParseExceptionpublic Statistics computeStatistics() throws FeatureStoreException, IOException
FeatureGroupBase
computeStatistics
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
- FeatureStoreExceptionIOException
- IOExceptionpublic Statistics computeStatistics(String wallclockTime) throws FeatureStoreException, IOException, ParseException
computeStatistics
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
IOException
ParseException
public Statistics getStatistics() throws FeatureStoreException, IOException
FeatureGroupBase
getStatistics
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
FeatureStoreException
- FeatureStoreExceptionIOException
- IOExceptionCopyright © 2025. All rights reserved.