public class ExternalFeatureGroup extends FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
created, creator, deltaStreamerJobConf, deprecated, description, eventTime, expectationsNames, featureGroupEngineBase, features, featureStore, hudiPrecombineKey, id, location, LOGGER, name, notificationTopicName, onlineEnabled, onlineTopicName, partitionKeys, path, primaryKeys, statisticColumns, statisticsConfig, storageConnector, subject, timeTravelFormat, topicName, type, utils, version
Constructor and Description |
---|
ExternalFeatureGroup() |
ExternalFeatureGroup(FeatureStore featureStore,
int id) |
ExternalFeatureGroup(FeatureStore featureStore,
@NonNull String name,
Integer version,
String query,
ExternalDataFormat dataFormat,
String path,
Map<String,String> options,
@NonNull StorageConnector storageConnector,
String description,
List<String> primaryKeys,
List<Feature> features,
StatisticsConfig statisticsConfig,
String eventTime,
boolean onlineEnabled,
String onlineTopicName,
String topicName,
String notificationTopicName) |
Modifier and Type | Method and Description |
---|---|
void |
appendFeatures(Feature features) |
void |
appendFeatures(List<Feature> features) |
Statistics |
computeStatistics() |
Statistics |
getStatistics() |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData)
Incrementally insert data to the online storage of an external feature group.
|
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions)
Incrementally insert data to the online storage of an external feature group.
|
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read() |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(boolean online) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(boolean online,
Map<String,String> readOptions) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(Map<String,String> readOptions) |
void |
save() |
Query |
select(List<String> features) |
Query |
selectAll() |
Query |
selectExcept(List<String> features) |
Query |
selectExceptFeatures(List<Feature> features) |
Query |
selectFeatures(List<Feature> features) |
void |
show(int numRows) |
void |
show(int numRows,
boolean online) |
void |
updateFeatures(Feature feature) |
void |
updateFeatures(List<Feature> features) |
addTag, checkDeprecated, delete, deleteTag, getAvroSchema, getComplexFeatures, getDeserializedAvroSchema, getDeserializedEncodedAvroSchema, getEncodedAvroSchema, getFeature, getFeatureAvroSchema, getPrimaryKeys, getSubject, getTag, getTags, setDeprecated, unloadSubject, updateDeprecated, updateDeprecated, updateDescription, updateFeatureDescription, updateNotificationTopicName, updateStatisticsConfig
public ExternalFeatureGroup(FeatureStore featureStore, @NonNull @NonNull String name, Integer version, String query, ExternalDataFormat dataFormat, String path, Map<String,String> options, @NonNull @NonNull StorageConnector storageConnector, String description, List<String> primaryKeys, List<Feature> features, StatisticsConfig statisticsConfig, String eventTime, boolean onlineEnabled, String onlineTopicName, String topicName, String notificationTopicName)
public ExternalFeatureGroup()
public ExternalFeatureGroup(FeatureStore featureStore, int id)
public void save() throws FeatureStoreException, IOException
FeatureStoreException
IOException
public org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read() throws FeatureStoreException, IOException
FeatureStoreException
IOException
public org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(boolean online) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(Map<String,String> readOptions) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(boolean online, Map<String,String> readOptions) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public void show(int numRows) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public void show(int numRows, boolean online) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, Storage storage) throws IOException, FeatureStoreException, ParseException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) throws FeatureStoreException, IOException
// get feature store handle
FeatureStore fs = HopsworksConnection.builder().build().getFeatureStore();
// get feature group handle
ExternalFeatureGroup fg = fs.getExternalFeatureGroup("electricity_prices", 1);
// insert data
fg.insert(featureData, writeOptions);
featureData
- Spark DataFrame, RDD. Features to be saved.IOException
- Generic IO exception.FeatureStoreException
- If client is not connected to Hopsworks; cannot run read query on storage and/or
can't reconcile schema.public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, Map<String,String> writeOptions) throws FeatureStoreException, IOException, ParseException
// get feature store handle
FeatureStore fs = HopsworksConnection.builder().build().getFeatureStore();
// get feature group handle
ExternalFeatureGroup fg = fs.getExternalFeatureGroup("electricity_prices", 1);
// Define additional write options (for example for Spark)
Map<String, String> writeOptions = = new HashMap<String, String>();
// insert data
fg.insert(featureData, writeOptions);
featureData
- Spark DataFrame, RDD. Features to be saved.writeOptions
- Additional write options as key-value pairs.IOException
- Generic IO exception.FeatureStoreException
- If client is not connected to Hopsworks; cannot run read query on storage and/or
can't reconcile schema.ParseException
public Query selectAll()
public Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) throws Exception
insertStream
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
Exception
public Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, Map<String,String> writeOptions) throws Exception
insertStream
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
Exception
public void updateFeatures(List<Feature> features) throws FeatureStoreException, IOException, ParseException
public void updateFeatures(Feature feature) throws FeatureStoreException, IOException, ParseException
public void appendFeatures(List<Feature> features) throws FeatureStoreException, IOException, ParseException
public void appendFeatures(Feature features) throws FeatureStoreException, IOException, ParseException
public Statistics computeStatistics() throws FeatureStoreException, IOException
FeatureStoreException
IOException
public Statistics getStatistics() throws FeatureStoreException, IOException
FeatureStoreException
IOException
Copyright © 2025. All rights reserved.