public class ExternalFeatureGroup extends FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
created, creator, dataSource, deltaStreamerJobConf, deprecated, description, eventTime, expectationsNames, featureGroupEngineBase, features, featureStore, featurestoreId, hudiPrecombineKey, id, location, LOGGER, name, notificationTopicName, onlineConfig, onlineEnabled, onlineIngestionApi, onlineTopicName, partitionKeys, primaryKeys, statisticColumns, statisticsConfig, storageConnector, subject, timeTravelFormat, topicName, type, utils, version
Constructor and Description |
---|
ExternalFeatureGroup() |
ExternalFeatureGroup(FeatureStore featureStore,
int id) |
ExternalFeatureGroup(FeatureStore featureStore,
@NonNull String name,
Integer version,
String query,
ExternalDataFormat dataFormat,
String path,
Map<String,String> options,
@NonNull StorageConnector storageConnector,
String description,
List<String> primaryKeys,
List<Feature> features,
StatisticsConfig statisticsConfig,
String eventTime,
boolean onlineEnabled,
String onlineTopicName,
String topicName,
String notificationTopicName,
OnlineConfig onlineConfig) |
Modifier and Type | Method and Description |
---|---|
void |
appendFeatures(Feature features) |
void |
appendFeatures(List<Feature> features) |
Statistics |
computeStatistics() |
Statistics |
getStatistics() |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData)
Incrementally insert data to the online storage of an external feature group.
|
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions)
Incrementally insert data to the online storage of an external feature group.
|
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read() |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(boolean online) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(boolean online,
Map<String,String> readOptions) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(Map<String,String> readOptions) |
void |
save() |
Query |
select(List<String> features) |
Query |
selectAll() |
Query |
selectExcept(List<String> features) |
Query |
selectExceptFeatures(List<Feature> features) |
Query |
selectFeatures(List<Feature> features) |
void |
show(int numRows) |
void |
show(int numRows,
boolean online) |
void |
updateFeatures(Feature feature) |
void |
updateFeatures(List<Feature> features) |
addTag, checkDeprecated, delete, deleteTag, getAvroSchema, getComplexFeatures, getDeserializedAvroSchema, getDeserializedEncodedAvroSchema, getEncodedAvroSchema, getFeature, getFeatureAvroSchema, getLatestOnlineIngestion, getOnlineIngestion, getPrimaryKeys, getSubject, getTag, getTags, setDeprecated, unloadSubject, updateDeprecated, updateDeprecated, updateDescription, updateFeatureDescription, updateNotificationTopicName, updateStatisticsConfig
public ExternalFeatureGroup(FeatureStore featureStore, @NonNull @NonNull String name, Integer version, String query, ExternalDataFormat dataFormat, String path, Map<String,String> options, @NonNull @NonNull StorageConnector storageConnector, String description, List<String> primaryKeys, List<Feature> features, StatisticsConfig statisticsConfig, String eventTime, boolean onlineEnabled, String onlineTopicName, String topicName, String notificationTopicName, OnlineConfig onlineConfig)
public ExternalFeatureGroup()
public ExternalFeatureGroup(FeatureStore featureStore, int id)
public void save() throws FeatureStoreException, IOException
FeatureStoreException
IOException
public org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read() throws FeatureStoreException, IOException
FeatureStoreException
IOException
public org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(boolean online) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(Map<String,String> readOptions) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(boolean online, Map<String,String> readOptions) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public void show(int numRows) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public void show(int numRows, boolean online) throws FeatureStoreException, IOException
FeatureStoreException
IOException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, Storage storage) throws IOException, FeatureStoreException, ParseException
public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) throws FeatureStoreException, IOException
// get feature store handle
FeatureStore fs = HopsworksConnection.builder().build().getFeatureStore();
// get feature group handle
ExternalFeatureGroup fg = fs.getExternalFeatureGroup("electricity_prices", 1);
// insert data
fg.insert(featureData, writeOptions);
featureData
- Spark DataFrame, RDD. Features to be saved.IOException
- Generic IO exception.FeatureStoreException
- If client is not connected to Hopsworks; cannot run read query on storage and/or
can't reconcile schema.public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, Map<String,String> writeOptions) throws FeatureStoreException, IOException, ParseException
// get feature store handle
FeatureStore fs = HopsworksConnection.builder().build().getFeatureStore();
// get feature group handle
ExternalFeatureGroup fg = fs.getExternalFeatureGroup("electricity_prices", 1);
// Define additional write options (for example for Spark)
Map<String, String> writeOptions = = new HashMap<String, String>();
// insert data
fg.insert(featureData, writeOptions);
featureData
- Spark DataFrame, RDD. Features to be saved.writeOptions
- Additional write options as key-value pairs.IOException
- Generic IO exception.FeatureStoreException
- If client is not connected to Hopsworks; cannot run read query on storage and/or
can't reconcile schema.ParseException
public Query selectAll()
public Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) throws Exception
insertStream
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
Exception
public Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, Map<String,String> writeOptions) throws Exception
insertStream
in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
Exception
public void updateFeatures(List<Feature> features) throws FeatureStoreException, IOException, ParseException
public void updateFeatures(Feature feature) throws FeatureStoreException, IOException, ParseException
public void appendFeatures(List<Feature> features) throws FeatureStoreException, IOException, ParseException
public void appendFeatures(Feature features) throws FeatureStoreException, IOException, ParseException
public Statistics computeStatistics() throws FeatureStoreException, IOException
FeatureStoreException
IOException
public Statistics getStatistics() throws FeatureStoreException, IOException
FeatureStoreException
IOException
Copyright © 2025. All rights reserved.