public class ExternalFeatureGroup extends FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>
created, creator, deltaStreamerJobConf, description, eventTime, expectationsNames, featureGroupEngineBase, features, featureStore, hudiPrecombineKey, id, location, LOGGER, name, onlineEnabled, onlineTopicName, partitionKeys, primaryKeys, statisticColumns, statisticsConfig, subject, timeTravelFormat, type, utils, version| Constructor and Description |
|---|
ExternalFeatureGroup() |
ExternalFeatureGroup(FeatureStore featureStore,
int id) |
ExternalFeatureGroup(FeatureStore featureStore,
@NonNull String name,
Integer version,
String query,
ExternalDataFormat dataFormat,
String path,
Map<String,String> options,
@NonNull StorageConnector storageConnector,
String description,
List<String> primaryKeys,
List<Feature> features,
StatisticsConfig statisticsConfig,
String eventTime,
boolean onlineEnabled,
String onlineTopicName) |
| Modifier and Type | Method and Description |
|---|---|
void |
appendFeatures(Feature features)
Append a single feature to the schema of the feature group.
|
void |
appendFeatures(List<Feature> features)
Append features to the schema of the feature group.
|
QueryBase |
asOf(String wallclockTime) |
QueryBase |
asOf(String wallclockTime,
String excludeUntil) |
void |
commitDeleteRecord(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) |
void |
commitDeleteRecord(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions) |
Map<Long,Map<String,String>> |
commitDetails() |
Map<Long,Map<String,String>> |
commitDetails(Integer limit) |
Map<Long,Map<String,String>> |
commitDetails(String wallclockTime) |
Map<Long,Map<String,String>> |
commitDetails(String wallclockTime,
Integer limit) |
Statistics |
computeStatistics()
Recompute the statistics for the feature group and save them to the feature store.
|
Statistics |
computeStatistics(String wallclockTime) |
Statistics |
getStatistics()
Get the last statistics commit for the feature group.
|
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData)
Incrementally insert data to the online storage of an external feature group.
|
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
boolean overwrite) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
boolean overwrite,
Map<String,String> writeOptions) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
boolean overwrite,
Map<String,String> writeOptions,
JobConfiguration jobConfiguration) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
HudiOperationType operation) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
JobConfiguration jobConfiguration) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions)
Incrementally insert data to the online storage of an external feature group.
|
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage,
boolean overwrite) |
void |
insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage,
boolean overwrite,
HudiOperationType operation,
Map<String,String> writeOptions) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
Map<String,String> writeOptions) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode,
boolean awaitTermination,
Long timeout) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode,
boolean awaitTermination,
Long timeout,
String checkpointLocation) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode,
boolean awaitTermination,
Long timeout,
String checkpointLocation,
Map<String,String> writeOptions) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode,
boolean awaitTermination,
Long timeout,
String checkpointLocation,
Map<String,String> writeOptions,
JobConfiguration jobConfiguration) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode,
boolean awaitTermination,
String checkpointLocation) |
Object |
insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
String queryName,
String outputMode,
String checkpointLocation) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read() |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(boolean online) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(boolean online,
Map<String,String> readOptions) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(Map<String,String> readOptions) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(String wallclockTime) |
org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
read(String wallclockTime,
Map<String,String> readOptions) |
void |
save() |
Query |
select(List<String> features) |
Query |
selectAll() |
Query |
selectExcept(List<String> features) |
Query |
selectExceptFeatures(List<Feature> features) |
Query |
selectFeatures(List<Feature> features) |
void |
show(int numRows) |
void |
show(int numRows,
boolean online) |
void |
updateFeatures(Feature feature)
Update the metadata of multiple features.
|
void |
updateFeatures(List<Feature> features)
Update the metadata of multiple features.
|
addTag, delete, deleteTag, getAvroSchema, getComplexFeatures, getDeserializedAvroSchema, getDeserializedEncodedAvroSchema, getEncodedAvroSchema, getFeature, getFeatureAvroSchema, getPrimaryKeys, getSubject, getTag, getTags, unloadSubject, updateDescription, updateFeatureDescription, updateStatisticsConfigpublic ExternalFeatureGroup(FeatureStore featureStore, @NonNull @NonNull String name, Integer version, String query, ExternalDataFormat dataFormat, String path, Map<String,String> options, @NonNull @NonNull StorageConnector storageConnector, String description, List<String> primaryKeys, List<Feature> features, StatisticsConfig statisticsConfig, String eventTime, boolean onlineEnabled, String onlineTopicName)
public ExternalFeatureGroup()
public ExternalFeatureGroup(FeatureStore featureStore, int id)
public void save()
throws FeatureStoreException,
IOException
FeatureStoreExceptionIOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read()
throws FeatureStoreException,
IOException
read in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionIOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(boolean online)
throws FeatureStoreException,
IOException
read in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionIOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(Map<String,String> readOptions) throws FeatureStoreException, IOException
read in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionIOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(boolean online,
Map<String,String> readOptions)
throws FeatureStoreException,
IOException
read in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionIOExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(String wallclockTime) throws FeatureStoreException, IOException, ParseException
read in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionIOExceptionParseExceptionpublic org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> read(String wallclockTime, Map<String,String> readOptions) throws FeatureStoreException, IOException, ParseException
read in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionIOExceptionParseExceptionpublic QueryBase asOf(String wallclockTime) throws FeatureStoreException, ParseException
asOf in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionParseExceptionpublic QueryBase asOf(String wallclockTime, String excludeUntil) throws FeatureStoreException, ParseException
asOf in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionParseExceptionpublic void show(int numRows)
throws FeatureStoreException,
IOException
show in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionIOExceptionpublic void show(int numRows,
boolean online)
throws FeatureStoreException,
IOException
show in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionIOExceptionpublic void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage)
throws IOException,
FeatureStoreException,
ParseException
insert in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>IOExceptionFeatureStoreExceptionParseExceptionpublic void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
boolean overwrite)
throws IOException,
FeatureStoreException,
ParseException
insert in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>IOExceptionFeatureStoreExceptionParseExceptionpublic void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage,
boolean overwrite)
throws IOException,
FeatureStoreException,
ParseException
insert in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>IOExceptionFeatureStoreExceptionParseExceptionpublic void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
boolean overwrite,
Map<String,String> writeOptions)
throws FeatureStoreException,
IOException,
ParseException
insert in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionIOExceptionParseExceptionpublic void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
HudiOperationType operation)
throws FeatureStoreException,
IOException,
ParseException
insert in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionIOExceptionParseExceptionpublic void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Storage storage,
boolean overwrite,
HudiOperationType operation,
Map<String,String> writeOptions)
throws FeatureStoreException,
IOException,
ParseException
insert in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionIOExceptionParseExceptionpublic void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
JobConfiguration jobConfiguration)
throws FeatureStoreException,
IOException,
ParseException
insert in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionIOExceptionParseExceptionpublic void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
boolean overwrite,
Map<String,String> writeOptions,
JobConfiguration jobConfiguration)
throws FeatureStoreException,
IOException,
ParseException
insert in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionIOExceptionParseExceptionpublic void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData)
throws FeatureStoreException,
IOException
// get feature store handle
FeatureStore fs = HopsworksConnection.builder().build().getFeatureStore();
// get feature group handle
ExternalFeatureGroup fg = fs.getExternalFeatureGroup("electricity_prices", 1);
// insert data
fg.insert(featureData, writeOptions);
insert in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>featureData - Spark DataFrame, RDD. Features to be saved.IOException - Generic IO exception.FeatureStoreException - If client is not connected to Hopsworks; cannot run read query on storage and/or
can't reconcile schema.public void insert(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions)
throws FeatureStoreException,
IOException,
ParseException
// get feature store handle
FeatureStore fs = HopsworksConnection.builder().build().getFeatureStore();
// get feature group handle
ExternalFeatureGroup fg = fs.getExternalFeatureGroup("electricity_prices", 1);
// Define additional write options (for example for Spark)
Map<String, String> writeOptions = = new HashMap<String, String>();
// insert data
fg.insert(featureData, writeOptions);
insert in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>featureData - Spark DataFrame, RDD. Features to be saved.writeOptions - Additional write options as key-value pairs.IOException - Generic IO exception.FeatureStoreException - If client is not connected to Hopsworks; cannot run read query on storage and/or
can't reconcile schema.ParseExceptionpublic void commitDeleteRecord(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData)
throws FeatureStoreException,
IOException,
ParseException
commitDeleteRecord in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionIOExceptionParseExceptionpublic void commitDeleteRecord(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData,
Map<String,String> writeOptions)
throws FeatureStoreException,
IOException,
ParseException
commitDeleteRecord in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionIOExceptionParseExceptionpublic Map<Long,Map<String,String>> commitDetails() throws IOException, FeatureStoreException, ParseException
commitDetails in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>IOExceptionFeatureStoreExceptionParseExceptionpublic Map<Long,Map<String,String>> commitDetails(Integer limit) throws IOException, FeatureStoreException, ParseException
commitDetails in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>IOExceptionFeatureStoreExceptionParseExceptionpublic Map<Long,Map<String,String>> commitDetails(String wallclockTime) throws IOException, FeatureStoreException, ParseException
commitDetails in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>IOExceptionFeatureStoreExceptionParseExceptionpublic Map<Long,Map<String,String>> commitDetails(String wallclockTime, Integer limit) throws IOException, FeatureStoreException, ParseException
commitDetails in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>IOExceptionFeatureStoreExceptionParseExceptionpublic Query selectFeatures(List<Feature> features)
selectFeatures in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>public Query select(List<String> features)
select in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>public Query selectAll()
selectAll in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>public Query selectExceptFeatures(List<Feature> features)
selectExceptFeatures in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>public Query selectExcept(List<String> features)
selectExcept in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>public Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData) throws Exception
insertStream in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>Exceptionpublic Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName) throws Exception
insertStream in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>Exceptionpublic Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, Map<String,String> writeOptions) throws Exception
insertStream in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>Exceptionpublic Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, Map<String,String> writeOptions) throws Exception
insertStream in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>Exceptionpublic Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode) throws Exception
insertStream in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>Exceptionpublic Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode, String checkpointLocation) throws Exception
insertStream in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>Exceptionpublic Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode, boolean awaitTermination, Long timeout) throws Exception
insertStream in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>Exceptionpublic Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode, boolean awaitTermination, Long timeout, String checkpointLocation) throws Exception
insertStream in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>Exceptionpublic Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode, boolean awaitTermination, Long timeout, String checkpointLocation, Map<String,String> writeOptions) throws Exception
insertStream in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>Exceptionpublic Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode, boolean awaitTermination, String checkpointLocation) throws Exception
insertStream in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>Exceptionpublic Object insertStream(org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> featureData, String queryName, String outputMode, boolean awaitTermination, Long timeout, String checkpointLocation, Map<String,String> writeOptions, JobConfiguration jobConfiguration) throws Exception
insertStream in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>Exceptionpublic void updateFeatures(List<Feature> features) throws FeatureStoreException, IOException, ParseException
FeatureGroupBaseupdateFeatures in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>features - List of Feature metadata objectsFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionParseException - ParseExceptionpublic void updateFeatures(Feature feature) throws FeatureStoreException, IOException, ParseException
FeatureGroupBaseupdateFeatures in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>feature - Feature metadata objectFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionParseException - ParseExceptionpublic void appendFeatures(List<Feature> features) throws FeatureStoreException, IOException, ParseException
FeatureGroupBaseappendFeatures in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>features - list of Feature metadata objectsFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionParseException - ParseExceptionpublic void appendFeatures(Feature features) throws FeatureStoreException, IOException, ParseException
FeatureGroupBaseappendFeatures in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>features - List of Feature metadata objectsFeatureStoreException - FeatureStoreExceptionIOException - IOExceptionParseException - ParseExceptionpublic Statistics computeStatistics() throws FeatureStoreException, IOException
FeatureGroupBasecomputeStatistics in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionpublic Statistics computeStatistics(String wallclockTime) throws FeatureStoreException, IOException, ParseException
computeStatistics in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreExceptionIOExceptionParseExceptionpublic Statistics getStatistics() throws FeatureStoreException, IOException
FeatureGroupBasegetStatistics in class FeatureGroupBase<org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>>FeatureStoreException - FeatureStoreExceptionIOException - IOExceptionCopyright © 2023. All rights reserved.