The latest release of the Automated Predictive Library (APL) introduces the capability to build several time series models at once from a segmented measure like Sales by Store for example or Profit by Product. No need any more to define a loop in your SQL code or Python code. Just tell APL what column represents the segment in your dataset. You can also specify how many HANA tasks to run in parallel for a faster execution.
This new capability requires HANA ML 2.13 and APL 2209.
Let’s see how it works in Python and then in SQL.
From a Jupyter notebook, we first define the HANA dataframe for the input series.
from hana_ml import dataframe as hd
conn = hd.ConnectionContext(userkey='MLMDA_KEY')
series_in = conn.table('TS_PRODUCT_SALES', schema='USER_APL')
series_in.head(5).collect()
How many segments do we have?
series_in.distinct(cols='Product').collect()
We run a fit_predict by product with 4 parallel tasks…
col_segment= 'Product'
from hana_ml.algorithms.apl.time_series import AutoTimeSeries
apl_model = AutoTimeSeries(time_column_name= 'Month', target= 'Quantity', horizon=6,
segment_column_name= col_segment, max_tasks= 4)
series_out = apl_model.fit_predict(data = series_in)
df_out = series_out.collect()
dict = {'ACTUAL': 'Actual',
'PREDICTED': 'Forecast',
'LOWER_INT_95PCT': 'Lower Limit',
'UPPER_INT_95PCT': 'Upper Limit' }
df_out.rename(columns=dict, inplace=True)
and check the status of each task.
my_filter = "\"KEY\" in ('AplTaskStatus')"
df = apl_model.get_summary().filter(my_filter).collect()
df.columns = [col_segment, 'Property', 'Task Status']
df.drop('Property', axis=1, inplace=True)
df.style.hide_index()
What went wrong with Earrings?
my_filter = "LEVEL < 3 and OID = 'Earrings'"
df = apl_model.get_fit_operation_log().filter(my_filter).collect()
df.columns = [col_segment, 'Time', 'Level', 'Origin', 'Log Text']
df.drop('Time', axis=1, inplace=True)
df.style.hide_index()
What are the model components for the other three segments?
df = apl_model.get_model_components()
df.style.hide_index()
We look at their forecasting accuracy…
my_metric = 'RootMeanSquareError' # Choose MAPE or SMAPE or MeanAbsoluteError or RootMeanSquareError
df = apl_model.get_horizon_wide_metric(metric_name=my_metric)
df.columns = [col_segment, my_metric]
df.style.format({my_metric:'{:,.4f}'}).hide_index()
and plot the predicted values for Tie.
seg_value = "Tie"
df_one_seg = df_out.query('Product == @seg_value')
import hvplot.pandas
df_one_seg.hvplot.line(
'Month' , ['Actual','Forecast'],
value_label='Quantity', ylim=(0, 1000),
title = 'Monthly Quantity Sold for ' + seg_value,
fontsize={'title': 10, 'labels': 10},
legend = 'bottom', height = 350, width = 900
)
Our readers working with the SQL interface rather than the Python API will be interested in the following sample code.
--- Input Series
drop view "TS_SORTED";
create view "TS_SORTED" as select * from APL_SAMPLES.PRODUCTS_SALES order by 1,2;
--- Output Series
drop table FORECAST_OUT;
create table FORECAST_OUT (
"Product" nvarchar(25),
"Month" DAYDATE,
"Quantity" integer,
"kts_1" DOUBLE
);
--- Persisted Tables for Debrief
drop table "INDICATORS";
create table "INDICATORS" like "SAP_PA_APL"."sap.pa.apl.base::BASE.T.INDICATORS";
drop table "DEBRIEF_METRIC";
create table "DEBRIEF_METRIC" like "SAP_PA_APL"."sap.pa.apl.base::BASE.T.DEBRIEF_METRIC_OID";
drop table "DEBRIEF_PROPERTY";
create table "DEBRIEF_PROPERTY" like "SAP_PA_APL"."sap.pa.apl.base::BASE.T.DEBRIEF_PROPERTY_OID";
--- Procedure
DO BEGIN
declare header "SAP_PA_APL"."sap.pa.apl.base::BASE.T.FUNCTION_HEADER";
declare config "SAP_PA_APL"."sap.pa.apl.base::BASE.T.OPERATION_CONFIG_EXTENDED";
declare var_desc "SAP_PA_APL"."sap.pa.apl.base::BASE.T.VARIABLE_DESC_OID";
declare var_role "SAP_PA_APL"."sap.pa.apl.base::BASE.T.VARIABLE_ROLES_WITH_COMPOSITES_OID";
declare out_log "SAP_PA_APL"."sap.pa.apl.base::BASE.T.OPERATION_LOG";
declare out_sum "SAP_PA_APL"."sap.pa.apl.base::BASE.T.SUMMARY";
declare out_indic "SAP_PA_APL"."sap.pa.apl.base::BASE.T.INDICATORS";
declare out_debrief_metric "SAP_PA_APL"."sap.pa.apl.base::BASE.T.DEBRIEF_METRIC_OID";
declare out_debrief_property "SAP_PA_APL"."sap.pa.apl.base::BASE.T.DEBRIEF_PROPERTY_OID";
:header.insert(('Oid', 'All Products'));
:header.insert(('LogLevel', '2'));
:header.insert(('MaxTasks', '4')); -- PARALLEL TASKS
:config.insert(('APL/SegmentColumnName', 'Product',null)); -- THE SEGMENT
:config.insert(('APL/Horizon', '6',null));
:config.insert(('APL/TimePointColumnName', 'Month',null));
:config.insert(('APL/LastTrainingTimePoint', '2021-12-01 00:00:00',null));
:config.insert(('APL/DecomposeInfluencers','true',null));
:var_role.insert(('Month', 'input',null,null,null));
:var_role.insert(('Quantity', 'target',null,null,null));
"SAP_PA_APL"."sap.pa.apl.base::FORECAST_AND_DEBRIEF"(
:header, :config, :var_desc, :var_role, 'USER_APL','TS_SORTED', 'USER_APL','FORECAST_OUT',out_log,out_sum,out_indic,out_debrief_metric,out_debrief_property);
select * from :out_log;
select * from :out_sum where key in ('AplTaskStatus','AplTotalElapsedTime') order by 1,2;
insert into "USER_APL"."INDICATORS" select * from :out_indic;
insert into "USER_APL"."DEBRIEF_METRIC" select * from :out_debrief_metric;
insert into "USER_APL"."DEBRIEF_PROPERTY" select * from :out_debrief_property;
END;
No comments:
Post a Comment