SAP HANA Tutorial, Material and Certification Guide

Saturday, 14 May 2022

APL Time Series Forecast using a Segmented Measure

The latest release of the Automated Predictive Library (APL) introduces the capability to build several time series models at once from a segmented measure like Sales by Store for example or Profit by Product. No need any more to define a loop in your SQL code or Python code. Just tell APL what column represents the segment in your dataset. You can also specify how many HANA tasks to run in parallel for a faster execution.

This new capability requires HANA ML 2.13 and APL 2209.

Let’s see how it works in Python and then in SQL.

From a Jupyter notebook, we first define the HANA dataframe for the input series.

from hana_ml import dataframe as hd

conn = hd.ConnectionContext(userkey='MLMDA_KEY')

series_in = conn.table('TS_PRODUCT_SALES', schema='USER_APL')

series_in.head(5).collect()

SAP HANA Exam Prep, SAP HANA Career, SAP HANA Jobs, SAP HANA Learning, SAP HANA Skills, SAP HANA Prep, SAP HANA Preparation

How many segments do we have?

series_in.distinct(cols='Product').collect()

We run a fit_predict by product with 4 parallel tasks…

col_segment= 'Product'

from hana_ml.algorithms.apl.time_series import AutoTimeSeries

apl_model = AutoTimeSeries(time_column_name= 'Month', target= 'Quantity', horizon=6,

segment_column_name= col_segment, max_tasks= 4)

series_out = apl_model.fit_predict(data = series_in)

df_out = series_out.collect()

dict = {'ACTUAL': 'Actual',

'PREDICTED': 'Forecast',

'LOWER_INT_95PCT': 'Lower Limit',

'UPPER_INT_95PCT': 'Upper Limit' }

df_out.rename(columns=dict, inplace=True)

and check the status of each task.

my_filter = "\"KEY\" in ('AplTaskStatus')"

df = apl_model.get_summary().filter(my_filter).collect()

df.columns = [col_segment, 'Property', 'Task Status']

df.drop('Property', axis=1, inplace=True)

df.style.hide_index()

What went wrong with Earrings?

my_filter = "LEVEL < 3 and OID = 'Earrings'"

df = apl_model.get_fit_operation_log().filter(my_filter).collect()

df.columns = [col_segment, 'Time', 'Level', 'Origin', 'Log Text']

df.drop('Time', axis=1, inplace=True)

df.style.hide_index()

What are the model components for the other three segments?

df = apl_model.get_model_components()

df.style.hide_index()

We look at their forecasting accuracy…

my_metric = 'RootMeanSquareError' # Choose MAPE or SMAPE or MeanAbsoluteError or RootMeanSquareError

df = apl_model.get_horizon_wide_metric(metric_name=my_metric)

df.columns = [col_segment, my_metric]

df.style.format({my_metric:'{:,.4f}'}).hide_index()

and plot the predicted values for Tie.

seg_value = "Tie"

df_one_seg = df_out.query('Product == @seg_value')

import hvplot.pandas

df_one_seg.hvplot.line(

'Month' , ['Actual','Forecast'],

value_label='Quantity', ylim=(0, 1000),

title = 'Monthly Quantity Sold for ' + seg_value,

fontsize={'title': 10, 'labels': 10},

legend = 'bottom', height = 350, width = 900

)

Our readers working with the SQL interface rather than the Python API will be interested in the following sample code.

--- Input Series

drop view "TS_SORTED";

create view "TS_SORTED" as select * from APL_SAMPLES.PRODUCTS_SALES order by 1,2;

--- Output Series

drop table FORECAST_OUT;

create table FORECAST_OUT (

"Product" nvarchar(25),

"Month" DAYDATE,

"Quantity" integer,

"kts_1" DOUBLE

);

--- Persisted Tables for Debrief

drop table "INDICATORS";

create table "INDICATORS" like "SAP_PA_APL"."sap.pa.apl.base::BASE.T.INDICATORS";

drop table "DEBRIEF_METRIC";

create table "DEBRIEF_METRIC" like "SAP_PA_APL"."sap.pa.apl.base::BASE.T.DEBRIEF_METRIC_OID";

drop table "DEBRIEF_PROPERTY";

create table "DEBRIEF_PROPERTY" like "SAP_PA_APL"."sap.pa.apl.base::BASE.T.DEBRIEF_PROPERTY_OID";

--- Procedure

DO BEGIN

declare header "SAP_PA_APL"."sap.pa.apl.base::BASE.T.FUNCTION_HEADER";

declare config "SAP_PA_APL"."sap.pa.apl.base::BASE.T.OPERATION_CONFIG_EXTENDED";

declare var_desc "SAP_PA_APL"."sap.pa.apl.base::BASE.T.VARIABLE_DESC_OID";

declare var_role "SAP_PA_APL"."sap.pa.apl.base::BASE.T.VARIABLE_ROLES_WITH_COMPOSITES_OID";

declare out_log "SAP_PA_APL"."sap.pa.apl.base::BASE.T.OPERATION_LOG";

declare out_sum "SAP_PA_APL"."sap.pa.apl.base::BASE.T.SUMMARY";

declare out_indic "SAP_PA_APL"."sap.pa.apl.base::BASE.T.INDICATORS";

declare out_debrief_metric "SAP_PA_APL"."sap.pa.apl.base::BASE.T.DEBRIEF_METRIC_OID";

declare out_debrief_property "SAP_PA_APL"."sap.pa.apl.base::BASE.T.DEBRIEF_PROPERTY_OID";

:header.insert(('Oid', 'All Products'));

:header.insert(('LogLevel', '2'));

:header.insert(('MaxTasks', '4')); -- PARALLEL TASKS

:config.insert(('APL/SegmentColumnName', 'Product',null)); -- THE SEGMENT

:config.insert(('APL/Horizon', '6',null));

:config.insert(('APL/TimePointColumnName', 'Month',null));

:config.insert(('APL/LastTrainingTimePoint', '2021-12-01 00:00:00',null));

:config.insert(('APL/DecomposeInfluencers','true',null));

:var_role.insert(('Month', 'input',null,null,null));

:var_role.insert(('Quantity', 'target',null,null,null));

"SAP_PA_APL"."sap.pa.apl.base::FORECAST_AND_DEBRIEF"(

:header, :config, :var_desc, :var_role, 'USER_APL','TS_SORTED', 'USER_APL','FORECAST_OUT',out_log,out_sum,out_indic,out_debrief_metric,out_debrief_property);

select * from :out_log;

select * from :out_sum where key in ('AplTaskStatus','AplTotalElapsedTime') order by 1,2;

insert into "USER_APL"."INDICATORS" select * from :out_indic;

insert into "USER_APL"."DEBRIEF_METRIC" select * from :out_debrief_metric;

insert into "USER_APL"."DEBRIEF_PROPERTY" select * from :out_debrief_property;

END;

SAP HANA Central

Pages

Saturday, 14 May 2022

APL Time Series Forecast using a Segmented Measure

No comments:

Post a Comment