Saturday, 14 May 2022

APL Time Series Forecast using a Segmented Measure

The latest release of the Automated Predictive Library (APL) introduces the capability to build several time series models at once from a segmented measure like Sales by Store for example or Profit by Product. No need any more to define a loop in your SQL code or Python code. Just tell APL what column represents the segment in your dataset. You can also specify how many HANA tasks to run in parallel for a faster execution.

This new capability requires HANA ML 2.13 and APL 2209.

Let’s see how it works in Python and then in SQL.

From a Jupyter notebook, we first define the HANA dataframe for the input series.

from hana_ml import dataframe as hd

conn = hd.ConnectionContext(userkey='MLMDA_KEY')

series_in = conn.table('TS_PRODUCT_SALES', schema='USER_APL')

series_in.head(5).collect()

SAP HANA Exam Prep, SAP HANA Career, SAP HANA Jobs, SAP HANA Learning, SAP HANA Skills, SAP HANA Prep, SAP HANA Preparation

How many segments do we have?

series_in.distinct(cols='Product').collect()

SAP HANA Exam Prep, SAP HANA Career, SAP HANA Jobs, SAP HANA Learning, SAP HANA Skills, SAP HANA Prep, SAP HANA Preparation

We run a fit_predict by product with 4 parallel tasks…

col_segment= 'Product'
from hana_ml.algorithms.apl.time_series import AutoTimeSeries
apl_model = AutoTimeSeries(time_column_name= 'Month', target= 'Quantity', horizon=6,
                           segment_column_name= col_segment, max_tasks= 4)
series_out = apl_model.fit_predict(data = series_in)
df_out = series_out.collect()
dict = {'ACTUAL': 'Actual', 
        'PREDICTED': 'Forecast', 
        'LOWER_INT_95PCT': 'Lower Limit', 
        'UPPER_INT_95PCT': 'Upper Limit' }
df_out.rename(columns=dict, inplace=True)

and check the status of each task.

my_filter = "\"KEY\" in ('AplTaskStatus')"
df = apl_model.get_summary().filter(my_filter).collect()
df.columns = [col_segment, 'Property', 'Task Status']
df.drop('Property', axis=1, inplace=True)
df.style.hide_index()

SAP HANA Exam Prep, SAP HANA Career, SAP HANA Jobs, SAP HANA Learning, SAP HANA Skills, SAP HANA Prep, SAP HANA Preparation

What went wrong with Earrings?

my_filter = "LEVEL < 3 and OID = 'Earrings'"
df = apl_model.get_fit_operation_log().filter(my_filter).collect()
df.columns = [col_segment, 'Time', 'Level', 'Origin', 'Log Text']
df.drop('Time', axis=1, inplace=True)
df.style.hide_index()

SAP HANA Exam Prep, SAP HANA Career, SAP HANA Jobs, SAP HANA Learning, SAP HANA Skills, SAP HANA Prep, SAP HANA Preparation

What are the model components for the other three segments?

df = apl_model.get_model_components()
df.style.hide_index()

SAP HANA Exam Prep, SAP HANA Career, SAP HANA Jobs, SAP HANA Learning, SAP HANA Skills, SAP HANA Prep, SAP HANA Preparation

We look at their forecasting accuracy…

my_metric = 'RootMeanSquareError'  # Choose MAPE or SMAPE or MeanAbsoluteError or RootMeanSquareError
df = apl_model.get_horizon_wide_metric(metric_name=my_metric)
df.columns = [col_segment, my_metric]
df.style.format({my_metric:'{:,.4f}'}).hide_index()

SAP HANA Exam Prep, SAP HANA Career, SAP HANA Jobs, SAP HANA Learning, SAP HANA Skills, SAP HANA Prep, SAP HANA Preparation

and plot the predicted values for Tie.

seg_value = "Tie"
df_one_seg = df_out.query('Product == @seg_value')

import hvplot.pandas
df_one_seg.hvplot.line(
 'Month' , ['Actual','Forecast'], 
 value_label='Quantity', ylim=(0, 1000),
 title = 'Monthly Quantity Sold for ' + seg_value,
 fontsize={'title': 10, 'labels': 10},
 legend = 'bottom', height = 350, width = 900
)

SAP HANA Exam Prep, SAP HANA Career, SAP HANA Jobs, SAP HANA Learning, SAP HANA Skills, SAP HANA Prep, SAP HANA Preparation

Our readers working with the SQL interface rather than the Python API will be interested in the following sample code.

--- Input Series
drop view "TS_SORTED";
create view "TS_SORTED" as select * from APL_SAMPLES.PRODUCTS_SALES order by 1,2;

--- Output Series
drop table FORECAST_OUT;
create table FORECAST_OUT (
  "Product"  nvarchar(25),
  "Month" DAYDATE,
  "Quantity" integer,
  "kts_1" DOUBLE
);

--- Persisted Tables for Debrief
drop table "INDICATORS";
create table "INDICATORS" like "SAP_PA_APL"."sap.pa.apl.base::BASE.T.INDICATORS";

drop table "DEBRIEF_METRIC";
create table "DEBRIEF_METRIC" like "SAP_PA_APL"."sap.pa.apl.base::BASE.T.DEBRIEF_METRIC_OID";

drop table "DEBRIEF_PROPERTY";
create table "DEBRIEF_PROPERTY" like "SAP_PA_APL"."sap.pa.apl.base::BASE.T.DEBRIEF_PROPERTY_OID";

--- Procedure
DO BEGIN
    declare header "SAP_PA_APL"."sap.pa.apl.base::BASE.T.FUNCTION_HEADER";
    declare config "SAP_PA_APL"."sap.pa.apl.base::BASE.T.OPERATION_CONFIG_EXTENDED";   
    declare var_desc "SAP_PA_APL"."sap.pa.apl.base::BASE.T.VARIABLE_DESC_OID";      
    declare var_role "SAP_PA_APL"."sap.pa.apl.base::BASE.T.VARIABLE_ROLES_WITH_COMPOSITES_OID";      
    declare out_log   "SAP_PA_APL"."sap.pa.apl.base::BASE.T.OPERATION_LOG";      
    declare out_sum   "SAP_PA_APL"."sap.pa.apl.base::BASE.T.SUMMARY";      
    declare out_indic "SAP_PA_APL"."sap.pa.apl.base::BASE.T.INDICATORS";      
    declare out_debrief_metric "SAP_PA_APL"."sap.pa.apl.base::BASE.T.DEBRIEF_METRIC_OID";      
    declare out_debrief_property "SAP_PA_APL"."sap.pa.apl.base::BASE.T.DEBRIEF_PROPERTY_OID";
   
    :header.insert(('Oid', 'All Products'));
    :header.insert(('LogLevel', '2'));
    :header.insert(('MaxTasks', '4'));  --  PARALLEL TASKS

    :config.insert(('APL/SegmentColumnName', 'Product',null));   --  THE SEGMENT
    :config.insert(('APL/Horizon', '6',null));
    :config.insert(('APL/TimePointColumnName', 'Month',null));
    :config.insert(('APL/LastTrainingTimePoint', '2021-12-01 00:00:00',null));
:config.insert(('APL/DecomposeInfluencers','true',null));

    :var_role.insert(('Month', 'input',null,null,null));
    :var_role.insert(('Quantity', 'target',null,null,null));

    "SAP_PA_APL"."sap.pa.apl.base::FORECAST_AND_DEBRIEF"(
:header, :config, :var_desc, :var_role, 'USER_APL','TS_SORTED', 'USER_APL','FORECAST_OUT',out_log,out_sum,out_indic,out_debrief_metric,out_debrief_property);

select * from :out_log;
select * from :out_sum where key in ('AplTaskStatus','AplTotalElapsedTime') order by 1,2;
    insert into  "USER_APL"."INDICATORS"        select * from :out_indic;
    insert into  "USER_APL"."DEBRIEF_METRIC"    select * from :out_debrief_metric;
    insert into  "USER_APL"."DEBRIEF_PROPERTY"  select * from :out_debrief_property; 
END;

No comments:

Post a Comment