Data Preparation
Preparing your experimental data for NannyML
Campaign Name
Date
Spend [USD]
# of Impressions
Reach
# of Website Clicks
# of Searches
# of View Content
# of Add to Cart
# of Purchase
import pandas as pd
control = pd.read_csv("control_group.csv", sep = ";")
test = pd.read_csv("test_group.csv", sep = ";")
# let's measure the probability of buying after our content has been viewed
selected_cols = ['Campaign Name', '# of View Content', '# of Purchase']
experiment = pd.concat([control[selected_cols], test[selected_cols]], ignore_index=True)
# let's remove missing values.
experiment = experiment.loc[experiment[selected_cols[-1]].notna()]
# Preprocess to make data comply with NannyML requirements
experiment['variable'] = 'Purchases from Views'
experiment = experiment.rename(columns={"# of Purchase": "success_count",})
experiment['fail_count'] = experiment['# of View Content'] - experiment['success_count']
experiment = experiment.drop('# of View Content', axis=1)
# Campaign name values must be control and treatment
experiment = experiment.replace({
'Control Campaign': 'control',
'Test Campaign': 'treatment'
})
# shufling and splitting for demonstration purposes only, final results are the same
experiment = experiment.sample(frac=1, random_state=13).reset_index(drop=True)
experiment[:10].to_parquet('ab_test1.pq', index=False)
experiment[10:].to_parquet('ab_test2.pq', index=False)
experiment.head(3)Campaign Name
variable
success_count
fail_count