sample-ecommerce-analytics
Pipeline: sample-ecommerce-analytics
| ## Load Dataset
@dm:recall
name = "sample-ecommerce-data"
## Exclude columns that are not relevant for our analytics
--> @dm:selectcolumns
exclude = "_id|_index|_type|currency|customer_.*_name|customer_id|customer_phone|user"
## Save into temporary dataset
--> @dm:save
name="temp-sample-ecommerce-data"
## First let us find list of categories
--> *dm:filter
* get category, order_id
--> @dm:eval
category = "category.lstrip('[').rstrip(']')"
--> @dm:explode
column = "category"
--> @dm:groupby
columns ="category" & agg='count'
--> @dm:save
name = "temp-categories"
## Analytic-1: Find Top-5 Categories
--> @c:new-block
name = "Top-5 Categories"
--> @dm:recall
name = "temp-categories"
--> @dm:sort
columns = "order_id" & order = 'descending'
--> @dm:head
n=5
--> @dm:save
name="temp-top-5-categories"
## Analytic-2: Find Top-5 Products Sold based on Quantity
--> @c:new-block
name = "Top-5 Products"
--> @dm:recall
name = "temp-sample-ecommerce-data"
--> *dm:filter
* get products
--> @dm:explode-json
column = "products" & include_exploded_columns = "product_name|quantity"
--> @dm:groupby
columns = "product_name"
--> @dm:sort
columns = "quantity" & order = "descending"
--> @dm:head
n = 5
--> @dm:save
name = "temp-top-5-products"
## Analytic-3: Customer Gender Mix
--> @c:new-block
name = "Customer Gender Mix"
--> @dm:recall
name="temp-sample-ecommerce-data"
--> *dm:filter
* get order_id, customer_gender
--> @dm:groupby
columns = 'customer_gender' & agg = 'count'
--> @dm:save
name = "ecommerce_customer_gender_spread"
|
Artifacts used in this Pipeline
Artifact Type |
Artifact Name |
Access |
dataset |
sample-ecommerce-data |
read |
dataset |
ecommerce_customer_gender_spread |
write |
Bots used in this Pipeline
@dm:recall @dm:selectcolumns @dm:save *dm:filter @dm:eval @dm:explode @dm:groupby @c:new-block @dm:sort @dm:head @dm:explode-json