Thursday, 17 December 2015

Time Series Interactive Plotswith R

Time Series Interactive Plots with R dygraphs
R is a leading statistical analysis software which comes with many new packages. I recently got to know about “dygraphs” which create beautiful time series visualizations.
Plots can be zoomed, highlighted, and many more can be done.
These interactive plots provide users to play with plots and understand in a better way.
I see this plots are equally dazzling as Tableau and Qlikview does.
I choose walmart sales data which you can download from:

Data Processing

#### load the needed libraries####
library("plyr")
library("reshape")
## 
## Attaching package: 'reshape'
## 
## The following objects are masked from 'package:plyr':
## 
##     rename, round_any
library("dygraphs")
library("xts")
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
setwd("D:\\Personal\\Blogs\\Jan_05_2015")

#### load the datasets####
walmartSales <- read.csv("WalmartSales.csv")
features <- read.csv("features.csv")


#### look of the data#####
head(walmartSales)
##   Store Dept       Date Weekly_Sales IsHoliday
## 1     1    1 2010-02-05     24924.50     FALSE
## 2     1    1 2010-02-12     46039.49      TRUE
## 3     1    1 2010-02-19     41595.55     FALSE
## 4     1    1 2010-02-26     19403.54     FALSE
## 5     1    1 2010-03-05     21827.90     FALSE
## 6     1    1 2010-03-12     21043.39     FALSE
#### number of stores and departments#####
unique(walmartSales$Store)
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [24] 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
unique(walmartSales$Dept)
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 16 17 18 19 20 21 22 23 24
## [24] 25 26 27 28 29 30 31 32 33 34 35 36 37 38 40 41 42 44 45 46 47 48 49
## [47] 51 52 54 55 56 58 59 60 67 71 72 74 77 78 79 80 81 82 83 85 87 90 91
## [70] 92 93 94 95 96 97 98 99 39 50 43 65
#### Average sales in each store across all departments ###

AvgSalesStore<-ddply(walmartSales,.(Store,Date),summarise,sum(Weekly_Sales))

##Change the colname
names(AvgSalesStore)[3]<-"sales"

## Merge the data sets

mergedSales <- merge(AvgSalesStore,features,by = c("Store","Date"))

## glance of AvgSalesStore
head(mergedSales)
##   Store       Date   sales Temperature Fuel_Price MarkDown1 MarkDown2
## 1     1 2010-02-05 1643691       42.31      2.572        NA        NA
## 2     1 2010-02-12 1641957       38.51      2.548        NA        NA
## 3     1 2010-02-19 1611968       39.93      2.514        NA        NA
## 4     1 2010-02-26 1409728       46.63      2.561        NA        NA
## 5     1 2010-03-05 1554807       46.50      2.625        NA        NA
## 6     1 2010-03-12 1439542       57.79      2.667        NA        NA
##   MarkDown3 MarkDown4 MarkDown5      CPI Unemployment IsHoliday
## 1        NA        NA        NA 211.0964        8.106     FALSE
## 2        NA        NA        NA 211.2422        8.106      TRUE
## 3        NA        NA        NA 211.2891        8.106     FALSE
## 4        NA        NA        NA 211.3196        8.106     FALSE
## 5        NA        NA        NA 211.3501        8.106     FALSE
## 6        NA        NA        NA 211.3806        8.106     FALSE

Store 3 time series plot

## Select store 3
    store3 <- mergedSales[mergedSales$Store==3,]
    names(store3)[3] <-"store3"
    
    head(store3[100:106,])
##      Store       Date   store3 Temperature Fuel_Price MarkDown1 MarkDown2
## 3246     3 2011-12-30 410553.9       48.29      3.129   1449.16  14356.07
## 3247     3 2012-01-06 398178.2       52.42      3.157   3824.22   9389.98
## 3248     3 2012-01-13 367438.6       51.86      3.261   3927.76   3323.64
## 3249     3 2012-01-20 365818.6       56.20      3.268    720.82   1629.26
## 3250     3 2012-01-27 349518.1       58.06      3.290    774.55    349.99
## 3251     3 2012-02-03 424960.7       59.33      3.360   9667.50    268.29
##      MarkDown3 MarkDown4 MarkDown5      CPI Unemployment IsHoliday
## 3246     66.66     81.43    777.35 223.0091        7.197      TRUE
## 3247     15.00    415.36   1476.85 223.1923        6.833     FALSE
## 3248        NA    226.97   1725.19 223.3755        6.833     FALSE
## 3249      1.46     16.05    799.50 223.4701        6.833     FALSE
## 3250        NA      4.00    328.72 223.5646        6.833     FALSE
## 3251      0.60   8368.15    804.24 223.6591        6.833     FALSE
    ## Create xts object
    store3_xts<-xts(store3[,-2],order.by=as.POSIXct(store3$Date))
    
    class(store3_xts)
## [1] "xts" "zoo"
## Time Series plot
    dygraph(store3_xts[,2],col)
## Warning in FUN(X[[4L]], ...): converting an R function to JSON as null. To
## change this, define a method for toJSON() for a 'function' object.

Store 16 time series plot

## Select store 16
    store16 <- mergedSales[mergedSales$Store==16,]
    names(store16)[3] <-"store16"

    head(store16[100:106,])
##      Store       Date  store16 Temperature Fuel_Price MarkDown1 MarkDown2
## 1101    16 2011-12-30 665861.1       23.91      3.119    921.67  11778.68
## 1102    16 2012-01-06 564538.1       26.49      3.095    443.59   5524.49
## 1103    16 2012-01-13 508520.1       19.55      3.077   1353.58   2551.73
## 1104    16 2012-01-20 474389.8       29.30      3.055   2087.19   1399.47
## 1105    16 2012-01-27 453979.2       28.17      3.038    612.01   1011.32
## 1106    16 2012-02-03 475905.1       25.53      3.031  32702.46    747.82
##      MarkDown3 MarkDown4 MarkDown5      CPI Unemployment IsHoliday
## 1101    101.87    272.51   1673.25 196.3586        6.232      TRUE
## 1102     15.70     35.25   3296.96 196.5458        6.162     FALSE
## 1103      0.24    289.67   1899.49 196.7331        6.162     FALSE
## 1104      5.58     75.83   4742.74 196.7797        6.162     FALSE
## 1105        NA      8.41   2482.91 196.8263        6.162     FALSE
## 1106      2.39   8063.07   2940.68 196.8729        6.162     FALSE
    ## Create xts object
    store16_xts<-xts(store16[,-2],order.by=as.POSIXct(store16$Date))
    
    class(store16_xts)
## [1] "xts" "zoo"
## Time Series plot
    dygraph(store16_xts[,2])

Store 20 time series plot

    ## Select store 20
    store20 <- mergedSales[mergedSales$Store==20,]
    names(store20)[3] <-"store20"

    head(store20[100:106,])
##      Store       Date store20 Temperature Fuel_Price MarkDown1 MarkDown2
## 1816    20 2011-12-30 2043245       36.35      3.389   6918.65  97740.99
## 1817    20 2012-01-06 1964702       33.42      3.422   9273.46  45971.43
## 1818    20 2012-01-13 1911511       37.79      3.513   3627.81  18632.14
## 1819    20 2012-01-20 1892776       27.65      3.533   2465.98   5030.40
## 1820    20 2012-01-27 1761017       37.19      3.567   1023.60   3044.15
## 1821    20 2012-02-03 2203523       39.93      3.617  58928.52   8766.77
##      MarkDown3 MarkDown4 MarkDown5      CPI Unemployment IsHoliday
## 1816    270.38   1648.03   2852.19 212.4036        7.082      TRUE
## 1817    207.60   1365.38   5460.86 212.5711        6.961     FALSE
## 1818    108.48    847.70   3614.04 212.7386        6.961     FALSE
## 1819    106.54    999.54   3380.92 212.8336        6.961     FALSE
## 1820    106.08     52.87   2447.73 212.9286        6.961     FALSE
## 1821    226.27  53603.99   8301.25 213.0236        6.961     FALSE
    ## Create xts object
    store20_xts<-xts(store20[,-2],order.by=as.POSIXct(store20$Date))
    
    class(store20_xts)
## [1] "xts" "zoo"
## Time Series plot
    dygraph(store20_xts[,2])

Store 8 time series plot

## Select store 8
    store8 <- mergedSales[mergedSales$Store==8,]
    names(store8)[3] <-"store8"
    
    head(store8[100:106,])
##      Store       Date   store8 Temperature Fuel_Price MarkDown1 MarkDown2
## 6249     8 2011-12-30 858572.2       36.33      3.129   3701.08  36381.83
## 6250     8 2012-01-06 872113.2       43.47      3.157   3093.39  26001.46
## 6251     8 2012-01-13 817661.8       36.46      3.261   6725.29  12764.99
## 6252     8 2012-01-20 813954.8       46.81      3.268   1643.08   1055.47
## 6253     8 2012-01-27 778178.5       45.52      3.290   6065.01    730.16
## 6254     8 2012-02-03 927610.7       45.56      3.360  41524.03   1412.41
##      MarkDown3 MarkDown4 MarkDown5      CPI Unemployment IsHoliday
## 6249    164.34    178.40   1508.56 223.0577        6.123      TRUE
## 6250     96.35    500.91   4468.63 223.2410        5.825     FALSE
## 6251     15.98    299.73   3851.69 223.4243        5.825     FALSE
## 6252      9.02    411.88   3161.92 223.5188        5.825     FALSE
## 6253     15.18     30.28  24241.12 223.6134        5.825     FALSE
## 6254     83.15  18723.56   3886.31 223.7079        5.825     FALSE
    ## Create xts object
    store8_xts<-xts(store8[,-2],order.by=as.POSIXct(store8$Date))
    
    class(store8_xts)
## [1] "xts" "zoo"
## Time Series plot
    dygraph(store8_xts[,2])

Store 6 time series plot

## Select store 6
    store6 <- mergedSales[mergedSales$Store==6,]
    names(store6)[3] <-"store6"

    head(store6[100:106,])
##      Store       Date  store6 Temperature Fuel_Price MarkDown1 MarkDown2
## 5963     6 2011-12-30 1598081       46.80      3.129   6098.54  82881.16
## 5964     6 2012-01-06 1395340       50.82      3.157   3220.17  32056.43
## 5965     6 2012-01-13 1344243       48.33      3.261   4779.48  11967.81
## 5966     6 2012-01-20 1326256       55.37      3.268   3413.91   1540.99
## 5967     6 2012-01-27 1315611       53.95      3.290   1510.59    630.92
## 5968     6 2012-02-03 1496306       57.45      3.360  36848.38   1772.68
##      MarkDown3 MarkDown4 MarkDown5      CPI Unemployment IsHoliday
## 5963    326.68    814.58   2728.99 221.1283        6.551      TRUE
## 5964    108.24    985.76   6650.63 221.3088        6.132     FALSE
## 5965     65.72    575.13   4512.18 221.4893        6.132     FALSE
## 5966     33.02    449.06   6587.63 221.5831        6.132     FALSE
## 5967     37.10     17.00   2641.90 221.6769        6.132     FALSE
## 5968    161.55  28199.34   7151.33 221.7707        6.132     FALSE
    ## Create xts object
    store6_xts<-xts(store6[,-2],order.by=as.POSIXct(store6$Date))
    
    class(store6_xts)
## [1] "xts" "zoo"
## Time Series plot
    dygraph(store6_xts[,2])

Comparision of 5 stores sales

## let us compare sales of 5 walmart stores


AllStores <- cbind(store3_xts[,2],store16_xts[,2],store20_xts[,2],store8_xts[,2],store6_xts[,2])

dygraph(AllStores)  %>%
  dyLegend(width = 200)%>%
  dyHighlight(highlightCircleSize = 3, 
             highlightSeriesBackgroundAlpha = 0.2,
             hideOnMouseOut = FALSE)

Data set contains many stores, and many departments. You can explore more with the basic code and data provided.

Author: Saikumar Allaka

No comments:

Post a Comment