R: ts function gotchas...
> # It appears R is
replicating data when creating a time series where
> # requested date range
exceeds available data.
>
> # The moral of the story is
make sure there are no gaps in data
> # prior to creating the
time series!!!
>
> df = data.frame()
>
> x = 10; yr = 2016; mo = 01;
df = rbind(df, data.frame(x, yr, mo))
> x = 12; yr = 2016; mo = 02;
df = rbind(df, data.frame(x, yr, mo))
> x = 15; yr = 2016; mo = 03;
df = rbind(df, data.frame(x, yr, mo))
> x = 19; yr = 2016; mo = 04;
df = rbind(df, data.frame(x, yr, mo))
>
> x = 11; yr = 2016; mo = 05;
df = rbind(df, data.frame(x, yr, mo))
> x = 13; yr = 2016; mo = 06;
df = rbind(df, data.frame(x, yr, mo))
> x = 16; yr = 2016; mo = 07;
df = rbind(df, data.frame(x, yr, mo))
> x = 20; yr = 2016; mo = 08;
df = rbind(df, data.frame(x, yr, mo))
>
> x = 12; yr = 2016; mo = 09;
df = rbind(df, data.frame(x, yr, mo))
> x = 14; yr = 2016; mo = 10;
df = rbind(df, data.frame(x, yr, mo))
> x = 17; yr = 2016; mo = 11;
df = rbind(df, data.frame(x, yr, mo))
> x = 21; yr = 2016; mo = 12;
df = rbind(df, data.frame(x, yr, mo))
>
> x = 13; yr = 2017; mo = 01;
df = rbind(df, data.frame(x, yr, mo))
> x = 15; yr = 2017; mo = 02;
df = rbind(df, data.frame(x, yr, mo))
> x = 18; yr = 2017; mo = 03;
df = rbind(df, data.frame(x, yr, mo))
> x = 22; yr = 2017; mo = 04;
df = rbind(df, data.frame(x, yr, mo))
>
> print(df)
x
yr mo
1 10 2016
1
2 12 2016
2
3 15 2016
3
4 19 2016
4
5 11 2016
5
6 13 2016
6
7 16 2016
7
8 20 2016
8
9 12 2016
9
10 14 2016 10
11 17 2016 11
12 21 2016 12
13 13 2017 1
14 15 2017 2
15 18 2017 3
16 22 2017 4
>
> # get first and last month
and year
> min_yr = min(df$yr)
> min_mo= min(subset(df,
yr==min_yr)$mo)
> max_yr = max(df$yr)
> max_mo= max(subset(df,
yr==max_yr)$mo)
>
> print(paste0("Have
values from ", min_mo, "/", min_yr, " thru ", max_mo,
"/", max_yr))
[1] "Have values from
1/2016 thru 4/2017"
>
> ts = ts(df$x,
start=c(min_yr, min_mo), end=c(max_yr, max_mo), frequency=12)
> print(ts)
Jan Feb Mar Apr May Jun Jul Aug Sep Oct
Nov Dec
2016 10
12 15 19
11 13 16
20 12 14
17 21
2017 13
15 18 22
>
> max_mo = 8 # data does not really go out that far
> ts2 = ts(df$x,
start=c(min_yr, min_mo), end=c(max_yr, max_mo), frequency=12)
> print(ts2)
Jan Feb Mar Apr May Jun Jul Aug Sep Oct
Nov Dec
2016 10
12 15 19
11 13 16
20 12 14
17 21
2017 13
15 18 22
10 12 15
19
> # N O T E: I do not have data for May-Aug 2017 so it uses Jan-Apr 2015 ! ! !
>
> # drop one row, Oct 2017
> df = subset(df, !(yr ==
2016 & mo == 10))
> print(df)
x
yr mo
1 10 2016
1
2 12 2016
2
3 15 2016
3
4 19 2016
4
5 11 2016
5
6 13 2016
6
7 16 2016
7
8 20 2016
8
9 12 2016
9
11 17 2016 11
12 21 2016 12
13 13 2017 1
14 15 2017 2
15 18 2017 3
16 22 2017 4
> max_mo = 4 # undo prior test
> ts3 = ts(df$x,
start=c(min_yr, min_mo), end=c(max_yr, max_mo), frequency=12)
> print(ts3)
Jan Feb Mar Apr May Jun Jul Aug Sep Oct
Nov Dec
2016 10
12 15 19
11 13 16
20 12 17
21 13
2017 15
18 22 10
> # N O T E: This shows df dates are ignored when creating time series ! ! !
No comments:
Post a Comment