Time Series in R: Quick Reference
Jun 26, 2018
Rodrigo Mariscal
6 minute read

Read and Define the Time Series (TS) Objects


Read from a Saved File

These functions reads a file without a TS structure and then defines the TS object.

# Read from a raw csv file
raw.data <- read.table("./data/data.us.csv", sep = ",", header = T)

# When you have the dates in the original csv file
xts.data <- xts(raw.data, order.by = as.Date(raw.data$date, "%m/%d/%Y"))

# When you don't have the dates in the original csv file but know the starting date
date = seq(as.Date("1960/3/1"), by = "3 month", length.out = nrow(raw.data))
xts.data <- xts(raw.data[,-1], order.by = date, frequency = 3)

This function reads and declares the TS structure from the begining.

# Note that this is a TS with a zoo structure
ts.data <- read.zoo("./data/data.us.csv", index.column = 1, sep = ",", header = T, format = "%m/%d/%Y")
# Or...
ts.data <- ts(raw.data[,2:4], frequency = 4, start = c(1960,1))

# One can convert the TS-zoo into a xts...
xts.data <- as.xts(ts.data)

Read from Online Sources

There are two main ways to get data into R: get the data into Excel or a csv or download for an online source. There are built-in package to get the data directly for the web in a predefined format. The table below shows the most popular sources and packages that one can use.

Sources R-Package Web Pages
Yahoo, FRED, Google, Onda quantmod Link
International Monetary Fund (IMF)1 IMFData or imfr Link
World Bank’s WDI WDI Link
OECD2 rsdmx Link
Penn World Tables pwt Link
International Labor Organization (ILO) rsdmx Link

One can use the getSymbols function with a previous search in the web pages and download directly into R.

getSymbols("GDPC1", src = "FRED")
getSymbols("PCEPILFE", src = "FRED")
getSymbols("FEDFUNDS", src = "FRED")

names(GDPC1) <- "US Real GDP"
names(PCEPILFE) <- "Core PCE"
names(FEDFUNDS) <- "FED Rate"

Subset and Extract

# Federal funds rate, montly data from January 1980 to March
FEDFUNDS["1980-01-01/1980-03-01"]
##            FED Rate
## 1980-01-01    13.82
## 1980-02-01    14.13
## 1980-03-01    17.19
# Real GDP, quarterly data, for in 2006
GDPC1["2006"]
##            US Real GDP
## 2006-01-01    15267.03
## 2006-04-01    15302.70
## 2006-07-01    15326.37
## 2006-10-01    15456.93
# End of period inflation rate from 2000 to 2005
PCEPILFE[format(index(PCEPILFE["2000/2005"]), "%m") %in% "12"]
##            Core PCE
## 1959-12-01   17.069
## 1960-12-01   17.295
## 1961-12-01   17.499
## 1962-12-01   17.718
## 1963-12-01   18.014
## 1964-12-01   18.244

Identify ’NA’s, Fill and Splice

# Set missings into the series...
gdp.miss <- GDPC1["2000/2002"]
gdp.miss["2001"] <- NA
# Identify the NAs
gdp.miss[is.na(gdp.miss)]

# Show numbers without NAs
na.omit(gdp.miss)
# Fill missing with the last observarion or with the first non-missing
# observation
cbind(gdp.miss, na.locf(gdp.miss), na.locf(gdp.miss, fromLast = T))
##            US.Real.GDP US.Real.GDP.1 US.Real.GDP.2
## 2000-01-01    12924.18      12924.18      12924.18
## 2000-04-01    13160.84      13160.84      13160.84
## 2000-07-01    13178.42      13178.42      13178.42
## 2000-10-01    13260.51      13260.51      13260.51
## 2001-01-01          NA      13260.51      13397.00
## 2001-04-01          NA      13260.51      13397.00
## 2001-07-01          NA      13260.51      13397.00
## 2001-10-01          NA      13260.51      13397.00
## 2002-01-01    13397.00      13397.00      13397.00
## 2002-04-01    13478.15      13478.15      13478.15
## 2002-07-01    13538.07      13538.07      13538.07
## 2002-10-01    13559.03      13559.03      13559.03
# Fill missing values with linear interpolation and bubic spline
cbind(gdp.miss, na.approx(gdp.miss), na.spline(gdp.miss, method = "fmm"))
##            US.Real.GDP US.Real.GDP.1 US.Real.GDP.2
## 2000-01-01    12924.18      12924.18      12924.18
## 2000-04-01    13160.84      13160.84      13160.84
## 2000-07-01    13178.42      13178.42      13178.42
## 2000-10-01    13260.51      13260.51      13260.51
## 2001-01-01          NA      13287.98      13320.38
## 2001-04-01          NA      13314.87      13338.72
## 2001-07-01          NA      13342.05      13341.20
## 2001-10-01          NA      13369.52      13352.20
## 2002-01-01    13397.00      13397.00      13397.00
## 2002-04-01    13478.15      13478.15      13478.15
## 2002-07-01    13538.07      13538.07      13538.07
## 2002-10-01    13559.03      13559.03      13559.03


Transformations, Combine and Change Frequency


Basic Function Transformations
Transformation Command
Logarithm log(y)
Lag: \(L^{n} y_{t} = y_{t-1}\) lag(y,n)
Difference: \(\Delta y_{t} = y_{t} - y_{t-1}\) diff(y)
Moving average: \(\bar{y}^{n}_{t} = \frac{1}{n} \sum^{n-1}_{i=0} y_{t-i}\) rollapply(y, n, FUN = mean)
Cumulative sum: \(y^{s}_{t} = \sum^{t}_{i=0} y_{i}\) cumsum(y)
# Transformations
xts.gdp$lgdp <- log(xts.gdp$gdp)
xts.gdp$lgdp_1 <- lag(xts.gdp$lgdp, 1)
xts.gdp$dlgdp <- diff(xts.gdp$lgdp)
xts.gdp$mov.avg5_lgdp <- rollapply(xts.gdp$lgdp, 5, FUN = mean)
xts.gdp$cu.sum_lgdp <- cumsum(xts.gdp$lgdp)

Period aggregation
# Get a date index on a lower frequency
periodicity(xts.gdp)
## Quarterly periodicity from 1947-03-01 to 2018-09-01
years <- endpoints(xts.gdp, on = "years")

# Aggregate to first/end of period
xts.gdp.a.firs <- period.apply(xts.gdp, INDEX = years, FUN = first)
xts.gdp.a.last <- period.apply(xts.gdp, INDEX = years, FUN = last)
# Aggregate to average of period
xts.gdp.a.mean <- period.apply(xts.gdp, INDEX = years, FUN = mean)
# Aggregate to sum of period
xts.gdp.a.sum <- period.apply(xts.gdp, INDEX = years, FUN = sum)
# Aggregate to min/max of period
xts.gdp.a.min <- period.apply(xts.gdp, INDEX = years, FUN = min)
xts.gdp.a.max <- period.apply(xts.gdp, INDEX = years, FUN = max)
# Putting all together...
cbind(xts.gdp["2000/2001"], xts.gdp.a.firs["2000/2001"], xts.gdp.a.last["2000/2001"], 
    xts.gdp.a.mean["2000/2001"], xts.gdp.a.sum["2000/2001"], xts.gdp.a.min["2000/2001"], 
    xts.gdp.a.max["2000/2001"])
##             QRT.GDP  FOP.GDP  EOP.GDP  AVG.GDP  SUM.GDP  MIN.GDP  MAX.GDP
## 2000-03-01 12924.18       NA       NA       NA       NA       NA       NA
## 2000-06-01 13160.84       NA       NA       NA       NA       NA       NA
## 2000-09-01 13178.42       NA       NA       NA       NA       NA       NA
## 2000-12-01 13260.51 12924.18 13260.51 13130.99 52523.95 12924.18 13260.51
## 2001-03-01 13222.69       NA       NA       NA       NA       NA       NA
## 2001-06-01 13299.98       NA       NA       NA       NA       NA       NA
## 2001-09-01 13244.78       NA       NA       NA       NA       NA       NA
## 2001-12-01 13280.86 13222.69 13280.86 13262.08 53048.32 13222.69 13299.98

Combine series
# Aggregate data to quarterly averages
quarts <- endpoints(xts.inf, on = "quarters")
xts.inf.q.avg <- period.apply(xts.inf, INDEX = quarts, FUN = mean)

# Merge monthly and quarterly data
xts.inf <- merge(xts.inf, xts.inf.q.avg, join = "left")
colnames(xts.inf) <- c("EOP.inf", "AVG.inf")
xts.inf["2001"]
##            EOP.inf  AVG.inf
## 2001-01-01  81.698       NA
## 2001-02-01  81.827       NA
## 2001-03-01  81.909 81.81133
## 2001-04-01  82.044       NA
## 2001-05-01  82.067       NA
## 2001-06-01  82.253 82.12133
## 2001-07-01  82.465       NA
## 2001-08-01  82.511       NA
## 2001-09-01  82.049 82.34167
## 2001-10-01  82.637       NA
## 2001-11-01  82.810       NA
## 2001-12-01  82.829 82.75867
# Merge two series and exclude the missing cases from both sides
merge(xts.gdp["2001"], xts.inf["2001"], join = "inner")
##             QRT.GDP EOP.inf  AVG.inf
## 2001-03-01 13222.69  81.909 81.81133
## 2001-06-01 13299.98  82.253 82.12133
## 2001-09-01 13244.78  82.049 82.34167
## 2001-12-01 13280.86  82.829 82.75867

Summary Charts

# Plot separate series under the zoo TS structure
plot(ts.data[,c(1:2)], plot.type = "multiple",
     col = c("blue","red"),
     lty = c(1,1), lwd = c(2,2),
     main = "",
     ylab = c("FED Rate","Inflation"),
     xlab = "Date")
legend(x = "topright", 
       legend = c("FED Rate","Inflation"), 
       col = c("blue","red"), lty = c(1,1), lwd = c(2,2))
# Plot series together under the zoo TS structure
plot(ts.data[,c(1:2)], plot.type = "single", ylim = c(0,20),
     col = c("blue","red"),
     lty = c(1,1), lwd = c(2,2),
     ylab = "Percentage points",
     xlab = "Date")
legend(x = "topright", 
       legend = c("Fed Rate","Inflation"), 
       col = c("blue","red"), lty = c(1,1), lwd = c(2,2))
ggplot() +
  geom_line(data = xts.data, aes(x = Index, y = ffr, color = "Fed Rate"), linetype = 1, size = 1) +
  geom_line(data = xts.data, aes(x = Index, y = infl, color = "Inflation"), linetype = 1, size = 1) +
  scale_color_manual(labels = c("Fed Rate","Inflation"),
                     breaks = c("Fed Rate","Inflation"),
                     values = c("Fed Rate"="red","Inflation"="blue")) +
  scale_y_continuous(limits=c(0,20), breaks=seq(0,20,5)) +
  scale_x_date(limits = as.Date(c("1960-03-01","2018-03-01")), date_breaks = "10 years", date_labels = "%Y") +
  theme_hc() +
  theme(legend.position = c(0.82,0.85), 
        legend.direction = "horizontal",
        legend.background = element_rect(fill="transparent"),
        panel.grid.major.y = element_line(size = 0.1, colour = "grey", linetype = 3),
        panel.grid.major.x = element_line(colour = "transparent"),
        panel.grid.minor.x = element_line(colour = "transparent")) +
  labs(x = "", y = "", color = "", 
       title = "Federal Funds Rate and PCE Inflation", 
       subtitle = "Percentage points", 
       caption = "Source: U.S. Bureau of Economic Analysis.")


  1. For more information check the official web site and these other useful sites: site 1, site 2 and site 3.

  2. Organisation for Economic Co-operation and Development, for more information check the web site.