USGDPpresidents
This document describes the process for updating Ecdat::USGDPpresidents
.
First decide the directory in which we want to work and copy this
vignette (*.Rmd
file) into that directory.
(RStudio
does not allow setwd
inside code
chunks to work as one might naively expect. Therefore, it’s best NOT to
try to change the working directory but instead to copy this vignette
into the desired working directory.)
Start by checking the span of years in
USGDPpresidents
:
library(Ecdat)
## Loading required package: Ecfun
##
## Attaching package: 'Ecfun'
## The following object is masked from 'package:base':
##
## sign
##
## Attaching package: 'Ecdat'
## The following object is masked from 'package:datasets':
##
## Orange
(rngYrs <- range(USGDPpresidents$Year))
## [1] 1610 2021
Next download “GDP - US” and “CPI - US” from Measuring Worth. On 2022-02-16 this produced two csv files, which I downloaded and copied into a directory in which we wish to work.
getwd()
## [1] "/private/var/folders/2n/zqk768wj3818l8x2wttbc5kw0000gn/T/RtmprGKc3V/Rbuild3888337032d2/Ecfun/vignettes"
(csv2 <- dir(pattern='\\.csv$'))
## character(0)
(CPIcsvs <- grep('^USCPI', csv2, value=TRUE))
## character(0)
(CPIcsv <- tail(CPIcsvs, 1))
## character(0)
(GDPcsvs <- grep('^USGDP', csv2, value=TRUE))
## character(0)
(GDPcsv <- tail(GDPcsvs, 1))
## character(0)
if((length(CPIcsv)==1) & (length(GDPcsv)==1)){
Update0 <- TRUE
} else Update0 <- FALSE
We must verify by visual inspection that CPIcsv
and
GDPcsv
are both of length 1 and are the files we want.
Read them:
Update <- FALSE
if(Update0){
str(USCPI <- read.csv(CPIcsv, skip=2))
str(USGDP. <- read.csv(GDPcsv, skip=1))
library(Ecfun)
USGDP <- asNumericDF(USGDP.)
print(rngCPIyrs <- range(USCPI$Year) )
print(rngGDPyrs <- range(USGDP$Year) )
endYr <- max(rngCPIyrs, rngGDPyrs)
if(endYr>rngYrs[2]) print(Update <- TRUE)
}
If Update, create a local copy of USGDPpresidents
with
the additional rows required to hold the new data:
if(Update){
rowsNeeded <- (endYr - rngYrs[2])
Nold <- nrow(USGDPpresidents)
iRep <- c(1:Nold, rep(Nold, rowsNeeded))
USGDPp2 <- USGDPpresidents[iRep,]
}
Fix the Year and insert NAs for all other columns for the new rows:
if(Update){
iNew <- (Nold+(1:rowsNeeded))
USGDPp2$Year[iNew] <- ((rngYrs[2]+1):endYr)
rownames(USGDPp2) <- USGDPp2$Year
#
USGDPp2[iNew, -1] <- NA
}
Now replace CPI by the new numbers:
if(Update){
selCPI <- (USGDPp2$Year %in% USCPI$Year)
if(any(!is.na(USGDPp2[!selCPI, 2]))){
stop('ERROR: There are CPI numbers ',
'in the current USGDPpresidents ',
'that are not in the new. ',
'Manual review required.')
}
USGDPp2$CPI[selCPI] <- USCPI[,2]
}
Does USGDPpresidents.Rd
needs to be updated to reflect
the proper reference years for the CPI?
if(Update){
readLines(CPIcsv, n=4)
}
If this says “Average 1982-84 = 100”, it should be good. Otherwise that (and this) should be updated.
Now let’s update GDPdeflator
:
if(Update){
selGDP <- (USGDPp2$Year %in% USGDP$Year)
#
if(any(!is.na(USGDPp2[!selGDP, 'GDPdeflator']))){
stop('ERROR: There are GDPdeflator numbers ',
'in the current USGDPpresidents ',
'that are not in the new. ',
'Manual review required.')
}
selDefl <- grep('Deflator', names(USGDP))
USGDPp2$GDPdeflator[selGDP] <- USGDP[,selDefl]
print(names(USGDP)[selDefl])
}
Compare the index year of “GDP.Deflator” with that in
USGDPpresidents.Rd
: If they are different, fix
USGDPpresidents.Rd
.
Now update population:
if(Update){
selPop <- grep('Population', names(USGDP))
USGDPp2$population.K[selGDP] <- USGDP[,selPop]
print(names(USGDP)[selPop])
}
Now realGDPperCapita
. This also has a reference year, so
we need to make sure we get them all:
if(Update){
if(any(!is.na(USGDPp2[!selGDP, 'readGDPperCapita']))){
stop('ERROR: There are realGDPperCapita numbers ',
'in the current USGDPpresidents ',
'that are not in the new. ',
'Manual review required.')
}
selGDPperC <- grep('Real.GDP.per.c', names(USGDP))
USGDPp2$realGDPperCapita[selGDP] <- USGDP[,selGDPperC]
print(names(USGDP)[selGDPperC])
}
Compare the index year of Real.GDP.per.capita
with that
in USGDPpresidents.Rd
: If they are different, fix
USGDPpresidents.Rd
.
Next: executive:
NOTE: THIS MAY NEED TO BE CHANGED MANUALLY HERE BEFORE EXECUTING,
BECAUSE IT IS NOT IN USGDP
… BOTH: ** WHO WAS PRESIDENT
SINCE THE PREVIOUS VERSION? ** WAS THAT PERSON NOT IN THE PREVIOUS
VERSION?
if(Update){
exec <- as.character(USGDPp2$executive)
exec[is.na(exec)] <- c('Trump', 'Trump', 'Biden')
lvlexec <- c(levels(USGDPp2$executive),
'Biden')
USGDPp2$executive <- ordered(exec, lvlexec)
}
Similarly: war
NOTE: IF THERE HAS BEEN A MAJOR WAR SINCE THE LAST VERSION, THEN THIS TEXT NEEDS TO BE CHANGED, BECAUSE IT ASSUMES THERE HAS NOT BEEN A MAJOR WAR.
if(Update){
war <- as.character(USGDPp2$war)
war[is.na(war)] <- ''
lvlwar <- levels(USGDPp2$war)
USGDPp2$war <- ordered(war, lvlwar)
}
Next: battleDeaths
and battleDeathsPMP
:
NOTE: battleDeaths
ARE ONLY BATTLE DEATHS IN MAJOR WARS
as defined in help(USGDPpresidents)
.
Otherwise, they are 0.
if(Update){
USGDPp2$battleDeaths[iNew] <- 0
#
USGDPp2$battleDeathsPMP <- with(USGDPp2,
1000*battleDeaths/population.K)
}
Keynes (per help(USGDPpresidents)
):
if(Update){
USGDPp2$Keynes[iNew] <- 0
}
Unemployment figures came from different sources for different years.
Since 1940 the source has been the Bureau of Labor Statistics (BLS),
series LNS14000000
from the Current Population Survey.
These data are available as a monthly series from the Current Population Survey of the Bureau
of Labor Statistics.
Download the most recent years as an Excel file, compute row averages,
and transfer the numbers for the most recent years here.
NOTE: When I did that on 2022-02-22 I found minor discrepancies in
earlier years. Pushing this further I found that I could download data
back to 1948. The average unemployment per this BLS computation for 1948
and 1947 were 3.75 and 6.05 percent, respectively, vs. 0.038 and 0.059
in the previous version of USGDPpresidents
for those years.
I therefore decided to read that *.xlsx
file and replace
all those numbers.
if(Update){
(xls <- dir(pattern='\\.xlsx$'))
(BLSxls <- grep('^Series', xls, value=TRUE))
}
library(readxl)
if(Update){
str(BLS <- read_excel(BLSxls, skip=11))
}
Compute the average unemployment here, so I don’t have to do this separately.
if(Update){
UNEMP <- as.matrix(BLS[2:13])
str(unemp <- apply(UNEMP, 1, mean))
}
Store these unemp
numbers
if(Update){
selU4GDP <- (USGDPp2$Year %in% BLS$Year)
selBLS <- (BLS$Year %in% USGDPp2$Year)
USGDPp2[selU4GDP, 'unemployment'] <-
unemp[selBLS]
# USGDPp2$unemployment[iNew] <- c(4.875,
# 4.35, 3.89166666666667)
USGDPp2$unempSource[iNew] <- USGDPp2$unempSource[
iNew[1]-1]
tail(USGDPp2)
}
fedReceipts
, fedOutlays
We get fedReceipts
and fedOutlays
from two
different sources. Let’s start with the historical data first.
We manually copied the historical data from series Y 335 and 336 in
United
States Census Bureau (1975) Bicentennial Edition: Historical Statistics
of the United States, Colonial Times to 1970, Part 2. Chapter Y.
Government into a LibreOffice *.ods
file. We need to
read that once and add it to USGDPp
:
if(Update){
(odsFile <- dir(pattern='\\.ods'))
(odsF <- grep('^hstat', odsFile, value=TRUE))
}
if(Update){
library(readODS)
str(hstat <- read_ods(odsF, sheet='Receipts', skip=2))
}
if(Update){
Hstat <- hstat[!is.na(hstat$Year), 1:3]
oOld <- order(Hstat$Year)
head(Hst <- Hstat[oOld, ])
}
Add as new variables to USGDPp2
:
if(Update){
USGDPp2$fedReceipts <- NA
USGDPp2$fedOutlays <- NA
selGDP4Hst <- (USGDPp2$Year %in% Hst$Year)
USGDPp2[selGDP4Hst, c("fedReceipts", "fedOutlays")] <-
(Hst[2:3] / 1000)
USGDPp2[c('Year', 'fedReceipts', 'fedOutlays')]
}
New let’s add the new data.
if(Update){
(BudgetFiles <- grep('^BUDGET', xls, value=TRUE))
(BudgetF2_1 <- grep('2-1', BudgetFiles, value=TRUE))
(BudgetFile <- tail(BudgetF2_1, 1))
}
if(Update){
str(Budget <- read_excel(BudgetFile, skip=3))
}
if(Update){
library(Ecfun)
str(Budg <- asNumericDF(Budget[-(1:2), 1:3]))
}
if(Update){
selGDP4budg <- (USGDPp2$Year %in% Budg[, 1])
selBudg <- (Budg[, 1] %in% USGDPp2$Year)
USGDPp2[selGDP4budg,
c('fedReceipts', 'fedOutlays')] <- Budg[selBudg, 2:3]
}
Finally: fedOutlays_pGDP
if(Update){
sum(i1843 <- (USGDP$Year==1843))
GDPnom <- (USGDP$Nominal.GDP..million.of.Dollars.
/ (1+i1843))
plot(USGDP$Year, GDPnom, type='l', log='y')
abline(v=1843)
fedOp <- (USGDPp2$fedOutlays[selGDP] / GDPnom)
plot(USGDP$Year, fedOp, type='l', log='y')
USGDPp2$fedOutlays_pGDP <- NA
USGDPp2$fedOutlays_pGDP[selGDP] <- fedOp
}
if(Update){
USGDPpresidents <- USGDPp2
sel <- !is.na(USGDPpresidents$fedOutlays_pGDP)
plot(100*fedOutlays_pGDP~Year,
USGDPpresidents[sel,], type='l', log='y',
xlab='', ylab='US federal outlays, % of GDP')
abline(h=2:3)
war <- (USGDPpresidents$war !='')
abline(v=USGDPpresidents$Year[war],
lty='dotted', col='light gray')
abline(v=c(1929, 1933), col='red', lty='dotted')
text(1931, 22, 'Hoover', srt=90, col='red')
}
if(Update){
save(USGDPpresidents, file='USGDPpresidents.rda')
getwd()
}
Now copy this file from the current working directory to
~Ecdat\data
, overwriting the previous version.