#Nathaniel D. Mark
#Recitation 6 R Script
#Intro to Econometrics
#October, 2018

#Preliminaries
#The following two packages are to create heteroskedastic standard errors: 
install.packages("lmtest")
library(lmtest)
install.packages("sandwich")
library(sandwich)
#Panel Linear Model Packages:
install.packages("plm")
library(plm)
set.seed(1)


#Generating Data
#I patients:
I<-1000
#T years:
T<-25  

#Main Question: Are patient costs higher in medicare than private insurance?
patient<-c(seq(1,I),rep(0,(T-1)*I))
year<-c(rep(1971,I),rep(0,(T-1)*I))
Age<-c(rbinom(I, 100,.5),rep(0,(T-1)*I))
Medicare<-c(ifelse(Age[1:I]>=65,1,0),rep(0,(T-1)*I))
HMO<-c(ifelse(Age[1:I]>=65,0,rbinom(I,1,.9)),rep(0,(T-1)*I))
Other<-c(rep(1,I)-Medicare[1:I]-HMO[1:I],rep(0,(T-1)*I))
HealthIndex<-rep(rnorm(I,0,.5)*Age[1:I]/100+rnorm(I,0,.75),T)
Cost<-c(apply(matrix(c(3000+4000*HealthIndex[1:I]-300*Medicare[1:I]+100*rgeom(I,.1),rep(0,I)),I,2,byrow = FALSE),1,max),rep(0,(T-1)*I))

for (t in 2:T){
  patient[((t-1)*I+1):(t*I)]<-patient[((t-2)*I+1):((t-1)*I)]
  year[((t-1)*I+1):(t*I)]<-year[((t-2)*I+1):((t-1)*I)]+1
  Age[((t-1)*I+1):(t*I)]<-Age[((t-2)*I+1):((t-1)*I)]+1
  Medicare[((t-1)*I+1):(t*I)]<-ifelse(Age[((t-1)*I+1):(t*I)]>=65,1,0)
  HMO[((t-1)*I+1):(t*I)]<-ifelse(Age[((t-1)*I+1):(t*I)]>=65,0,rbinom(I,1,(HMO[((t-2)*I+1):((t-1)*I)])*.85))
  Other[((t-1)*I+1):(t*I)]<-rep(1,I)-Medicare[((t-1)*I+1):(t*I)]-HMO[((t-1)*I+1):(t*I)]
  Cost[((t-1)*I+1):(t*I)]<-apply(matrix(c(3000+4000*as.numeric(year[((t-1)*I+1):(t*I)]>=1981)+4000*HealthIndex[((t-1)*I+1):(t*I)]+Age[((t-1)*I+1):(t*I)]-300*Medicare[((t-1)*I+1):(t*I)]+100*rgeom(I,.1),rep(0,I)),I,2,byrow = FALSE),1,max) 
} 

#Cost = 3000 + 4000(after 1981?) + 4000 * Health Index - 300 *medicare + epsilon

Data <- data.frame(patient = patient, year = year, Age = Age, Medicare = Medicare, HMO = HMO, Other = Other, Cost = Cost)

#Looking at the data:
hist(Age)
hist(HealthIndex)
hist(Cost)
pie(c(sum(Medicare), sum(HMO), sum(Other)), c("Medicare", "HMO", "Other"), main="Insurance Policy Make-up")


#Estimating Panel Data Models in R:
#For each, I use the plm (panel data linear model) package:
#General framework:
#Regression:
  #plm(Y ~ Xvars, data = dataframename, index = c("name of entity var", "name of time var"), model="type of model", effect = "if model = within, type of effects")
#Standard Errors:
  #coeftest(modelname, vcov=vcovHC(modelname, type = "type of heteroskedastic robustness", cluster = "group if entity clusters; time if time clusters"))

#Method One: "Pooled" Model:
#This is just OLS -- like we are ignoring that our data is panel:
PooledModel <- lm(Cost ~ Age + Medicare + HMO + Other)
summary(PooledModel)

#Adding clustered heteroskedastic-robust 
PooledModel2 <- plm(Cost ~ Age + Medicare + HMO + Other, data = Data, index = c("patient", "year"), model = "pooling")
coeftest(PooledModel, vcov = vcovHC(PooledModel, type = "HC0", cluster = "group"))

#Method Two: First Differencing Model:
FDModel <- plm(Cost ~ Age + Medicare + HMO + Other, data = Data, index=c("patient", "year"), model = "fd")
coeftest(FDModel, vcov = vcovHC(FDModel, type = "HC0"))
  #Note: Age was colinear with the intercept (time-trend)

#Method Three: Time Effects:
TimeModel<- plm(Cost ~ Age + Medicare + HMO + Other, data = Data, index = c("patient", "year"), effect = "time", model = "within")
coeftest(TimeModel, vcov = vcovHC(TimeModel, type = "HC0"))

#Method Four: Entity fixed effects:
  #NOTE: this is the default plm model.
EntityModel <- plm(Cost ~ Age + Medicare + HMO + Other, data = Data, index = c("patient", "year"), effect= "individual" , model = "within")
coeftest(EntityModel, vcov = vcovHC(EntityModel, type = "HC0", cluster = "group"))

#Testimg to show equivalency to adding all dummy variables:
DummyModel <- lm(Cost ~ Age + Medicare + HMO + Other + as.factor(patient) + as.factor(year) - 1 )
summary(DummyModel)

#Method Five: All (entity and time fixed Effects) Model:
AllModel<- plm(Cost ~ Age + Medicare + HMO + Other, data = Data, index=c("patient", "year"), effect = "twoway", model = "within")
summary(AllModel)