#Nathaniel D. Mark #Recitation 6 R Script #Intro to Econometrics #October, 2018 #Preliminaries #The following two packages are to create heteroskedastic standard errors: install.packages("lmtest") library(lmtest) install.packages("sandwich") library(sandwich) #Panel Linear Model Packages: install.packages("plm") library(plm) set.seed(1) #Generating Data #I patients: I<-1000 #T years: T<-25 #Main Question: Are patient costs higher in medicare than private insurance? patient<-c(seq(1,I),rep(0,(T-1)*I)) year<-c(rep(1971,I),rep(0,(T-1)*I)) Age<-c(rbinom(I, 100,.5),rep(0,(T-1)*I)) Medicare<-c(ifelse(Age[1:I]>=65,1,0),rep(0,(T-1)*I)) HMO<-c(ifelse(Age[1:I]>=65,0,rbinom(I,1,.9)),rep(0,(T-1)*I)) Other<-c(rep(1,I)-Medicare[1:I]-HMO[1:I],rep(0,(T-1)*I)) HealthIndex<-rep(rnorm(I,0,.5)*Age[1:I]/100+rnorm(I,0,.75),T) Cost<-c(apply(matrix(c(3000+4000*HealthIndex[1:I]-300*Medicare[1:I]+100*rgeom(I,.1),rep(0,I)),I,2,byrow = FALSE),1,max),rep(0,(T-1)*I)) for (t in 2:T){ patient[((t-1)*I+1):(t*I)]<-patient[((t-2)*I+1):((t-1)*I)] year[((t-1)*I+1):(t*I)]<-year[((t-2)*I+1):((t-1)*I)]+1 Age[((t-1)*I+1):(t*I)]<-Age[((t-2)*I+1):((t-1)*I)]+1 Medicare[((t-1)*I+1):(t*I)]<-ifelse(Age[((t-1)*I+1):(t*I)]>=65,1,0) HMO[((t-1)*I+1):(t*I)]<-ifelse(Age[((t-1)*I+1):(t*I)]>=65,0,rbinom(I,1,(HMO[((t-2)*I+1):((t-1)*I)])*.85)) Other[((t-1)*I+1):(t*I)]<-rep(1,I)-Medicare[((t-1)*I+1):(t*I)]-HMO[((t-1)*I+1):(t*I)] Cost[((t-1)*I+1):(t*I)]<-apply(matrix(c(3000+4000*as.numeric(year[((t-1)*I+1):(t*I)]>=1981)+4000*HealthIndex[((t-1)*I+1):(t*I)]+Age[((t-1)*I+1):(t*I)]-300*Medicare[((t-1)*I+1):(t*I)]+100*rgeom(I,.1),rep(0,I)),I,2,byrow = FALSE),1,max) } #Cost = 3000 + 4000(after 1981?) + 4000 * Health Index - 300 *medicare + epsilon Data <- data.frame(patient = patient, year = year, Age = Age, Medicare = Medicare, HMO = HMO, Other = Other, Cost = Cost) #Looking at the data: hist(Age) hist(HealthIndex) hist(Cost) pie(c(sum(Medicare), sum(HMO), sum(Other)), c("Medicare", "HMO", "Other"), main="Insurance Policy Make-up") #Estimating Panel Data Models in R: #For each, I use the plm (panel data linear model) package: #General framework: #Regression: #plm(Y ~ Xvars, data = dataframename, index = c("name of entity var", "name of time var"), model="type of model", effect = "if model = within, type of effects") #Standard Errors: #coeftest(modelname, vcov=vcovHC(modelname, type = "type of heteroskedastic robustness", cluster = "group if entity clusters; time if time clusters")) #Method One: "Pooled" Model: #This is just OLS -- like we are ignoring that our data is panel: PooledModel <- lm(Cost ~ Age + Medicare + HMO + Other) summary(PooledModel) #Adding clustered heteroskedastic-robust PooledModel2 <- plm(Cost ~ Age + Medicare + HMO + Other, data = Data, index = c("patient", "year"), model = "pooling") coeftest(PooledModel, vcov = vcovHC(PooledModel, type = "HC0", cluster = "group")) #Method Two: First Differencing Model: FDModel <- plm(Cost ~ Age + Medicare + HMO + Other, data = Data, index=c("patient", "year"), model = "fd") coeftest(FDModel, vcov = vcovHC(FDModel, type = "HC0")) #Note: Age was colinear with the intercept (time-trend) #Method Three: Time Effects: TimeModel<- plm(Cost ~ Age + Medicare + HMO + Other, data = Data, index = c("patient", "year"), effect = "time", model = "within") coeftest(TimeModel, vcov = vcovHC(TimeModel, type = "HC0")) #Method Four: Entity fixed effects: #NOTE: this is the default plm model. EntityModel <- plm(Cost ~ Age + Medicare + HMO + Other, data = Data, index = c("patient", "year"), effect= "individual" , model = "within") coeftest(EntityModel, vcov = vcovHC(EntityModel, type = "HC0", cluster = "group")) #Testimg to show equivalency to adding all dummy variables: DummyModel <- lm(Cost ~ Age + Medicare + HMO + Other + as.factor(patient) + as.factor(year) - 1 ) summary(DummyModel) #Method Five: All (entity and time fixed Effects) Model: AllModel<- plm(Cost ~ Age + Medicare + HMO + Other, data = Data, index=c("patient", "year"), effect = "twoway", model = "within") summary(AllModel)