I would like to conduct multivariate regressions analyses of survey data with the item count technique using ictreg but without success as I keep seeing the following error message "Error in xtfrm.data.frame(x) : cannot xtfrm data frames". I am very new to R and not sure how to fix the issue.
Please find below the sample data on which I run the code on (only 20 observations, I don't have access to the remaining dataset), as well as the code.
structure(list(sex = structure(c(1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 1), label = "1=female)", format.stata = "%9.0g", labels = c(`0. Male` = 0,
`1. Female` = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), age = structure(c(2.40000009536743, 2.40000009536743, 2.40000009536743,
2.40000009536743, 2.40000009536743, 2.40000009536743, 2.40000009536743,
2.40000009536743, 2.40000009536743, 2.40000009536743, 2.40000009536743,
2.40000009536743, 2.40000009536743, 2.40000009536743, 2.40000009536743,
2.40000009536743, 2.40000009536743, 2.40000009536743, 2.40000009536743,
2.40000009536743), label = "Age (year)", format.stata = "%2.0f"),
P217_OCCUPATION = structure(c(9, 5, 7, 5, 5, 9, 5, 5, 5,
5, 8, 9, 9, 9, 5, 5, 8, 9, 5, 5), label = "P217 Occupation", format.stata = "%52.0f", labels = c(`1. LEGISLATORS,SENIOR OFFICIALS AND MANAGERS` = 1,
`2. PROFESSIONALS` = 2, `3. TECHNICIANS AND ASSOCIATE PROFESSIONALS` = 3,
`4. CLERKS` = 4, `5. SERVICE WORKERS AND SHOP AND MARKET SALES WORKERS` = 5,
`6. SKILLED AGRICULTURAL AND FISHERY WORKERS` = 6, `7. CRAFTS AND RELATED TRADES WORKERS` = 7,
`8. PLANT AND MACHINE OPERATORS ASSEMBLERS` = 8, `9. ELEMENTARY OCCUPATION` = 9,
`10. NOT STATED` = 10), class = c("haven_labelled", "vctrs_vctr",
"double")), industryGroup = structure(c(7, 5, 7, 5, 5, 7,
5, 7, 7, 7, 6, 6, 7, 8, 5, 7, 7, 7, 5, 5), label = "P3102 Industry Group", format.stata = "%45.0f", labels = c(`1. Agriculture, Hunting,.Forestry and Fishing` = 1,
`2. Mining and Quarrying` = 2, `3. Manufacturing` = 3, `4. Construction` = 4,
`5. Trade, Hotels & Restaurants` = 5, `6. Transport` = 6,
`7. Community & Personal Services` = 7, `8. Not Stated` = 8
), class = c("haven_labelled", "vctrs_vctr", "double")),
ownership = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1), label = "What is the form of ownership of this activity/enterprise?", format.stata = "%46.0f", labels = c(`1. Sole ownership or members of the household` = 1,
`2. Partnership` = 2, `3. Other /specify/` = 3, `9. Not stated` = 9
), class = c("haven_labelled", "vctrs_vctr", "double")),
bookAcct = structure(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), label = "Have book of account", format.stata = "%34.0f", labels = c(`1. Yes some accounts, but not full` = 1,
`2. No, no accounts kept` = 2), class = c("haven_labelled",
"vctrs_vctr", "double")), easyAvoid = structure(c(1, 1, NA,
1, 0, 1, 1, 1, 1, 1, 1, 1, 0, NA, 0, 0, 0, 0, 1, 1), label = "Difficult evade (y/n)", format.stata = "%29.0f", labels = c(`1. Very easy` = 1,
`2. Easy` = 2, `3. Neither easy nor difficult` = 3, `4. Difficult` = 4,
`5. Very difficult` = 5, `98. .Don?t know` = 98, `99. Refuse to answer` = 99
), class = c("haven_labelled", "vctrs_vctr", "double")),
easyComply = structure(c(0, 0, 0, 0, 1, 0, NA, 0, 0, 0, 1,
0, 0, NA, 0, 1, 0, 0, 0, 0), label = "Easy comply tax obl (y/n)", format.stata = "%29.0f", labels = c(`1. Very easy` = 1,
`2. Easy` = 2, `3. Neither easy nor difficult` = 3, `4. Difficult` = 4,
`5. Very difficult` = 5, `98. .Don?t know` = 98, `99. Refuse to answer` = 99
), class = c("haven_labelled", "vctrs_vctr", "double")),
trustGVT = structure(c(0, 1, NA, 0, 0, 1, 1, 1, 1, 1, 0,
1, 1, 1, 1, 1, 0, 0, NA, 0), label = "Trust government (y/n)", format.stata = "%24.0f", labels = c(`1. A great deal of trust` = 1,
`2. Trust somewhat` = 2, `3. Trust just a little` = 3, `4. No trust at all` = 4,
`5. Have never heard of` = 5, `98. Don?t know` = 98, `99. Refuse to answer` = 99
), class = c("haven_labelled", "vctrs_vctr", "double")),
moral0 = structure(c(1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1), label = "Should always pay taxes (y/n)", format.stata = "%46.0f", labels = c(`1. Agree much more with the FIRST statement` = 1,
`2. Agree a bit more with the FIRST statement` = 2, `3. Agree a bit more with the SECOND statement` = 3,
`4. Agree much more with the SECOND statement` = 4, `5. Agree with neither statement` = 5,
`888. Refuse to answer` = 888, `999. Don?t know` = 999), class = c("haven_labelled",
"vctrs_vctr", "double")), opinion1 = structure(c(1, 1, 0,
1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0), label = "Wrong not paying tax (y/n)", format.stata = "%27.0f", labels = c(`1. Not wrong at all` = 1,
`2. Wrong but understandable` = 2, `3. Wrong and punishable` = 3,
`98. Don?t know` = 98, `99. Refuse to answer` = 99), class = c("haven_labelled",
"vctrs_vctr", "double")), opinion2 = structure(c(1, 1, 1,
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, NA), label = "Tax auth right to make pay (y/n)", format.stata = "%29.0f", labels = c(`1. Completely agree` = 1,
`2. Somewhat agree` = 2, `3. Neither agree nor disagree` = 3,
`4. Somewhat disagree` = 4, `5. Completely disagree` = 5,
`98. Don?t know` = 98, `99. Refuse to answer` = 99), class = c("haven_labelled",
"vctrs_vctr", "double")), moral1 = structure(c(0, 1, 1, 0,
1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, NA), label = "Refuse to pay until better service (y/n)", format.stata = "%29.0f", labels = c(`1. Completely agree` = 1,
`2. Somewhat agree` = 2, `3. Neither agree nor disagree` = 3,
`4. Somewhat disagree` = 4, `5. Completely disagree` = 5,
`98. Don?t know` = 98, `99. Refuse to answer` = 99), class = c("haven_labelled",
"vctrs_vctr", "double")), direct_reporting = structure(c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, NA, 0, 0), label = "Do you always pay all taxes you owe to the government?", format.stata = "%19.0f", labels = c(No = 0,
Yes = 1), class = c("haven_labelled", "vctrs_vctr", "double"
)), head = structure(c(0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1), label = "Owner is HH head", format.stata = "%9.0g"),
norm = structure(c(0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1,
NA, 1, 1, 0, 0, 0, 0), label = "Social costs (y/n)", format.stata = "%27.0g", labels = c(`1. Very likely` = 1,
`2. Likely` = 2, `3. May or may not be likely` = 3, `4. Unlikely` = 4,
`5. Very unlikely` = 5, `98. Don?t know` = 98, `99. Refuse to answer` = 99
), class = c("haven_labelled", "vctrs_vctr", "double")),
treat = structure(c(0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0,
0, 1, 1, 0, 1, 0, 1), label = "RECODE of expgroup", format.stata = "%12.0g", labels = c(Control = 0,
Treatment = 1), class = c("haven_labelled", "vctrs_vctr",
"double")), married = structure(c(1, 1, 0, 0, 1, 1, 0, 0,
0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0), label = "Married", format.stata = "%9.0g"),
secondary = structure(c(0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
1, 1, 0, 0, 0, 0, 1, 0, 0), format.stata = "%9.0g"), log_hhsizeTot = structure(c(1.0986123085022,
1.0986123085022, 1.7917594909668, 0.6931471824646, 0.6931471824646,
1.0986123085022, 1.3862943649292, 0, 0, 1.3862943649292,
0.6931471824646, 1.3862943649292, 1.3862943649292, 1.3862943649292,
0.6931471824646, 1.3862943649292, 1.0986123085022, 0, 0,
1.3862943649292), format.stata = "%9.0g"), log_GROSS_VALUE = structure(c(11.1462001800537,
10.1105012893677, 9.07107830047607, 9.61580562591553, 9.50300979614258,
8.4990291595459, 11.0020999908447, 9.80972576141357, 9.9776668548584,
10.2691717147827, 11.0974102020264, 10.4744672775269, 9.95227813720703,
8.64822101593018, 10.1464338302612, 11.1844215393066, 10.5713167190552,
11.0974102020264, 8.69951438903809, 11.0974102020264), format.stata = "%9.0g"),
log_REVENUFROMSALE = structure(c(11.1418619155884, 10.1105012893677,
9.07107830047607, 9.61580562591553, 9.57498359680176, 8.4763708114624,
11.0020999908447, 9.80145454406738, 9.95418071746826, 10.2691717147827,
11.0974102020264, 10.4744672775269, 9.95227813720703, 8.64822101593018,
10.1345996856689, 11.1844215393066, 10.0858087539673, 10.8967390060425,
8.69951438903809, 11.0974102020264), format.stata = "%9.0g"),
y = structure(c(3, 2, 3, 2, 2, 2, 3, 2, 2, 2, 3, 3, 2, 3,
2, 1, 2, 2, 2, 3), format.stata = "%10.0g")), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
### Load the list experiment package (including required sandwich package)
library(sandwich)
library(list)
# Load package allowing to upload .dta
## First foreign to pull .dta
library(foreign)
## Then install haven package to upload .dta from >=Stata 13 versions
install.packages("readstata13")
library(readstata13)
myTryCatch <- function(expr) {
warn <- err <- NULL
value <- withCallingHandlers(
tryCatch(expr, error=function(e) {
err <<- e
NULL
}), warning=function(w) {
warn <<- w
invokeRestart("muffleWarning")
})
list(value=value, warning=warn, error=err)
}
## ####
## Table 1 - "Observed Data from the List Experiment"
rm(list=ls())
require(list)
## Load .dta data (change directory)
mydata <- readstata13::read.dta13("XXX.dta")
## Drop observations with missing data from key variables
### define key vars
key_vars <- c("easyAvoid", "easyComply", "trustGVT", "norm", "sex", "age", "head", "ownership", "bookAcct", "married", "secondary","P217_OCCUPATION", "log_hhsizeTot", "log_GROSS_VALUE","log_REVENUFROMSALE")
### use complete.cases(dat[key_vars]) - gives a boolean vector that allows to subset the rows
dat <- mydata[complete.cases(mydata[key_vars]), ] ## subset rows for complete cases in key_vars
## make factor
dat <- transform(dat, P217_OCCUPATION=as.factor(P217_OCCUPATION))
# Count the number of rows kept
nrow(dat)
table(dat$y, dat$treat)
## ####
## Table 2 - "Estimated Coefficients from the Logistic Regression Models"
### Fit standard design ML model
# Since the calls to list::ictreg() are identical except the model formulae, list()
## put formulae in named list
frml <- list(
## ...
noboundary0=y ~ easyAvoid + easyComply + trustGVT + norm + sex + age +
head + ownership + bookAcct + married + secondary + P217_OCCUPATION +
log_hhsizeTot + log_GROSS_VALUE + log_REVENUFROMSALE,
## Enforcement + Trust
noboundary1=y ~ easyAvoid + trustGVT,
## Enforcement + Facilitation + Trust
noboundary2=y ~ easyAvoid + easyComply + trustGVT,
## Enforcement + Trust + Social norm
noboundary3=y ~ easyAvoid + trustGVT + norm,
## Enforcement + Facilitation + Trust + Social norm
noboundary4=y ~ easyAvoid + easyComply + trustGVT + norm,
## ...
noboundary5=y ~ ~ easyAvoid + easyComply + trustGVT + sex + age + head +
ownership + bookAcct + married + secondary + P217_OCCUPATION +
log_hhsizeTot + log_GROSS_VALUE + log_REVENUFROMSALE,
## ...
noboundary6=y ~ easyAvoid + trustGVT + norm + sex + age + head + ownership +
bookAcct + married + secondary + P217_OCCUPATION + log_hhsizeTot +
log_GROSS_VALUE + log_REVENUFROMSALE,
## ...
noboundary7=y ~ easyAvoid + easyComply + trustGVT + norm + sex + age + head +
ownership + bookAcct + married + secondary + P217_OCCUPATION +
log_hhsizeTot + log_GROSS_VALUE + log_REVENUFROMSALE
)
# and lapply() over it. For cases ictreg fails in an interation (which would stop the loop)
# provide NA as a substitute using tryCatch
## run regressions
library(list)
fits <- lapply(frml, \(x) {
tryCatch(ictreg(x, treat="treat", J=4, data=dat, method="lm",
overdispersed=FALSE),
error=\(e) NA)
})
# fits is now a list that contains regression results
names(fits)
# [1] "noboundary0" "noboundary1" "noboundary2" "noboundary3" "noboundary4"
# [6] "noboundary5" "noboundary6" "noboundary7"
# which is easily accessible
fits$noboundary1
summary(fits$noboundary1)
sink('foo.txt') ## open sink
lapply(frml, \(x) {
+ myTryCatch(ictreg(x, treat="treat", J=4, data=dat, method="lm", overdispersed=FALSE))
})
sink() ## close sink