截面数据DID操作程序指南, 一步一步教你做
凡是搞计量经济的,都关注这个号了
邮箱:econometrics666@sina.cn
之前,咱们圈子引荐过“截面数据DID讲述, 截面做双重差分政策评估的范式”。今天,咱们就将截面数据DID操作程序指南呈现给各位学者。它主要来自于咱们社群群友引荐,而社群里也有完整的do程序和数据帮助你实现文章里的结果。
**********************************************************************
******数据请理
**********************************************************************
*** 0. Program setup
**********************************************************************
version 11.2
clear all
macro drop _all
set linesize 80
set more off
**********************************************************************
*** 1. Identify sample
**********************************************************************
*********************************
*** 1.1. open data
*********************************
use "Datasets/Source/cgss2005", clear
count
*********************************
*** 1.2. current residence: urban / rural
*********************************
tab qs2c, nol m
recode qs2c (1 = 0) (2 = 1), gen(residence)
label var residence "current places of residence"
label define rural 0 "0 urban" 1"1 rural"
label values residence rural
tab residence, m
*********************************
*** 1.3. birth year
*********************************
tab qa3_01, m
gen birthyear = qa3_01
label var birthyear "year of birth"
// keep 3 cohorts
keep if qa3_01 >= 1955 & qa3_01 <= 1966
count
*********************************
*** 1.4. birth place: rural if qc01c_1 == .
*********************************
tab qc01c_1, m
gen rural_b1f = 1 if qc01c_1 == .
replace rural_b1f = 0 if qc01c_1 < .
label var rural_b1f "birth place based on father: .=rural"
label values rural_b1f rural
tab rural_b1f, m
tab rural_b1f residence, m
tab qc01c_2, m
gen rural_b1m = 1 if qc01c_2 == .
replace rural_b1m = 0 if qc01c_2 < .
label var rural_b1m "birth place based on mother: .=rural"
label values rural_b1m rural
tab rural_b1m, m
tab rural_b1m residence, m
*********************************
*** 1.5. more refined birth place
*********************************
// code occupation as missing, if danwei information is don't know / unwilling to say;
// code occupation as "not doing farm work," if danwei information is not missing, but occupation = . (peasant didn't get the question of either danwei or occupation)
gen rural_newf = rural_b1f
replace rural_newf = 0 if qc01c_1 == . & (qc01d_1 <= 8 | qc01e_1 <= 7)
replace rural_newf = . if qc01c_1 == . & ((qc01d_1 == 10 | qc01d_1 == 11) | (qc01e_1 == 9 | qc01e_1 == 10))
label var rural_newf "refined birth place based on father"
label values rural_newf rural
tab rural_newf, m
gen rural_newm = rural_b1m
replace rural_newm = 0 if qc01c_2 == . & (qc01d_2 <= 8 | qc01e_2 <= 7)
replace rural_newm = . if qc01c_2 == . & ((qc01d_2 == 10 | qc01d_2 == 11) | (qc01e_2 == 9 | qc01e_2 == 10))
label var rural_newm "refined birth place based on mother"
label values rural_newm rural
tab rural_newm, m
gen rural_newfm = (rural_b1f == 1 & rural_b1m == 1) if rural_b1f < .
label var rural_newfm "refined birth place based on both father's and mother's info"
**********************************************************************
*** 2. Father's and mother's background information
**********************************************************************
*********************************
*** 2.1. party membership
*********************************
tab qc01b_1, m nol
recode qc01b_1 (1 = 1) (2/3 = 0) (4/6 = .), gen(fparty)
label var fparty "father's party membership"
tab fparty, m
tab qc01b_2, m nol
recode qc01b_2 (1 = 1) (2/3 = 0) (4/6 = .), gen(mparty)
label var mparty "mother's party membership"
tab mparty, m
gen fmparty = (fparty == 1 | mparty == 1) if fparty < .
label var fmparty "father's or mother's party membership"
*********************************
*** 2.2. education
*********************************
tab qc01a_1, m nol
recode qc01a_1 (1 = 0) (2 12 = 6) (3 = 9) (4/6 = 12) (7/8 = 14) (9/10 = 16) (11 = 19) (13 15 16 = .), gen(fedu)
label var fedu "father's years of schooling"
gen fjunior = (fedu >= 9) if fedu < .
label var fjunior "father at least junior high educated"
tab qc01a_2, m nol
recode qc01a_2 (1 = 0) (2 12 = 6) (3 = 9) (4/6 = 12) (7/8 = 14) (9/10 = 16) (11 = 19) (13 15 16 = .), gen(medu)
label var medu "mother's years of schooling"
gen mjunior = (medu >= 9) if medu < .
label var mjunior "mother at least junior high educated"
gen fmedu = fedu if fedu >= medu & fedu < . & medu < .
replace fmedu = medu if fedu < medu & fedu < . & medu < .
replace fmedu = fedu if fmedu == . & fedu < . & medu == .
replace fmedu = medu if fmedu == . & fedu == . & medu < .
label var fmedu "highest schooling years of parents"
gen fmjunior = (fmedu >= 9) if fmedu < .
label var fmjunior "father or mother at least junior high educated"
**********************************************************************
*** 3. Key predictors for diff-in-diff
**********************************************************************
*********************************
*** 3.1. province
*********************************
tab qs2a, m
tab qa6_01, m
/*95.66% of respondents have the same permanent hukou as where they reside now. Only 0.82%, have interprovince migration.*/
tab qs2a if qa6_01 == 4
gen province = qs2a
replace province = 44 if province == 46 // Hainan 1955-1966 was in Guangdong province
label var province "province"
label define province 11 "Beijing" 12 "Tianjin" 13 "Hebei" 14 "Shanxi" 15 "Neimenggu" 21 "Liaoning" 22 "Jilin" 23 "Heilongjiang" 31 "Shanghai" 32 "Jiangsu" 33 "Zhejiang" 34 "Anhui" 35 "Fujian" 36 "Jiangxi" 37 "Shandong" 41 "Henan" 42 "Hubei" 43 "Hunan" 44 "Guangdong" 45 "Guangxi" 46 "Hainan" 51 "Sichuan" 52 "Guizhou" 53 "Yunnan" 61 "Shaanxi" 62 "Gansu" 65 "Xinjiang" 50 "Chongqing"
label values province province
tab province, m
*********************************
*** 3.2. excess death rate 1: from Huang (2012, Table 2)
*********************************
gen edr1 = .
replace edr1 = 1.67 if qs2a == 11
replace edr1 = 1.80 if qs2a == 12
replace edr1 = 3.12 if qs2a == 13
replace edr1 = 0.95 if qs2a == 14
replace edr1 = 0.47 if qs2a == 15
replace edr1 = 5.55 if qs2a == 21
replace edr1 = 2.22 if qs2a == 22
replace edr1 = 1.75 if qs2a == 23
replace edr1 = 0.62 if qs2a == 31
replace edr1 = 5.10 if qs2a == 32
replace edr1 = 1.88 if qs2a == 33
replace edr1 = 21.07 if qs2a == 34
replace edr1 = 3.68 if qs2a == 35
replace edr1 = 2.37 if qs2a == 36
replace edr1 = 7.87 if qs2a == 37
replace edr1 = 10.22 if qs2a == 41
replace edr1 = 5.02 if qs2a == 42
replace edr1 = 8.80 if qs2a == 43
replace edr1 = 3.37 if qs2a == 44 | qs2a == 46
replace edr1 = 10.90 if qs2a == 45
replace edr1 = 28.63 if qs2a == 50 | qs2a == 51
replace edr1 = 16.38 if qs2a == 52
replace edr1 = 3.15 if qs2a == 53
replace edr1 = 0.13 if qs2a == 61
replace edr1 = 10.48 if qs2a == 62
replace edr1 = 2.63 if qs2a == 65
label var edr1 "excess mortality index"
tab edr1, m
*********************************
*** 3.3. birh year dummy, and interaction of birth year and excess death rate 1
*********************************
forvalues i = 1955/1966 {
gen by`i' = (birthyear == `i')
label var by`i' "dummy for birthyear `i'"
}
forvalues i = 1955(1)1966 {
gen edr1`i' = edr1 * by`i'
label var edr1`i' "interaction of edr1 and birthyear"
}
**********************************************************************
*** 4. Health outcomes: functional health status (FH)
**********************************************************************
codebook qd1 qd2 qd3 qd4 qd5 qd6 qd7 qd8, compact
tab1 qd1 qd2 qd3 qd4 qd5 qd6 qd7 qd8, m
recode qd1 1=5 2=4.2 3=3.4 4=2.6 5=1.8 6=1, gen(FH1)
recode qd2 1=5 2=4 4=2 5=1, gen(FH2)
recode qd3 1=5 2=4 4=2 5=1, gen(FH3)
recode qd4 1=5 2=4.2 3=3.4 4=2.6 5=1.8 6=1, gen(FH4)
recode qd5 1=5 2=4 4=2 5=1, gen(FH5)
recode qd6 1=5 2=4 4=2 5=1, gen(FH6)
recode qd7 1=5 2=4 4=2 5=1, gen(FH7)
recode qd8 1=5 2=4 4=2 5=1, gen(FH8)
gen FH = FH1 + FH2 + FH3 + FH4 + FH5 + FH6 + FH7 + FH8 if FH1 < . & FH2 < . & FH3 < . & FH4 < . & FH5 < . & FH6 < . & FH7 < . & FH8 < .
label var FH "functional health"
**********************************************************************
*** 5. Demographic variables
**********************************************************************
*********************************
*** 5.1. gender
*********************************
tab qa2_01,m nol
recode qa2_01 (2 = 0), gen(male)
label var male "gender: female as ref."
label define male 0 "0 female" 1 "1 male"
label values male male
tab male, m
*********************************
*** 5.2. age, cohort, and interaction terms
*********************************
gen age = 2005 - birthyear
label var age "age at 2005"
gen pre = (qa3_01 >= 1955 & qa3_01 <= 1958)
label variable pre "pre-famine cohort: born 1955-1958"
gen famine = (qa3_01 >= 1959 & qa3_01 <= 1962)
label variable famine "famine cohort: born 1959-1962"
gen post = (qa3_01 >= 1963 & qa3_01 <= 1966)
label variable post "post-famine cohort: born 1963-1966"
tab1 pre famine post, m
gen cohort = 1 if pre == 1
replace cohort = 2 if famine == 1
replace cohort = 3 if post == 1
label variable cohort "3 cohorts"
label define cohort 1 "1 pre-famine: 55-58" 2 "2 famine: 59-62" 3 "3 post-famine:63-66"
label values cohort cohort
tab cohort, m
foreach var in pre famine post {
gen `var'edr1 = `var' * edr1
label var `var'edr1 "`var' * edr1"
}
**********************************************************************
*** 6. Respondents' SES
**********************************************************************
*********************************
*** 6.1. work hour
*********************************
tab qb08, m
tab qa7_01 if qb08 == .
gen workhour = qb08
replace workhour = 0 if qb08 ==.
label var workhour "work hour per week"
histogram workhour, normal
gen lnwh = log(workhour + 0.1)
label var lnwh "log work hour"
histogram lnwh, normal
codebook lnwh
*********************************
*** 6.2. individual income
*********************************
tab qa7_01, m
tab qb12a, m
tab qb12b, m
gen inc_month = qb12a
gen inc_year = qb12b
label var inc_month "income last month"
label var inc_year "income last year"
tab qa7_01 if qb12a == .
tab qa7_01 if qb12b == .
gen lninc = ln(inc_year + 1)
label var lninc "logged income last year"
*********************************
*** 6.3. marital status
*********************************
tab qb01, m
tab qb01, m nol
recode qb01 (2 4 6 = 1) (1 3 5 = 0), gen(marital)
label define marital 0 "0 unmarried" 1 "1 currently married"
label values marital marital
label var marital "marital status"
tab marital, m
*********************************
*** 6.4. party membership
*********************************
tab qb04a, m
tab qb04a, nol m
gen ccp = (qb04a == 1) if qb04a < .
label var ccp "party member/mass(ref)"
label define ccp 0 "0 Mass" 1 "1 Party"
label values ccp ccp
tab ccp, m
*********************************
*** 6.5. education
*********************************
tab qb03a, m
tab qb03a, m nol
gen edu = .
replace edu = 0 if qb03a < 3
replace edu = qb03a - 2 if qb03a >= 3 & qb03a <= 14
replace edu = 12 if qb03a == 15 | qb03a == 16
replace edu = 15 if qb03a == 17 | qb03a == 18
replace edu = 16 if qb03a == 19 | qb03a == 20
replace edu = 19 if qb03a == 21
label var edu "years of schooling"
tab edu, m
// dummy
gen illiterate = (edu == 0) if edu < .
label var illiterate "edu = 0: illiterate"
recode edu 0=0 1/6=1 7/19=0, gen(elementary)
label var elementary "0 < edu <= 6: elementary"
recode edu 0/6=0 7/9=1 10/19=0, gen(junior)
label var junior "7 <= edu <= 9: junior"
recode edu 0/9=0 10/19=1, gen(hs)
label var hs "10 <= edu <= 19: High School and above"
tab1 illiterate elementary junior hs, m
**********************************************************************
*** 7. Generate 4 groups based on parental party membership and education
**********************************************************************
gen group1 = 1 if male == 1 & fmjunior == 0
replace group1 = 2 if male == 1 & fmjunior == 1
replace group1 = 3 if male == 0 & fmjunior == 0
replace group1 = 4 if male == 0 & fmjunior == 1
tab group1, generate(group1)
gen group2 = 1 if male == 1 & fmparty == 0
replace group2 = 2 if male == 1 & fmparty == 1
replace group2 = 3 if male == 0 & fmparty == 0
replace group2 = 4 if male == 0 & fmparty == 1
tab group2, generate(group2)
**********************************************************************
*** 8. Save data
**********************************************************************
order residence - group24
quietly compress
save "Datasets/Derived/2014-02-24_clean10a_SocScienceRes_58notinfamine"
*************************************************************************
*****Table 1
**************************************************************************
// Task: descriptive analysis; 2005 cgss; rural sample
// Project: Chinese Famine & Self-rated Health
**********************************************************************
*** 0. Program setup
**********************************************************************
version 13.0
clear all
macro drop _all
set linesize 80
set more off
**********************************************************************
*** 1. Identify sample
**********************************************************************
*********************************
*** 1.1. open data
*********************************
use "2014-02-24_clean10a_SocScienceRes_58notinfamine", clear
*********************************
*** 1.2. sample
*********************************
keep if rural_newfm == 1 & fmparty < .
drop if rural_b1f == 0 | rural_b1m == 0
count
/* n = 1716*/
**********************************************************************
*** 2. EDR and health
**********************************************************************
tab edr1, m
// health factor analysis & reliability
factor FH1 FH2 FH3 FH4 FH5 FH6 FH7 FH8
alpha FH1 FH2 FH3 FH4 FH5 FH6 FH7 FH8
**********************************************************************
*** 3. Descriptives
**********************************************************************
tab1 cohort male pre famine post FH1-FH lninc edu edr1 fmparty age, m
tabstat FH FH1-FH8 edu lninc edr1 fmparty age, by(male) stat(mean sd count) long nototal format(%9.2f)
tabstat FH FH1-FH8 edu lninc fmparty age if cohort == 1, by(male) stat(mean sd count) long nototal format(%9.2f)
tabstat FH FH1-FH8 edu lninc fmparty age if cohort == 2, by(male) stat(mean sd count) long nototal format(%9.2f)
tabstat FH FH1-FH8 edu lninc fmparty age if cohort == 3, by(male) stat(mean sd count) long nototal format(%9.2f)
foreach var of varlist FH FH1-FH8 edu lninc fmparty age {
display "variable == `var'"
ttest `var', by(male)
}
foreach c of varlist pre famine post {
foreach v of varlist FH FH1-FH8 edu lninc fmparty age {
display "cohort = `c'; variable = `v'"
ttest `v' if `c' == 1, by(male)
}
}
foreach v of varlist FH FH1-FH8 edu lninc fmparty age {
display "==> `v'"
anova `v' cohort
}
************************************************************
***Table 2-Table 3
****************************************************************************
// Task: diff-in-diff models
// Project: Chinese Famine & Self-rated Health
*****************************************************************
*** 0. Program setup
*****************************************************************
version 11.2
clear all
macro drop _all
set mem 1000m
set linesize 80
set more off
*****************************************************************
*** 1. Open data, drop unused cases, and global
*****************************************************************
**************************************************
*** 1.1. open data and drop unused cases
**************************************************
use "2014-02-24_clean10a_SocScienceRes_58notinfamine", clear
keep if rural_newfm == 1 & fmparty < .
drop if rural_b1f == 0 | rural_b1m == 0
replace province = 51 if province == 50 //chongqing in Sichuan around the famine period
**************************************************
*** 1.2. global
**************************************************
global health "FH"
global by "by1955 by1956 by1957 by1958 by1959 by1960 by1961 by1962"
global edr1by "edr11955 edr11956 edr11957 edr11958 edr11959 edr11960 edr11961 edr11962"
*****************************************************************
*** 2. Diff-in-diff: health
*****************************************************************
**************************************************
*** 2.1. models (Table 2)
**************************************************
foreach var in $health {
reg `var' pre famine edr1 preedr1 famineedr1 age i.province if male == 1, vce(cluster province)
outreg2 using "MainEffects_health", excel bdec(3) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +)
reg `var' pre famine edr1 preedr1 famineedr1 age i.province if male == 0, vce(cluster province)
outreg2 using "MainEffects_health", excel bdec(3) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +)
}
foreach var in $health {
reg `var' pre famine edr1 preedr1 famineedr1 age i.province if group2 == 1, vce(cluster province)
outreg2 using "MainEffects_health", excel bdec(3) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +)
reg `var' pre famine edr1 preedr1 famineedr1 age if group2 == 2, vce(cluster province)
outreg2 using "MainEffects_health", excel bdec(3) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +)
reg `var' pre famine edr1 preedr1 famineedr1 age i.province if group2 == 3, vce(cluster province)
outreg2 using "MainEffects_health", excel bdec(3) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +)
reg `var' pre famine edr1 preedr1 famineedr1 age if group2 == 4, vce(cluster province)
outreg2 using "MainEffects_health", excel bdec(3) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +)
}
**************************************************
*** 2.2. tests
**************************************************
reg FH pre famine edr1 preedr1 famineedr1 age ib11.province if group2 == 1
est store m5
reg FH pre famine edr1 preedr1 famineedr1 age if group2 == 2
est store m6
reg FH pre famine edr1 preedr1 famineedr1 age ib11.province if group2 == 3
est store m7
reg FH pre famine edr1 preedr1 famineedr1 age if group2 == 4
est store m8
foreach var in $health {
suest m5 m6 m7 m8, vce(cluster province)
test [m5_mean]famineedr1 = [m6_mean]famineedr1 = [m7_mean]famineedr1 = [m8_mean]famineedr1
test [m5_mean]famineedr1 - [m6_mean]famineedr1 = [m7_mean]famineedr1 - [m8_mean]famineedr1
// SES difference in men
suest m5 m6, vce(cluster province)
test [m5_mean]famineedr1 = [m6_mean]famineedr1
// SES difference in women
suest m7 m8, vce(cluster province)
test [m7_mean]famineedr1 = [m8_mean]famineedr1
}
est clear
**************************************************
*** 2.3. sensitivity: use birthyear (Appendix D)
**************************************************
foreach var in $health {
reg `var' $by edr1 $edr1by age i.province if group2 == 1, vce(cluster province)
outreg2 using "MainEffects_health2", excel bdec(3) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +)
reg `var' $by edr1 $edr1by age if group2 == 2, vce(cluster province)
outreg2 using "MainEffects_health2", excel bdec(3) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +)
reg `var' $by edr1 $edr1by age i.province if group2 == 3, vce(cluster province)
outreg2 using "MainEffects_health2", excel bdec(3) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +)
reg `var' $by edr1 $edr1by age if group2 == 4, vce(cluster province)
outreg2 using "MainEffects_health2", excel bdec(3) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +)
}
**********************************************************************
*** 3. Diff-in-diff: SES outcomes
**********************************************************************
**************************************************
*** 3.1. models (Table 3)
**************************************************
foreach var in edu lninc {
reg `var' pre famine edr1 preedr1 famineedr1 age i.province if group2 == 1, vce(cluster province)
outreg2 using "MainEffects_ses", excel bdec(3) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +)
reg `var' pre famine edr1 preedr1 famineedr1 age if group2 == 2, vce(cluster province)
outreg2 using "MainEffects_ses", excel bdec(3) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +)
reg `var' pre famine edr1 preedr1 famineedr1 age i.province if group2 == 3, vce(cluster province)
outreg2 using "MainEffects_ses", excel bdec(3) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +)
reg `var' pre famine edr1 preedr1 famineedr1 age if group2 == 4, vce(cluster province)
outreg2 using "MainEffects_ses", excel bdec(3) alpha(0.001, 0.01, 0.05, 0.1) symbol(***, **, *, +)
}
**************************************************
*** 3.2. tests
**************************************************
reg edu pre famine edr1 preedr1 famineedr1 age ib11.province if group2 == 1
est store m5
reg edu pre famine edr1 preedr1 famineedr1 age if group2 == 2
est store m6
reg edu pre famine edr1 preedr1 famineedr1 age ib11.province if group2 == 3
est store m7
reg edu pre famine edr1 preedr1 famineedr1 age if group2 == 4
est store m8
foreach var in edu lninc {
suest m5 m6 m7 m8, vce(cluster province)
test [m5_mean]famineedr1 = [m6_mean]famineedr1 = [m7_mean]famineedr1 = [m8_mean]famineedr1
test [m5_mean]famineedr1 - [m6_mean]famineedr1 = [m7_mean]famineedr1 - [m8_mean]famineedr1
// SES difference in men
suest m5 m6, vce(cluster province)
test [m5_mean]famineedr1 = [m6_mean]famineedr1
// SES difference in women
suest m7 m8, vce(cluster province)
test [m7_mean]famineedr1 = [m8_mean]famineedr1
}
est clear
拓展性阅读:
11.高效使用Stata的115页Tips, PDF版本可打印使用
3.2卷RDD断点回归使用手册, 含Stata和R软件操作流程
8.DID, 合成控制, 匹配, RDD四种方法比较, 适用范围和特征
10.在教育领域使用IV, RDD, DID, PSM多吗?
13.PSM-DID, DID, RDD, Stata程序百科全书式的宝典
其他名家专栏文章,建议全部阅读
4.必须反对实证主义--评陆铭《如何把实证研究进行到底》
8.陈强: 计量经济学实证论文写作全解析
10.陆蓉计量工具让经济学科学化了吗
12.于晓华计量经济模型进行实证分析的正确打开方式
13.方汉明美国经济学教育体系和对中国的启示
2年,计量经济圈公众号近1000篇文章,
Econometrics Circle
数据系列:空间矩阵 | 工企数据 | PM2.5 | 市场化指数 | CO2数据 | 夜间灯光 | 官员方言 | 微观数据 |
计量系列:匹配方法 | 内生性 | 工具变量 | DID | 面板数据 | 常用TOOL | 中介调节 | 时间序列 | RDD断点 | 合成控制 |
数据处理:Stata | R | Python | 缺失值 | CHIP/ CHNS/CHARLS/CFPS/CGSS等 |
干货系列:能源环境 | 效率研究 | 空间计量 | 国际经贸 | 计量软件 | 商科研究 | 机器学习 | SSCI | CSSCI | SSCI查询 |
计量经济圈组织了一个计量社群,有如下特征:热情互助最多、前沿趋势最多、社科资料最多、社科数据最多、科研牛人最多、海外名校最多。因此,建议积极进取和有强烈研习激情的中青年学者到社群交流探讨,始终坚信优秀是通过感染优秀而互相成就彼此的。