forked from wbuchanan/eda
-
Notifications
You must be signed in to change notification settings - Fork 0
/
edaheat.ado
158 lines (113 loc) · 4.84 KB
/
edaheat.ado
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
********************************************************************************
* Description of the Program - *
* EDA subroutine used to create heatmap of correlations between continuous *
* variables in the dataset. *
* *
* Program Output - *
* Creates heatmap GPH and PDF as well as entries in the LaTeX document *
* *
* Lines - *
* 157 *
* *
********************************************************************************
*! edaheat
*! v 0.0.0
*! 28OCT2015
// Drop program from memory if already loaded
cap prog drop edaheat
// Define program
prog def edaheat, rclass
// Stata version used to interpret syntax
version 14
// Define the syntax structure of the program
syntax varlist(min=2) [if] [in], root(string asis) [ keepgph ]
// Mark observations to use
marksample touse
// Add section header to LaTeX file
file write doc "\section{Correlations} \newpage\clearpage" _n
// Add subsection header
file write doc "\subsection{Correlations Between Continuous Variables} \newpage\clearpage" _n
// Preserve current state of the data
preserve
// Keep only cases satisfying if/in condition
keep if `touse'
// Keep only the listed variables
keep `varlist'
// Loop over variables to get variable labels
foreach v of var `varlist' {
// Clean the variable label
texclean `"`: var l `v''"'
// Get variable label
loc `v'lab `r(clntex)'
// Check for null strings
if `"``v'lab'"' == "" {
// Clean the variable name
texclean `"`v'"', r
// Assign the variable name to the label macro
loc `v'lab `r(clntex)'
} // End IF Block for null variable label handling
} // Emd Loop to get variable labels
// Get pairwise correlation coefficient estimates
pwcorr `varlist'
// Store the correlation matrix
mat edaheatmat = r(C)
// Assign rownames to matrix
mat rownames edaheatmat = `varlist'
// Assign column names to matrix
mat colnames edaheatmat = `varlist'
// Clear existing data from memory
clear
// Load the correlation matrix as the data
qui: svmat edaheatmat
// Generate an id variable
qui: g xvar = _n
// Store the maximum value of _n
loc maxn = `c(N)'
// Normalize the data
reshape long edaheatmat, i(xvar) j(yvar)
// Loop over ids to assign variable labels
forv i = 1/`maxn' {
// Get the ith word from varlist and use that to get the
la def xvar `i' `"`: word `i' of `varlist'lab'"', modify
} // End Loop to define value labels
// Assign value labels
la val xvar xvar
la val yvar xvar
// Change end of line delimited to semicolon
#d ;
// Create a contour plot for the correlations
cap: qui: tw contour edaheatmat yvar xvar, heatmap xlab(1(1)`maxn', val
labsize(tiny) angle(90)) ylab(1(1)`maxn', val labsize(tiny) angle(0)
nogrid) graphr(ic(white) fc(white) lc(white)) ccut(-1(.2)1) ysca(rev)
ccolor("127 59 8" "179 88 6" "224 130 20" "253 184 99" "254 224 182"
"216 218 235" "178 171 210" "128 115 172");
// This is the one used for inclusion to work around the issues with
// twoway contour
tw contour edaheatmat yvar xvar, heatmap xlab(1(1)`maxn', val
labsize(tiny) angle(90)) ylab(1(1)`maxn', val labsize(tiny) angle(0)
nogrid) graphr(ic(white) fc(white) lc(white)) ccut(-1(.2)1) ysca(rev)
ccolor("127 59 8" "179 88 6" "224 130 20" "253 184 99" "254 224 182"
"247 247 247" "216 218 235" "178 171 210" "128 115 172" "84 39 136"
"45 0 75") xti("Continuous Variables") yti("Continuous Variables")
zti("Estimated" "Correlation Coefficient")
ti("Correlations Between Continuous Variables");
// End of Line delimited to carriage return
#d cr
// Export the graph to pdf
gr export `"`root'/graphs/edaheatmap.pdf"', as(pdf) replace
// Check for keepgph option
if "`keepgph'" != "" {
// If not turned on syntax to delete Stata GPH file
qui: gr save `"`root'/graphs/edaheatmap.gph"', replace
} // End IF Block for gph save definition
// Include in the LaTeX document
file write doc "\begin{figure}[h!]" _n
file write doc `"\caption{Correlation Heatmap \label{fig:heatmap}}"' _n
file write doc `"\includegraphics[width=\textwidth]{edaheatmap.pdf}"' _n
file write doc "\end{figure} \newpage\clearpage" _n
// Return the matrix used for the heat map from the function
ret mat edacorr = edaheatmat
// Restore data to previous state
restore
// End of program definition
end