|
124 | 124 |
|
125 | 125 | to include =readLine()= even though we extended =RemoteFlowSource=. |
126 | 126 |
|
127 | | -** TODO supplement codeql: Add to models-as-data |
128 | | - - schema in codeql: [[../ql/java/ql/lib/semmle/code/java/dataflow/internal/ExternalFlowExtensions.qll]] |
129 | | - |
130 | | - - data sample: [[../.github/codeql/extensions/jedis-db-local-java/models/redis.clients.jedis.model.yml]] |
131 | | - |
132 | | - In the model editor, we see a java.io.*Console.*readline' (using =show already modeled= option) |
133 | | - #+BEGIN_SRC sh |
134 | | - 1:$ rg -i 'java.io.*Console.*readline' ql/java |
135 | | - ql/java/ql/lib/ext/generated/java.io.model.yml |
136 | | - 16: - ["java.io", "Console", False, "readLine", "()", "", "Argument[this]", "ReturnValue", "taint", "df-generated"] |
137 | | - 17: - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[0]", "Argument[this]", "taint", "df-generated"] |
138 | | - 18: - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[1].ArrayElement", "Argument[this]", "taint", "df-generated"] |
139 | | - 19: - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[this]", "ReturnValue", "taint", "df-generated"] |
140 | | - #+END_SRC |
141 | | - note: this file is in the generated/ tree. There are others. |
142 | | - |
143 | | - The current readline modeling is in the =summaryModel= section; we need it |
144 | | - in a =sourceModel= |
145 | | - #+BEGIN_SRC yaml |
146 | | - extensions: |
147 | | - - addsTo: |
148 | | - pack: codeql/java-all |
149 | | - extensible: summaryModel |
150 | | - data: |
151 | | - ... |
152 | | - - ["java.io", "Console", False, "readLine", "()", "", "Argument[this]", "ReturnValue", "taint", "df-generated"] |
153 | | - - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[0]", "Argument[this]", "taint", "df-generated"] |
154 | | - - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[1].ArrayElement", "Argument[this]", "taint", "df-generated"] |
155 | | - - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument |
156 | | - #+END_SRC |
157 | | - |
158 | | - The model editor will not show this because its already modeled. To |
159 | | - illustrate text-based additions, we'll use plain text. |
160 | | - Starting from |
161 | | - #+BEGIN_SRC yaml |
162 | | - extensions: |
163 | | - - addsTo: |
164 | | - pack: codeql/java-all |
165 | | - extensible: summaryModel |
166 | | - data: |
167 | | - ... |
168 | | - - ["java.io", "Console", False, "readLine", "()", "", "Argument[this]", "ReturnValue", "taint", "df-generated"] |
169 | | - - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[0]", "Argument[this]", "taint", "df-generated"] |
170 | | - - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[1].ArrayElement", "Argument[this]", "taint", "df-generated"] |
171 | | - - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument |
172 | | - #+END_SRC |
173 | | - and the field information |
174 | | - #+BEGIN_SRC java |
175 | | - extensible predicate sourceModel( |
176 | | - string package, string type, boolean subtypes, string name, string signature, string ext, |
177 | | - string output, string kind, string provenance, QlBuiltins::ExtensionId madId |
178 | | - ); |
179 | | - #+END_SRC |
180 | | - Starting from =summaryModel= |
181 | | - #+BEGIN_SRC yaml |
182 | | - # summaryModel |
183 | | - # string package, string type, boolean subtypes, string name, string signature, string ext, string input, string output, string kind, string provenance, QlBuiltins::ExtensionId madId |
184 | | - - ["java.io", "Console", False, "readLine", "()", "", "Argument[this]", "ReturnValue", "taint", "df-generated"] |
185 | | - #+END_SRC |
186 | | - |
187 | | - we can construct the =sourceModel= |
188 | | - #+BEGIN_SRC yaml |
189 | | - extensions: |
190 | | - - addsTo: |
191 | | - pack: codeql/java-all |
192 | | - extensible: sourceModel |
193 | | - data: |
194 | | - # sourceModel |
195 | | - # string package, string type, boolean subtypes, string name, string signature, string ext, string output, string kind, string provenance, QlBuiltins::ExtensionId madId |
196 | | - - ["java.io", "Console", False, "readLine", "()", "", "ReturnValue", "remote", "manual"] |
197 | | - |
198 | | - # # from original |
199 | | - # # summaryModel |
200 | | - # # string package, string type, boolean subtypes, string name, string signature, string ext, string input, string output, string kind, string provenance, QlBuiltins::ExtensionId madId |
201 | | - # - ["java.io", "Console", False, "readLine", "()", "", "Argument[this]", "ReturnValue", "taint", "df-generated"] |
202 | | - |
203 | | - #+END_SRC |
204 | | - |
205 | | - and move this into [[../.github/codeql/extensions/sqlite-db/models/sqlite.model.yml]] |
206 | | - |
207 | | - |
208 | | - |
209 | | - To ensure that these model extensions are applied during query runs, include |
210 | | - this setting |
211 | | - #+begin_src javascript |
212 | | - { |
| 127 | +** Supplement CodeQL: Add to models-as-data |
| 128 | + |
| 129 | + To modify the dataflow configuration using the models-as-data mechanism, we will explicitly define a new source model for =java.io.Console.readLine=. This function is already modeled in CodeQL—but only as a =summaryModel=. For SQL injection tracking, we want to treat it as a =sourceModel=. Since it’s already covered in auto-generated data, it does not appear in the model editor interface. |
| 130 | + |
| 131 | + We begin by locating the existing model: |
| 132 | + |
| 133 | + - The model schema used for extensions is defined here: |
| 134 | + [[../ql/java/ql/lib/semmle/code/java/dataflow/internal/ExternalFlowExtensions.qll]] |
| 135 | + |
| 136 | + - For reference, see a sample manually written model YAML: |
| 137 | + [[../.github/codeql/extensions/jedis-db-local-java/models/redis.clients.jedis.model.yml]] |
| 138 | + |
| 139 | + - To verify that readline is already modeled, use ripgrep: |
| 140 | + #+BEGIN_SRC sh |
| 141 | + 1:$ rg -i 'java.io.*Console.*readline' ql/java |
| 142 | + ql/java/ql/lib/ext/generated/java.io.model.yml |
| 143 | + 16: - ["java.io", "Console", False, "readLine", "()", "", "Argument[this]", "ReturnValue", "taint", "df-generated"] |
| 144 | + 17: - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[0]", "Argument[this]", "taint", "df-generated"] |
| 145 | + 18: - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[1].ArrayElement", "Argument[this]", "taint", "df-generated"] |
| 146 | + 19: - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[this]", "ReturnValue", "taint", "df-generated"] |
| 147 | + #+END_SRC |
| 148 | + |
| 149 | + Note: this model is auto-generated (=df-generated=) and appears under =summaryModel=. |
| 150 | + |
| 151 | + Here is an example of that structure: |
| 152 | + #+BEGIN_SRC yaml |
| 153 | + extensions: |
| 154 | + - addsTo: |
| 155 | + pack: codeql/java-all |
| 156 | + extensible: summaryModel |
| 157 | + data: |
| 158 | + ... |
| 159 | + - ["java.io", "Console", False, "readLine", "()", "", "Argument[this]", "ReturnValue", "taint", "df-generated"] |
| 160 | + - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[0]", "Argument[this]", "taint", "df-generated"] |
| 161 | + - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[1].ArrayElement", "Argument[this]", "taint", "df-generated"] |
| 162 | + - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument |
| 163 | + #+END_SRC |
| 164 | + |
| 165 | + Because this modeling is already present, the model editor UI will hide the |
| 166 | + function. To override it, we’ll define a new source manually in plain YAML. |
| 167 | + |
| 168 | + First, recall the schema definition for =sourceModel=: |
| 169 | + |
| 170 | + #+BEGIN_SRC java |
| 171 | + extensible predicate sourceModel( |
| 172 | + string package, string type, boolean subtypes, string name, string signature, string ext, |
| 173 | + string output, string kind, string provenance, QlBuiltins::ExtensionId madId |
| 174 | + ); |
| 175 | + #+END_SRC |
| 176 | + |
| 177 | + Starting from the existing =summaryModel=, |
| 178 | + |
| 179 | + #+BEGIN_SRC yaml |
| 180 | + # summaryModel |
| 181 | + # string package, string type, boolean subtypes, string name, string signature, string ext, string input, string output, string kind, string provenance, QlBuiltins::ExtensionId madId |
| 182 | + - ["java.io", "Console", False, "readLine", "()", "", "Argument[this]", "ReturnValue", "taint", "df-generated"] |
| 183 | + #+END_SRC |
| 184 | + |
| 185 | + we construct the following =sourceModel= definition instead: |
| 186 | + |
| 187 | + #+BEGIN_SRC yaml |
| 188 | + extensions: |
| 189 | + - addsTo: |
| 190 | + pack: codeql/java-all |
| 191 | + extensible: sourceModel |
| 192 | + data: |
| 193 | + # sourceModel |
| 194 | + # string package, string type, boolean subtypes, string name, string signature, string ext, string output, string kind, string provenance, QlBuiltins::ExtensionId madId |
| 195 | + - ["java.io", "Console", False, "readLine", "()", "", "ReturnValue", "remote", "manual"] |
| 196 | + |
| 197 | + # # from original |
| 198 | + # # summaryModel |
| 199 | + # # string package, string type, boolean subtypes, string name, string signature, string ext, string input, string output, string kind, string provenance, QlBuiltins::ExtensionId madId |
| 200 | + # - ["java.io", "Console", False, "readLine", "()", "", "Argument[this]", "ReturnValue", "taint", "df-generated"] |
| 201 | + #+END_SRC |
| 202 | + |
| 203 | + Place this in: |
| 204 | + |
| 205 | + [[../.github/codeql/extensions/sqlite-db/models/sqlite.model.yml]] |
| 206 | + |
| 207 | + To ensure the model extension is applied, you must instruct the CodeQL |
| 208 | + extension to include all extension packs. In =qllab.code-workspace=, add: |
| 209 | + |
| 210 | + #+BEGIN_SRC javascript |
| 211 | + { |
| 212 | + ..., |
| 213 | + "settings": { |
213 | 214 | ..., |
214 | | - "settings": { |
215 | | - ..., |
216 | | - "codeQL.runningQueries.useExtensionPacks": "all" |
217 | | - } |
| 215 | + "codeQL.runningQueries.useExtensionPacks": "all" |
218 | 216 | } |
219 | | - #+end_src |
| 217 | + } |
| 218 | + #+END_SRC |
220 | 219 |
|
221 | | - in the workspace configuration file [[../qllab.code-workspace]] |
| 220 | + If needed, also include this setting in =.vscode/settings.json=: |
222 | 221 |
|
223 | | - In some environments (e.g., older VS Code versions), you may also need to |
224 | | - replicate this setting in [[../.vscode/settings.json]]; there it simplifies to |
225 | | - #+begin_src javascript |
226 | | - "codeQL.runningQueries.useExtensionPacks": "all" |
227 | | - #+end_src |
| 222 | + #+BEGIN_SRC javascript |
| 223 | + "codeQL.runningQueries.useExtensionPacks": "all" |
| 224 | + #+END_SRC |
228 | 225 |
|
229 | | - Now we can run [[../ql/java/ql/src/Security/CWE/CWE-089/SqlTainted.ql]] again. |
| 226 | + Now re-run the query: |
| 227 | + |
| 228 | + [[../ql/java/ql/src/Security/CWE/CWE-089/SqlTainted.ql]] |
230 | 229 |
|
| 230 | + You should see flows that originate at =readLine()= and reach the SQL sink. This confirms that your manual =sourceModel= extension is effective. |
231 | 231 |
|
232 | 232 |
|
0 commit comments