|
1 | 1 | * Using sqlite to illustrate models-as-data |
2 | | - This description uses / recycles a codeql workshop. |
3 | | -** Build the codeql database |
4 | | - To get started, build the codeql database (adjust paths to your setup): |
| 2 | + |
| 3 | + This section demonstrates the use of the models-as-data system by analyzing a |
| 4 | + small Java application that uses the SQLite JDBC driver. The example is adapted |
| 5 | + from a CodeQL workshop. |
| 6 | + |
| 7 | +** Build the CodeQL Database |
| 8 | + |
| 9 | + To get started, build the CodeQL database for the SQLite-backed Java sample. Adjust paths as needed. |
| 10 | + |
5 | 11 | #+BEGIN_SRC sh |
6 | | - # Build the db with source commit id. |
7 | 12 | SRCDIR=$(pwd) |
8 | 13 | DB=$SRCDIR/java-sqlite-$(cd $SRCDIR && git rev-parse --short HEAD).db |
9 | 14 |
|
10 | 15 | echo $DB |
11 | 16 | test -d "$DB" && rm -fR "$DB" |
12 | 17 | mkdir -p "$DB" |
13 | 18 |
|
14 | | - # Use the correct codeql |
| 19 | + # Ensure the correct CodeQL version is in your PATH |
15 | 20 | export PATH="$(cd ../codeql && pwd):$PATH" |
16 | 21 | codeql database create --language=java -s . -j 8 -v $DB --command='./build.sh' |
17 | 22 |
|
18 | | - # Check for AddUser in the db |
| 23 | + # Check for presence of AddUser.java in the resulting database |
19 | 24 | unzip -v $DB/src.zip | grep AddUser |
20 | 25 | #+END_SRC |
| 26 | + |
21 | 27 | Then add this database directory to your VS Code =DATABASES= tab. |
22 | | -** Tests using a default query |
23 | | - You can run the stdlib query |
24 | | - [[../ql/java/ql/src/Security/CWE/CWE-089/SqlTainted.ql]] but will get no results. |
25 | | - It does point at classes to inspect -- in particular, the source and sink |
26 | | - classes. Run [[./Illustrations.ql]]; from the command line or vs studio code. |
27 | | - Via cli: |
| 28 | + |
| 29 | +** Tests Using a Default Query |
| 30 | + |
| 31 | + You can run the standard SQL injection query: |
| 32 | + |
| 33 | + [[../ql/java/ql/src/Security/CWE/CWE-089/SqlTainted.ql]] |
| 34 | + |
| 35 | + but it will return no results. However, it does help identify which classes are being analyzed as potential sources and sinks. Instead, run the diagnostic query: |
| 36 | + |
| 37 | + [[./Illustrations.ql]] |
| 38 | + |
| 39 | + You can run it from the CLI: |
| 40 | + |
28 | 41 | #+BEGIN_SRC sh |
29 | | - # run query |
30 | 42 | codeql query run \ |
31 | 43 | -v \ |
32 | 44 | --database java-sqlite-e2e555c.db \ |
|
35 | 47 | --ram=14000 \ |
36 | 48 | Illustrations.ql |
37 | 49 |
|
38 | | - # format results |
39 | 50 | codeql bqrs decode --format=text result.bqrs | sed -n '/^Result set: #select/,$p' |
40 | 51 | #+END_SRC |
41 | | - This shows |
| 52 | + |
| 53 | + The result will look like: |
| 54 | + |
42 | 55 | #+BEGIN_SRC text |
43 | 56 | Result set: #select |
44 | 57 | | ui | qsi | |
45 | 58 | +------+-------+ |
46 | 59 | | args | query | |
47 | 60 | #+END_SRC |
48 | | - In the editor, these link to |
49 | | - 1. =main(ARGS)= and |
50 | | - 2. =conn.createStatement().executeUpdate(QUERY);= |
51 | | - The second is correct, but =System.console().readLine();= is not found. |
52 | | - Thus, =SqlTainted.ql= will not find anything. |
53 | | - |
54 | | -** TODO supplement sources via the model editor |
55 | | - - [ ] We have no flow |
56 | | - + check source, sink |
57 | | - + we have a sink |
58 | | - + but ActiveThreatModelSource finds no source |
59 | | - - [ ] We can supplement in different ways |
60 | | -** supplement codeql: Write full manual query: already in workshop |
61 | | -** TODO supplement codeql: Add to FlowSource or a subclass |
62 | | - |
63 | | - Note: this /one area/ that just has to be known. Browsing source will *not* |
64 | | - help you. |
65 | | - |
66 | | - CodeQL reading hint: |
67 | | - : class ActiveThreatModelSource extends DataFlow::Node |
68 | | - uses |
69 | | - : this.(SourceNode).getThreatModel() |
70 | | - So following the cast (SourceNode) may be useful: |
71 | | - #+BEGIN_SRC java |
72 | | - /** |
73 | | - ,* A data flow source. |
74 | | - ,*/ |
75 | | - abstract class SourceNode extends DataFlow::Node |
76 | | - #+END_SRC |
77 | | - Following the =abstract class= is promising: |
78 | | - #+BEGIN_SRC java |
79 | | - abstract class RemoteFlowSource extends SourceNode |
80 | | - #+END_SRC |
81 | | - and others. |
82 | | - |
83 | | - In |
84 | | - [[../ql/java/ql/lib/Customizations.qll]] |
85 | | - notice the comments mentioning RemoteFlowSource. |
86 | | - Use imports from [[../ql/java/ql/src/Security/CWE/CWE-089/SqlTainted.ql]] |
87 | | - but note that there are conflicts. you will use |
88 | | - : private import semmle.code.java.dataflow.FlowSources |
89 | | - Follow this to FlowSources, and find the mentioned RemoteFlowSource |
90 | | - : abstract class RemoteFlowSource extends SourceNode |
91 | | - |
92 | | - Add the custom source. The modified [[../ql/java/ql/lib/Customizations.qll]] is |
93 | | - #+BEGIN_SRC java |
94 | | - import java |
95 | | - private import semmle.code.java.dataflow.FlowSources |
96 | | - |
97 | | - class ReadLine extends RemoteFlowSource { |
98 | | - ReadLine() { |
99 | | - exists(Call read | |
100 | | - read.getCallee().getName() = "readLine" and |
101 | | - read = this.asExpr() |
102 | | - ) |
103 | | - } |
104 | | - |
105 | | - override string getSourceType() { result = "Console readline" } |
106 | | - } |
107 | | - #+END_SRC |
108 | 61 |
|
109 | | - Note that the predicate |
110 | | - #+BEGIN_SRC java |
111 | | - module QueryInjectionFlowConfig implements DataFlow::ConfigSig { |
112 | | - predicate isSource(DataFlow::Node src) { src instanceof ActiveThreatModelSource } |
113 | | - ...; |
114 | | - } |
115 | | - #+END_SRC |
116 | | - now also returns the readLine() result -- although we extended |
117 | | - RemoteFlowSource, not ActiveThreatModelSource |
| 62 | + In the editor, these correspond to: |
| 63 | + 1. =main(String[] args)= — source-like |
| 64 | + 2. =conn.createStatement().executeUpdate(query)= — sink |
| 65 | + |
| 66 | + However, =System.console().readLine()= is not detected as a source. Therefore, =SqlTainted.ql= cannot find a complete flow. |
| 67 | + |
| 68 | +** Supplement Sources via the Model Editor |
| 69 | + |
| 70 | + - [ ] We observe no flow from source to sink |
| 71 | + - A sink exists (=executeUpdate=) |
| 72 | + - But no recognized source is found |
| 73 | + - [ ] There are two ways to fix this: |
| 74 | + 1. Add a new source in =Customizations.qll= |
| 75 | + 2. Add a new source in the models-as-data YAML format |
| 76 | + |
| 77 | +** Supplement CodeQL: Write a Full Manual Query |
| 78 | + |
| 79 | + A manual dataflow query is already available: |
| 80 | + |
| 81 | + [[./full-query.ql]] |
| 82 | + |
| 83 | + This can trace the data manually even when standard configuration fails. |
| 84 | + |
| 85 | +** Supplement CodeQL: Add to FlowSource or a Subclass |
| 86 | + |
| 87 | + Sometimes, the only way to identify how to extend a source is to understand how CodeQL internally resolves source nodes. |
| 88 | + |
| 89 | + Key class hierarchies: |
| 90 | + #+BEGIN_SRC java |
| 91 | + abstract class SourceNode extends DataFlow::Node |
| 92 | + abstract class RemoteFlowSource extends SourceNode |
| 93 | + #+END_SRC |
| 94 | + |
| 95 | + Follow usage in: |
| 96 | + - [[../ql/java/ql/lib/Customizations.qll]] |
| 97 | + - [[../ql/java/ql/src/Security/CWE/CWE-089/SqlTainted.ql]] |
| 98 | + |
| 99 | + Then modify =Customizations.qll= by adding the custom source. The modified |
| 100 | + [[../ql/java/ql/lib/Customizations.qll]] is |
| 101 | + #+BEGIN_SRC java |
| 102 | + import java |
| 103 | + private import semmle.code.java.dataflow.FlowSources |
| 104 | + |
| 105 | + class ReadLine extends RemoteFlowSource { |
| 106 | + ReadLine() { |
| 107 | + exists(Call read | |
| 108 | + read.getCallee().getName() = "readLine" and |
| 109 | + read = this.asExpr() |
| 110 | + ) |
| 111 | + } |
| 112 | + |
| 113 | + override string getSourceType() { result = "Console readline" } |
| 114 | + } |
| 115 | + #+END_SRC |
| 116 | + |
| 117 | + This allows |
| 118 | + |
| 119 | + #+BEGIN_SRC java |
| 120 | + predicate isSource(DataFlow::Node src) { |
| 121 | + src instanceof ActiveThreatModelSource |
| 122 | + } |
| 123 | + #+END_SRC |
| 124 | + |
| 125 | + to include =readLine()= even though we extended =RemoteFlowSource=. |
118 | 126 |
|
119 | 127 | ** TODO supplement codeql: Add to models-as-data |
120 | 128 | - schema in codeql: [[../ql/java/ql/lib/semmle/code/java/dataflow/internal/ExternalFlowExtensions.qll]] |
|
130 | 138 | 18: - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[1].ArrayElement", "Argument[this]", "taint", "df-generated"] |
131 | 139 | 19: - ["java.io", "Console", False, "readLine", "(String,Object[])", "", "Argument[this]", "ReturnValue", "taint", "df-generated"] |
132 | 140 | #+END_SRC |
133 | | - note: this file is in the generated/ tree. |
| 141 | + note: this file is in the generated/ tree. There are others. |
134 | 142 |
|
135 | 143 | The current readline modeling is in the =summaryModel= section; we need it |
136 | 144 | in a =sourceModel= |
|
219 | 227 | #+end_src |
220 | 228 |
|
221 | 229 | Now we can run [[../ql/java/ql/src/Security/CWE/CWE-089/SqlTainted.ql]] again. |
| 230 | + |
| 231 | + |
| 232 | + |
0 commit comments