Skip to content

Commit 307d912

Browse files
authored
[clang][analyzer] Add taintedness to argv (llvm#178054)
If the execution environment is untrusted, we assume that the argv, argc and envp parameters of the main function are attacker controlled values and set them as taint analysis sources.
1 parent ab5205c commit 307d912

11 files changed

Lines changed: 315 additions & 2 deletions

File tree

clang/docs/analyzer/checkers.rst

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1482,6 +1482,13 @@ For a more detailed description of configuration options, please see the
14821482
advanced users can fully customize their taint configuration model.
14831483
Default: ``true``.
14841484
1485+
* If the analyzer option ``assume-controlled-environment`` is set to ``false``,
1486+
it is assumed that the command line arguments and the environment
1487+
variables of the program are attacker controlled.
1488+
In particular, the ``argv``, ``argc`` and ``envp`` arguments of the
1489+
``main`` function and the return value of the ``getenv()``
1490+
function are assumed to hold tainted values.
1491+
14851492
**Related Guidelines**
14861493
14871494
* `CWE Data Neutralization Issues
@@ -3788,7 +3795,7 @@ Check that ``[[clang::annotate_type("webkit.nodelete")]]`` annotation does not a
37883795
Foo [[clang::annotate_type("webkit.nodelete")]] trivialFunction(RefCountable* obj) {
37893796
return obj->anotherTrivialFunction();
37903797
};
3791-
3798+
37923799
``[[clang::annotate_type("webkit.nodelete")]]`` annotation makes the function ignored for the purpose of other WebKit smart pointer checkers.
37933800
For example, ``alpha.webkit.UncountedCallArgsChecker`` will ignore a function call with this annotation.
37943801

clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,10 +378,12 @@ struct GenericTaintRuleParser {
378378
CheckerManager &Mgr;
379379
};
380380

381-
class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
381+
class GenericTaintChecker
382+
: public Checker<check::PreCall, check::PostCall, check::BeginFunction> {
382383
public:
383384
void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
384385
void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
386+
void checkBeginFunction(CheckerContext &C) const;
385387

386388
void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
387389
const char *Sep) const override;
@@ -829,8 +831,94 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
829831
std::make_move_iterator(Rules.end()));
830832
}
831833

834+
bool isPointerToCharArray(const QualType &QT) {
835+
if (!QT->isPointerType())
836+
return false;
837+
QualType PointeeType = QT->getPointeeType();
838+
return PointeeType->isPointerType() &&
839+
PointeeType->getPointeeType()->isCharType();
840+
}
841+
842+
// The incoming parameters of the main function get tainted
843+
// if the program called in an untrusted environment.
844+
void GenericTaintChecker::checkBeginFunction(CheckerContext &C) const {
845+
if (!C.inTopFrame() || C.getAnalysisManager()
846+
.getAnalyzerOptions()
847+
.ShouldAssumeControlledEnvironment)
848+
return;
849+
850+
const auto *FD = dyn_cast<FunctionDecl>(C.getLocationContext()->getDecl());
851+
if (!FD || !FD->isMain() || FD->param_size() < 2)
852+
return;
853+
854+
if (!FD->parameters()[0]->getType()->isIntegerType())
855+
return;
856+
857+
if (!isPointerToCharArray(FD->parameters()[1]->getType()))
858+
return;
859+
ProgramStateRef State = C.getState();
860+
861+
const MemRegion *ArgcReg =
862+
State->getRegion(FD->parameters()[0], C.getLocationContext());
863+
SVal ArgcSVal = State->getSVal(ArgcReg);
864+
State = addTaint(State, ArgcSVal);
865+
StringRef ArgcName = FD->parameters()[0]->getName();
866+
if (auto N = ArgcSVal.getAs<NonLoc>()) {
867+
ConstraintManager &CM = C.getConstraintManager();
868+
// The upper bound is the ARG_MAX on an arbitrary Linux
869+
// to model that is is typically smaller than INT_MAX.
870+
State = CM.assumeInclusiveRange(State, *N, llvm::APSInt::getUnsigned(1),
871+
llvm::APSInt::getUnsigned(2097152), true);
872+
}
873+
874+
const MemRegion *ArgvReg =
875+
State->getRegion(FD->parameters()[1], C.getLocationContext());
876+
SVal ArgvSVal = State->getSVal(ArgvReg);
877+
State = addTaint(State, ArgvSVal);
878+
StringRef ArgvName = FD->parameters()[1]->getName();
879+
880+
bool HaveEnvp = FD->param_size() > 2;
881+
SVal EnvpSVal;
882+
StringRef EnvpName;
883+
if (HaveEnvp && !isPointerToCharArray(FD->parameters()[2]->getType()))
884+
return;
885+
if (HaveEnvp) {
886+
const MemRegion *EnvPReg =
887+
State->getRegion(FD->parameters()[2], C.getLocationContext());
888+
EnvpSVal = State->getSVal(EnvPReg);
889+
EnvpName = FD->parameters()[2]->getName();
890+
State = addTaint(State, EnvpSVal);
891+
}
892+
893+
const NoteTag *OriginatingTag =
894+
C.getNoteTag([ArgvSVal, ArgcSVal, ArgcName, ArgvName, EnvpSVal,
895+
EnvpName](PathSensitiveBugReport &BR) -> std::string {
896+
if ((!BR.isInteresting(ArgcSVal) && !BR.isInteresting(ArgvSVal) &&
897+
!BR.isInteresting(EnvpSVal)))
898+
return "";
899+
if (BR.getBugType().getCategory() != categories::TaintedData)
900+
return "";
901+
std::string Message = "";
902+
if (BR.isInteresting(ArgvSVal))
903+
Message += "'" + ArgvName.str() + "'";
904+
if (BR.isInteresting(ArgcSVal)) {
905+
if (Message.size() > 0)
906+
Message += ", ";
907+
Message += "'" + ArgcName.str() + "'";
908+
}
909+
if (BR.isInteresting(EnvpSVal)) {
910+
if (Message.size() > 0)
911+
Message += ", ";
912+
Message += "'" + EnvpName.str() + "'";
913+
}
914+
return "Taint originated in " + Message;
915+
});
916+
C.addTransition(State, OriginatingTag);
917+
}
918+
832919
void GenericTaintChecker::checkPreCall(const CallEvent &Call,
833920
CheckerContext &C) const {
921+
834922
initTaintRules(C);
835923

836924
// FIXME: this should be much simpler.

clang/test/Analysis/taint-generic.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ char *stpcpy(char *restrict s1, const char *restrict s2);
116116
char *strncpy( char * destination, const char * source, size_t num );
117117
char *strndup(const char *s, size_t n);
118118
char *strncat(char *restrict s1, const char *restrict s2, size_t n);
119+
char *strcat( char *dest, const char *src );
119120

120121
void *malloc(size_t);
121122
void *calloc(size_t nmemb, size_t size);
@@ -1396,3 +1397,13 @@ void testAcceptPropagates() {
13961397
int acceptSocket = accept(listenSocket, 0, 0);
13971398
clang_analyzer_isTainted_int(acceptSocket); // expected-warning {{YES}}
13981399
}
1400+
1401+
int main(int argc, char * argv[]) {
1402+
if (argc < 2)
1403+
return 1;
1404+
char cmd[2048] = "/bin/cat ";
1405+
clang_analyzer_isTainted_char(*argv[1]); // expected-warning{{YES}}
1406+
strncat(cmd, argv[1], sizeof(cmd) - strlen(cmd)-1);
1407+
system(cmd);// expected-warning {{Untrusted data is passed to a system call}}
1408+
return 0;
1409+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound \
2+
// RUN: -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify %s
3+
4+
// This file is for testing enhanced diagnostics produced by the
5+
// GenericTaintChecker
6+
7+
// In an untrusted environment the argc is also tainted.
8+
int main(int argc, char *argv[], char *envp[]) { // expected-note {{Taint originated in 'argc'}}
9+
char cmd[2048] = "/bin/cat ";
10+
char filename[1024];
11+
int foo = 100 / argc; // no-warning argc >= 1
12+
if (!envp[0]) // expected-note {{Assuming the condition is false}}
13+
// expected-note@-1 {{Taking false branch}}
14+
return 1;
15+
int v[5]={1,2,3,4,5};
16+
return v[argc];// expected-warning {{Potential out of bound access to 'v' with tainted index}}
17+
// expected-note@-1 {{Access of 'v' with a tainted index that may be too large}}
18+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound \
2+
// RUN: -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify %s
3+
4+
// This file is for testing enhanced diagnostics produced by the
5+
// GenericTaintChecker
6+
7+
typedef __typeof(sizeof(int)) size_t;
8+
int system(const char *command);
9+
size_t strlen(const char *str);
10+
char *strncat(char *destination, const char *source, size_t num);
11+
char *strncpy(char *destination, const char *source, size_t num);
12+
13+
// In an untrusted environment the the environment variables
14+
// coming through the envp are also tainted.
15+
int main(int argc, char *argv[], char *envp[]) { // expected-note {{Taint originated in 'envp'}}
16+
char cmd[2048] = "/bin/cat ";
17+
char filename[1024];
18+
if (!envp[0]) // expected-note {{Assuming the condition is false}}
19+
// expected-note@-1 {{Taking false branch}}
20+
return 1;
21+
strncpy(filename, envp[0], sizeof(filename) - 1); // expected-note {{Taint propagated to the 1st argument}}
22+
strncat(cmd, filename, sizeof(cmd) - strlen(cmd) - 1); // expected-note {{Taint propagated to the 1st argument}}
23+
system(cmd); // expected-warning {{Untrusted data is passed to a system call}}
24+
// expected-note@-1 {{Untrusted data is passed to a system call}}
25+
return 0;
26+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound \
2+
// RUN: -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify %s
3+
4+
// This file is for testing enhanced diagnostics produced by the
5+
// GenericTaintChecker
6+
7+
typedef __typeof(sizeof(int)) size_t;
8+
int system(const char *command);
9+
size_t strlen(const char *str);
10+
char *strncat(char *destination, const char *source, size_t num);
11+
char *strncpy(char *destination, const char *source, size_t num);
12+
13+
// In an untrusted environment the the environment variables
14+
// coming through the envp are also tainted.
15+
// This differs from envp.c because main parameters are declared
16+
// with alternative pointer to pointer declaration.
17+
int main(int argc, char **argv, char **envp) { // expected-note {{Taint originated in 'envp'}}
18+
char cmd[2048] = "/bin/cat ";
19+
char filename[1024];
20+
if (!envp[0]) // expected-note {{Assuming the condition is false}}
21+
// expected-note@-1 {{Taking false branch}}
22+
return 1;
23+
strncpy(filename, envp[0], sizeof(filename) - 1); // expected-note {{Taint propagated to the 1st argument}}
24+
strncat(cmd, filename, sizeof(cmd) - strlen(cmd) - 1); // expected-note {{Taint propagated to the 1st argument}}
25+
system(cmd); // expected-warning {{Untrusted data is passed to a system call}}
26+
// expected-note@-1 {{Untrusted data is passed to a system call}}
27+
return 0;
28+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config \
2+
// RUN: assume-controlled-environment=false -analyzer-output=text -verify %s
3+
4+
typedef __typeof(sizeof(int)) size_t;
5+
int system(const char *command);
6+
size_t strlen(const char *str);
7+
char *strncat(char *destination, const char *source, size_t num);
8+
9+
// When main is declared with incorrect signature,
10+
// the analyzer won't crash and we get a compilation error.
11+
12+
int main(char* argv[], int argc) {
13+
//expected-error@-1 {{first parameter of 'main' (argument count) must be of type 'int'}}
14+
//expected-error@-2 {{second parameter of 'main' (argument array) must be of type 'char **'}}
15+
char cmd[2048] = "/bin/cat ";
16+
char filename[1024];
17+
strncat(cmd, filename, sizeof(cmd) - strlen(cmd) - 1);
18+
system(cmd);
19+
return 0;
20+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config \
2+
// RUN: assume-controlled-environment=false -analyzer-output=text -verify %s
3+
// expected-no-diagnostics
4+
5+
// This file is for testing enhanced
6+
// diagnostics produced by the GenericTaintChecker
7+
8+
typedef __typeof(sizeof(int)) size_t;
9+
int system(const char *command);
10+
size_t strlen(const char *str);
11+
char *strncat(char *destination, const char *source, size_t num);
12+
13+
int main(void) {
14+
char cmd[2048] = "/bin/cat ";
15+
char filename[1024];
16+
strncat(cmd, filename, sizeof(cmd) - strlen(cmd) - 1);
17+
system(cmd);
18+
return 0;
19+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound \
2+
// RUN: -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify %s
3+
4+
// This file is for testing enhanced diagnostics produced by the GenericTaintChecker
5+
6+
typedef __typeof(sizeof(int)) size_t;
7+
int system(const char *command);
8+
size_t strlen(const char *str);
9+
char *strncat(char *destination, const char *source, size_t num);
10+
char *strncpy(char *destination, const char *source, size_t num);
11+
12+
// In an untrusted environment the cmd line arguments
13+
// are assumed to be tainted.
14+
int main(int argc, char *argv[]) { // expected-note {{Taint originated in 'argv'}}
15+
if (argc < 2) // expected-note {{'argc' is >= 2}}
16+
// expected-note@-1 {{Taking false branch}}
17+
return 1;
18+
char cmd[2048] = "/bin/cat ";
19+
char filename[1024];
20+
strncpy(filename, argv[1], sizeof(filename) - 1); // expected-note {{Taint propagated to the 1st argument}}
21+
strncat(cmd, filename, sizeof(cmd) - strlen(cmd) - 1); // expected-note {{Taint propagated to the 1st argument}}
22+
system(cmd); // expected-warning {{Untrusted data is passed to a system call}}
23+
// expected-note@-1 {{Untrusted data is passed to a system call}}
24+
return 0;
25+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config \
2+
// RUN: assume-controlled-environment=false -analyzer-output=text -verify %s
3+
4+
// This file is for testing enhanced diagnostics produced by the
5+
// GenericTaintChecker
6+
7+
typedef __typeof(sizeof(int)) size_t;
8+
int system(const char *command);
9+
size_t strlen(const char *str);
10+
char *strncat(char *destination, const char *source, size_t num);
11+
char *strncpy(char *destination, const char *source, size_t num);
12+
13+
// In an untrusted environment the cmd line arguments
14+
// are assumed to be tainted.
15+
int main(int argc, char *argv[]) { // expected-note {{Taint originated in 'argv'}}
16+
if (argc < 2) // expected-note {{'argc' is >= 2}}
17+
// expected-note@-1 {{Taking false branch}}
18+
return 1;
19+
char cmd[2048] = "/bin/cat ";
20+
char filename[1024];
21+
strncpy(filename, argv[1], sizeof(filename) - 1); // expected-note {{Taint propagated to the 1st argument}}
22+
strncat(cmd, filename, sizeof(cmd) - strlen(cmd) - 1); // expected-note {{Taint propagated to the 1st argument}}
23+
system(cmd); // expected-warning {{Untrusted data is passed to a system call}}
24+
// expected-note@-1 {{Untrusted data is passed to a system call}}
25+
return 0;
26+
}
27+
28+
// A function declared inside a class or namespace may be named "main" but it
29+
// cannot be _the_ `main` function that is executed at startup. Validate that
30+
// in a case like this the arguments are not marked as tainted and no warning
31+
// is produced.
32+
class MyClass {
33+
int main(int argc, char *argv[]) {
34+
if (argc < 2)
35+
return 1;
36+
char cmd[2048] = "/bin/cat ";
37+
char filename[1024];
38+
strncpy(filename, argv[1], sizeof(filename) - 1);
39+
strncat(cmd, filename, sizeof(cmd) - strlen(cmd) - 1);
40+
system(cmd);
41+
return 0;
42+
}
43+
};

0 commit comments

Comments
 (0)