1*67e74705SXin Li //== CStringSyntaxChecker.cpp - CoreFoundation containers API *- C++ -*-==//
2*67e74705SXin Li //
3*67e74705SXin Li // The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li //
10*67e74705SXin Li // An AST checker that looks for common pitfalls when using C string APIs.
11*67e74705SXin Li // - Identifies erroneous patterns in the last argument to strncat - the number
12*67e74705SXin Li // of bytes to copy.
13*67e74705SXin Li //
14*67e74705SXin Li //===----------------------------------------------------------------------===//
15*67e74705SXin Li #include "ClangSACheckers.h"
16*67e74705SXin Li #include "clang/AST/Expr.h"
17*67e74705SXin Li #include "clang/AST/OperationKinds.h"
18*67e74705SXin Li #include "clang/AST/StmtVisitor.h"
19*67e74705SXin Li #include "clang/Analysis/AnalysisContext.h"
20*67e74705SXin Li #include "clang/Basic/TargetInfo.h"
21*67e74705SXin Li #include "clang/Basic/TypeTraits.h"
22*67e74705SXin Li #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
23*67e74705SXin Li #include "clang/StaticAnalyzer/Core/Checker.h"
24*67e74705SXin Li #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
25*67e74705SXin Li #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
26*67e74705SXin Li #include "llvm/ADT/SmallString.h"
27*67e74705SXin Li #include "llvm/Support/raw_ostream.h"
28*67e74705SXin Li
29*67e74705SXin Li using namespace clang;
30*67e74705SXin Li using namespace ento;
31*67e74705SXin Li
32*67e74705SXin Li namespace {
33*67e74705SXin Li class WalkAST: public StmtVisitor<WalkAST> {
34*67e74705SXin Li const CheckerBase *Checker;
35*67e74705SXin Li BugReporter &BR;
36*67e74705SXin Li AnalysisDeclContext* AC;
37*67e74705SXin Li
38*67e74705SXin Li /// Check if two expressions refer to the same declaration.
sameDecl(const Expr * A1,const Expr * A2)39*67e74705SXin Li inline bool sameDecl(const Expr *A1, const Expr *A2) {
40*67e74705SXin Li if (const DeclRefExpr *D1 = dyn_cast<DeclRefExpr>(A1->IgnoreParenCasts()))
41*67e74705SXin Li if (const DeclRefExpr *D2 = dyn_cast<DeclRefExpr>(A2->IgnoreParenCasts()))
42*67e74705SXin Li return D1->getDecl() == D2->getDecl();
43*67e74705SXin Li return false;
44*67e74705SXin Li }
45*67e74705SXin Li
46*67e74705SXin Li /// Check if the expression E is a sizeof(WithArg).
isSizeof(const Expr * E,const Expr * WithArg)47*67e74705SXin Li inline bool isSizeof(const Expr *E, const Expr *WithArg) {
48*67e74705SXin Li if (const UnaryExprOrTypeTraitExpr *UE =
49*67e74705SXin Li dyn_cast<UnaryExprOrTypeTraitExpr>(E))
50*67e74705SXin Li if (UE->getKind() == UETT_SizeOf)
51*67e74705SXin Li return sameDecl(UE->getArgumentExpr(), WithArg);
52*67e74705SXin Li return false;
53*67e74705SXin Li }
54*67e74705SXin Li
55*67e74705SXin Li /// Check if the expression E is a strlen(WithArg).
isStrlen(const Expr * E,const Expr * WithArg)56*67e74705SXin Li inline bool isStrlen(const Expr *E, const Expr *WithArg) {
57*67e74705SXin Li if (const CallExpr *CE = dyn_cast<CallExpr>(E)) {
58*67e74705SXin Li const FunctionDecl *FD = CE->getDirectCallee();
59*67e74705SXin Li if (!FD)
60*67e74705SXin Li return false;
61*67e74705SXin Li return (CheckerContext::isCLibraryFunction(FD, "strlen") &&
62*67e74705SXin Li sameDecl(CE->getArg(0), WithArg));
63*67e74705SXin Li }
64*67e74705SXin Li return false;
65*67e74705SXin Li }
66*67e74705SXin Li
67*67e74705SXin Li /// Check if the expression is an integer literal with value 1.
isOne(const Expr * E)68*67e74705SXin Li inline bool isOne(const Expr *E) {
69*67e74705SXin Li if (const IntegerLiteral *IL = dyn_cast<IntegerLiteral>(E))
70*67e74705SXin Li return (IL->getValue().isIntN(1));
71*67e74705SXin Li return false;
72*67e74705SXin Li }
73*67e74705SXin Li
getPrintableName(const Expr * E)74*67e74705SXin Li inline StringRef getPrintableName(const Expr *E) {
75*67e74705SXin Li if (const DeclRefExpr *D = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts()))
76*67e74705SXin Li return D->getDecl()->getName();
77*67e74705SXin Li return StringRef();
78*67e74705SXin Li }
79*67e74705SXin Li
80*67e74705SXin Li /// Identify erroneous patterns in the last argument to strncat - the number
81*67e74705SXin Li /// of bytes to copy.
82*67e74705SXin Li bool containsBadStrncatPattern(const CallExpr *CE);
83*67e74705SXin Li
84*67e74705SXin Li public:
WalkAST(const CheckerBase * checker,BugReporter & br,AnalysisDeclContext * ac)85*67e74705SXin Li WalkAST(const CheckerBase *checker, BugReporter &br, AnalysisDeclContext *ac)
86*67e74705SXin Li : Checker(checker), BR(br), AC(ac) {}
87*67e74705SXin Li
88*67e74705SXin Li // Statement visitor methods.
89*67e74705SXin Li void VisitChildren(Stmt *S);
VisitStmt(Stmt * S)90*67e74705SXin Li void VisitStmt(Stmt *S) {
91*67e74705SXin Li VisitChildren(S);
92*67e74705SXin Li }
93*67e74705SXin Li void VisitCallExpr(CallExpr *CE);
94*67e74705SXin Li };
95*67e74705SXin Li } // end anonymous namespace
96*67e74705SXin Li
97*67e74705SXin Li // The correct size argument should look like following:
98*67e74705SXin Li // strncat(dst, src, sizeof(dst) - strlen(dest) - 1);
99*67e74705SXin Li // We look for the following anti-patterns:
100*67e74705SXin Li // - strncat(dst, src, sizeof(dst) - strlen(dst));
101*67e74705SXin Li // - strncat(dst, src, sizeof(dst) - 1);
102*67e74705SXin Li // - strncat(dst, src, sizeof(dst));
containsBadStrncatPattern(const CallExpr * CE)103*67e74705SXin Li bool WalkAST::containsBadStrncatPattern(const CallExpr *CE) {
104*67e74705SXin Li if (CE->getNumArgs() != 3)
105*67e74705SXin Li return false;
106*67e74705SXin Li const Expr *DstArg = CE->getArg(0);
107*67e74705SXin Li const Expr *SrcArg = CE->getArg(1);
108*67e74705SXin Li const Expr *LenArg = CE->getArg(2);
109*67e74705SXin Li
110*67e74705SXin Li // Identify wrong size expressions, which are commonly used instead.
111*67e74705SXin Li if (const BinaryOperator *BE =
112*67e74705SXin Li dyn_cast<BinaryOperator>(LenArg->IgnoreParenCasts())) {
113*67e74705SXin Li // - sizeof(dst) - strlen(dst)
114*67e74705SXin Li if (BE->getOpcode() == BO_Sub) {
115*67e74705SXin Li const Expr *L = BE->getLHS();
116*67e74705SXin Li const Expr *R = BE->getRHS();
117*67e74705SXin Li if (isSizeof(L, DstArg) && isStrlen(R, DstArg))
118*67e74705SXin Li return true;
119*67e74705SXin Li
120*67e74705SXin Li // - sizeof(dst) - 1
121*67e74705SXin Li if (isSizeof(L, DstArg) && isOne(R->IgnoreParenCasts()))
122*67e74705SXin Li return true;
123*67e74705SXin Li }
124*67e74705SXin Li }
125*67e74705SXin Li // - sizeof(dst)
126*67e74705SXin Li if (isSizeof(LenArg, DstArg))
127*67e74705SXin Li return true;
128*67e74705SXin Li
129*67e74705SXin Li // - sizeof(src)
130*67e74705SXin Li if (isSizeof(LenArg, SrcArg))
131*67e74705SXin Li return true;
132*67e74705SXin Li return false;
133*67e74705SXin Li }
134*67e74705SXin Li
VisitCallExpr(CallExpr * CE)135*67e74705SXin Li void WalkAST::VisitCallExpr(CallExpr *CE) {
136*67e74705SXin Li const FunctionDecl *FD = CE->getDirectCallee();
137*67e74705SXin Li if (!FD)
138*67e74705SXin Li return;
139*67e74705SXin Li
140*67e74705SXin Li if (CheckerContext::isCLibraryFunction(FD, "strncat")) {
141*67e74705SXin Li if (containsBadStrncatPattern(CE)) {
142*67e74705SXin Li const Expr *DstArg = CE->getArg(0);
143*67e74705SXin Li const Expr *LenArg = CE->getArg(2);
144*67e74705SXin Li PathDiagnosticLocation Loc =
145*67e74705SXin Li PathDiagnosticLocation::createBegin(LenArg, BR.getSourceManager(), AC);
146*67e74705SXin Li
147*67e74705SXin Li StringRef DstName = getPrintableName(DstArg);
148*67e74705SXin Li
149*67e74705SXin Li SmallString<256> S;
150*67e74705SXin Li llvm::raw_svector_ostream os(S);
151*67e74705SXin Li os << "Potential buffer overflow. ";
152*67e74705SXin Li if (!DstName.empty()) {
153*67e74705SXin Li os << "Replace with 'sizeof(" << DstName << ") "
154*67e74705SXin Li "- strlen(" << DstName <<") - 1'";
155*67e74705SXin Li os << " or u";
156*67e74705SXin Li } else
157*67e74705SXin Li os << "U";
158*67e74705SXin Li os << "se a safer 'strlcat' API";
159*67e74705SXin Li
160*67e74705SXin Li BR.EmitBasicReport(FD, Checker, "Anti-pattern in the argument",
161*67e74705SXin Li "C String API", os.str(), Loc,
162*67e74705SXin Li LenArg->getSourceRange());
163*67e74705SXin Li }
164*67e74705SXin Li }
165*67e74705SXin Li
166*67e74705SXin Li // Recurse and check children.
167*67e74705SXin Li VisitChildren(CE);
168*67e74705SXin Li }
169*67e74705SXin Li
VisitChildren(Stmt * S)170*67e74705SXin Li void WalkAST::VisitChildren(Stmt *S) {
171*67e74705SXin Li for (Stmt *Child : S->children())
172*67e74705SXin Li if (Child)
173*67e74705SXin Li Visit(Child);
174*67e74705SXin Li }
175*67e74705SXin Li
176*67e74705SXin Li namespace {
177*67e74705SXin Li class CStringSyntaxChecker: public Checker<check::ASTCodeBody> {
178*67e74705SXin Li public:
179*67e74705SXin Li
checkASTCodeBody(const Decl * D,AnalysisManager & Mgr,BugReporter & BR) const180*67e74705SXin Li void checkASTCodeBody(const Decl *D, AnalysisManager& Mgr,
181*67e74705SXin Li BugReporter &BR) const {
182*67e74705SXin Li WalkAST walker(this, BR, Mgr.getAnalysisDeclContext(D));
183*67e74705SXin Li walker.Visit(D->getBody());
184*67e74705SXin Li }
185*67e74705SXin Li };
186*67e74705SXin Li }
187*67e74705SXin Li
registerCStringSyntaxChecker(CheckerManager & mgr)188*67e74705SXin Li void ento::registerCStringSyntaxChecker(CheckerManager &mgr) {
189*67e74705SXin Li mgr.registerChecker<CStringSyntaxChecker>();
190*67e74705SXin Li }
191*67e74705SXin Li
192