| 1 | //== MIGChecker.cpp - MIG calling convention checker ------------*- C++ -*--==// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines MIGChecker, a Mach Interface Generator calling convention |
| 10 | // checker. Namely, in MIG callback implementation the following rules apply: |
| 11 | // - When a server routine returns an error code that represents success, it |
| 12 | // must take ownership of resources passed to it (and eventually release |
| 13 | // them). |
| 14 | // - Additionally, when returning success, all out-parameters must be |
| 15 | // initialized. |
| 16 | // - When it returns any other error code, it must not take ownership, |
| 17 | // because the message and its out-of-line parameters will be destroyed |
| 18 | // by the client that called the function. |
| 19 | // For now we only check the last rule, as its violations lead to dangerous |
| 20 | // use-after-free exploits. |
| 21 | // |
| 22 | //===----------------------------------------------------------------------===// |
| 23 | |
| 24 | #include "clang/AST/Attr.h" |
| 25 | #include "clang/Analysis/AnyCall.h" |
| 26 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
| 27 | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" |
| 28 | #include "clang/StaticAnalyzer/Core/Checker.h" |
| 29 | #include "clang/StaticAnalyzer/Core/CheckerManager.h" |
| 30 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" |
| 31 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" |
| 32 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" |
| 33 | #include <optional> |
| 34 | |
| 35 | using namespace clang; |
| 36 | using namespace ento; |
| 37 | |
| 38 | namespace { |
| 39 | class MIGChecker : public Checker<check::PostCall, check::PreStmt<ReturnStmt>, |
| 40 | check::EndFunction> { |
| 41 | BugType BT{this, "Use-after-free (MIG calling convention violation)" , |
| 42 | categories::MemoryError}; |
| 43 | |
| 44 | // The checker knows that an out-of-line object is deallocated if it is |
| 45 | // passed as an argument to one of these functions. If this object is |
| 46 | // additionally an argument of a MIG routine, the checker keeps track of that |
| 47 | // information and issues a warning when an error is returned from the |
| 48 | // respective routine. |
| 49 | CallDescriptionMap<unsigned> Deallocators = { |
| 50 | #define CALL(required_args, deallocated_arg, ...) \ |
| 51 | {{CDM::SimpleFunc, {__VA_ARGS__}, required_args}, deallocated_arg} |
| 52 | // E.g., if the checker sees a C function 'vm_deallocate' that has |
| 53 | // exactly 3 parameters, it knows that argument #1 (starting from 0, i.e. |
| 54 | // the second argument) is going to be consumed in the sense of the MIG |
| 55 | // consume-on-success convention. |
| 56 | CALL(3, 1, "vm_deallocate" ), |
| 57 | CALL(3, 1, "mach_vm_deallocate" ), |
| 58 | CALL(2, 0, "mig_deallocate" ), |
| 59 | CALL(2, 1, "mach_port_deallocate" ), |
| 60 | CALL(1, 0, "device_deallocate" ), |
| 61 | CALL(1, 0, "iokit_remove_connect_reference" ), |
| 62 | CALL(1, 0, "iokit_remove_reference" ), |
| 63 | CALL(1, 0, "iokit_release_port" ), |
| 64 | CALL(1, 0, "ipc_port_release" ), |
| 65 | CALL(1, 0, "ipc_port_release_sonce" ), |
| 66 | CALL(1, 0, "ipc_voucher_attr_control_release" ), |
| 67 | CALL(1, 0, "ipc_voucher_release" ), |
| 68 | CALL(1, 0, "lock_set_dereference" ), |
| 69 | CALL(1, 0, "memory_object_control_deallocate" ), |
| 70 | CALL(1, 0, "pset_deallocate" ), |
| 71 | CALL(1, 0, "semaphore_dereference" ), |
| 72 | CALL(1, 0, "space_deallocate" ), |
| 73 | CALL(1, 0, "space_inspect_deallocate" ), |
| 74 | CALL(1, 0, "task_deallocate" ), |
| 75 | CALL(1, 0, "task_inspect_deallocate" ), |
| 76 | CALL(1, 0, "task_name_deallocate" ), |
| 77 | CALL(1, 0, "thread_deallocate" ), |
| 78 | CALL(1, 0, "thread_inspect_deallocate" ), |
| 79 | CALL(1, 0, "upl_deallocate" ), |
| 80 | CALL(1, 0, "vm_map_deallocate" ), |
| 81 | #undef CALL |
| 82 | #define CALL(required_args, deallocated_arg, ...) \ |
| 83 | {{CDM::CXXMethod, {__VA_ARGS__}, required_args}, deallocated_arg} |
| 84 | // E.g., if the checker sees a method 'releaseAsyncReference64()' that is |
| 85 | // defined on class 'IOUserClient' that takes exactly 1 argument, it knows |
| 86 | // that the argument is going to be consumed in the sense of the MIG |
| 87 | // consume-on-success convention. |
| 88 | CALL(1, 0, "IOUserClient" , "releaseAsyncReference64" ), |
| 89 | CALL(1, 0, "IOUserClient" , "releaseNotificationPort" ), |
| 90 | #undef CALL |
| 91 | }; |
| 92 | |
| 93 | CallDescription OsRefRetain{CDM::SimpleFunc, {"os_ref_retain" }, 1}; |
| 94 | |
| 95 | void checkReturnAux(const ReturnStmt *RS, CheckerContext &C) const; |
| 96 | |
| 97 | public: |
| 98 | void checkPostCall(const CallEvent &Call, CheckerContext &C) const; |
| 99 | |
| 100 | // HACK: We're making two attempts to find the bug: checkEndFunction |
| 101 | // should normally be enough but it fails when the return value is a literal |
| 102 | // that never gets put into the Environment and ends of function with multiple |
| 103 | // returns get agglutinated across returns, preventing us from obtaining |
| 104 | // the return value. The problem is similar to https://reviews.llvm.org/D25326 |
| 105 | // but now we step into it in the top-level function. |
| 106 | void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const { |
| 107 | checkReturnAux(RS, C); |
| 108 | } |
| 109 | void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const { |
| 110 | checkReturnAux(RS, C); |
| 111 | } |
| 112 | |
| 113 | }; |
| 114 | } // end anonymous namespace |
| 115 | |
| 116 | // A flag that says that the programmer has called a MIG destructor |
| 117 | // for at least one parameter. |
| 118 | REGISTER_TRAIT_WITH_PROGRAMSTATE(ReleasedParameter, bool) |
| 119 | // A set of parameters for which the check is suppressed because |
| 120 | // reference counting is being performed. |
| 121 | REGISTER_SET_WITH_PROGRAMSTATE(RefCountedParameters, const ParmVarDecl *) |
| 122 | |
| 123 | static const ParmVarDecl *getOriginParam(SVal V, CheckerContext &C, |
| 124 | bool IncludeBaseRegions = false) { |
| 125 | // TODO: We should most likely always include base regions here. |
| 126 | SymbolRef Sym = V.getAsSymbol(IncludeBaseRegions); |
| 127 | if (!Sym) |
| 128 | return nullptr; |
| 129 | |
| 130 | // If we optimistically assume that the MIG routine never re-uses the storage |
| 131 | // that was passed to it as arguments when it invalidates it (but at most when |
| 132 | // it assigns to parameter variables directly), this procedure correctly |
| 133 | // determines if the value was loaded from the transitive closure of MIG |
| 134 | // routine arguments in the heap. |
| 135 | while (const MemRegion *MR = Sym->getOriginRegion()) { |
| 136 | const auto *VR = dyn_cast<VarRegion>(Val: MR); |
| 137 | if (VR && VR->hasMemorySpace<StackArgumentsSpaceRegion>(State: C.getState()) && |
| 138 | VR->getStackFrame()->inTopFrame()) |
| 139 | return cast<ParmVarDecl>(Val: VR->getDecl()); |
| 140 | |
| 141 | const SymbolicRegion *SR = MR->getSymbolicBase(); |
| 142 | if (!SR) |
| 143 | return nullptr; |
| 144 | |
| 145 | Sym = SR->getSymbol(); |
| 146 | } |
| 147 | |
| 148 | return nullptr; |
| 149 | } |
| 150 | |
| 151 | static bool isInMIGCall(CheckerContext &C) { |
| 152 | const LocationContext *LC = C.getLocationContext(); |
| 153 | assert(LC && "Unknown location context" ); |
| 154 | |
| 155 | const StackFrameContext *SFC; |
| 156 | // Find the top frame. |
| 157 | while (LC) { |
| 158 | SFC = LC->getStackFrame(); |
| 159 | LC = SFC->getParent(); |
| 160 | } |
| 161 | |
| 162 | const Decl *D = SFC->getDecl(); |
| 163 | |
| 164 | if (std::optional<AnyCall> AC = AnyCall::forDecl(D)) { |
| 165 | // Even though there's a Sema warning when the return type of an annotated |
| 166 | // function is not a kern_return_t, this warning isn't an error, so we need |
| 167 | // an extra check here. |
| 168 | // FIXME: AnyCall doesn't support blocks yet, so they remain unchecked |
| 169 | // for now. |
| 170 | if (!AC->getReturnType(Ctx&: C.getASTContext()) |
| 171 | .getCanonicalType()->isSignedIntegerType()) |
| 172 | return false; |
| 173 | } |
| 174 | |
| 175 | if (D->hasAttr<MIGServerRoutineAttr>()) |
| 176 | return true; |
| 177 | |
| 178 | // See if there's an annotated method in the superclass. |
| 179 | if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: D)) |
| 180 | for (const auto *OMD: MD->overridden_methods()) |
| 181 | if (OMD->hasAttr<MIGServerRoutineAttr>()) |
| 182 | return true; |
| 183 | |
| 184 | return false; |
| 185 | } |
| 186 | |
| 187 | void MIGChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const { |
| 188 | if (OsRefRetain.matches(Call)) { |
| 189 | // If the code is doing reference counting over the parameter, |
| 190 | // it opens up an opportunity for safely calling a destructor function. |
| 191 | // TODO: We should still check for over-releases. |
| 192 | if (const ParmVarDecl *PVD = |
| 193 | getOriginParam(V: Call.getArgSVal(Index: 0), C, /*IncludeBaseRegions=*/true)) { |
| 194 | // We never need to clean up the program state because these are |
| 195 | // top-level parameters anyway, so they're always live. |
| 196 | C.addTransition(State: C.getState()->add<RefCountedParameters>(K: PVD)); |
| 197 | } |
| 198 | return; |
| 199 | } |
| 200 | |
| 201 | if (!isInMIGCall(C)) |
| 202 | return; |
| 203 | |
| 204 | const unsigned *ArgIdxPtr = Deallocators.lookup(Call); |
| 205 | if (!ArgIdxPtr) |
| 206 | return; |
| 207 | |
| 208 | ProgramStateRef State = C.getState(); |
| 209 | unsigned ArgIdx = *ArgIdxPtr; |
| 210 | SVal Arg = Call.getArgSVal(Index: ArgIdx); |
| 211 | const ParmVarDecl *PVD = getOriginParam(V: Arg, C); |
| 212 | if (!PVD || State->contains<RefCountedParameters>(key: PVD)) |
| 213 | return; |
| 214 | |
| 215 | const NoteTag *T = |
| 216 | C.getNoteTag(Cb: [this, PVD](PathSensitiveBugReport &BR) -> std::string { |
| 217 | if (&BR.getBugType() != &BT) |
| 218 | return "" ; |
| 219 | SmallString<64> Str; |
| 220 | llvm::raw_svector_ostream OS(Str); |
| 221 | OS << "Value passed through parameter '" << PVD->getName() |
| 222 | << "\' is deallocated" ; |
| 223 | return std::string(OS.str()); |
| 224 | }); |
| 225 | C.addTransition(State: State->set<ReleasedParameter>(true), Tag: T); |
| 226 | } |
| 227 | |
| 228 | // Returns true if V can potentially represent a "successful" kern_return_t. |
| 229 | static bool mayBeSuccess(SVal V, CheckerContext &C) { |
| 230 | ProgramStateRef State = C.getState(); |
| 231 | |
| 232 | // Can V represent KERN_SUCCESS? |
| 233 | if (!State->isNull(V).isConstrainedFalse()) |
| 234 | return true; |
| 235 | |
| 236 | SValBuilder &SVB = C.getSValBuilder(); |
| 237 | ASTContext &ACtx = C.getASTContext(); |
| 238 | |
| 239 | // Can V represent MIG_NO_REPLY? |
| 240 | static const int MigNoReply = -305; |
| 241 | V = SVB.evalEQ(state: C.getState(), lhs: V, rhs: SVB.makeIntVal(integer: MigNoReply, type: ACtx.IntTy)); |
| 242 | if (!State->isNull(V).isConstrainedTrue()) |
| 243 | return true; |
| 244 | |
| 245 | // If none of the above, it's definitely an error. |
| 246 | return false; |
| 247 | } |
| 248 | |
| 249 | void MIGChecker::checkReturnAux(const ReturnStmt *RS, CheckerContext &C) const { |
| 250 | // It is very unlikely that a MIG callback will be called from anywhere |
| 251 | // within the project under analysis and the caller isn't itself a routine |
| 252 | // that follows the MIG calling convention. Therefore we're safe to believe |
| 253 | // that it's always the top frame that is of interest. There's a slight chance |
| 254 | // that the user would want to enforce the MIG calling convention upon |
| 255 | // a random routine in the middle of nowhere, but given that the convention is |
| 256 | // fairly weird and hard to follow in the first place, there's relatively |
| 257 | // little motivation to spread it this way. |
| 258 | if (!C.inTopFrame()) |
| 259 | return; |
| 260 | |
| 261 | if (!isInMIGCall(C)) |
| 262 | return; |
| 263 | |
| 264 | // We know that the function is non-void, but what if the return statement |
| 265 | // is not there in the code? It's not a compile error, we should not crash. |
| 266 | if (!RS) |
| 267 | return; |
| 268 | |
| 269 | ProgramStateRef State = C.getState(); |
| 270 | if (!State->get<ReleasedParameter>()) |
| 271 | return; |
| 272 | |
| 273 | SVal V = C.getSVal(S: RS); |
| 274 | if (mayBeSuccess(V, C)) |
| 275 | return; |
| 276 | |
| 277 | ExplodedNode *N = C.generateErrorNode(); |
| 278 | if (!N) |
| 279 | return; |
| 280 | |
| 281 | auto R = std::make_unique<PathSensitiveBugReport>( |
| 282 | args: BT, |
| 283 | args: "MIG callback fails with error after deallocating argument value. " |
| 284 | "This is a use-after-free vulnerability because the caller will try to " |
| 285 | "deallocate it again" , |
| 286 | args&: N); |
| 287 | |
| 288 | R->addRange(R: RS->getSourceRange()); |
| 289 | bugreporter::trackExpressionValue( |
| 290 | N, E: RS->getRetValue(), R&: *R, |
| 291 | Opts: {.Kind: bugreporter::TrackingKind::Thorough, /*EnableNullFPSuppression=*/false}); |
| 292 | C.emitReport(R: std::move(R)); |
| 293 | } |
| 294 | |
| 295 | void ento::registerMIGChecker(CheckerManager &Mgr) { |
| 296 | Mgr.registerChecker<MIGChecker>(); |
| 297 | } |
| 298 | |
| 299 | bool ento::shouldRegisterMIGChecker(const CheckerManager &mgr) { |
| 300 | return true; |
| 301 | } |
| 302 | |