/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // ELF file parser #pragma once #include #include #include #include #include #include #include #include #include #include #include #include #if FOLLY_HAVE_ELF #include #include // For ElfW() namespace folly { namespace symbolizer { #if defined(ElfW) #define FOLLY_ELF_ELFW(name) ElfW(name) #elif defined(__FreeBSD__) #define FOLLY_ELF_ELFW(name) Elf_##name #endif using ElfAddr = FOLLY_ELF_ELFW(Addr); using ElfEhdr = FOLLY_ELF_ELFW(Ehdr); using ElfOff = FOLLY_ELF_ELFW(Off); using ElfPhdr = FOLLY_ELF_ELFW(Phdr); using ElfShdr = FOLLY_ELF_ELFW(Shdr); using ElfSym = FOLLY_ELF_ELFW(Sym); using ElfRel = FOLLY_ELF_ELFW(Rel); using ElfRela = FOLLY_ELF_ELFW(Rela); using ElfDyn = FOLLY_ELF_ELFW(Dyn); // ElfFileId is supposed to uniquely identify any instance of an ELF binary. // It does that by using the file's inode, dev ID, size and modification time // (ns): Just using dev, inode is not // unique enough, because the file can be overwritten with new contents, but // will keep same dev and inode, so we take into account modification time and // file size to minimize risk. struct ElfFileId { dev_t dev; ino_t inode; off_t size; uint64_t mtime; }; inline bool operator==(const ElfFileId& lhs, const ElfFileId& rhs) { return lhs.dev == rhs.dev && lhs.inode == rhs.inode && lhs.size == rhs.size && lhs.mtime == rhs.mtime; } /** * ELF file parser. * * We handle native files only (32-bit files on a 32-bit platform, 64-bit files * on a 64-bit platform), and only executables (ET_EXEC) shared objects * (ET_DYN), core files (ET_CORE) and relocatable file (ET_REL). */ class ElfFile { public: class Options { public: constexpr Options() noexcept {} constexpr bool writable() const noexcept { return writable_; } constexpr Options& writable(bool const value) noexcept { writable_ = value; return *this; } private: bool writable_ = false; }; ElfFile() noexcept; // Note: may throw, call openNoThrow() explicitly if you don't want to throw explicit ElfFile(const char* name, Options const& options = Options()); // Open the ELF file. // Returns 0 on success, kSystemError (guaranteed to be -1) (and sets errno) // on IO error, kInvalidElfFile (and sets errno to EINVAL) for an invalid // Elf file. On error, if msg is not nullptr, sets *msg to a static string // indicating what failed. enum OpenResultCode : int { kSuccess = 0, kSystemError = -1, kInvalidElfFile = -2, }; struct OpenResult { OpenResultCode code{}; char const* msg{}; /* implicit */ constexpr operator OpenResultCode() const noexcept { return code; } }; // Open the ELF file. Does not throw on error. OpenResult openNoThrow( const char* name, Options const& options = Options()) noexcept; // Like openNoThrow, but follow .gnu_debuglink if present OpenResult openAndFollow( const char* name, Options const& options = Options()) noexcept; // Open the ELF file. Throws on error. void open(const char* name, Options const& options = Options()); ~ElfFile(); ElfFile(ElfFile&& other) noexcept; ElfFile& operator=(ElfFile&& other) noexcept; /** Retrieve the ELF header */ const ElfEhdr& elfHeader() const noexcept { return at(0); } /** * Get the base address, the address where the file should be loaded if * no relocations happened. */ uintptr_t getBaseAddress() const noexcept { return baseAddress_; } /** Find a section given its name */ const ElfShdr* getSectionByName(const char* name) const noexcept; /** Find a section given its index in the section header table */ const ElfShdr* getSectionByIndex(size_t idx) const noexcept; /** Retrieve the name of a section */ const char* getSectionName(const ElfShdr& section) const noexcept; /** Get the actual section body */ folly::StringPiece getSectionBody(const ElfShdr& section) const noexcept; /** Retrieve a string from a string table section */ const char* getString( const ElfShdr& stringTable, size_t offset) const noexcept; /** * Iterate over all strings in a string table section for as long as * fn(str) returns false. * Returns the current ("found") string when fn returned true, or nullptr * if fn returned false for all strings in the table. */ template const char* iterateStrings(const ElfShdr& stringTable, Fn fn) const noexcept(is_nothrow_invocable_v); /** * Iterate over program headers as long as fn(section) returns false. * Returns a pointer to the current ("found") section when fn returned * true, or nullptr if fn returned false for all sections. */ template const ElfPhdr* iterateProgramHeaders(Fn fn) const noexcept(is_nothrow_invocable_v); /** * Iterate over all sections for as long as fn(section) returns false. * Returns a pointer to the current ("found") section when fn returned * true, or nullptr if fn returned false for all sections. */ template const ElfShdr* iterateSections(Fn fn) const noexcept(is_nothrow_invocable_v); /** * Iterate over all sections with a given type. Similar to * iterateSections(), but filtered only for sections with the given type. */ template const ElfShdr* iterateSectionsWithType(uint32_t type, Fn fn) const noexcept(is_nothrow_invocable_v); /** * Iterate over all sections with a given types. Similar to * iterateSectionWithTypes(), but filtered on multiple types. */ template const ElfShdr* iterateSectionsWithTypes( std::initializer_list types, Fn fn) const noexcept(is_nothrow_invocable_v); /** * Iterate over all symbols within a given section. * * Returns a pointer to the current ("found") symbol when fn returned true, * or nullptr if fn returned false for all symbols. */ template const ElfSym* iterateSymbols(const ElfShdr& section, Fn fn) const noexcept(is_nothrow_invocable_v); template const ElfSym* iterateSymbolsWithType( const ElfShdr& section, uint32_t type, Fn fn) const noexcept(is_nothrow_invocable_v); template const ElfSym* iterateSymbolsWithTypes( const ElfShdr& section, std::initializer_list types, Fn fn) const noexcept(is_nothrow_invocable_v); /** * Iterate over entries within a given section. * * Returns a pointer to the current ("found") entry when fn returned * true, or nullptr if fn returned false for all entries. */ template const E* iterateSectionEntries(const ElfShdr& section, Fn&& fn) const noexcept(is_nothrow_invocable_v); /** * Find symbol definition by address. * Note that this is the file virtual address, so you need to undo * any relocation that might have happened. * * Returns {nullptr, nullptr} if not found. */ typedef std::pair Symbol; Symbol getDefinitionByAddress(uintptr_t address) const noexcept; /** * Find symbol definition by name. Optionally specify the symbol types to * consider. * * If a symbol with this name cannot be found, a Symbol * will be returned. This is O(N) in the number of symbols in the file. * * Returns {nullptr, nullptr} if not found. */ Symbol getSymbolByName( const char* name, std::initializer_list types = { STT_OBJECT, STT_FUNC, STT_GNU_IFUNC}) const noexcept; /** * Find multiple symbol definitions by name. Because searching for a symbol is * O(N) this method enables searching for multiple symbols in a single pass. * * Returns a map containing a key for each unique symbol name in the provided * names container. The corresponding value is either Symbol or if the symbol was not found. */ template std::unordered_map getSymbolsByName( const C& names, std::initializer_list types = { STT_OBJECT, STT_FUNC, STT_GNU_IFUNC}) const noexcept { std::unordered_map result(names.size()); if (names.empty()) { return result; } for (const std::string& name : names) { result[name] = {nullptr, nullptr}; } size_t seenCount = 0; auto findSymbol = [&](const folly::symbolizer::ElfShdr& section, const folly::symbolizer::ElfSym& sym) -> bool { auto symbol = folly::symbolizer::ElfFile::Symbol(§ion, &sym); auto name = getSymbolName(symbol); if (name == nullptr) { return false; } auto itr = result.find(name); if (itr != result.end() && itr->second.first == nullptr && itr->second.second == nullptr) { itr->second = symbol; ++seenCount; } return seenCount == result.size(); }; auto iterSection = [&](const folly::symbolizer::ElfShdr& section) -> bool { iterateSymbolsWithTypes(section, types, [&](const auto& sym) -> bool { return findSymbol(section, sym); }); return false; }; // Try the .dynsym section first if it exists, it's smaller. iterateSectionsWithType(SHT_DYNSYM, iterSection) || iterateSectionsWithType(SHT_SYMTAB, iterSection); return result; } /** * Get the value of a symbol. */ template const T* getSymbolValue(const ElfSym* symbol) const noexcept { const ElfShdr* section = getSectionByIndex(symbol->st_shndx); if (section == nullptr) { return nullptr; } return valueAt(*section, symbol->st_value); } /** * Get the value of the object stored at the given address. * * This is the function that you want to use in conjunction with * getSymbolValue() to follow pointers. For example, to get the value of * a char* symbol, you'd do something like this: * * auto sym = getSymbolByName("someGlobalValue"); * auto addrPtr = getSymbolValue(sym.second); * const char* str = getAddressValue(*addrPtr); */ template const T* getAddressValue(const ElfAddr addr) const noexcept { const ElfShdr* section = getSectionContainingAddress(addr); if (section == nullptr) { return nullptr; } return valueAt(*section, addr); } /** * Retrieve symbol name. */ const char* getSymbolName(const Symbol& symbol) const noexcept; /** Find the section containing the given address */ const ElfShdr* getSectionContainingAddress(ElfAddr addr) const noexcept; const char* filepath() const { return filepath_; } const ElfFileId& getFileId() const { return fileId_; } /** * Announce an intention to access file data in a specific pattern in the * future. https://man7.org/linux/man-pages/man2/posix_fadvise.2.html */ std::pair posixFadvise( off_t offset, off_t len, int const advice) const noexcept; std::pair posixFadvise( int const advice) const noexcept; private: OpenResult init() noexcept; void reset() noexcept; ElfFile(const ElfFile&) = delete; ElfFile& operator=(const ElfFile&) = delete; void validateStringTable(const ElfShdr& stringTable) const noexcept; template const T& at(ElfOff offset) const noexcept { static_assert( std::is_standard_layout::value && std::is_trivial::value, "non-pod"); FOLLY_SAFE_CHECK( offset + sizeof(T) <= length_, "Offset (", static_cast(offset), " + ", sizeof(T), ") is not contained within our mapped file (", filepath_, ") of length ", length_); return *reinterpret_cast(file_ + offset); } template const T* valueAt(const ElfShdr& section, const ElfAddr addr) const noexcept { // For exectuables and shared objects, st_value holds a virtual address // that refers to the memory owned by sections. Since we didn't map the // sections into the addresses that they're expecting (sh_addr), but // instead just mmapped the entire file directly, we need to translate // between addresses and offsets into the file. // // TODO: For other file types, st_value holds a file offset directly. Since // I don't have a use-case for that right now, just assert that // nobody wants this. We can always add it later. if (!(elfHeader().e_type == ET_EXEC || elfHeader().e_type == ET_DYN || elfHeader().e_type == ET_CORE)) { return nullptr; } if (!(addr >= section.sh_addr && (addr + sizeof(T)) <= (section.sh_addr + section.sh_size))) { return nullptr; } // SHT_NOBITS: a section that occupies no space in the file but otherwise // resembles SHT_PROGBITS. Although this section contains no bytes, the // sh_offset member contains the conceptual file offset. Typically used // for zero-initialized data sections like .bss. if (section.sh_type == SHT_NOBITS) { static T t = {}; return &t; } ElfOff offset = section.sh_offset + (addr - section.sh_addr); return (offset + sizeof(T) <= length_) ? &at(offset) : nullptr; } static constexpr size_t kFilepathMaxLen = 512; char filepath_[kFilepathMaxLen] = {}; int fd_; char* file_; // mmap() location size_t length_; // mmap() length ElfFileId fileId_; uintptr_t baseAddress_; }; } // namespace symbolizer } // namespace folly namespace std { template <> struct hash { size_t operator()(const folly::symbolizer::ElfFileId fileId) const { return folly::hash::hash_combine( fileId.dev, fileId.inode, fileId.size, fileId.mtime); } }; } // namespace std #include #endif // FOLLY_HAVE_ELF