Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2018 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
Martin Stjernholm | fdb2f60 | 2019-01-09 14:47:00 +0000 | [diff] [blame] | 17 | #include "art_api/dex_file_external.h" |
| 18 | |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 19 | #include <inttypes.h> |
| 20 | #include <stdint.h> |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 21 | #include <sys/stat.h> |
| 22 | #include <sys/types.h> |
| 23 | #include <unistd.h> |
| 24 | |
| 25 | #include <cerrno> |
| 26 | #include <cstring> |
David Srbecky | e778fa6 | 2020-04-07 17:52:16 +0100 | [diff] [blame] | 27 | #include <deque> |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 28 | #include <map> |
| 29 | #include <memory> |
| 30 | #include <string> |
| 31 | #include <utility> |
| 32 | #include <vector> |
| 33 | |
| 34 | #include <android-base/logging.h> |
| 35 | #include <android-base/macros.h> |
| 36 | #include <android-base/mapped_file.h> |
| 37 | #include <android-base/stringprintf.h> |
| 38 | |
| 39 | #include <dex/class_accessor-inl.h> |
| 40 | #include <dex/code_item_accessors-inl.h> |
| 41 | #include <dex/dex_file-inl.h> |
| 42 | #include <dex/dex_file_loader.h> |
| 43 | |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 44 | extern "C" { |
Martin Stjernholm | d3e9ff3 | 2021-03-16 00:44:25 +0000 | [diff] [blame] | 45 | |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 46 | struct ADexFile_Method { |
| 47 | ADexFile* adex; |
| 48 | uint32_t index; |
| 49 | size_t offset; |
| 50 | size_t size; |
| 51 | }; |
David Srbecky | 2ddb98b | 2021-03-09 00:37:04 +0000 | [diff] [blame] | 52 | |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 53 | // Opaque implementation of ADexFile for the C interface. |
| 54 | struct ADexFile { |
| 55 | explicit ADexFile(std::unique_ptr<const art::DexFile> dex_file) |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 56 | : dex_file_(std::move(dex_file)) {} |
| 57 | |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 58 | inline bool FindMethod(uint32_t dex_offset, /*out*/ ADexFile_Method* result) { |
David Srbecky | e778fa6 | 2020-04-07 17:52:16 +0100 | [diff] [blame] | 59 | uint32_t class_def_index; |
| 60 | if (GetClassDefIndex(dex_offset, &class_def_index)) { |
| 61 | art::ClassAccessor accessor(*dex_file_, class_def_index); |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 62 | for (const art::ClassAccessor::Method& method : accessor.GetMethods()) { |
| 63 | art::CodeItemInstructionAccessor code = method.GetInstructions(); |
| 64 | if (!code.HasCodeItem()) { |
| 65 | continue; |
| 66 | } |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 67 | size_t offset = reinterpret_cast<const uint8_t*>(code.Insns()) - dex_file_->Begin(); |
| 68 | size_t size = code.InsnsSizeInBytes(); |
| 69 | if (offset <= dex_offset && dex_offset < offset + size) { |
| 70 | *result = ADexFile_Method { |
| 71 | .adex = this, |
| 72 | .index = method.GetIndex(), |
| 73 | .offset = offset, |
| 74 | .size = size, |
| 75 | }; |
David Srbecky | 2ddb98b | 2021-03-09 00:37:04 +0000 | [diff] [blame] | 76 | return true; |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 77 | } |
| 78 | } |
| 79 | } |
David Srbecky | 2ddb98b | 2021-03-09 00:37:04 +0000 | [diff] [blame] | 80 | return false; |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 81 | } |
David Srbecky | e778fa6 | 2020-04-07 17:52:16 +0100 | [diff] [blame] | 82 | |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 83 | void CreateClassCache() { |
| 84 | // Create binary search table with (end_dex_offset, class_def_index) entries. |
| 85 | // That is, we don't assume that dex code of given class is consecutive. |
| 86 | std::deque<std::pair<uint32_t, uint32_t>> cache; |
| 87 | for (art::ClassAccessor accessor : dex_file_->GetClasses()) { |
| 88 | for (const art::ClassAccessor::Method& method : accessor.GetMethods()) { |
| 89 | art::CodeItemInstructionAccessor code = method.GetInstructions(); |
| 90 | if (code.HasCodeItem()) { |
| 91 | int32_t offset = reinterpret_cast<const uint8_t*>(code.Insns()) - dex_file_->Begin(); |
| 92 | DCHECK_NE(offset, 0); |
| 93 | cache.emplace_back(offset + code.InsnsSizeInBytes(), accessor.GetClassDefIndex()); |
| 94 | } |
| 95 | } |
| 96 | } |
| 97 | std::sort(cache.begin(), cache.end()); |
| 98 | |
| 99 | // If two consecutive methods belong to same class, we can merge them. |
| 100 | // This tends to reduce the number of entries (used memory) by 10x. |
| 101 | size_t num_entries = cache.size(); |
| 102 | if (cache.size() > 1) { |
| 103 | for (auto it = std::next(cache.begin()); it != cache.end(); it++) { |
| 104 | if (std::prev(it)->second == it->second) { |
| 105 | std::prev(it)->first = 0; // Clear entry with lower end_dex_offset (mark to remove). |
| 106 | num_entries--; |
| 107 | } |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | // The cache is immutable now. Store it as continuous vector to save space. |
| 112 | class_cache_.reserve(num_entries); |
| 113 | auto pred = [](auto it) { return it.first != 0; }; // Entries to copy (not cleared above). |
| 114 | std::copy_if(cache.begin(), cache.end(), std::back_inserter(class_cache_), pred); |
| 115 | } |
| 116 | |
| 117 | inline bool GetClassDefIndex(uint32_t dex_offset, uint32_t* class_def_index) { |
David Srbecky | e778fa6 | 2020-04-07 17:52:16 +0100 | [diff] [blame] | 118 | if (class_cache_.empty()) { |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 119 | CreateClassCache(); |
David Srbecky | e778fa6 | 2020-04-07 17:52:16 +0100 | [diff] [blame] | 120 | } |
| 121 | |
| 122 | // Binary search in the class cache. First element of the pair is the key. |
| 123 | auto comp = [](uint32_t value, const auto& it) { return value < it.first; }; |
| 124 | auto it = std::upper_bound(class_cache_.begin(), class_cache_.end(), dex_offset, comp); |
| 125 | if (it != class_cache_.end()) { |
| 126 | *class_def_index = it->second; |
| 127 | return true; |
| 128 | } |
| 129 | return false; |
| 130 | } |
| 131 | |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 132 | // The underlying ART object. |
| 133 | std::unique_ptr<const art::DexFile> dex_file_; |
| 134 | |
David Srbecky | e778fa6 | 2020-04-07 17:52:16 +0100 | [diff] [blame] | 135 | // Binary search table with (end_dex_offset, class_def_index) entries. |
| 136 | std::vector<std::pair<uint32_t, uint32_t>> class_cache_; |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 137 | |
| 138 | // Used as short lived temporary when needed. Avoids alloc/free. |
| 139 | std::string temporary_qualified_name_; |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 140 | }; |
| 141 | |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 142 | ADexFile_Error ADexFile_create(const void* _Nonnull address, |
| 143 | size_t size, |
| 144 | size_t* _Nullable new_size, |
| 145 | const char* _Nonnull location, |
| 146 | /*out*/ ADexFile* _Nullable * _Nonnull out_dex_file) { |
| 147 | *out_dex_file = nullptr; |
| 148 | |
| 149 | if (size < sizeof(art::DexFile::Header)) { |
| 150 | if (new_size != nullptr) { |
| 151 | *new_size = sizeof(art::DexFile::Header); |
| 152 | } |
| 153 | return ADEXFILE_ERROR_NOT_ENOUGH_DATA; |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 154 | } |
| 155 | |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 156 | const art::DexFile::Header* header = reinterpret_cast<const art::DexFile::Header*>(address); |
David Srbecky | b8a0d8b | 2023-03-06 15:54:38 +0000 | [diff] [blame] | 157 | uint32_t dex_size = header->file_size_; // Size of "one dex file" excluding any shared data. |
| 158 | uint32_t full_size = dex_size; // Includes referenced shared data past the end of dex. |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 159 | if (art::CompactDexFile::IsMagicValid(header->magic_)) { |
| 160 | // Compact dex files store the data section separately so that it can be shared. |
| 161 | // Therefore we need to extend the read memory range to include it. |
| 162 | // TODO: This might be wasteful as we might read data in between as well. |
| 163 | // In practice, this should be fine, as such sharing only happens on disk. |
| 164 | uint32_t computed_file_size; |
| 165 | if (__builtin_add_overflow(header->data_off_, header->data_size_, &computed_file_size)) { |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 166 | return ADEXFILE_ERROR_INVALID_HEADER; |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 167 | } |
David Srbecky | b8a0d8b | 2023-03-06 15:54:38 +0000 | [diff] [blame] | 168 | if (computed_file_size > full_size) { |
| 169 | full_size = computed_file_size; |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 170 | } |
| 171 | } else if (!art::StandardDexFile::IsMagicValid(header->magic_)) { |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 172 | return ADEXFILE_ERROR_INVALID_HEADER; |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 173 | } |
| 174 | |
David Srbecky | b8a0d8b | 2023-03-06 15:54:38 +0000 | [diff] [blame] | 175 | if (size < full_size) { |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 176 | if (new_size != nullptr) { |
David Srbecky | b8a0d8b | 2023-03-06 15:54:38 +0000 | [diff] [blame] | 177 | *new_size = full_size; |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 178 | } |
| 179 | return ADEXFILE_ERROR_NOT_ENOUGH_DATA; |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 180 | } |
| 181 | |
| 182 | std::string loc_str(location); |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 183 | std::string error_msg; |
David Srbecky | b8a0d8b | 2023-03-06 15:54:38 +0000 | [diff] [blame] | 184 | art::DexFileLoader loader(static_cast<const uint8_t*>(address), dex_size, loc_str); |
David Srbecky | 052f5fb | 2023-02-13 12:42:12 +0000 | [diff] [blame] | 185 | std::unique_ptr<const art::DexFile> dex_file = loader.Open(header->checksum_, |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 186 | /*oat_dex_file=*/nullptr, |
| 187 | /*verify=*/false, |
| 188 | /*verify_checksum=*/false, |
| 189 | &error_msg); |
| 190 | if (dex_file == nullptr) { |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 191 | LOG(ERROR) << "Can not open dex file " << loc_str << ": " << error_msg; |
| 192 | return ADEXFILE_ERROR_INVALID_DEX; |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 193 | } |
| 194 | |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 195 | *out_dex_file = new ADexFile(std::move(dex_file)); |
| 196 | return ADEXFILE_ERROR_OK; |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 197 | } |
| 198 | |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 199 | void ADexFile_destroy(ADexFile* self) { |
| 200 | delete self; |
| 201 | } |
| 202 | |
| 203 | size_t ADexFile_findMethodAtOffset(ADexFile* self, |
| 204 | size_t dex_offset, |
| 205 | ADexFile_MethodCallback* callback, |
| 206 | void* callback_data) { |
| 207 | const art::DexFile* dex_file = self->dex_file_.get(); |
| 208 | if (!dex_file->IsInDataSection(dex_file->Begin() + dex_offset)) { |
| 209 | return 0; // The DEX offset is not within the bytecode of this dex file. |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 210 | } |
| 211 | |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 212 | if (dex_file->IsCompactDexFile()) { |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 213 | // The data section of compact dex files might be shared. |
| 214 | // Check the subrange unique to this compact dex. |
| 215 | const art::CompactDexFile::Header& cdex_header = |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 216 | dex_file->AsCompactDexFile()->GetHeader(); |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 217 | uint32_t begin = cdex_header.data_off_ + cdex_header.OwnedDataBegin(); |
| 218 | uint32_t end = cdex_header.data_off_ + cdex_header.OwnedDataEnd(); |
| 219 | if (dex_offset < begin || dex_offset >= end) { |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 220 | return 0; // The DEX offset is not within the bytecode of this dex file. |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 221 | } |
| 222 | } |
| 223 | |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 224 | ADexFile_Method info; |
| 225 | if (!self->FindMethod(dex_offset, &info)) { |
| 226 | return 0; |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 227 | } |
| 228 | |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 229 | callback(callback_data, &info); |
| 230 | return 1; |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 231 | } |
| 232 | |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 233 | size_t ADexFile_forEachMethod(ADexFile* self, |
| 234 | ADexFile_MethodCallback* callback, |
| 235 | void* callback_data) { |
| 236 | size_t count = 0; |
| 237 | for (art::ClassAccessor accessor : self->dex_file_->GetClasses()) { |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 238 | for (const art::ClassAccessor::Method& method : accessor.GetMethods()) { |
| 239 | art::CodeItemInstructionAccessor code = method.GetInstructions(); |
David Srbecky | 2ddb98b | 2021-03-09 00:37:04 +0000 | [diff] [blame] | 240 | if (code.HasCodeItem()) { |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 241 | size_t offset = reinterpret_cast<const uint8_t*>(code.Insns()) - self->dex_file_->Begin(); |
| 242 | ADexFile_Method info { |
| 243 | .adex = self, |
| 244 | .index = method.GetIndex(), |
| 245 | .offset = offset, |
David Srbecky | 2ddb98b | 2021-03-09 00:37:04 +0000 | [diff] [blame] | 246 | .size = code.InsnsSizeInBytes(), |
David Srbecky | 2ddb98b | 2021-03-09 00:37:04 +0000 | [diff] [blame] | 247 | }; |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 248 | callback(callback_data, &info); |
| 249 | count++; |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 250 | } |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 251 | } |
| 252 | } |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 253 | return count; |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 254 | } |
| 255 | |
David Srbecky | 30d4d2f | 2021-03-24 16:11:49 +0000 | [diff] [blame] | 256 | size_t ADexFile_Method_getCodeOffset(const ADexFile_Method* self, |
| 257 | size_t* out_size) { |
| 258 | if (out_size != nullptr) { |
| 259 | *out_size = self->size; |
| 260 | } |
| 261 | return self->offset; |
| 262 | } |
| 263 | |
| 264 | const char* ADexFile_Method_getName(const ADexFile_Method* self, |
| 265 | size_t* out_size) { |
| 266 | const char* name = self->adex->dex_file_->GetMethodName(self->index); |
| 267 | if (out_size != nullptr) { |
| 268 | *out_size = strlen(name); |
| 269 | } |
| 270 | return name; |
| 271 | } |
| 272 | |
| 273 | const char* ADexFile_Method_getQualifiedName(const ADexFile_Method* self, |
| 274 | int with_params, |
| 275 | size_t* out_size) { |
| 276 | std::string& temp = self->adex->temporary_qualified_name_; |
| 277 | temp.clear(); |
| 278 | self->adex->dex_file_->AppendPrettyMethod(self->index, with_params, &temp); |
| 279 | if (out_size != nullptr) { |
| 280 | *out_size = temp.size(); |
| 281 | } |
| 282 | return temp.data(); |
| 283 | } |
| 284 | |
| 285 | const char* ADexFile_Method_getClassDescriptor(const ADexFile_Method* self, |
| 286 | size_t* out_size) { |
| 287 | const art::dex::MethodId& method_id = self->adex->dex_file_->GetMethodId(self->index); |
| 288 | const char* name = self->adex->dex_file_->GetMethodDeclaringClassDescriptor(method_id); |
| 289 | if (out_size != nullptr) { |
| 290 | *out_size = strlen(name); |
| 291 | } |
| 292 | return name; |
| 293 | } |
| 294 | |
| 295 | const char* ADexFile_Error_toString(ADexFile_Error self) { |
| 296 | switch (self) { |
| 297 | case ADEXFILE_ERROR_OK: return "Ok"; |
| 298 | case ADEXFILE_ERROR_INVALID_DEX: return "Dex file is invalid."; |
| 299 | case ADEXFILE_ERROR_NOT_ENOUGH_DATA: return "Not enough data. Incomplete dex file."; |
| 300 | case ADEXFILE_ERROR_INVALID_HEADER: return "Invalid dex file header."; |
| 301 | } |
| 302 | return nullptr; |
| 303 | } |
Martin Stjernholm | b3d2e83 | 2018-11-15 18:09:35 +0000 | [diff] [blame] | 304 | |
| 305 | } // extern "C" |