Verifier now generates register maps, though nothing uses them yet.
The code to generate the maps is enabled for testing, but currently
nothing is done with them. Will work out what to do with them later.
Change-Id: Ifa5b7a9dd1233813d4f4040cacfb83e9b4a5330b
diff --git a/src/dex_verifier.cc b/src/dex_verifier.cc
index 0a93109..3380981 100644
--- a/src/dex_verifier.cc
+++ b/src/dex_verifier.cc
@@ -145,7 +145,7 @@
}
/*
- * Sanity-check the register counts. ins + locals = registers, so make
+ * Sanity-check the register counts. ins + locals = registers, so make
* sure that ins <= registers.
*/
if (code_item->ins_size_ > code_item->registers_size_) {
@@ -204,7 +204,7 @@
while (width < insns_size) {
if (!VerifyInstruction(vdata, inst, width)) {
- LOG(ERROR) << "VFY: rejecting opcode 0x" << std::hex
+ LOG(ERROR) << "VFY: rejecting opcode 0x" << std::hex
<< (int) inst->Opcode() << " at 0x" << width << std::dec;
return false;
}
@@ -316,6 +316,7 @@
const DexFile::CodeItem* code_item = vdata->code_item_;
uint16_t registers_size = code_item->registers_size_;
uint32_t insns_size = code_item->insns_size_;
+ bool generate_register_map = true;
RegisterTable reg_table;
if (registers_size * insns_size > 4*1024*1024) {
@@ -324,7 +325,8 @@
}
/* Create and initialize register lists. */
- if (!InitRegisterTable(vdata, ®_table, kTrackRegsGcPoints)) {
+ if (!InitRegisterTable(vdata, ®_table,
+ generate_register_map ? kTrackRegsGcPoints : kTrackRegsBranches)) {
return false;
}
@@ -347,8 +349,16 @@
return false;
}
- /* TODO: Generate a register map. */
-
+ /* Generate a register map. */
+ if (generate_register_map) {
+ RegisterMap* map = GenerateRegisterMapV(vdata);
+ /*
+ * Tuck the map into the Method. It will either get used directly or, if
+ * we're in dexopt, will be packed up and appended to the DEX file.
+ */
+ // TODO: Put the map somewhere...
+ delete map;
+ }
return true;
}
@@ -792,7 +802,7 @@
}
/*
- * Check for 32-bit overflow. This isn't strictly necessary if we can
+ * Check for 32-bit overflow. This isn't strictly necessary if we can
* depend on the VM to have identical "wrap-around" behavior, but
* it's unwise to depend on that.
*/
@@ -824,7 +834,7 @@
uint32_t i;
/*
- * Every address gets a RegisterLine struct. This is wasteful, but
+ * Every address gets a RegisterLine struct. This is wasteful, but
* not so much that it's worth chasing through an extra level of
* indirection.
*/
@@ -932,17 +942,15 @@
Class* DexVerifier::LookupClassByDescriptor(const Method* method,
const char* descriptor, VerifyError* failure) {
/*
- * The compiler occasionally puts references to nonexistent
- * classes in signatures. For example, if you have a non-static
- * inner class with no constructor, the compiler provides
- * a private <init> for you. Constructing the class
- * requires <init>(parent), but the outer class can't call
- * that because the method is private. So the compiler
- * generates a package-scope <init>(parent,bogus) method that
- * just calls the regular <init> (the "bogus" part being necessary
- * to distinguish the signature of the synthetic method).
- * Treating the bogus class as an instance of java.lang.Object
- * allows the verifier to process the class successfully.
+ * The compiler occasionally puts references to nonexistent classes in
+ * signatures. For example, if you have a non-static inner class with no
+ * constructor, the compiler provides a private <init> for you.
+ * Constructing the class requires <init>(parent), but the outer class can't
+ * call that because the method is private. So the compiler generates a
+ * package-scope <init>(parent,bogus) method that just calls the regular
+ * <init> (the "bogus" part being necessary to distinguish the signature of
+ * the synthetic method). Treating the bogus class as an instance of
+ * java.lang.Object allows the verifier to process the class successfully.
*/
ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
const ClassLoader* class_loader =
@@ -971,12 +979,11 @@
}
/*
- * Try to continue with base array type. This will let
- * us pass basic stuff (e.g. get array len) that wouldn't
- * fly with an Object. This is NOT correct if the
- * missing type is a primitive array, but we should never
- * have a problem loading those. (I'm not convinced this
- * is correct or even useful. Just use Object here?)
+ * Try to continue with base array type. This will let us pass basic
+ * stuff (e.g. get array len) that wouldn't fly with an Object. This
+ * is NOT correct if the missing type is a primitive array, but we
+ * should never have a problem loading those. (I'm not convinced this
+ * is correct or even useful. Just use Object here?)
*/
klass = class_linker->FindClass("[Ljava/lang/Object;", class_loader);
} else if (descriptor[0] == 'L') {
@@ -1061,7 +1068,7 @@
if (!method->IsStatic()) {
/*
* If this is a constructor for a class other than java.lang.Object,
- * mark the first ("this") argument as uninitialized. This restricts
+ * mark the first ("this") argument as uninitialized. This restricts
* field access until the superclass constructor is called.
*/
ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -1100,12 +1107,11 @@
case 'L':
case '[':
/*
- * We assume that reference arguments are initialized. The
- * only way it could be otherwise (assuming the caller was
- * verified) is if the current method is <init>, but in that
- * case it's effectively considered initialized the instant
- * we reach here (in the sense that we can return without
- * doing anything or call virtual methods).
+ * We assume that reference arguments are initialized. The only way
+ * it could be otherwise (assuming the caller was verified) is if
+ * the current method is <init>, but in that case it's effectively
+ * considered initialized the instant we reach here (in the sense
+ * that we can return without doing anything or call virtual methods).
*/
{
Class* klass =
@@ -1166,8 +1172,8 @@
const char* descriptor = dex_file->GetReturnTypeDescriptor(proto_id);
/*
- * Validate return type. We don't do the type lookup; just want to make
- * sure that it has the right format. Only major difference from the
+ * Validate return type. We don't do the type lookup; just want to make
+ * sure that it has the right format. Only major difference from the
* method argument format is that 'V' is supported.
*/
switch (*descriptor) {
@@ -1216,7 +1222,7 @@
int idx;
assert(klass != NULL);
- /* TODO: binary search when numEntries > 8 */
+ /* TODO: binary search when num_entries > 8 */
for (idx = uninit_map->num_entries_ - 1; idx >= 0; idx--) {
if (uninit_map->map_[idx].addr_ == addr) {
if (uninit_map->map_[idx].klass_ != NULL &&
@@ -1253,7 +1259,7 @@
/* Continue until no instructions are marked "changed". */
while (true) {
/*
- * Find the first marked one. Use "start_guess" as a way to find
+ * Find the first marked one. Use "start_guess" as a way to find
* one quickly.
*/
for (insn_idx = start_guess; insn_idx < insns_size; insn_idx++) {
@@ -1273,15 +1279,14 @@
}
/*
- * We carry the working set of registers from instruction to
- * instruction. If this address can be the target of a branch
- * (or throw) instruction, or if we're skipping around chasing
- * "changed" flags, we need to load the set of registers from
- * the table.
+ * We carry the working set of registers from instruction to instruction.
+ * If this address can be the target of a branch (or throw) instruction,
+ * or if we're skipping around chasing "changed" flags, we need to load
+ * the set of registers from the table.
*
- * Because we always prefer to continue on to the next instruction,
- * we should never have a situation where we have a stray
- * "changed" flag set on an instruction that isn't a branch target.
+ * Because we always prefer to continue on to the next instruction, we
+ * should never have a situation where we have a stray "changed" flag set
+ * on an instruction that isn't a branch target.
*/
if (InsnIsBranchTarget(insn_flags, insn_idx)) {
RegisterLine* work_line = ®_table->work_line_;
@@ -1320,7 +1325,7 @@
if (DEAD_CODE_SCAN && ((method->GetAccessFlags() & kAccWritable) == 0)) {
/*
- * Scan for dead code. There's nothing "evil" about dead code
+ * Scan for dead code. There's nothing "evil" about dead code
* (besides the wasted space), but it indicates a flaw somewhere
* down the line, possibly in the verifier.
*
@@ -1331,7 +1336,7 @@
for (insn_idx = 0; insn_idx < insns_size;
insn_idx += InsnGetWidth(insn_flags, insn_idx)) {
/*
- * Switch-statement data doesn't get "visited" by scanner. It
+ * Switch-statement data doesn't get "visited" by scanner. It
* may or may not be preceded by a padding NOP (for alignment).
*/
if (insns[insn_idx] == Instruction::kPackedSwitchSignature ||
@@ -1392,14 +1397,14 @@
/*
* Once we finish decoding the instruction, we need to figure out where
- * we can go from here. There are three possible ways to transfer
+ * we can go from here. There are three possible ways to transfer
* control to another statement:
*
- * (1) Continue to the next instruction. Applies to all but
+ * (1) Continue to the next instruction. Applies to all but
* unconditional branches, method returns, and exception throws.
- * (2) Branch to one or more possible locations. Applies to branches
+ * (2) Branch to one or more possible locations. Applies to branches
* and switch statements.
- * (3) Exception handlers. Applies to any instruction that can
+ * (3) Exception handlers. Applies to any instruction that can
* throw an exception that is handled by an encompassing "try"
* block.
*
@@ -1422,7 +1427,7 @@
VerifyError failure = VERIFY_ERROR_NONE;
/*
- * Make a copy of the previous register state. If the instruction
+ * Make a copy of the previous register state. If the instruction
* can throw an exception, we will copy/merge this into the "catch"
* address rather than work_line, because we don't want the result
* from the "successful" code path (e.g. a check-cast that "improves"
@@ -1442,7 +1447,7 @@
switch (dec_insn.opcode_) {
case Instruction::NOP:
/*
- * A "pure" NOP has no effect on anything. Data tables start with
+ * A "pure" NOP has no effect on anything. Data tables start with
* a signature that looks like a NOP; if we see one of these in
* the course of executing code then we have a problem.
*/
@@ -1472,12 +1477,12 @@
/*
* The move-result instructions copy data out of a "pseudo-register"
- * with the results from the last method invocation. In practice we
+ * with the results from the last method invocation. In practice we
* might want to hold the result in an actual CPU register, so the
* Dalvik spec requires that these only appear immediately after an
* invoke or filled-new-array.
*
- * These calls invalidate the "result" register. (This is now
+ * These calls invalidate the "result" register. (This is now
* redundant with the reset done below, but it can make the debug info
* easier to read in some cases.)
*/
@@ -1497,7 +1502,7 @@
/*
* This statement can only appear as the first instruction in an
* exception handler (though not all exception handlers need to
- * have one of these). We verify that as part of extracting the
+ * have one of these). We verify that as part of extracting the
* exception type from the catch block list.
*
* "res_class" will hold the closest common superclass of all
@@ -1585,7 +1590,7 @@
/*
* Verify that the reference in vAA is an instance of the type
- * in "return_type". The Zero type is allowed here. If the
+ * in "return_type". The Zero type is allowed here. If the
* method is declared to return an interface, then any
* initialized reference is acceptable.
*
@@ -1659,21 +1664,21 @@
break;
case Instruction::MONITOR_EXIT:
/*
- * monitor-exit instructions are odd. They can throw exceptions,
+ * monitor-exit instructions are odd. They can throw exceptions,
* but when they do they act as if they succeeded and the PC is
- * pointing to the following instruction. (This behavior goes back
+ * pointing to the following instruction. (This behavior goes back
* to the need to handle asynchronous exceptions, a now-deprecated
* feature that Dalvik doesn't support.)
*
- * In practice we don't need to worry about this. The only
+ * In practice we don't need to worry about this. The only
* exceptions that can be thrown from monitor-exit are for a
- * null reference and -exit without a matching -enter. If the
+ * null reference and -exit without a matching -enter. If the
* structured locking checks are working, the former would have
* failed on the -enter instruction, and the latter is impossible.
*
* This is fortunate, because issue 3221411 prevents us from
* chasing the "can throw" path when monitor verification is
- * enabled. If we can fully verify the locking we can ignore
+ * enabled. If we can fully verify the locking we can ignore
* some catch blocks (which will show up as "dead" code when
* we skip them here); if we can't, then the code path could be
* "live" so we still need to check it.
@@ -1686,7 +1691,7 @@
case Instruction::CHECK_CAST:
/*
* If this instruction succeeds, we will promote register vA to
- * the type in vB. (This could be a demotion -- not expected, so
+ * the type in vB. (This could be a demotion -- not expected, so
* we don't try to address it.)
*
* If it fails, an exception is thrown, which we deal with later
@@ -2100,7 +2105,7 @@
}
} else {
/*
- * Null array ref; this code path will fail at runtime. We
+ * Null array ref; this code path will fail at runtime. We
* know this is either long or double, so label it const.
*/
dst_type = kRegTypeConstLo;
@@ -2134,7 +2139,7 @@
assert(res_class->GetComponentType() != NULL);
/*
- * Find the element class. res_class->GetComponentType() indicates
+ * Find the element class. res_class->GetComponentType() indicates
* the basic type, which won't be what we want for a
* multi-dimensional array.
*/
@@ -2158,7 +2163,7 @@
} else {
/*
* The array reference is NULL, so the current code path will
- * throw an exception. For proper merging with later code
+ * throw an exception. For proper merging with later code
* paths, and correct handling of "if-eqz" tests on the
* result of the array get, we want to treat this as a null
* reference.
@@ -2292,7 +2297,7 @@
Class* element_class;
/*
- * Get the array class. If the array ref is null, we won't
+ * Get the array class. If the array ref is null, we won't
* have type information (and we'll crash at runtime with a
* null pointer exception).
*/
@@ -2308,12 +2313,12 @@
}
/*
- * Find the element class. res_class->GetComponentType() indicates
+ * Find the element class. res_class->GetComponentType() indicates
* the basic type, which won't be what we want for a
* multi-dimensional array.
*
* All we want to check here is that the element type is a
- * reference class. We *don't* check instanceof here, because
+ * reference class. We *don't* check instanceof here, because
* you can still put a String into a String[] after the latter
* has been cast to an Object[].
*/
@@ -2730,7 +2735,7 @@
break;
/*
- * Get type of field we're storing into. We know that the
+ * Get type of field we're storing into. We know that the
* contents of the register match the instruction, but we also
* need to ensure that the instruction matches the field type.
* Using e.g. sput-short to write into a 32-bit integer field
@@ -2899,9 +2904,9 @@
break;
/*
- * Some additional checks when calling <init>. We know from
+ * Some additional checks when calling <init>. We know from
* the invocation arg check that the "this" argument is an
- * instance of called_method->klass. Now we further restrict
+ * instance of called_method->klass. Now we further restrict
* that to require that called_method->klass is the same as
* this->klass or this->super, allowing the latter only if
* the "this" argument is the same as the "this" argument to
@@ -2950,7 +2955,7 @@
/*
* Replace the uninitialized reference with an initialized
- * one, and clear the entry in the uninit map. We need to
+ * one, and clear the entry in the uninit map. We need to
* do this for all registers that have the same object
* instance in them, not just the "this" register.
*/
@@ -2998,7 +3003,7 @@
#if 0 /* can't do this here, fails on dalvik test 052-verifier-fun */
/*
* Get the type of the "this" arg, which should always be an
- * interface class. Because we don't do a full merge on
+ * interface class. Because we don't do a full merge on
* interface classes, this might have reduced to Object.
*/
this_type = GetInvocationThis(work_line, &dec_insn, &failure);
@@ -3020,7 +3025,7 @@
/*
* Either "this_class" needs to be the interface class that
* defined abs_method, or abs_method's class needs to be one
- * of the interfaces implemented by "this_class". (Or, if
+ * of the interfaces implemented by "this_class". (Or, if
* we couldn't complete the merge, this will be Object.)
*/
if (this_class != abs_method->GetDeclaringClass() &&
@@ -3038,7 +3043,7 @@
/*
* We don't have an object instance, so we can't find the
- * concrete method. However, all of the type information is
+ * concrete method. However, all of the type information is
* in the abstract method, so we're good.
*/
return_type = GetMethodReturnType(dex_file, abs_method);
@@ -3265,7 +3270,7 @@
/*
* This falls into the general category of "optimized" instructions,
- * which don't generally appear during verification. Because it's
+ * which don't generally appear during verification. Because it's
* inserted in the course of verification, we can expect to see it here.
*/
//case Instruction::THROW_VERIFICATION_ERROR:
@@ -3274,32 +3279,32 @@
/*
* Verifying "quickened" instructions is tricky, because we have
- * discarded the original field/method information. The byte offsets
+ * discarded the original field/method information. The byte offsets
* and vtable indices only have meaning in the context of an object
* instance.
*
* If a piece of code declares a local reference variable, assigns
* null to it, and then issues a virtual method call on it, we
- * cannot evaluate the method call during verification. This situation
+ * cannot evaluate the method call during verification. This situation
* isn't hard to handle, since we know the call will always result in an
- * NPE, and the arguments and return value don't matter. Any code that
+ * NPE, and the arguments and return value don't matter. Any code that
* depends on the result of the method call is inaccessible, so the
* fact that we can't fully verify anything that comes after the bad
* call is not a problem.
*
* We must also consider the case of multiple code paths, only some of
- * which involve a null reference. We can completely verify the method
+ * which involve a null reference. We can completely verify the method
* if we sidestep the results of executing with a null reference.
* For example, if on the first pass through the code we try to do a
* virtual method invocation through a null ref, we have to skip the
* method checks and have the method return a "wildcard" type (which
- * merges with anything to become that other thing). The move-result
+ * merges with anything to become that other thing). The move-result
* will tell us if it's a reference, single-word numeric, or double-word
- * value. We continue to perform the verification, and at the end of
+ * value. We continue to perform the verification, and at the end of
* the function any invocations that were never fully exercised are
* marked as null-only.
*
- * We would do something similar for the field accesses. The field's
+ * We would do something similar for the field accesses. The field's
* type, once known, can be used to recover the width of short integers.
* If the object reference was null, the field-get returns the "wildcard"
* type, which is acceptable for any operation.
@@ -3332,9 +3337,9 @@
/*
* These instructions are equivalent (from the verifier's point of view)
- * to the original form. The change was made for correctness rather
+ * to the original form. The change was made for correctness rather
* than improved performance (except for invoke-object-init, which
- * provides both). The substitution takes place after verification
+ * provides both). The substitution takes place after verification
* completes, though, so we don't expect to see them here.
*/
case Instruction::UNUSED_F0:
@@ -3385,7 +3390,7 @@
break;
/*
- * DO NOT add a "default" clause here. Without it the compiler will
+ * DO NOT add a "default" clause here. Without it the compiler will
* complain if an instruction is missing (which is desirable).
*/
}
@@ -3418,9 +3423,9 @@
}
/*
- * If we didn't just set the result register, clear it out. This
+ * If we didn't just set the result register, clear it out. This
* ensures that you can only use "move-result" immediately after the
- * result is set. (We could check this statically, but it's not
+ * result is set. (We could check this statically, but it's not
* expensive and it makes our debugging output cleaner.)
*/
if (!just_set_result) {
@@ -3430,7 +3435,7 @@
}
/*
- * Handle "continue". Tag the next consecutive instruction.
+ * Handle "continue". Tag the next consecutive instruction.
*/
if ((opcode_flag & Instruction::kContinue) != 0) {
size_t insn_width = InsnGetWidth(insn_flags, insn_idx);
@@ -3442,7 +3447,7 @@
/*
* The only way to get to a move-exception instruction is to get
- * thrown there. Make sure the next instruction isn't one.
+ * thrown there. Make sure the next instruction isn't one.
*/
if (!CheckMoveException(code_item->insns_, insn_idx + insn_width))
return false;
@@ -3458,7 +3463,7 @@
} else {
/*
* We're not recording register data for the next instruction,
- * so we don't know what the prior state was. We have to
+ * so we don't know what the prior state was. We have to
* assume that something has changed and re-evaluate it.
*/
InsnSetChanged(insn_flags, insn_idx + insn_width, true);
@@ -3466,13 +3471,13 @@
}
/*
- * Handle "branch". Tag the branch target.
+ * Handle "branch". Tag the branch target.
*
* NOTE: instructions like Instruction::EQZ provide information about the
- * state of the register when the branch is taken or not taken. For example,
+ * state of the register when the branch is taken or not taken. For example,
* somebody could get a reference field, check it for zero, and if the
* branch is taken immediately store that register in a boolean field
- * since the value is known to be zero. We do not currently account for
+ * since the value is known to be zero. We do not currently account for
* that, and will reject the code.
*
* TODO: avoid re-fetching the branch target
@@ -3499,7 +3504,7 @@
}
/*
- * Handle "switch". Tag all possible branch targets.
+ * Handle "switch". Tag all possible branch targets.
*
* We've already verified that the table is structurally sound, so we
* just need to walk through and tag the targets.
@@ -3541,7 +3546,7 @@
/*
* Handle instructions that can throw and that are sitting in a
- * "try" block. (If they're not in a "try" block when they throw,
+ * "try" block. (If they're not in a "try" block when they throw,
* control transfers out of the method.)
*/
if ((opcode_flag & Instruction::kThrow) != 0 &&
@@ -3555,10 +3560,10 @@
has_catch_all = true;
/*
- * Merge registers into the "catch" block. We want to
- * use the "savedRegs" rather than "work_regs", because
- * at runtime the exception will be thrown before the
- * instruction modifies any registers.
+ * Merge registers into the "catch" block. We want to use the
+ * "savedRegs" rather than "work_regs", because at runtime the
+ * exception will be thrown before the instruction modifies any
+ * registers.
*/
if (!UpdateRegisters(insn_flags, reg_table, iterator.Get().address_,
®_table->saved_line_))
@@ -3567,7 +3572,7 @@
/*
* If the monitor stack depth is nonzero, there must be a "catch all"
- * handler for this instruction. This does apply to monitor-exit
+ * handler for this instruction. This does apply to monitor-exit
* because of async exception handling.
*/
if (work_line->monitor_stack_top_ != 0 && !has_catch_all) {
@@ -3587,9 +3592,7 @@
}
}
- /*
- * If we're returning from the method, make sure our monitor stack is empty.
- */
+ /* If we're returning from the method, make sure monitor stack is empty. */
if ((opcode_flag & Instruction::kReturn) != 0 &&
work_line->monitor_stack_top_ != 0) {
LOG(ERROR) << "VFY: return with stack depth="
@@ -3599,8 +3602,8 @@
}
/*
- * Update start_guess. Advance to the next instruction of that's
- * possible, otherwise use the branch target if one was found. If
+ * Update start_guess. Advance to the next instruction of that's
+ * possible, otherwise use the branch target if one was found. If
* neither of those exists we're in a return or throw; leave start_guess
* alone and let the caller sort it out.
*/
@@ -3769,7 +3772,7 @@
/*
* Confirm that the entry at the top of the stack is associated with
- * the register. Pop the top entry off.
+ * the register. Pop the top entry off.
*/
work_line->monitor_stack_top_--;
#ifdef BUG_3215458_FIXED
@@ -3840,7 +3843,6 @@
must_be_local = true;
}
- //if (!obj_class->InstanceOf(field->GetDeclaringClass())) {
if (!field->GetDeclaringClass()->IsAssignableFrom(obj_class)) {
LOG(ERROR) << "VFY: invalid field access (field "
<< field->GetDeclaringClass()->GetDescriptor()->ToModifiedUtf8()
@@ -3927,10 +3929,10 @@
LOG(ERROR) << "VFY: unable to resolve exception class "
<< handler.type_idx_ << " ("
<< dex_file->dexStringByTypeIdx(handler.type_idx_) << ")";
- /* TODO: do we want to keep going? If we don't fail
- * this we run the risk of having a non-Throwable
- * introduced at runtime. However, that won't pass
- * an instanceof test, so is essentially harmless.
+ /* TODO: do we want to keep going? If we don't fail this we run
+ * the risk of having a non-Throwable introduced at runtime.
+ * However, that won't pass an instanceof test, so is essentially
+ * harmless.
*/
} else {
if (common_super == NULL)
@@ -4054,9 +4056,9 @@
/*
* In most circumstances we won't see a reference to a primitive
* class here (e.g. "D"), since that would mean the object in the
- * register is actually a primitive type. It can happen as the
+ * register is actually a primitive type. It can happen as the
* result of an assumed-successful check-cast instruction in
- * which the second argument refers to a primitive class. (In
+ * which the second argument refers to a primitive class. (In
* practice, such an instruction will always throw an exception.)
*
* This is not an issue for instructions like const-class, where
@@ -4274,7 +4276,7 @@
}
/*
- * Implement "move-result-wide". Copy the category-2 value from the result
+ * Implement "move-result-wide". Copy the category-2 value from the result
* register to another register, and reset the result register.
*/
void DexVerifier::CopyResultRegister2(RegisterLine* register_line,
@@ -4358,7 +4360,7 @@
if (!has_primitive && array_dim1 == array_dim2) {
/*
- * Two arrays of reference types with equal dimensions. Try to
+ * Two arrays of reference types with equal dimensions. Try to
* find a good match.
*/
common_elem = FindCommonSuperclass(c1->GetComponentType(),
@@ -4366,7 +4368,7 @@
num_dims = array_dim1;
} else {
/*
- * Mismatched array depths and/or array(s) of primitives. We want
+ * Mismatched array depths and/or array(s) of primitives. We want
* Object, or an Object array with appropriate dimensions.
*
* We initialize array_class to Object here, because it's possible
@@ -4380,7 +4382,7 @@
}
/*
- * Find an appropriately-dimensioned array class. This is easiest
+ * Find an appropriately-dimensioned array class. This is easiest
* to do iteratively, using the array class found by the current round
* as the element type for the next round.
*/
@@ -4431,8 +4433,8 @@
* from the table with a reference type.
*
* Uninitialized references are composed of the enum ORed with an
- * index value. The uninitialized table entry at index zero *will*
- * show up as a simple kRegTypeUninit value. Since this cannot be
+ * index value. The uninitialized table entry at index zero *will*
+ * show up as a simple kRegTypeUninit value. Since this cannot be
* merged with anything but itself, the rules do the right thing.
*/
if (type1 < kRegTypeMAX) {
@@ -4491,8 +4493,8 @@
if (!InsnIsVisitedOrChanged(insn_flags, next_insn)) {
/*
* We haven't processed this instruction before, and we haven't
- * touched the registers here, so there's nothing to "merge". Copy
- * the registers over and mark it as changed. (This is the only
+ * touched the registers here, so there's nothing to "merge". Copy
+ * the registers over and mark it as changed. (This is the only
* way a register can transition out of "unknown", so this is not
* just an optimization.)
*/
@@ -4651,7 +4653,7 @@
VerifyError* failure) {
if (*failure == VERIFY_ERROR_NONE) {
/*
- * The 1nr types are interchangeable at this level. We could
+ * The 1nr types are interchangeable at this level. We could
* do something special if we can definitively identify it as a
* float, but there's no real value in doing so.
*/
@@ -4909,8 +4911,8 @@
}
/*
- * Verify each register. If "arg_count" is bad, VerifyRegisterType()
- * will run off the end of the list and fail. It's legal, if silly,
+ * Verify each register. If "arg_count" is bad, VerifyRegisterType()
+ * will run off the end of the list and fail. It's legal, if silly,
* for arg_count to be zero.
*/
for (ui = 0; ui < arg_count; ui++) {
@@ -4965,7 +4967,7 @@
int actual_args;
/*
- * Resolve the method. This could be an abstract or concrete method
+ * Resolve the method. This could be an abstract or concrete method
* depending on what sort of call we're making.
*/
res_method = class_linker->ResolveMethod(*dex_file, dec_insn->vB_, dex_cache,
@@ -4985,7 +4987,7 @@
/*
* Only time you can explicitly call a method starting with '<' is when
- * making a "direct" invocation on "<init>". There are additional
+ * making a "direct" invocation on "<init>". There are additional
* restrictions but we don't enforce them here.
*/
if (res_method->GetName()->Equals("<init>")) {
@@ -5039,7 +5041,7 @@
/*
* We use vAA as our expected arg count, rather than res_method->insSize,
- * because we need to match the call to the signature. Also, we might
+ * because we need to match the call to the signature. Also, we might
* might be calling through an abstract method definition (which doesn't
* have register count values).
*/
@@ -5064,7 +5066,7 @@
/*
* Check the "this" argument, which must be an instance of the class
- * that declared the method. For an interface class, we don't do the
+ * that declared the method. For an interface class, we don't do the
* full interface merge, so we can't do a rigorous check here (which
* is okay since we have to do it at runtime).
*/
@@ -5098,7 +5100,7 @@
}
/*
- * Process the target method's signature. This signature may or may not
+ * Process the target method's signature. This signature may or may not
* have been verified, so we can't assume it's properly formed.
*/
for (sig_offset = 1; sig_offset < sig.size(); sig_offset++) {
@@ -5220,4 +5222,595 @@
return NULL;
}
+DexVerifier::RegisterMap* DexVerifier::GenerateRegisterMapV(VerifierData* vdata)
+{
+ const DexFile::CodeItem* code_item = vdata->code_item_;
+ int i, bytes_for_addr, gc_point_count;
+
+ if (code_item->registers_size_ >= 2048) {
+ LOG(ERROR) << "ERROR: register map can't handle "
+ << code_item->registers_size_ << " registers";
+ return NULL;
+ }
+ uint8_t reg_width = (code_item->registers_size_ + 7) / 8;
+
+ /*
+ * Decide if we need 8 or 16 bits to hold the address. Strictly speaking
+ * we only need 16 bits if we actually encode an address >= 256 -- if
+ * the method has a section at the end without GC points (e.g. array
+ * data) we don't need to count it. The situation is unusual, and
+ * detecting it requires scanning the entire method, so we don't bother.
+ */
+ RegisterMapFormat format;
+ if (code_item->insns_size_ < 256) {
+ format = kRegMapFormatCompact8;
+ bytes_for_addr = 1;
+ } else {
+ format = kRegMapFormatCompact16;
+ bytes_for_addr = 2;
+ }
+
+ /*
+ * Count up the number of GC point instructions.
+ *
+ * NOTE: this does not automatically include the first instruction,
+ * since we don't count method entry as a GC point.
+ */
+ gc_point_count = 0;
+ for (i = 0; i < (int) code_item->insns_size_; i++) {
+ if (InsnIsGcPoint(vdata->insn_flags_.get(), i))
+ gc_point_count++;
+ }
+ if (gc_point_count >= 65536) {
+ /* We could handle this, but in practice we don't get near this. */
+ LOG(ERROR) << "ERROR: register map can't handle " << gc_point_count
+ << " gc points in one method";
+ return NULL;
+ }
+
+ /* Calculate size of buffer to hold the map data. */
+ uint32_t data_size = gc_point_count * (bytes_for_addr + reg_width);
+
+ RegisterMap* map = new RegisterMap(format, reg_width, gc_point_count,
+ true, data_size);
+
+ /* Populate it. */
+ uint8_t* map_data = map->data_.get();
+ for (i = 0; i < (int) vdata->code_item_->insns_size_; i++) {
+ if (InsnIsGcPoint(vdata->insn_flags_.get(), i)) {
+ assert(vdata->register_lines_[i].reg_types_.get() != NULL);
+ if (format == kRegMapFormatCompact8) {
+ *map_data++ = i;
+ } else /*kRegMapFormatCompact16*/ {
+ *map_data++ = i & 0xff;
+ *map_data++ = i >> 8;
+ }
+ OutputTypeVector(vdata->register_lines_[i].reg_types_.get(),
+ code_item->registers_size_, map_data);
+ map_data += reg_width;
+ }
+ }
+
+ assert((uint32_t) map_data - (uint32_t) map->data_.get() == data_size);
+
+ // TODO: Remove this check when it's really running...
+#if 1
+ if (!VerifyMap(vdata, map)) {
+ LOG(ERROR) << "Map failed to verify";
+ return NULL;
+ }
+#endif
+
+ /* Try to compress the map. */
+ RegisterMap* compress_map = CompressMapDifferential(map);
+ if (compress_map != NULL) {
+ // TODO: Remove this check when it's really running...
+#if 1
+ /*
+ * Expand the compressed map we just created, and compare it
+ * to the original. Abort the VM if it doesn't match up.
+ */
+ UniquePtr<RegisterMap> uncompressed_map(UncompressMapDifferential(compress_map));
+ if (uncompressed_map.get() == NULL) {
+ LOG(ERROR) << "Map failed to uncompress - "
+ << vdata->method_->GetDeclaringClass()->GetDescriptor()->ToModifiedUtf8()
+ << "." << vdata->method_->GetName()->ToModifiedUtf8();
+ delete map;
+ delete compress_map;
+ /* bad - compression is broken or we're out of memory */
+ return NULL;
+ } else {
+ if (!CompareMaps(map, uncompressed_map.get())) {
+ LOG(ERROR) << "Map comparison failed - "
+ << vdata->method_->GetDeclaringClass()->GetDescriptor()->ToModifiedUtf8()
+ << "." << vdata->method_->GetName()->ToModifiedUtf8();
+ delete map;
+ delete compress_map;
+ /* bad - compression is broken */
+ return NULL;
+ }
+ }
+#endif
+ delete map;
+ map = compress_map;
+ }
+
+ return map;
+}
+
+void DexVerifier::OutputTypeVector(const RegType* regs, int insn_reg_count,
+ uint8_t* data) {
+ uint8_t val = 0;
+ int i;
+
+ for (i = 0; i < insn_reg_count; i++) {
+ RegType type = *regs++;
+ val >>= 1;
+ if (IsReferenceType(type))
+ val |= 0x80; /* set hi bit */
+
+ if ((i & 0x07) == 7)
+ *data++ = val;
+ }
+ if ((i & 0x07) != 0) {
+ /* Flush bits from last byte. */
+ val >>= 8 - (i & 0x07);
+ *data++ = val;
+ }
+}
+
+bool DexVerifier::VerifyMap(VerifierData* vdata, const RegisterMap* map) {
+ const uint8_t* raw_map = map->data_.get();
+ uint8_t format = map->format_;
+ const int num_entries = map->num_entries_;
+ int ent;
+
+ if ((vdata->code_item_->registers_size_ + 7) / 8 != map->reg_width_) {
+ LOG(ERROR) << "GLITCH: registersSize=" << vdata->code_item_->registers_size_
+ << ", reg_width=" << map->reg_width_;
+ return false;
+ }
+
+ for (ent = 0; ent < num_entries; ent++) {
+ int addr;
+
+ switch (format) {
+ case kRegMapFormatCompact8:
+ addr = *raw_map++;
+ break;
+ case kRegMapFormatCompact16:
+ addr = *raw_map++;
+ addr |= (*raw_map++) << 8;
+ break;
+ default:
+ LOG(FATAL) << "GLITCH: bad format (" << format << ")";
+ return false;
+ }
+
+ const RegType* regs = vdata->register_lines_[addr].reg_types_.get();
+ if (regs == NULL) {
+ LOG(ERROR) << "GLITCH: addr " << addr << " has no data";
+ return false;
+ }
+
+ uint8_t val = 0;
+ int i;
+
+ for (i = 0; i < vdata->code_item_->registers_size_; i++) {
+ bool bit_is_ref, reg_is_ref;
+
+ val >>= 1;
+ if ((i & 0x07) == 0) {
+ /* Load next byte of data. */
+ val = *raw_map++;
+ }
+
+ bit_is_ref = val & 0x01;
+
+ RegType type = regs[i];
+ reg_is_ref = IsReferenceType(type);
+
+ if (bit_is_ref != reg_is_ref) {
+ LOG(ERROR) << "GLITCH: addr " << addr << " reg " << i << ": bit="
+ << bit_is_ref << " reg=" << reg_is_ref << "(" << type << ")";
+ return false;
+ }
+ }
+ /* Raw_map now points to the address field of the next entry. */
+ }
+
+ return true;
+}
+
+bool DexVerifier::CompareMaps(const RegisterMap* map1, const RegisterMap* map2)
+{
+ size_t size1, size2;
+
+ size1 = ComputeRegisterMapSize(map1);
+ size2 = ComputeRegisterMapSize(map2);
+ if (size1 != size2) {
+ LOG(ERROR) << "CompareMaps: size mismatch (" << size1 << " vs " << size2
+ << ")";
+ return false;
+ }
+
+ if (map1->format_ != map2->format_ || map1->reg_width_ != map2->reg_width_ ||
+ map1->num_entries_ != map2->num_entries_ ||
+ map1->format_on_heap_ != map2->format_on_heap_) {
+ LOG(ERROR) << "CompareMaps: fields mismatch";
+ }
+ if (memcmp(map1->data_.get(), map2->data_.get(), size1) != 0) {
+ LOG(ERROR) << "CompareMaps: data mismatch";
+ return false;
+ }
+
+ return true;
+}
+
+size_t DexVerifier::ComputeRegisterMapSize(const RegisterMap* map) {
+ uint8_t format = map->format_;
+ uint16_t num_entries = map->num_entries_;
+
+ assert(map != NULL);
+
+ switch (format) {
+ case kRegMapFormatNone:
+ return 1;
+ case kRegMapFormatCompact8:
+ return (1 + map->reg_width_) * num_entries;
+ case kRegMapFormatCompact16:
+ return (2 + map->reg_width_) * num_entries;
+ case kRegMapFormatDifferential:
+ {
+ /* Decoded ULEB128 length. */
+ const uint8_t* ptr = map->data_.get();
+ return DecodeUnsignedLeb128(&ptr);
+ }
+ default:
+ LOG(FATAL) << "Bad register map format " << format;
+ return 0;
+ }
+}
+
+int DexVerifier::ComputeBitDiff(const uint8_t* bits1, const uint8_t* bits2,
+ int byte_width, int* first_bit_changed_ptr, int* num_bits_changed_ptr,
+ uint8_t* leb_out_buf) {
+ int num_bits_changed = 0;
+ int first_bit_changed = -1;
+ int leb_size = 0;
+ int byte_num;
+
+ /*
+ * Run through the vectors, first comparing them at the byte level. This
+ * will yield a fairly quick result if nothing has changed between them.
+ */
+ for (byte_num = 0; byte_num < byte_width; byte_num++) {
+ uint8_t byte1 = *bits1++;
+ uint8_t byte2 = *bits2++;
+ if (byte1 != byte2) {
+ /* Walk through the byte, identifying the changed bits. */
+ int bit_num;
+ for (bit_num = 0; bit_num < 8; bit_num++) {
+ if (((byte1 >> bit_num) & 0x01) != ((byte2 >> bit_num) & 0x01)) {
+ int bit_offset = (byte_num << 3) + bit_num;
+
+ if (first_bit_changed < 0)
+ first_bit_changed = bit_offset;
+ num_bits_changed++;
+
+ if (leb_out_buf == NULL) {
+ leb_size += UnsignedLeb128Size(bit_offset);
+ } else {
+ uint8_t* cur_buf = leb_out_buf;
+ leb_out_buf = WriteUnsignedLeb128(leb_out_buf, bit_offset);
+ leb_size += leb_out_buf - cur_buf;
+ }
+ }
+ }
+ }
+ }
+
+ if (num_bits_changed > 0)
+ assert(first_bit_changed >= 0);
+
+ if (first_bit_changed_ptr != NULL)
+ *first_bit_changed_ptr = first_bit_changed;
+ if (num_bits_changed_ptr != NULL)
+ *num_bits_changed_ptr = num_bits_changed;
+
+ return leb_size;
+}
+
+DexVerifier::RegisterMap* DexVerifier::CompressMapDifferential(
+ const RegisterMap* map) {
+ int orig_size = ComputeRegisterMapSize(map);
+ uint8_t* tmp_ptr;
+ int addr_width;
+
+ uint8_t format = map->format_;
+ switch (format) {
+ case kRegMapFormatCompact8:
+ addr_width = 1;
+ break;
+ case kRegMapFormatCompact16:
+ addr_width = 2;
+ break;
+ default:
+ LOG(ERROR) << "ERROR: can't compress map with format=" << format;
+ return NULL;
+ }
+
+ int reg_width = map->reg_width_;
+ int num_entries = map->num_entries_;
+
+ if (num_entries <= 1) {
+ return NULL;
+ }
+
+ /*
+ * We don't know how large the compressed data will be. It's possible
+ * for it to expand and become larger than the original. The header
+ * itself is variable-sized, so we generate everything into a temporary
+ * buffer and then copy it to form-fitting storage once we know how big
+ * it will be (and that it's smaller than the original).
+ *
+ * If we use a size that is equal to the size of the input map plus
+ * a value longer than a single entry can possibly expand to, we need
+ * only check for overflow at the end of each entry. The worst case
+ * for a single line is (1 + <ULEB8 address> + <full copy of vector>).
+ * Addresses are 16 bits, so that's (1 + 3 + reg_width).
+ *
+ * The initial address offset and bit vector will take up less than
+ * or equal to the amount of space required when uncompressed -- large
+ * initial offsets are rejected.
+ */
+ UniquePtr<uint8_t[]> tmp_buf(new uint8_t[orig_size + (1 + 3 + reg_width)]);
+
+ tmp_ptr = tmp_buf.get();
+
+ const uint8_t* map_data = map->data_.get();
+ const uint8_t* prev_bits;
+ uint16_t addr, prev_addr;
+
+ addr = *map_data++;
+ if (addr_width > 1)
+ addr |= (*map_data++) << 8;
+
+ if (addr >= 128) {
+ LOG(ERROR) << "Can't compress map with starting address >= 128";
+ return NULL;
+ }
+
+ /*
+ * Start by writing the initial address and bit vector data. The high
+ * bit of the initial address is used to indicate the required address
+ * width (which the decoder can't otherwise determine without parsing
+ * the compressed data).
+ */
+ *tmp_ptr++ = addr | (addr_width > 1 ? 0x80 : 0x00);
+ memcpy(tmp_ptr, map_data, reg_width);
+
+ prev_bits = map_data;
+ prev_addr = addr;
+
+ tmp_ptr += reg_width;
+ map_data += reg_width;
+
+ /* Loop over all following entries. */
+ for (int entry = 1; entry < num_entries; entry++) {
+ int addr_diff;
+ uint8_t key;
+
+ /* Pull out the address and figure out how to encode it. */
+ addr = *map_data++;
+ if (addr_width > 1)
+ addr |= (*map_data++) << 8;
+
+ addr_diff = addr - prev_addr;
+ assert(addr_diff > 0);
+ if (addr_diff < 8) {
+ /* Small difference, encode in 3 bits. */
+ key = addr_diff -1; /* set 00000AAA */
+ } else {
+ /* Large difference, output escape code. */
+ key = 0x07; /* escape code for AAA */
+ }
+
+ int num_bits_changed, first_bit_changed, leb_size;
+
+ leb_size = ComputeBitDiff(prev_bits, map_data, reg_width,
+ &first_bit_changed, &num_bits_changed, NULL);
+
+ if (num_bits_changed == 0) {
+ /* set B to 1 and CCCC to zero to indicate no bits were changed */
+ key |= 0x08;
+ } else if (num_bits_changed == 1 && first_bit_changed < 16) {
+ /* set B to 0 and CCCC to the index of the changed bit */
+ key |= first_bit_changed << 4;
+ } else if (num_bits_changed < 15 && leb_size < reg_width) {
+ /* set B to 1 and CCCC to the number of bits */
+ key |= 0x08 | (num_bits_changed << 4);
+ } else {
+ /* set B to 1 and CCCC to 0x0f so we store the entire vector */
+ key |= 0x08 | 0xf0;
+ }
+
+ /*
+ * Encode output. Start with the key, follow with the address
+ * diff (if it didn't fit in 3 bits), then the changed bit info.
+ */
+ *tmp_ptr++ = key;
+ if ((key & 0x07) == 0x07)
+ tmp_ptr = WriteUnsignedLeb128(tmp_ptr, addr_diff);
+
+ if ((key & 0x08) != 0) {
+ int bit_count = key >> 4;
+ if (bit_count == 0) {
+ /* nothing changed, no additional output required */
+ } else if (bit_count == 15) {
+ /* full vector is most compact representation */
+ memcpy(tmp_ptr, map_data, reg_width);
+ tmp_ptr += reg_width;
+ } else {
+ /* write bit indices in ULEB128 format */
+ (void) ComputeBitDiff(prev_bits, map_data, reg_width,
+ NULL, NULL, tmp_ptr);
+ tmp_ptr += leb_size;
+ }
+ } else {
+ /* single-bit changed, value encoded in key byte */
+ }
+
+ prev_bits = map_data;
+ prev_addr = addr;
+ map_data += reg_width;
+
+ /* See if we've run past the original size. */
+ if (tmp_ptr - tmp_buf.get() >= orig_size) {
+ return NULL;
+ }
+ }
+
+ /*
+ * Create a RegisterMap with the contents.
+ *
+ * TODO: consider using a threshold other than merely ">=". We would
+ * get poorer compression but potentially use less native heap space.
+ */
+ int new_data_size = tmp_ptr - tmp_buf.get();
+ int new_map_size = new_data_size + UnsignedLeb128Size(new_data_size);
+
+ if (new_map_size >= orig_size) {
+ return NULL;
+ }
+
+ RegisterMap* new_map = new RegisterMap(kRegMapFormatDifferential, reg_width,
+ num_entries, true, new_map_size);
+
+ tmp_ptr = new_map->data_.get();
+ tmp_ptr = WriteUnsignedLeb128(tmp_ptr, new_data_size);
+ memcpy(tmp_ptr, tmp_buf.get(), new_data_size);
+
+ return new_map;
+}
+
+DexVerifier::RegisterMap* DexVerifier::UncompressMapDifferential(
+ const RegisterMap* map) {
+ uint8_t format = map->format_;
+ RegisterMapFormat new_format;
+ int reg_width, num_entries, new_addr_width, new_data_size;
+
+ if (format != kRegMapFormatDifferential) {
+ LOG(ERROR) << "Not differential (" << format << ")";
+ return NULL;
+ }
+
+ reg_width = map->reg_width_;
+ num_entries = map->num_entries_;
+
+ /* Get the data size; we can check this at the end. */
+ const uint8_t* src_ptr = map->data_.get();
+ int expected_src_len = DecodeUnsignedLeb128(&src_ptr);
+ const uint8_t* src_start = src_ptr;
+
+ /* Get the initial address and the 16-bit address flag. */
+ int addr = *src_ptr & 0x7f;
+ if ((*src_ptr & 0x80) == 0) {
+ new_format = kRegMapFormatCompact8;
+ new_addr_width = 1;
+ } else {
+ new_format = kRegMapFormatCompact16;
+ new_addr_width = 2;
+ }
+ src_ptr++;
+
+ /* Now we know enough to allocate the new map. */
+ new_data_size = (new_addr_width + reg_width) * num_entries;
+ RegisterMap* new_map = new RegisterMap(new_format, reg_width, num_entries,
+ true, new_data_size);
+
+ /* Write the start address and initial bits to the new map. */
+ uint8_t* dst_ptr = new_map->data_.get();
+
+ *dst_ptr++ = addr & 0xff;
+ if (new_addr_width > 1)
+ *dst_ptr++ = (uint8_t) (addr >> 8);
+
+ memcpy(dst_ptr, src_ptr, reg_width);
+
+ int prev_addr = addr;
+ const uint8_t* prev_bits = dst_ptr; /* point at uncompressed data */
+
+ dst_ptr += reg_width;
+ src_ptr += reg_width;
+
+ /* Walk through, uncompressing one line at a time. */
+ int entry;
+ for (entry = 1; entry < num_entries; entry++) {
+ int addr_diff;
+ uint8_t key;
+
+ key = *src_ptr++;
+
+ /* Get the address. */
+ if ((key & 0x07) == 7) {
+ /* Address diff follows in ULEB128. */
+ addr_diff = DecodeUnsignedLeb128(&src_ptr);
+ } else {
+ addr_diff = (key & 0x07) +1;
+ }
+
+ addr = prev_addr + addr_diff;
+ *dst_ptr++ = addr & 0xff;
+ if (new_addr_width > 1)
+ *dst_ptr++ = (uint8_t) (addr >> 8);
+
+ /* Unpack the bits. */
+ if ((key & 0x08) != 0) {
+ int bit_count = (key >> 4);
+ if (bit_count == 0) {
+ /* No bits changed, just copy previous. */
+ memcpy(dst_ptr, prev_bits, reg_width);
+ } else if (bit_count == 15) {
+ /* Full copy of bit vector is present; ignore prev_bits. */
+ memcpy(dst_ptr, src_ptr, reg_width);
+ src_ptr += reg_width;
+ } else {
+ /* Copy previous bits and modify listed indices. */
+ memcpy(dst_ptr, prev_bits, reg_width);
+ while (bit_count--) {
+ int bit_index = DecodeUnsignedLeb128(&src_ptr);
+ ToggleBit(dst_ptr, bit_index);
+ }
+ }
+ } else {
+ /* Copy previous bits and modify the specified one. */
+ memcpy(dst_ptr, prev_bits, reg_width);
+
+ /* One bit, from 0-15 inclusive, was changed. */
+ ToggleBit(dst_ptr, key >> 4);
+ }
+
+ prev_addr = addr;
+ prev_bits = dst_ptr;
+ dst_ptr += reg_width;
+ }
+
+ if (dst_ptr - new_map->data_.get() != new_data_size) {
+ LOG(ERROR) << "ERROR: output " << dst_ptr - new_map->data_.get()
+ << " bytes, expected " << new_data_size;
+ free(new_map);
+ return NULL;
+ }
+
+ if (src_ptr - src_start != expected_src_len) {
+ LOG(ERROR) << "ERROR: consumed " << src_ptr - src_start
+ << " bytes, expected " << expected_src_len;
+ free(new_map);
+ return NULL;
+ }
+
+ return new_map;
+}
+
} // namespace art
diff --git a/src/dex_verifier.h b/src/dex_verifier.h
index c89dfaf..a8351df 100644
--- a/src/dex_verifier.h
+++ b/src/dex_verifier.h
@@ -14,7 +14,7 @@
#define kMaxMonitorStackDepth (sizeof(MonitorEntries) * 8)
/*
- * Set this to enable dead code scanning. This is not required, but it's
+ * Set this to enable dead code scanning. This is not required, but it's
* very useful when testing changes to the verifier (to make sure we're not
* skipping over stuff). The only reason not to do it is that it slightly
* increases the time required to perform verification.
@@ -26,7 +26,7 @@
#endif
/*
- * We need an extra "pseudo register" to hold the return type briefly. It
+ * We need an extra "pseudo register" to hold the return type briefly. It
* can be category 1 or 2, so we need two slots.
*/
#define kExtraRegs 2
@@ -36,7 +36,7 @@
public:
/*
* RegType holds information about the type of data held in a register.
- * For most types it's a simple enum. For reference types it holds a
+ * For most types it's a simple enum. For reference types it holds a
* pointer to the ClassObject, and for uninitialized references it holds
* an index into the UninitInstanceMap.
*/
@@ -44,7 +44,7 @@
/*
* A bit vector indicating which entries in the monitor stack are
- * associated with this register. The low bit corresponds to the stack's
+ * associated with this register. The low bit corresponds to the stack's
* bottom-most entry.
*/
typedef uint32_t MonitorEntries;
@@ -71,12 +71,12 @@
};
/*
- * "Direct" and "virtual" methods are stored independently. The type of call
+ * "Direct" and "virtual" methods are stored independently. The type of call
* used to invoke the method determines which list we search, and whether
* we travel up into superclasses.
*
* (<clinit>, <init>, and methods declared "private" or "static" are stored
- * in the "direct" list. All others are stored in the "virtual" list.)
+ * in the "direct" list. All others are stored in the "virtual" list.)
*/
enum MethodType {
METHOD_UNKNOWN = 0,
@@ -98,7 +98,7 @@
};
/*
- * Enumeration for register type values. The "hi" piece of a 64-bit value
+ * Enumeration for register type values. The "hi" piece of a 64-bit value
* MUST immediately follow the "lo" piece in the enumeration, so we can check
* that hi==lo+1.
*
@@ -125,7 +125,7 @@
*
* In addition, all of the above can convert to "float".
*
- * We're more careful with integer values than the spec requires. The
+ * We're more careful with integer values than the spec requires. The
* motivation is to restrict byte/char/short to the correct range of values.
* For example, if a method takes a byte argument, we don't want to allow
* the code to load the constant "1024" and pass it in.
@@ -136,7 +136,7 @@
kRegTypeConflict, /* merge clash makes this reg's type unknowable */
/*
- * Category-1nr types. The order of these is chiseled into a couple
+ * Category-1nr types. The order of these is chiseled into a couple
* of tables, so don't add, remove, or reorder if you can avoid it.
*/
#define kRegType1nrSTART kRegTypeZero
@@ -167,9 +167,9 @@
/*
* Enumeration max; this is used with "full" (32-bit) RegType values.
*
- * Anything larger than this is a ClassObject or uninit ref. Mask off
+ * Anything larger than this is a ClassObject or uninit ref. Mask off
* all but the low 8 bits; if you're left with kRegTypeUninit, pull
- * the uninit index out of the high 24. Because kRegTypeUninit has an
+ * the uninit index out of the high 24. Because kRegTypeUninit has an
* odd value, there is no risk of a particular ClassObject pointer bit
* pattern being confused for it (assuming our class object allocator
* uses word alignment).
@@ -183,9 +183,9 @@
* Register type categories, for type checking.
*
* The spec says category 1 includes boolean, byte, char, short, int, float,
- * reference, and returnAddress. Category 2 includes long and double.
+ * reference, and returnAddress. Category 2 includes long and double.
*
- * We treat object references separately, so we have "category1nr". We
+ * We treat object references separately, so we have "category1nr". We
* don't support jsr/ret, so there is no "returnAddress" type.
*/
enum TypeCategory {
@@ -225,13 +225,24 @@
#define kVerifyErrorRefTypeShift 6
/*
+ * Format enumeration for RegisterMap data area.
+ */
+ enum RegisterMapFormat {
+ kRegMapFormatUnknown = 0,
+ kRegMapFormatNone, /* indicates no map data follows */
+ kRegMapFormatCompact8, /* compact layout, 8-bit addresses */
+ kRegMapFormatCompact16, /* compact layout, 16-bit addresses */
+ kRegMapFormatDifferential, /* compressed, differential encoding */
+ };
+
+ /*
* During verification, we associate one of these with every "interesting"
- * instruction. We track the status of all registers, and (if the method
+ * instruction. We track the status of all registers, and (if the method
* has any monitor-enter instructions) maintain a stack of entered monitors
* (identified by code unit offset).
*
* If live-precise register maps are enabled, the "liveRegs" vector will
- * be populated. Unlike the other lists of registers here, we do not
+ * be populated. Unlike the other lists of registers here, we do not
* track the liveness of the method result register (which is not visible
* to the GC).
*/
@@ -242,7 +253,8 @@
uint32_t monitor_stack_top_;
RegisterLine()
- : reg_types_(NULL), monitor_entries_(NULL), monitor_stack_(NULL), monitor_stack_top_(0) {
+ : reg_types_(NULL), monitor_entries_(NULL), monitor_stack_(NULL),
+ monitor_stack_top_(0) {
}
/* Allocate space for the fields. */
@@ -258,14 +270,14 @@
/* Big fat collection of register data. */
struct RegisterTable {
/*
- * Array of RegisterLine structs, one per address in the method. We only
+ * Array of RegisterLine structs, one per address in the method. We only
* set the pointers for certain addresses, based on instruction widths
* and what we're trying to accomplish.
*/
UniquePtr<RegisterLine[]> register_lines_;
/*
- * Number of registers we track for each instruction. This is equal
+ * Number of registers we track for each instruction. This is equal
* to the method's declared "registersSize" plus kExtraRegs (2).
*/
size_t insn_reg_count_plus_;
@@ -291,7 +303,7 @@
/*
* Table that maps uninitialized instances to classes, based on the
- * address of the new-instance instruction. One per method.
+ * address of the new-instance instruction. One per method.
*/
struct UninitInstanceMap {
int num_entries_;
@@ -326,9 +338,9 @@
UniquePtr<UninitInstanceMap> uninit_map_;
/*
- * Array of RegisterLine structs, one entry per code unit. We only need
+ * Array of RegisterLine structs, one entry per code unit. We only need
* entries for code units that hold the start of an "interesting"
- * instruction. For register map generation, we're only interested
+ * instruction. For register map generation, we're only interested
* in GC points.
*/
RegisterLine* register_lines_;
@@ -341,15 +353,45 @@
const DexFile::CodeItem* code_item)
: method_(method), dex_file_(dex_file), code_item_(code_item),
insn_flags_(NULL), uninit_map_(NULL), register_lines_(NULL),
- new_instance_count_(0), monitor_enter_count_(0) { }
+ new_instance_count_(0), monitor_enter_count_(0) {
+ }
};
/*
- * Merge result table for primitive values. The table is symmetric along
+ * This is a single variable-size structure. It may be allocated on the
+ * heap or mapped out of a (post-dexopt) DEX file.
+ *
+ * 32-bit alignment of the structure is NOT guaranteed. This makes it a
+ * little awkward to deal with as a structure; to avoid accidents we use
+ * only byte types. Multi-byte values are little-endian.
+ *
+ * Size of (format==FormatNone): 1 byte
+ * Size of (format==FormatCompact8): 4 + (1 + reg_width) * num_entries
+ * Size of (format==FormatCompact16): 4 + (2 + reg_width) * num_entries
+ */
+ struct RegisterMap {
+ /* header */
+ uint8_t format_; /* enum RegisterMapFormat; MUST be first entry */
+ uint8_t reg_width_; /* bytes per register line, 1+ */
+ uint16_t num_entries_; /* number of entries */
+ bool format_on_heap_; /* indicates allocation on heap */
+
+ /* raw data starts here; need not be aligned */
+ UniquePtr<uint8_t[]> data_;
+
+ RegisterMap(uint8_t format, uint8_t reg_width, uint16_t num_entries,
+ bool format_on_heap, uint32_t data_size)
+ : format_(format), reg_width_(reg_width), num_entries_(num_entries),
+ format_on_heap_(format_on_heap), data_(new uint8_t[data_size]()) {
+ }
+ };
+
+ /*
+ * Merge result table for primitive values. The table is symmetric along
* the diagonal.
*
- * Note that 32-bit int/float do not merge into 64-bit long/double. This
- * is a register merge, not a widening conversion. Only the "implicit"
+ * Note that 32-bit int/float do not merge into 64-bit long/double. This
+ * is a register merge, not a widening conversion. Only the "implicit"
* widening within a category, e.g. byte to short, is allowed.
*
* Dalvik does not draw a distinction between int and float, but we enforce
@@ -357,11 +399,11 @@
* versa. We do not allow free exchange between 32-bit int/float and 64-bit
* long/double.
*
- * Note that Uninit+Uninit=Uninit. This holds true because we only
+ * Note that Uninit+Uninit=Uninit. This holds true because we only
* use this when the RegType value is exactly equal to kRegTypeUninit, which
* can only happen for the zeroeth entry in the table.
*
- * "Unknown" never merges with anything known. The only time a register
+ * "Unknown" never merges with anything known. The only time a register
* transitions from "unknown" to "known" is when we're executing code
* for the first time, and we handle that with a simple copy.
*/
@@ -510,7 +552,7 @@
* (3) Iterate through the method, checking type safety and looking
* for code flow problems.
*
- * Some checks may be bypassed depending on the verification mode. We can't
+ * Some checks may be bypassed depending on the verification mode. We can't
* turn this stuff off completely if we want to do "exact" GC.
*
* Confirmed here:
@@ -571,7 +613,7 @@
/*
* Compute the width of the instruction at each address in the instruction
- * stream, and store it in vdata->insn_flags. Addresses that are in the
+ * stream, and store it in vdata->insn_flags. Addresses that are in the
* middle of an instruction, or that are part of switch table data, are not
* touched (so the caller should probably initialize "insn_flags" to zero).
*
@@ -609,14 +651,14 @@
bool* pConditional, bool* selfOkay);
/*
- * Verify an array data table. "cur_offset" is the offset of the
+ * Verify an array data table. "cur_offset" is the offset of the
* fill-array-data instruction.
*/
static bool CheckArrayData(const DexFile::CodeItem* code_item,
uint32_t cur_offset);
/*
- * Perform static checks on a "new-instance" instruction. Specifically,
+ * Perform static checks on a "new-instance" instruction. Specifically,
* make sure the class reference isn't for an array class.
*
* We don't need the actual class, just a pointer to the class name.
@@ -624,7 +666,7 @@
static bool CheckNewInstance(const DexFile* dex_file, uint32_t idx);
/*
- * Perform static checks on a "new-array" instruction. Specifically, make
+ * Perform static checks on a "new-array" instruction. Specifically, make
* sure they aren't creating an array of arrays that causes the number of
* dimensions to exceed 255.
*/
@@ -637,13 +679,13 @@
static bool CheckTypeIndex(const DexFile* dex_file, uint32_t idx);
/*
- * Perform static checks on a field get or set instruction. All we do
+ * Perform static checks on a field get or set instruction. All we do
* here is ensure that the field index is in the valid range.
*/
static bool CheckFieldIndex(const DexFile* dex_file, uint32_t idx);
/*
- * Perform static checks on a method invocation instruction. All we do
+ * Perform static checks on a method invocation instruction. All we do
* here is ensure that the method index is in the valid range.
*/
static bool CheckMethodIndex(const DexFile* dex_file, uint32_t idx);
@@ -667,7 +709,7 @@
*
* There are some tests we don't do here, e.g. we don't try to verify
* that invoking a method that takes a double is done with consecutive
- * registers. This requires parsing the target method signature, which
+ * registers. This requires parsing the target method signature, which
* we will be doing later on during the code flow analysis.
*/
static bool CheckVarArgRegs(const DexFile::CodeItem* code_item, uint32_t vA,
@@ -696,7 +738,7 @@
*
* We don't expect code to jump directly into an exception handler, but
* it's valid to do so as long as the target isn't a "move-exception"
- * instruction. We verify that in a later stage.
+ * instruction. We verify that in a later stage.
*
* The dex format forbids certain instructions from branching to itself.
*
@@ -762,7 +804,7 @@
#ifndef NDEBUG
/*
- * Compare two register lines. Returns 0 if they match.
+ * Compare two register lines. Returns 0 if they match.
*
* Using this for a sort is unwise, since the value can change based on
* machine endianness.
@@ -801,11 +843,11 @@
/*
* Create a new uninitialized instance map.
*
- * The map is allocated and populated with address entries. The addresses
+ * The map is allocated and populated with address entries. The addresses
* appear in ascending order to allow binary searching.
*
* Very few methods have 10 or more new-instance instructions; the
- * majority have 0 or 1. Occasionally a static initializer will have 200+.
+ * majority have 0 or 1. Occasionally a static initializer will have 200+.
*
* TODO: merge this into the static pass or initRegisterTable; want to
* avoid walking through the instructions yet again just to set up this table
@@ -824,7 +866,7 @@
const char* descriptor, VerifyError* failure);
/*
- * Look up a class reference in a signature. Could be an arg or the
+ * Look up a class reference in a signature. Could be an arg or the
* return value.
*
* Advances "*sig" to the last character in the signature (that is, to
@@ -836,7 +878,7 @@
VerifyError* failure);
/*
- * Look up an array class reference in a signature. Could be an arg or the
+ * Look up an array class reference in a signature. Could be an arg or the
* return value.
*
* Advances "*sig" to the last character in the signature.
@@ -896,14 +938,14 @@
* because it's easier to do them here.)
*
* We need an array of RegType values, one per register, for every
- * instruction. If the method uses monitor-enter, we need extra data
+ * instruction. If the method uses monitor-enter, we need extra data
* for every register, and a stack for every "interesting" instruction.
* In theory this could become quite large -- up to several megabytes for
* a monster function.
*
* NOTE:
* The spec forbids backward branches when there's an uninitialized reference
- * in a register. The idea is to prevent something like this:
+ * in a register. The idea is to prevent something like this:
* loop:
* move r1, r0
* new-instance r0, MyClass
@@ -912,9 +954,9 @@
* initialize r0
*
* This leaves us with two different instances, both allocated by the
- * same instruction, but only one is initialized. The scheme outlined in
+ * same instruction, but only one is initialized. The scheme outlined in
* v3 4.11.1.4 wouldn't catch this, so they work around it by preventing
- * backward branches. We achieve identical results without restricting
+ * backward branches. We achieve identical results without restricting
* code reordering by specifying that you can't execute the new-instance
* instruction if a register contains an uninitialized instance created
* by that same instrutcion.
@@ -929,24 +971,24 @@
* it has on registers.
*
* Finds zero or more following instructions and sets the "changed" flag
- * if execution at that point needs to be (re-)evaluated. Register changes
- * are merged into "reg_types_" at the target addresses. Does not set or
+ * if execution at that point needs to be (re-)evaluated. Register changes
+ * are merged into "reg_types_" at the target addresses. Does not set or
* clear any other flags in "insn_flags".
*/
static bool CodeFlowVerifyInstruction(VerifierData* vdata,
RegisterTable* reg_table, uint32_t insn_idx, size_t* start_guess);
/*
- * Replace an instruction with "throw-verification-error". This allows us to
+ * Replace an instruction with "throw-verification-error". This allows us to
* defer error reporting until the code path is first used.
*
* This is expected to be called during "just in time" verification, not
- * from within dexopt. (Verification failures in dexopt will result in
+ * from within dexopt. (Verification failures in dexopt will result in
* postponement of verification to first use of the class.)
*
- * The throw-verification-error instruction requires two code units. Some
+ * The throw-verification-error instruction requires two code units. Some
* of the replaced instructions require three; the third code unit will
- * receive a "nop". The instruction's length will be left unchanged
+ * receive a "nop". The instruction's length will be left unchanged
* in "insn_flags".
*
* The VM postpones setting of debugger breakpoints in unverified classes,
@@ -967,15 +1009,15 @@
/*
* Look up an instance field, specified by "field_idx", that is going to be
- * accessed in object "obj_type". This resolves the field and then verifies
+ * accessed in object "obj_type". This resolves the field and then verifies
* that the class containing the field is an instance of the reference in
* "obj_type".
*
* It is possible for "obj_type" to be kRegTypeZero, meaning that we might
- * have a null reference. This is a runtime problem, so we allow it,
+ * have a null reference. This is a runtime problem, so we allow it,
* skipping some of the type checks.
*
- * In general, "obj_type" must be an initialized reference. However, we
+ * In general, "obj_type" must be an initialized reference. However, we
* allow it to be uninitialized if this is an "<init>" method and the field
* is declared within the "obj_type" class.
*
@@ -995,7 +1037,7 @@
/*
* For the "move-exception" instruction at "insn_idx", which must be at an
* exception handler address, determine the first common superclass of
- * all exceptions that can land here. (For javac output, we're probably
+ * all exceptions that can land here. (For javac output, we're probably
* looking at multiple spans of bytecode covered by one "try" that lands
* at an exception-specific "catch", but in general the handler could be
* shared for multiple exceptions.)
@@ -1018,7 +1060,7 @@
}
/*
- * Return the register type for the method. We can't just use the
+ * Return the register type for the method. We can't just use the
* already-computed DalvikJniReturnType, because if it's a reference type
* we need to do the class lookup.
*
@@ -1030,7 +1072,7 @@
const Method* method);
/*
- * Get the value from a register, and cast it to a Class. Sets
+ * Get the value from a register, and cast it to a Class. Sets
* "*failure" if something fails.
*
* This fails if the register holds an uninitialized class.
@@ -1041,20 +1083,20 @@
uint32_t vsrc, VerifyError* failure);
/*
- * Get the "this" pointer from a non-static method invocation. This
+ * Get the "this" pointer from a non-static method invocation. This
* returns the RegType so the caller can decide whether it needs the
- * reference to be initialized or not. (Can also return kRegTypeZero
+ * reference to be initialized or not. (Can also return kRegTypeZero
* if the reference can only be zero at this point.)
*
* The argument count is in vA, and the first argument is in vC, for both
- * "simple" and "range" versions. We just need to make sure vA is >= 1
+ * "simple" and "range" versions. We just need to make sure vA is >= 1
* and then return vC.
*/
static RegType GetInvocationThis(const RegisterLine* register_line,
const Instruction::DecodedInstruction* dec_insn, VerifyError* failure);
/*
- * Set the type of register N, verifying that the register is valid. If
+ * Set the type of register N, verifying that the register is valid. If
* "new_type" is the "Lo" part of a 64-bit value, register N+1 will be
* set to "new_type+1".
*
@@ -1074,7 +1116,7 @@
* derived from a constant to prevent mixing of int/float and long/double.
*
* If "vsrc" is a reference, both it and the "vsrc" register must be
- * initialized ("vsrc" may be Zero). This will verify that the value in
+ * initialized ("vsrc" may be Zero). This will verify that the value in
* the register is an instance of check_type, or if check_type is an
* interface, verify that the register implements check_type.
*/
@@ -1087,7 +1129,7 @@
/*
* Update all registers holding "uninit_type" to instead hold the
- * corresponding initialized reference type. This is called when an
+ * corresponding initialized reference type. This is called when an
* appropriate <init> method is invoked -- all copies of the reference
* must be marked as initialized.
*/
@@ -1096,21 +1138,21 @@
VerifyError* failure);
/*
- * Implement category-1 "move" instructions. Copy a 32-bit value from
+ * Implement category-1 "move" instructions. Copy a 32-bit value from
* "vsrc" to "vdst".
*/
static void CopyRegister1(RegisterLine* register_line, uint32_t vdst,
uint32_t vsrc, TypeCategory cat, VerifyError* failure);
/*
- * Implement category-2 "move" instructions. Copy a 64-bit value from
- * "vsrc" to "vdst". This copies both halves of the register.
+ * Implement category-2 "move" instructions. Copy a 64-bit value from
+ * "vsrc" to "vdst". This copies both halves of the register.
*/
static void CopyRegister2(RegisterLine* register_line, uint32_t vdst,
uint32_t vsrc, VerifyError* failure);
/*
- * Implement "move-result". Copy the category-1 value from the result
+ * Implement "move-result". Copy the category-1 value from the result
* register to another register, and reset the result register.
*/
static void CopyResultRegister1(RegisterLine* register_line,
@@ -1118,22 +1160,22 @@
VerifyError* failure);
/*
- * Implement "move-result-wide". Copy the category-2 value from the result
+ * Implement "move-result-wide". Copy the category-2 value from the result
* register to another register, and reset the result register.
*/
static void CopyResultRegister2(RegisterLine* register_line,
const int insn_reg_count, uint32_t vdst, VerifyError* failure);
/*
- * Compute the "class depth" of a class. This is the distance from the
- * class to the top of the tree, chasing superclass links. java.lang.Object
+ * Compute the "class depth" of a class. This is the distance from the
+ * class to the top of the tree, chasing superclass links. java.lang.Object
* has a class depth of 0.
*/
static int GetClassDepth(Class* klass);
/*
* Given two classes, walk up the superclass tree to find a common
- * ancestor. (Called from findCommonSuperclass().)
+ * ancestor. (Called from findCommonSuperclass().)
*
* TODO: consider caching the class depth in the class object so we don't
* have to search for it here.
@@ -1141,9 +1183,9 @@
static Class* DigForSuperclass(Class* c1, Class* c2);
/*
- * Merge two array classes. We can't use the general "walk up to the
+ * Merge two array classes. We can't use the general "walk up to the
* superclass" merge because the superclass of an array is always Object.
- * We want String[] + Integer[] = Object[]. This works for higher dimensions
+ * We want String[] + Integer[] = Object[]. This works for higher dimensions
* as well, e.g. String[][] + Integer[][] = Object[][].
*
* If Foo1 and Foo2 are subclasses of Foo, Foo1[] + Foo2[] = Foo[].
@@ -1154,19 +1196,19 @@
* with the least dimension, e.g. String[][] + String[][][][] = Object[][].
*
* Arrays of primitive types effectively have one less dimension when
- * merging. int[] + float[] = Object, int[] + String[] = Object,
- * int[][] + float[][] = Object[], int[][] + String[] = Object[]. (The
+ * merging. int[] + float[] = Object, int[] + String[] = Object,
+ * int[][] + float[][] = Object[], int[][] + String[] = Object[]. (The
* only time this function doesn't return an array class is when one of
* the arguments is a 1-dimensional primitive array.)
*
* This gets a little awkward because we may have to ask the VM to create
- * a new array type with the appropriate element and dimensions. However, we
+ * a new array type with the appropriate element and dimensions. However, we
* shouldn't be doing this often.
*/
static Class* FindCommonArraySuperclass(Class* c1, Class* c2);
/*
- * Find the first common superclass of the two classes. We're not
+ * Find the first common superclass of the two classes. We're not
* interested in common interfaces.
*
* The easiest way to do this for concrete classes is to compute the "class
@@ -1177,21 +1219,21 @@
* element type.
*
* If one class is an interface, we check to see if the other class/interface
- * (or one of its predecessors) implements the interface. If so, we return
+ * (or one of its predecessors) implements the interface. If so, we return
* the interface; otherwise, we return Object.
*
- * NOTE: we continue the tradition of "lazy interface handling". To wit,
+ * NOTE: we continue the tradition of "lazy interface handling". To wit,
* suppose we have three classes:
* One implements Fancy, Free
* Two implements Fancy, Free
* Three implements Free
- * where Fancy and Free are unrelated interfaces. The code requires us
- * to merge One into Two. Ideally we'd use a common interface, which
+ * where Fancy and Free are unrelated interfaces. The code requires us
+ * to merge One into Two. Ideally we'd use a common interface, which
* gives us a choice between Fancy and Free, and no guidance on which to
- * use. If we use Free, we'll be okay when Three gets merged in, but if
- * we choose Fancy, we're hosed. The "ideal" solution is to create a
+ * use. If we use Free, we'll be okay when Three gets merged in, but if
+ * we choose Fancy, we're hosed. The "ideal" solution is to create a
* set of common interfaces and carry that around, merging further references
- * into it. This is a pain. The easy solution is to simply boil them
+ * into it. This is a pain. The easy solution is to simply boil them
* down to Objects and let the runtime invokeinterface call fail, which
* is what we do.
*/
@@ -1216,9 +1258,9 @@
MonitorEntries ents2, bool* changed);
/*
- * We're creating a new instance of class C at address A. Any registers
+ * We're creating a new instance of class C at address A. Any registers
* holding instances previously created at address A must be initialized
- * by now. If not, we mark them as "conflict" to prevent them from being
+ * by now. If not, we mark them as "conflict" to prevent them from being
* used (otherwise, MarkRefsAsInitialized would mark the old ones and the
* new ones at the same time).
*/
@@ -1284,11 +1326,11 @@
VerifyError* failure);
/*
- * Check constraints on constructor return. Specifically, make sure that
+ * Check constraints on constructor return. Specifically, make sure that
* the "this" argument got initialized.
*
* The "this" argument to <init> uses code offset kUninitThisArgAddr, which
- * puts it at the start of the list in slot 0. If we see a register with
+ * puts it at the start of the list in slot 0. If we see a register with
* an uninitialized slot 0 reference, we know it somehow didn't get
* initialized.
*
@@ -1298,7 +1340,7 @@
const RegisterLine* register_line, const int insn_reg_count);
/*
- * Verify that the target instruction is not "move-exception". It's important
+ * Verify that the target instruction is not "move-exception". It's important
* that the only way to execute a move-exception is as the first instruction
* of an exception handler.
*
@@ -1308,11 +1350,11 @@
static bool CheckMoveException(const uint16_t* insns, int insn_idx);
/*
- * See if "type" matches "cat". All we're really looking for here is that
+ * See if "type" matches "cat". All we're really looking for here is that
* we're not mixing and matching 32-bit and 64-bit quantities, and we're
- * not mixing references with numerics. (For example, the arguments to
+ * not mixing references with numerics. (For example, the arguments to
* "a < b" could be integers of different sizes, but they must both be
- * integers. Dalvik is less specific about int vs. float, so we treat them
+ * integers. Dalvik is less specific about int vs. float, so we treat them
* as equivalent here.)
*
* For category 2 values, "type" must be the "low" half of the value.
@@ -1351,7 +1393,7 @@
VerifyError* failure);
/*
- * Verify types for a binary "2addr" operation. "src_type1"/"src_type2"
+ * Verify types for a binary "2addr" operation. "src_type1"/"src_type2"
* are verified against vA/vB, then "dst_type" is stored into vA.
*/
static void CheckBinop2addr(RegisterLine* register_line,
@@ -1365,26 +1407,26 @@
* For example, right-shifting an int 24 times results in a value that can
* be treated as a byte.
*
- * Things get interesting when contemplating sign extension. Right-
+ * Things get interesting when contemplating sign extension. Right-
* shifting an integer by 16 yields a value that can be represented in a
* "short" but not a "char", but an unsigned right shift by 16 yields a
- * value that belongs in a char rather than a short. (Consider what would
+ * value that belongs in a char rather than a short. (Consider what would
* happen if the result of the shift were cast to a char or short and then
- * cast back to an int. If sign extension, or the lack thereof, causes
+ * cast back to an int. If sign extension, or the lack thereof, causes
* a change in the 32-bit representation, then the conversion was lossy.)
*
- * A signed right shift by 17 on an integer results in a short. An unsigned
+ * A signed right shift by 17 on an integer results in a short. An unsigned
* right shfit by 17 on an integer results in a posshort, which can be
* assigned to a short or a char.
*
* An unsigned right shift on a short can actually expand the result into
- * a 32-bit integer. For example, 0xfffff123 >>> 8 becomes 0x00fffff1,
+ * a 32-bit integer. For example, 0xfffff123 >>> 8 becomes 0x00fffff1,
* which can't be represented in anything smaller than an int.
*
* javac does not generate code that takes advantage of this, but some
- * of the code optimizers do. It's generally a peephole optimization
+ * of the code optimizers do. It's generally a peephole optimization
* that replaces a particular sequence, e.g. (bipush 24, ishr, i2b) is
- * replaced by (bipush 24, ishr). Knowing that shifting a short 8 times
+ * replaced by (bipush 24, ishr). Knowing that shifting a short 8 times
* to the right yields a byte is really more than we need to handle the
* code that's out there, but support is not much more complex than just
* handling integer.
@@ -1398,16 +1440,16 @@
/*
* We're performing an operation like "and-int/2addr" that can be
- * performed on booleans as well as integers. We get no indication of
+ * performed on booleans as well as integers. We get no indication of
* boolean-ness, but we can infer it from the types of the arguments.
*
* Assumes we've already validated reg1/reg2.
*
- * TODO: consider generalizing this. The key principle is that the
+ * TODO: consider generalizing this. The key principle is that the
* result of a bitwise operation can only be as wide as the widest of
- * the operands. You can safely AND/OR/XOR two chars together and know
+ * the operands. You can safely AND/OR/XOR two chars together and know
* you still have a char, so it's reasonable for the compiler or "dx"
- * to skip the int-to-char instruction. (We need to do this for boolean
+ * to skip the int-to-char instruction. (We need to do this for boolean
* because there is no int-to-boolean operation.)
*
* Returns true if both args are Boolean, Zero, or One.
@@ -1417,7 +1459,7 @@
/*
* Verify types for A two-register instruction with a literal constant
- * (e.g. "add-int/lit8"). "dst_type" is stored into vA, and "src_type" is
+ * (e.g. "add-int/lit8"). "dst_type" is stored into vA, and "src_type" is
* verified against vB.
*
* If "check_boolean_op" is set, we use the constant value in vC.
@@ -1440,18 +1482,18 @@
static bool IsCorrectInvokeKind(MethodType method_type, Method* res_method);
/*
- * Verify the arguments to a method. We're executing in "method", making
+ * Verify the arguments to a method. We're executing in "method", making
* a call to the method reference in vB.
*
- * If this is a "direct" invoke, we allow calls to <init>. For calls to
- * <init>, the first argument may be an uninitialized reference. Otherwise,
+ * If this is a "direct" invoke, we allow calls to <init>. For calls to
+ * <init>, the first argument may be an uninitialized reference. Otherwise,
* calls to anything starting with '<' will be rejected, as will any
* uninitialized reference arguments.
*
* For non-static method calls, this will verify that the method call is
* appropriate for the "this" argument.
*
- * The method reference is in vBBBB. The "is_range" parameter determines
+ * The method reference is in vBBBB. The "is_range" parameter determines
* whether we use 0-4 "args" values or a range of registers defined by
* vAA and vCCCC.
*
@@ -1466,6 +1508,95 @@
const Instruction::DecodedInstruction* dec_insn, MethodType method_type,
bool is_range, bool is_super, VerifyError* failure);
+ /*
+ * Generate the register map for a method that has just been verified
+ * (i.e. we're doing this as part of verification).
+ *
+ * For type-precise determination we have all the data we need, so we
+ * just need to encode it in some clever fashion.
+ *
+ * Returns a pointer to a newly-allocated RegisterMap, or NULL on failure.
+ */
+ static RegisterMap* GenerateRegisterMapV(VerifierData* vdata);
+
+ /*
+ * Determine if the RegType value is a reference type.
+ *
+ * Ordinarily we include kRegTypeZero in the "is it a reference"
+ * check. There's no value in doing so here, because we know
+ * the register can't hold anything but zero.
+ */
+ static inline bool IsReferenceType(RegType type) {
+ return (type > kRegTypeMAX || type == kRegTypeUninit);
+ }
+
+ /* Toggle the value of the "idx"th bit in "ptr". */
+ static inline void ToggleBit(uint8_t* ptr, int idx) {
+ ptr[idx >> 3] ^= 1 << (idx & 0x07);
+ }
+
+ /*
+ * Given a line of registers, output a bit vector that indicates whether
+ * or not the register holds a reference type (which could be null).
+ *
+ * We use '1' to indicate it's a reference, '0' for anything else (numeric
+ * value, uninitialized data, merge conflict). Register 0 will be found
+ * in the low bit of the first byte.
+ */
+ static void OutputTypeVector(const RegType* regs, int insn_reg_count,
+ uint8_t* data);
+
+ /*
+ * Double-check the map.
+ *
+ * We run through all of the data in the map, and compare it to the original.
+ * Only works on uncompressed data.
+ */
+ static bool VerifyMap(VerifierData* vdata, const RegisterMap* map);
+
+ /* Compare two register maps. Returns true if they're equal, false if not. */
+ static bool CompareMaps(const RegisterMap* map1, const RegisterMap* map2);
+
+ /* Compute the size, in bytes, of a register map. */
+ static size_t ComputeRegisterMapSize(const RegisterMap* map);
+
+ /*
+ * Compute the difference between two bit vectors.
+ *
+ * If "leb_out_buf" is non-NULL, we output the bit indices in ULEB128 format
+ * as we go. Otherwise, we just generate the various counts.
+ *
+ * The bit vectors are compared byte-by-byte, so any unused bits at the
+ * end must be zero.
+ *
+ * Returns the number of bytes required to hold the ULEB128 output.
+ *
+ * If "first_bit_changed_ptr" or "num_bits_changed_ptr" are non-NULL, they
+ * will receive the index of the first changed bit and the number of changed
+ * bits, respectively.
+ */
+ static int ComputeBitDiff(const uint8_t* bits1, const uint8_t* bits2,
+ int byte_width, int* first_bit_changed_ptr, int* num_bits_changed_ptr,
+ uint8_t* leb_out_buf);
+
+ /*
+ * Compress the register map with differential encoding.
+ *
+ * On success, returns a newly-allocated RegisterMap. If the map is not
+ * compatible for some reason, or fails to get smaller, this will return NULL.
+ */
+ static RegisterMap* CompressMapDifferential(const RegisterMap* map);
+
+ /*
+ * Expand a compressed map to an uncompressed form.
+ *
+ * Returns a newly-allocated RegisterMap on success, or NULL on failure.
+ *
+ * TODO: consider using the linear allocator or a custom allocator with
+ * LRU replacement for these instead of the native heap.
+ */
+ static RegisterMap* UncompressMapDifferential(const RegisterMap* map);
+
DISALLOW_COPY_AND_ASSIGN(DexVerifier);
};
diff --git a/src/leb128.h b/src/leb128.h
index bc5dd1a..ad55199 100644
--- a/src/leb128.h
+++ b/src/leb128.h
@@ -79,6 +79,32 @@
return result;
}
+// Returns the number of bytes needed to encode the value in unsigned LEB128.
+static inline uint32_t UnsignedLeb128Size(uint32_t data) {
+ uint32_t count = 0;
+ do {
+ data >>= 7;
+ count++;
+ } while (data != 0);
+ return count;
+}
+
+// Writes a 32-bit value in unsigned ULEB128 format.
+// Returns the updated pointer.
+static inline uint8_t* WriteUnsignedLeb128(uint8_t* ptr, uint32_t data) {
+ while (true) {
+ uint8_t out = data & 0x7f;
+ if (out != data) {
+ *ptr++ = out | 0x80;
+ data >>= 7;
+ } else {
+ *ptr++ = out;
+ break;
+ }
+ }
+ return ptr;
+}
+
} // namespace art
#endif // ART_SRC_LEB128_H_