diff options
| author | 2018-09-06 11:30:36 +0100 | |
|---|---|---|
| committer | 2018-09-12 12:58:57 +0100 | |
| commit | ff74a74a7c0535a51d677c36fd6a37c34d30ea38 (patch) | |
| tree | 156c0e1cf4334f05e7bcea05b5e8e1ff5efce7bf /runtime/interpreter/mterp/gen_mterp.py | |
| parent | ee6ad006e25eb07f4866935255d13b3d87cd46e5 (diff) | |
Rewrite the mterp code generator and the template snippets.
This changes the code generation, but the generated assembly
files are unchanged (except for some whitespace differences).
This replaces the custom template meta-language with python.
All the architecture-specific template files are concatenated to create
one big python script. This generated python script is then executed to
produced the final assembly file. The template syntax is:
* Lines starting with % are python code. They will be copied as-is to
the script (without the %) and thus executed during the generation.
* Other lines are text, and they are essentially syntax sugar for
out.write('''(line text)''') and thus they write the main output.
* Within a text line, $ can be used insert variables from code.
This makes the code generation simpler and it will make it possible
to use full power of python within the snippets to simplify code.
Test: test-art-host-gtest
Change-Id: I8325ca406b328f82163241b3d698f94de5e38bff
Diffstat (limited to 'runtime/interpreter/mterp/gen_mterp.py')
| -rwxr-xr-x | runtime/interpreter/mterp/gen_mterp.py | 697 |
1 files changed, 100 insertions, 597 deletions
diff --git a/runtime/interpreter/mterp/gen_mterp.py b/runtime/interpreter/mterp/gen_mterp.py index 75c5174bcb..cf69bcecc3 100755 --- a/runtime/interpreter/mterp/gen_mterp.py +++ b/runtime/interpreter/mterp/gen_mterp.py @@ -14,605 +14,108 @@ # See the License for the specific language governing permissions and # limitations under the License. -# -# Using instructions from an architecture-specific config file, generate C -# and assembly source files for the Dalvik interpreter. -# - -import sys, string, re, time -from string import Template - -interp_defs_file = "../../../libdexfile/dex/dex_instruction_list.h" # need opcode list -kNumPackedOpcodes = 256 - -splitops = False -verbose = False -handler_size_bits = -1000 -handler_size_bytes = -1000 -in_op_start = 0 # 0=not started, 1=started, 2=ended -in_alt_op_start = 0 # 0=not started, 1=started, 2=ended -default_op_dir = None -default_alt_stub = None -opcode_locations = {} -alt_opcode_locations = {} -asm_stub_text = [] -fallback_stub_text = [] -label_prefix = ".L" # use ".L" to hide labels from gdb -alt_label_prefix = ".L_ALT" # use ".L" to hide labels from gdb -style = None # interpreter style -generate_alt_table = False -function_type_format = ".type %s, %%function" -function_size_format = ".size %s, .-%s" -global_name_format = "%s" - -# Exception class. -class DataParseError(SyntaxError): - "Failure when parsing data file" - -# -# Set any omnipresent substitution values. -# -def getGlobalSubDict(): - return { "handler_size_bits":handler_size_bits, - "handler_size_bytes":handler_size_bytes } - -# -# Parse arch config file -- -# Set interpreter style. -# -def setHandlerStyle(tokens): - global style - if len(tokens) != 2: - raise DataParseError("handler-style requires one argument") - style = tokens[1] - if style != "computed-goto": - raise DataParseError("handler-style (%s) invalid" % style) - -# -# Parse arch config file -- -# Set handler_size_bytes to the value of tokens[1], and handler_size_bits to -# log2(handler_size_bytes). Throws an exception if "bytes" is not 0 or -# a power of two. -# -def setHandlerSize(tokens): - global handler_size_bits, handler_size_bytes - if style != "computed-goto": - print "Warning: handler-size valid only for computed-goto interpreters" - if len(tokens) != 2: - raise DataParseError("handler-size requires one argument") - if handler_size_bits != -1000: - raise DataParseError("handler-size may only be set once") - - # compute log2(n), and make sure n is 0 or a power of 2 - handler_size_bytes = bytes = int(tokens[1]) - bits = -1 - while bytes > 0: - bytes //= 2 # halve with truncating division - bits += 1 - - if handler_size_bytes == 0 or handler_size_bytes != (1 << bits): - raise DataParseError("handler-size (%d) must be power of 2" \ - % orig_bytes) - handler_size_bits = bits - -# -# Parse arch config file -- -# Copy a file in to asm output file. -# -def importFile(tokens): - if len(tokens) != 2: - raise DataParseError("import requires one argument") - source = tokens[1] - if source.endswith(".S"): - appendSourceFile(tokens[1], getGlobalSubDict(), asm_fp, None) - else: - raise DataParseError("don't know how to import %s (expecting .cpp/.S)" - % source) - -# -# Parse arch config file -- -# Copy a file in to the C or asm output file. -# -def setAsmStub(tokens): - global asm_stub_text - if len(tokens) != 2: - raise DataParseError("import requires one argument") - try: - stub_fp = open(tokens[1]) - asm_stub_text = stub_fp.readlines() - except IOError, err: - stub_fp.close() - raise DataParseError("unable to load asm-stub: %s" % str(err)) - stub_fp.close() - -# -# Parse arch config file -- -# Copy a file in to the C or asm output file. -# -def setFallbackStub(tokens): - global fallback_stub_text - if len(tokens) != 2: - raise DataParseError("import requires one argument") - try: - stub_fp = open(tokens[1]) - fallback_stub_text = stub_fp.readlines() - except IOError, err: - stub_fp.close() - raise DataParseError("unable to load fallback-stub: %s" % str(err)) - stub_fp.close() -# -# Parse arch config file -- -# Record location of default alt stub -# -def setAsmAltStub(tokens): - global default_alt_stub, generate_alt_table - if len(tokens) != 2: - raise DataParseError("import requires one argument") - default_alt_stub = tokens[1] - generate_alt_table = True -# -# Change the default function type format -# -def setFunctionTypeFormat(tokens): - global function_type_format - function_type_format = tokens[1] -# -# Change the default function size format -# -def setFunctionSizeFormat(tokens): - global function_size_format - function_size_format = tokens[1] -# -# Change the global name format -# -def setGlobalNameFormat(tokens): - global global_name_format - global_name_format = tokens[1] -# -# Parse arch config file -- -# Start of opcode list. -# -def opStart(tokens): - global in_op_start - global default_op_dir - if len(tokens) != 2: - raise DataParseError("opStart takes a directory name argument") - if in_op_start != 0: - raise DataParseError("opStart can only be specified once") - default_op_dir = tokens[1] - in_op_start = 1 +import sys, re +from os import listdir +from cStringIO import StringIO -# -# Parse arch config file -- -# Set location of a single alt opcode's source file. -# -def altEntry(tokens): - global generate_alt_table - if len(tokens) != 3: - raise DataParseError("alt requires exactly two arguments") - if in_op_start != 1: - raise DataParseError("alt statements must be between opStart/opEnd") - try: - index = opcodes.index(tokens[1]) - except ValueError: - raise DataParseError("unknown opcode %s" % tokens[1]) - if alt_opcode_locations.has_key(tokens[1]): - print "Note: alt overrides earlier %s (%s -> %s)" \ - % (tokens[1], alt_opcode_locations[tokens[1]], tokens[2]) - alt_opcode_locations[tokens[1]] = tokens[2] - generate_alt_table = True +# This file is included verbatim at the start of the in-memory python script. +SCRIPT_SETUP_CODE = "common/gen_setup.py" -# -# Parse arch config file -- -# Set location of a single opcode's source file. -# -def opEntry(tokens): - #global opcode_locations - if len(tokens) != 3: - raise DataParseError("op requires exactly two arguments") - if in_op_start != 1: - raise DataParseError("op statements must be between opStart/opEnd") - try: - index = opcodes.index(tokens[1]) - except ValueError: - raise DataParseError("unknown opcode %s" % tokens[1]) - if opcode_locations.has_key(tokens[1]): - print "Note: op overrides earlier %s (%s -> %s)" \ - % (tokens[1], opcode_locations[tokens[1]], tokens[2]) - opcode_locations[tokens[1]] = tokens[2] +INTERP_DEFS_FILE = "../../../libdexfile/dex/dex_instruction_list.h" # need opcode list +NUM_PACKED_OPCODES = 256 -# -# Parse arch config file -- -# End of opcode list; emit instruction blocks. -# -def opEnd(tokens): - global in_op_start - if len(tokens) != 1: - raise DataParseError("opEnd takes no arguments") - if in_op_start != 1: - raise DataParseError("opEnd must follow opStart, and only appear once") - in_op_start = 2 - - loadAndEmitOpcodes() - if splitops == False: - if generate_alt_table: - loadAndEmitAltOpcodes() - -def genaltop(tokens): - if in_op_start != 2: - raise DataParseError("alt-op can be specified only after op-end") - if len(tokens) != 1: - raise DataParseError("opEnd takes no arguments") - if generate_alt_table: - loadAndEmitAltOpcodes() - -# # Extract an ordered list of instructions from the VM sources. We use the -# "goto table" definition macro, which has exactly kNumPackedOpcodes -# entries. -# +# "goto table" definition macro, which has exactly NUM_PACKED_OPCODES entries. def getOpcodeList(): - opcodes = [] - opcode_fp = open(interp_defs_file) - opcode_re = re.compile(r"^\s*V\((....), (\w+),.*", re.DOTALL) - for line in opcode_fp: - match = opcode_re.match(line) - if not match: - continue - opcodes.append("op_" + match.group(2).lower()) - opcode_fp.close() - - if len(opcodes) != kNumPackedOpcodes: - print "ERROR: found %d opcodes in Interp.h (expected %d)" \ - % (len(opcodes), kNumPackedOpcodes) - raise SyntaxError, "bad opcode count" - return opcodes - -def emitAlign(): - if style == "computed-goto": - asm_fp.write(" .balign %d\n" % handler_size_bytes) - -# -# Load and emit opcodes for all kNumPackedOpcodes instructions. -# -def loadAndEmitOpcodes(): - sister_list = [] - assert len(opcodes) == kNumPackedOpcodes - need_dummy_start = False - - loadAndEmitGenericAsm("instruction_start") - - for i in xrange(kNumPackedOpcodes): - op = opcodes[i] - - if opcode_locations.has_key(op): - location = opcode_locations[op] - else: - location = default_op_dir - - if location == "FALLBACK": - emitFallback(i) - else: - loadAndEmitAsm(location, i, sister_list) - - # For a 100% C implementation, there are no asm handlers or stubs. We - # need to have the MterpAsmInstructionStart label point at op_nop, and it's - # too annoying to try to slide it in after the alignment psuedo-op, so - # we take the low road and just emit a dummy op_nop here. - if need_dummy_start: - emitAlign() - asm_fp.write(label_prefix + "_op_nop: /* dummy */\n"); - - emitAlign() - - loadAndEmitGenericAsm("instruction_end") - - if style == "computed-goto": - emitSectionComment("Sister implementations", asm_fp) - loadAndEmitGenericAsm("instruction_start_sister") - asm_fp.writelines(sister_list) - loadAndEmitGenericAsm("instruction_end_sister") - -# -# Load an alternate entry stub -# -def loadAndEmitAltStub(source, opindex): - op = opcodes[opindex] - if verbose: - print " alt emit %s --> stub" % source - dict = getGlobalSubDict() - dict.update({ "opcode":op, "opnum":opindex }) - - emitAsmHeader(asm_fp, dict, alt_label_prefix) - appendSourceFile(source, dict, asm_fp, None) - -# -# Load and emit alternate opcodes for all kNumPackedOpcodes instructions. -# -def loadAndEmitAltOpcodes(): - assert len(opcodes) == kNumPackedOpcodes - start_label = global_name_format % "artMterpAsmAltInstructionStart" - end_label = global_name_format % "artMterpAsmAltInstructionEnd" - - loadAndEmitGenericAsm("instruction_start_alt") - - for i in xrange(kNumPackedOpcodes): - op = opcodes[i] - if alt_opcode_locations.has_key(op): - source = "%s/alt_%s.S" % (alt_opcode_locations[op], op) - else: - source = default_alt_stub - loadAndEmitAltStub(source, i) - - emitAlign() - - loadAndEmitGenericAsm("instruction_end_alt") - -# -# Load an assembly fragment and emit it. -# -def loadAndEmitAsm(location, opindex, sister_list): - op = opcodes[opindex] - source = "%s/%s.S" % (location, op) - dict = getGlobalSubDict() - dict.update({ "opcode":op, "opnum":opindex }) - if verbose: - print " emit %s --> asm" % source - - emitAsmHeader(asm_fp, dict, label_prefix) - appendSourceFile(source, dict, asm_fp, sister_list) - -# -# Load a non-handler assembly fragment and emit it. -# -def loadAndEmitGenericAsm(name): - source = "%s/%s.S" % (default_op_dir, name) - dict = getGlobalSubDict() - appendSourceFile(source, dict, asm_fp, None) - -# -# Emit fallback fragment -# -def emitFallback(opindex): - op = opcodes[opindex] - dict = getGlobalSubDict() - dict.update({ "opcode":op, "opnum":opindex }) - emitAsmHeader(asm_fp, dict, label_prefix) - for line in fallback_stub_text: - asm_fp.write(line) - asm_fp.write("\n") - -# -# Output the alignment directive and label for an assembly piece. -# -def emitAsmHeader(outfp, dict, prefix): - outfp.write("/* ------------------------------ */\n") - # The alignment directive ensures that the handler occupies - # at least the correct amount of space. We don't try to deal - # with overflow here. - emitAlign() - # Emit a label so that gdb will say the right thing. We prepend an - # underscore so the symbol name doesn't clash with the Opcode enum. - outfp.write(prefix + "_%(opcode)s: /* 0x%(opnum)02x */\n" % dict) - -# -# Output a generic instruction stub that updates the "glue" struct and -# calls the C implementation. -# -def emitAsmStub(outfp, dict): - emitAsmHeader(outfp, dict, label_prefix) - for line in asm_stub_text: - templ = Template(line) - outfp.write(templ.substitute(dict)) - -# -# Append the file specified by "source" to the open "outfp". Each line will -# be template-replaced using the substitution dictionary "dict". -# -# If the first line of the file starts with "%" it is taken as a directive. -# A "%include" line contains a filename and, optionally, a Python-style -# dictionary declaration with substitution strings. (This is implemented -# with recursion.) -# -# If "sister_list" is provided, and we find a line that contains only "&", -# all subsequent lines from the file will be appended to sister_list instead -# of copied to the output. -# -# This may modify "dict". -# -def appendSourceFile(source, dict, outfp, sister_list): - outfp.write("/* File: %s */\n" % source) - infp = open(source, "r") - in_sister = False - for line in infp: - if line.startswith("%include"): - # Parse the "include" line - tokens = line.strip().split(' ', 2) - if len(tokens) < 2: - raise DataParseError("malformed %%include in %s" % source) - - alt_source = tokens[1].strip("\"") - if alt_source == source: - raise DataParseError("self-referential %%include in %s" - % source) - - new_dict = dict.copy() - if len(tokens) == 3: - new_dict.update(eval(tokens[2])) - #print " including src=%s dict=%s" % (alt_source, new_dict) - appendSourceFile(alt_source, new_dict, outfp, sister_list) - continue - - elif line.startswith("%default"): - # copy keywords into dictionary - tokens = line.strip().split(' ', 1) - if len(tokens) < 2: - raise DataParseError("malformed %%default in %s" % source) - defaultValues = eval(tokens[1]) - for entry in defaultValues: - dict.setdefault(entry, defaultValues[entry]) - continue - - elif line.startswith("%break") and sister_list != None: - # allow more than one %break, ignoring all following the first - if style == "computed-goto" and not in_sister: - in_sister = True - sister_list.append("\n/* continuation for %(opcode)s */\n"%dict) - continue - - # perform keyword substitution if a dictionary was provided - if dict != None: - templ = Template(line) - try: - subline = templ.substitute(dict) - except KeyError, err: - raise DataParseError("keyword substitution failed in %s: %s" - % (source, str(err))) - except: - print "ERROR: substitution failed: " + line - raise - else: - subline = line - - # write output to appropriate file - if in_sister: - sister_list.append(subline) - else: - outfp.write(subline) - outfp.write("\n") - infp.close() - -# -# Emit a C-style section header comment. -# -def emitSectionComment(str, fp): - equals = "========================================" \ - "===================================" - - fp.write("\n/*\n * %s\n * %s\n * %s\n */\n" % - (equals, str, equals)) - - -# -# =========================================================================== -# "main" code -# - -# -# Check args. -# -if len(sys.argv) != 3: - print "Usage: %s target-arch output-dir" % sys.argv[0] - sys.exit(2) - -target_arch = sys.argv[1] -output_dir = sys.argv[2] - -# -# Extract opcode list. -# -opcodes = getOpcodeList() -#for op in opcodes: -# print " %s" % op - -# -# Open config file. -# -try: - config_fp = open("config_%s" % target_arch) -except: - print "Unable to open config file 'config_%s'" % target_arch - sys.exit(1) - -# -# Open and prepare output files. -# -try: - asm_fp = open("%s/mterp_%s.S" % (output_dir, target_arch), "w") -except: - print "Unable to open output files" - print "Make sure directory '%s' exists and existing files are writable" \ - % output_dir - # Ideally we'd remove the files to avoid confusing "make", but if they - # failed to open we probably won't be able to remove them either. - sys.exit(1) - -print "Generating %s" % (asm_fp.name) - -file_header = """/* - * This file was generated automatically by gen-mterp.py for '%s'. - * - * --> DO NOT EDIT <-- - */ - -""" % (target_arch) - -asm_fp.write(file_header) - -# -# Process the config file. -# -failed = False -try: - for line in config_fp: - line = line.strip() # remove CRLF, leading spaces - tokens = line.split(' ') # tokenize - #print "%d: %s" % (len(tokens), tokens) - if len(tokens[0]) == 0: - #print " blank" - pass - elif tokens[0][0] == '#': - #print " comment" - pass - else: - if tokens[0] == "handler-size": - setHandlerSize(tokens) - elif tokens[0] == "import": - importFile(tokens) - elif tokens[0] == "asm-stub": - setAsmStub(tokens) - elif tokens[0] == "asm-alt-stub": - setAsmAltStub(tokens) - elif tokens[0] == "op-start": - opStart(tokens) - elif tokens[0] == "op-end": - opEnd(tokens) - elif tokens[0] == "alt": - altEntry(tokens) - elif tokens[0] == "op": - opEntry(tokens) - elif tokens[0] == "handler-style": - setHandlerStyle(tokens) - elif tokens[0] == "alt-ops": - genaltop(tokens) - elif tokens[0] == "split-ops": - splitops = True - elif tokens[0] == "fallback-stub": - setFallbackStub(tokens) - elif tokens[0] == "function-type-format": - setFunctionTypeFormat(tokens) - elif tokens[0] == "function-size-format": - setFunctionSizeFormat(tokens) - elif tokens[0] == "global-name-format": - setGlobalNameFormat(tokens) - else: - raise DataParseError, "unrecognized command '%s'" % tokens[0] - if style == None: - print "tokens[0] = %s" % tokens[0] - raise DataParseError, "handler-style must be first command" -except DataParseError, err: - print "Failed: " + str(err) - # TODO: remove output files so "make" doesn't get confused - failed = True - asm_fp.close() - asm_fp = None - -config_fp.close() - -# -# Done! -# -if asm_fp: - asm_fp.close() - -sys.exit(failed) + opcodes = [] + opcode_fp = open(INTERP_DEFS_FILE) + opcode_re = re.compile(r"^\s*V\((....), (\w+),.*", re.DOTALL) + for line in opcode_fp: + match = opcode_re.match(line) + if not match: + continue + opcodes.append("op_" + match.group(2).lower()) + opcode_fp.close() + + if len(opcodes) != NUM_PACKED_OPCODES: + print "ERROR: found %d opcodes in Interp.h (expected %d)" \ + % (len(opcodes), NUM_PACKED_OPCODES) + raise SyntaxError, "bad opcode count" + return opcodes + +indent_re = re.compile(r"^%( *)") + +# Finds variable references in text: $foo or ${foo} +escape_re = re.compile(r''' + (?<!\$) # Look-back: must not be preceded by another $. + \$ + (\{)? # May be enclosed by { } pair. + (?P<name>\w+) # Save the symbol in named group. + (?(1)\}) # Expect } if and only if { was present. +''', re.VERBOSE) + +def generate_script(arch, setup_code): + # Create new python script and write the initial setup code. + script = StringIO() # File-like in-memory buffer. + script.write("# DO NOT EDIT: This file was generated by gen-mterp.py.\n") + script.write('arch = "' + arch + '"\n') + script.write(setup_code) + opcodes = getOpcodeList() + script.write("def opcodes(is_alt):\n") + for i in xrange(NUM_PACKED_OPCODES): + script.write(' write_opcode({0}, "{1}", {1}, is_alt)\n'.format(i, opcodes[i])) + + # Find all template files and translate them into python code. + files = listdir(arch) + for file in sorted(files): + f = open(arch + "/" + file, "r") + indent = "" + for line in f.readlines(): + line = line.rstrip() + if line.startswith("%"): + script.write(line.lstrip("%") + "\n") + indent = indent_re.match(line).group(1) + if line.endswith(":"): + indent += " " + else: + line = escape_re.sub(r"''' + \g<name> + '''", line) + line = line.replace("\\", "\\\\") + line = line.replace("$$", "$") + script.write(indent + "write_line('''" + line + "''')\n") + script.write("\n") + f.close() + + # TODO: Remove the concept of sister snippets. It is barely used. + script.write("def write_sister():\n") + if arch == "arm": + script.write(" op_float_to_long_sister_code()\n") + script.write(" op_double_to_long_sister_code()\n") + if arch == "mips": + script.write(" global opnum, opcode\n") + names = [ + "op_float_to_long", + "op_double_to_long", + "op_mul_long", + "op_shl_long", + "op_shr_long", + "op_ushr_long", + "op_shl_long_2addr", + "op_shr_long_2addr", + "op_ushr_long_2addr" + ] + for name in names: + script.write(' opcode = "' + name + '"\n') + script.write(" " + name + "_sister_code()\n") + script.write(" pass\n") + + script.write('generate()\n') + script.seek(0) + return script.read() + +# Generate the script for each architecture and execute it. +for arch in ["arm", "arm64", "mips", "mips64", "x86", "x86_64"]: + with open(SCRIPT_SETUP_CODE, "r") as setup_code_file: + script = generate_script(arch, setup_code_file.read()) + filename = "out/mterp_" + arch + ".py" # Name to report in error messages. + # open(filename, "w").write(script) # Write the script to disk for debugging. + exec(compile(script, filename, mode='exec')) |