diff options
author | 2014-03-31 16:50:12 -0700 | |
---|---|---|
committer | 2014-04-30 19:25:51 -0700 | |
commit | 5a4fa82ab42af6e728a60e3261963aa243c3e2cd (patch) | |
tree | 69da9eb204c294be63f30d49cf0233e1dc9b6e93 /compiler/utils/assembler_test.h | |
parent | 0a3b13fc401bcf21225e30654012fe98806b0873 (diff) |
x86_64 Assembler Test Infrastructure, fix x86_64 assembler
Some infrastructure to do real assembler testing. Need to extend to
other assemblers, and a lot more tests.
Fix some of the cases of the x86_64 assembler.
Change-Id: I15b5f3a094af469130db68a95a66602cf30d8fc4
Diffstat (limited to 'compiler/utils/assembler_test.h')
-rw-r--r-- | compiler/utils/assembler_test.h | 687 |
1 files changed, 687 insertions, 0 deletions
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h new file mode 100644 index 0000000000..ce1c4de2fa --- /dev/null +++ b/compiler/utils/assembler_test.h @@ -0,0 +1,687 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_ASSEMBLER_TEST_H_ +#define ART_COMPILER_UTILS_ASSEMBLER_TEST_H_ + +#include "assembler.h" + +#include "gtest/gtest.h" + +#include <cstdio> +#include <cstdlib> +#include <fstream> +#include <iostream> +#include <iterator> +#include <sys/stat.h> + +namespace art { + +template<typename Ass, typename Reg, typename Imm> +class AssemblerTest : public testing::Test { + public: + Ass* GetAssembler() { + return assembler_.get(); + } + + typedef std::string (*TestFn)(Ass* assembler); + + void DriverFn(TestFn f, std::string test_name) { + Driver(f(assembler_.get()), test_name); + } + + // This driver assumes the assembler has already been called. + void DriverStr(std::string assembly_string, std::string test_name) { + Driver(assembly_string, test_name); + } + + std::string RepeatR(void (Ass::*f)(Reg), std::string fmt) { + const std::vector<Reg*> registers = GetRegisters(); + std::string str; + for (auto reg : registers) { + (assembler_.get()->*f)(*reg); + std::string base = fmt; + + size_t reg_index = base.find("{reg}"); + if (reg_index != std::string::npos) { + std::ostringstream sreg; + sreg << *reg; + std::string reg_string = sreg.str(); + base.replace(reg_index, 5, reg_string); + } + + if (str.size() > 0) { + str += "\n"; + } + str += base; + } + // Add a newline at the end. + str += "\n"; + return str; + } + + std::string RepeatRR(void (Ass::*f)(Reg, Reg), std::string fmt) { + const std::vector<Reg*> registers = GetRegisters(); + std::string str; + for (auto reg1 : registers) { + for (auto reg2 : registers) { + (assembler_.get()->*f)(*reg1, *reg2); + std::string base = fmt; + + size_t reg1_index = base.find("{reg1}"); + if (reg1_index != std::string::npos) { + std::ostringstream sreg; + sreg << *reg1; + std::string reg_string = sreg.str(); + base.replace(reg1_index, 6, reg_string); + } + + size_t reg2_index = base.find("{reg2}"); + if (reg2_index != std::string::npos) { + std::ostringstream sreg; + sreg << *reg2; + std::string reg_string = sreg.str(); + base.replace(reg2_index, 6, reg_string); + } + + if (str.size() > 0) { + str += "\n"; + } + str += base; + } + } + // Add a newline at the end. + str += "\n"; + return str; + } + + std::string RepeatRI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, std::string fmt) { + const std::vector<Reg*> registers = GetRegisters(); + std::string str; + std::vector<int64_t> imms = CreateImmediateValues(imm_bytes); + for (auto reg : registers) { + for (int64_t imm : imms) { + Imm* new_imm = CreateImmediate(imm); + (assembler_.get()->*f)(*reg, *new_imm); + delete new_imm; + std::string base = fmt; + + size_t reg_index = base.find("{reg}"); + if (reg_index != std::string::npos) { + std::ostringstream sreg; + sreg << *reg; + std::string reg_string = sreg.str(); + base.replace(reg_index, 5, reg_string); + } + + size_t imm_index = base.find("{imm}"); + if (imm_index != std::string::npos) { + std::ostringstream sreg; + sreg << imm; + std::string imm_string = sreg.str(); + base.replace(imm_index, 5, imm_string); + } + + if (str.size() > 0) { + str += "\n"; + } + str += base; + } + } + // Add a newline at the end. + str += "\n"; + return str; + } + + std::string RepeatI(void (Ass::*f)(const Imm&), size_t imm_bytes, std::string fmt) { + std::string str; + std::vector<int64_t> imms = CreateImmediateValues(imm_bytes); + for (int64_t imm : imms) { + Imm* new_imm = CreateImmediate(imm); + (assembler_.get()->*f)(*new_imm); + delete new_imm; + std::string base = fmt; + + size_t imm_index = base.find("{imm}"); + if (imm_index != std::string::npos) { + std::ostringstream sreg; + sreg << imm; + std::string imm_string = sreg.str(); + base.replace(imm_index, 5, imm_string); + } + + if (str.size() > 0) { + str += "\n"; + } + str += base; + } + // Add a newline at the end. + str += "\n"; + return str; + } + + // This is intended to be run as a test. + bool CheckTools() { + if (!FileExists(GetAssemblerCommand())) { + return false; + } + LOG(INFO) << "Chosen assembler command: " << GetAssemblerCommand(); + + if (!FileExists(GetObjdumpCommand())) { + return false; + } + LOG(INFO) << "Chosen objdump command: " << GetObjdumpCommand(); + + // Disassembly is optional. + std::string disassembler = GetDisassembleCommand(); + if (disassembler.length() != 0) { + if (!FileExists(disassembler)) { + return false; + } + LOG(INFO) << "Chosen disassemble command: " << GetDisassembleCommand(); + } else { + LOG(INFO) << "No disassembler given."; + } + + return true; + } + + protected: + void SetUp() OVERRIDE { + assembler_.reset(new Ass()); + + SetUpHelpers(); + } + + // Override this to set up any architecture-specific things, e.g., register vectors. + virtual void SetUpHelpers() {} + + virtual std::vector<Reg*> GetRegisters() = 0; + + // Get the typically used name for this architecture, e.g., aarch64, x86_64, ... + virtual std::string GetArchitectureString() = 0; + + // Get the name of the assembler, e.g., "as" by default. + virtual std::string GetAssemblerCmdName() { + return "as"; + } + + // Switches to the assembler command. Default none. + virtual std::string GetAssemblerParameters() { + return ""; + } + + // Return the host assembler command for this test. + virtual std::string GetAssemblerCommand() { + // Already resolved it once? + if (resolved_assembler_cmd_.length() != 0) { + return resolved_assembler_cmd_; + } + + std::string line = FindTool(GetAssemblerCmdName()); + if (line.length() == 0) { + return line; + } + + resolved_assembler_cmd_ = line + GetAssemblerParameters(); + + return line; + } + + // Get the name of the objdump, e.g., "objdump" by default. + virtual std::string GetObjdumpCmdName() { + return "objdump"; + } + + // Switches to the objdump command. Default is " -h". + virtual std::string GetObjdumpParameters() { + return " -h"; + } + + // Return the host objdump command for this test. + virtual std::string GetObjdumpCommand() { + // Already resolved it once? + if (resolved_objdump_cmd_.length() != 0) { + return resolved_objdump_cmd_; + } + + std::string line = FindTool(GetObjdumpCmdName()); + if (line.length() == 0) { + return line; + } + + resolved_objdump_cmd_ = line + GetObjdumpParameters(); + + return line; + } + + // Get the name of the objdump, e.g., "objdump" by default. + virtual std::string GetDisassembleCmdName() { + return "objdump"; + } + + // Switches to the objdump command. As it's a binary, one needs to push the architecture and + // such to objdump, so it's architecture-specific and there is no default. + virtual std::string GetDisassembleParameters() = 0; + + // Return the host disassembler command for this test. + virtual std::string GetDisassembleCommand() { + // Already resolved it once? + if (resolved_disassemble_cmd_.length() != 0) { + return resolved_disassemble_cmd_; + } + + std::string line = FindTool(GetDisassembleCmdName()); + if (line.length() == 0) { + return line; + } + + resolved_disassemble_cmd_ = line + GetDisassembleParameters(); + + return line; + } + + // Create a couple of immediate values up to the number of bytes given. + virtual std::vector<int64_t> CreateImmediateValues(size_t imm_bytes) { + std::vector<int64_t> res; + res.push_back(0); + res.push_back(-1); + res.push_back(0x12); + if (imm_bytes >= 2) { + res.push_back(0x1234); + res.push_back(-0x1234); + if (imm_bytes >= 4) { + res.push_back(0x12345678); + res.push_back(-0x12345678); + if (imm_bytes >= 6) { + res.push_back(0x123456789ABC); + res.push_back(-0x123456789ABC); + if (imm_bytes >= 8) { + res.push_back(0x123456789ABCDEF0); + res.push_back(-0x123456789ABCDEF0); + } + } + } + } + return res; + } + + // Create an immediate from the specific value. + virtual Imm* CreateImmediate(int64_t imm_value) = 0; + + private: + // Driver() assembles and compares the results. If the results are not equal and we have a + // disassembler, disassemble both and check whether they have the same mnemonics (in which case + // we just warn). + void Driver(std::string assembly_text, std::string test_name) { + EXPECT_NE(assembly_text.length(), 0U) << "Empty assembly"; + + NativeAssemblerResult res; + Compile(assembly_text, &res, test_name); + + EXPECT_TRUE(res.ok) << res.error_msg; + if (!res.ok) { + // No way of continuing. + return; + } + + size_t cs = assembler_->CodeSize(); + UniquePtr<std::vector<uint8_t> > data(new std::vector<uint8_t>(cs)); + MemoryRegion code(&(*data)[0], data->size()); + assembler_->FinalizeInstructions(code); + + if (*data == *res.code) { + Clean(&res); + } else { + if (DisassembleBinaries(*data, *res.code, test_name)) { + if (data->size() > res.code->size()) { + LOG(WARNING) << "Assembly code is not identical, but disassembly of machine code is " + "equal: this implies sub-optimal encoding! Our code size=" << data->size() << + ", gcc size=" << res.code->size(); + } else { + LOG(INFO) << "GCC chose a different encoding than ours, but the overall length is the " + "same."; + } + } else { + // This will output the assembly. + EXPECT_EQ(*data, *res.code) << "Outputs (and disassembly) not identical."; + } + } + } + + // Structure to store intermediates and results. + struct NativeAssemblerResult { + bool ok; + std::string error_msg; + std::string base_name; + UniquePtr<std::vector<uint8_t>> code; + uintptr_t length; + }; + + // Compile the assembly file from_file to a binary file to_file. Returns true on success. + bool Assemble(const char* from_file, const char* to_file, std::string* error_msg) { + bool have_assembler = FileExists(GetAssemblerCommand()); + EXPECT_TRUE(have_assembler) << "Cannot find assembler:" << GetAssemblerCommand(); + if (!have_assembler) { + return false; + } + + std::vector<std::string> args; + + args.push_back(GetAssemblerCommand()); + args.push_back("-o"); + args.push_back(to_file); + args.push_back(from_file); + + return Exec(args, error_msg); + } + + // Runs objdump -h on the binary file and extracts the first line with .text. + // Returns "" on failure. + std::string Objdump(std::string file) { + bool have_objdump = FileExists(GetObjdumpCommand()); + EXPECT_TRUE(have_objdump) << "Cannot find objdump: " << GetObjdumpCommand(); + if (!have_objdump) { + return ""; + } + + std::string error_msg; + std::vector<std::string> args; + + args.push_back(GetObjdumpCommand()); + args.push_back(file); + args.push_back(">"); + args.push_back(file+".dump"); + std::string cmd = Join(args, ' '); + + args.clear(); + args.push_back("/bin/sh"); + args.push_back("-c"); + args.push_back(cmd); + + if (!Exec(args, &error_msg)) { + EXPECT_TRUE(false) << error_msg; + } + + std::ifstream dump(file+".dump"); + + std::string line; + bool found = false; + while (std::getline(dump, line)) { + if (line.find(".text") != line.npos) { + found = true; + break; + } + } + + dump.close(); + + if (found) { + return line; + } else { + return ""; + } + } + + // Disassemble both binaries and compare the text. + bool DisassembleBinaries(std::vector<uint8_t>& data, std::vector<uint8_t>& as, + std::string test_name) { + std::string disassembler = GetDisassembleCommand(); + if (disassembler.length() == 0) { + LOG(WARNING) << "No dissassembler command."; + return false; + } + + std::string data_name = WriteToFile(data, test_name + ".ass"); + std::string error_msg; + if (!DisassembleBinary(data_name, &error_msg)) { + LOG(INFO) << "Error disassembling: " << error_msg; + std::remove(data_name.c_str()); + return false; + } + + std::string as_name = WriteToFile(as, test_name + ".gcc"); + if (!DisassembleBinary(as_name, &error_msg)) { + LOG(INFO) << "Error disassembling: " << error_msg; + std::remove(data_name.c_str()); + std::remove((data_name + ".dis").c_str()); + std::remove(as_name.c_str()); + return false; + } + + bool result = CompareFiles(data_name + ".dis", as_name + ".dis"); + + if (result) { + std::remove(data_name.c_str()); + std::remove(as_name.c_str()); + std::remove((data_name + ".dis").c_str()); + std::remove((as_name + ".dis").c_str()); + } + + return result; + } + + bool DisassembleBinary(std::string file, std::string* error_msg) { + std::vector<std::string> args; + + args.push_back(GetDisassembleCommand()); + args.push_back(file); + args.push_back("| sed -n \'/<.data>/,$p\' | sed -e \'s/.*://\'"); + args.push_back(">"); + args.push_back(file+".dis"); + std::string cmd = Join(args, ' '); + + args.clear(); + args.push_back("/bin/sh"); + args.push_back("-c"); + args.push_back(cmd); + + return Exec(args, error_msg); + } + + std::string WriteToFile(std::vector<uint8_t>& buffer, std::string test_name) { + std::string file_name = GetTmpnam() + std::string("---") + test_name; + const char* data = reinterpret_cast<char*>(buffer.data()); + std::ofstream s_out(file_name + ".o"); + s_out.write(data, buffer.size()); + s_out.close(); + return file_name + ".o"; + } + + bool CompareFiles(std::string f1, std::string f2) { + std::ifstream f1_in(f1); + std::ifstream f2_in(f2); + + bool result = std::equal(std::istreambuf_iterator<char>(f1_in), + std::istreambuf_iterator<char>(), + std::istreambuf_iterator<char>(f2_in)); + + f1_in.close(); + f2_in.close(); + + return result; + } + + // Compile the given assembly code and extract the binary, if possible. Put result into res. + bool Compile(std::string assembly_code, NativeAssemblerResult* res, std::string test_name) { + res->ok = false; + res->code.reset(nullptr); + + res->base_name = GetTmpnam() + std::string("---") + test_name; + + // TODO: Lots of error checking. + + std::ofstream s_out(res->base_name + ".S"); + s_out << assembly_code; + s_out.close(); + + if (!Assemble((res->base_name + ".S").c_str(), (res->base_name + ".o").c_str(), + &res->error_msg)) { + res->error_msg = "Could not compile."; + return false; + } + + std::string odump = Objdump(res->base_name + ".o"); + if (odump.length() == 0) { + res->error_msg = "Objdump failed."; + return false; + } + + std::istringstream iss(odump); + std::istream_iterator<std::string> start(iss); + std::istream_iterator<std::string> end; + std::vector<std::string> tokens(start, end); + + if (tokens.size() < OBJDUMP_SECTION_LINE_MIN_TOKENS) { + res->error_msg = "Objdump output not recognized: too few tokens."; + return false; + } + + if (tokens[1] != ".text") { + res->error_msg = "Objdump output not recognized: .text not second token."; + return false; + } + + std::string lengthToken = "0x" + tokens[2]; + std::istringstream(lengthToken) >> std::hex >> res->length; + + std::string offsetToken = "0x" + tokens[5]; + uintptr_t offset; + std::istringstream(offsetToken) >> std::hex >> offset; + + std::ifstream obj(res->base_name + ".o"); + obj.seekg(offset); + res->code.reset(new std::vector<uint8_t>(res->length)); + obj.read(reinterpret_cast<char*>(&(*res->code)[0]), res->length); + obj.close(); + + res->ok = true; + return true; + } + + // Remove temporary files. + void Clean(const NativeAssemblerResult* res) { + std::remove((res->base_name + ".S").c_str()); + std::remove((res->base_name + ".o").c_str()); + std::remove((res->base_name + ".o.dump").c_str()); + } + + // Check whether file exists. Is used for commands, so strips off any parameters: anything after + // the first space. We skip to the last slash for this, so it should work with directories with + // spaces. + static bool FileExists(std::string file) { + if (file.length() == 0) { + return false; + } + + // Need to strip any options. + size_t last_slash = file.find_last_of('/'); + if (last_slash == std::string::npos) { + // No slash, start looking at the start. + last_slash = 0; + } + size_t space_index = file.find(' ', last_slash); + + if (space_index == std::string::npos) { + std::ifstream infile(file.c_str()); + return infile.good(); + } else { + std::string copy = file.substr(0, space_index - 1); + + struct stat buf; + return stat(copy.c_str(), &buf) == 0; + } + } + + static std::string GetGCCRootPath() { + return "prebuilts/gcc/linux-x86"; + } + + static std::string GetRootPath() { + // 1) Check ANDROID_BUILD_TOP + char* build_top = getenv("ANDROID_BUILD_TOP"); + if (build_top != nullptr) { + return std::string(build_top) + "/"; + } + + // 2) Do cwd + char temp[1024]; + return getcwd(temp, 1024) ? std::string(temp) + "/" : std::string(""); + } + + std::string FindTool(std::string tool_name) { + // Find the current tool. Wild-card pattern is "arch-string*tool-name". + std::string gcc_path = GetRootPath() + GetGCCRootPath(); + std::vector<std::string> args; + args.push_back("find"); + args.push_back(gcc_path); + args.push_back("-name"); + args.push_back(GetArchitectureString() + "*" + tool_name); + args.push_back("|"); + args.push_back("sort"); + args.push_back("|"); + args.push_back("tail"); + args.push_back("-n"); + args.push_back("1"); + std::string tmp_file = GetTmpnam(); + args.push_back(">"); + args.push_back(tmp_file); + std::string sh_args = Join(args, ' '); + + args.clear(); + args.push_back("/bin/sh"); + args.push_back("-c"); + args.push_back(sh_args); + + std::string error_msg; + if (!Exec(args, &error_msg)) { + EXPECT_TRUE(false) << error_msg; + return ""; + } + + std::ifstream in(tmp_file.c_str()); + std::string line; + if (!std::getline(in, line)) { + in.close(); + std::remove(tmp_file.c_str()); + return ""; + } + in.close(); + std::remove(tmp_file.c_str()); + return line; + } + + // Use a consistent tmpnam, so store it. + std::string GetTmpnam() { + if (tmpnam_.length() == 0) { + tmpnam_ = std::string(tmpnam(nullptr)); + } + return tmpnam_; + } + + UniquePtr<Ass> assembler_; + + std::string resolved_assembler_cmd_; + std::string resolved_objdump_cmd_; + std::string resolved_disassemble_cmd_; + std::string tmpnam_; + + static constexpr size_t OBJDUMP_SECTION_LINE_MIN_TOKENS = 6; +}; + +} // namespace art + +#endif // ART_COMPILER_UTILS_ASSEMBLER_TEST_H_ |