blob: 8adcdb9e85b60f02caf27bc91181e58003abee30 [file] [log] [blame]
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Module ELF contains ELF, Symbol, Section classes for manipulation over ELF files.
It can parse, and change ELF file. This version works only with vmlinux and doesn't properly work with ELF that contains
UND symbols
"""
import subprocess
import re
import os
import struct
from Utils import Utils
from collections import OrderedDict
from binascii import unhexlify
__author__ = "Vadym Stupakov"
__copyright__ = "Copyright (c) 2017 Samsung Electronics"
__credits__ = ["Vadym Stupakov"]
__version__ = "1.0"
__maintainer__ = "Vadym Stupakov"
__email__ = "v.stupakov@samsung.com"
__status__ = "Production"
class Symbol:
def __init__(self, name=str(), sym_type=str(), bind=str(), visibility=str(), addr=int(), size=int(), ndx=str()):
self.utils = Utils()
self.name = str(name)
self.type = str(sym_type)
self.bind = str(bind)
self.ndx = str(ndx)
self.visibility = str(visibility)
self.addr = self.utils.to_int(addr)
self.size = self.utils.to_int(size)
def __str__(self):
return "name: '{}', type: '{}', bind: '{}', ndx: '{}', visibility: '{}', address: '{}', size: '{}'".format(
self.name, self.type, self.bind, self.ndx, self.visibility, hex(self.addr), hex(self.size)
)
def __lt__(self, other):
return self.addr <= other.addr
class Section:
def __init__(self, name=str(), sec_type=str(), addr=int(), offset=int(), size=int()):
self.utils = Utils()
self.name = str(name)
self.type = str(sec_type)
self.addr = self.utils.to_int(addr)
self.offset = self.utils.to_int(offset)
self.size = self.utils.to_int(size)
def __str__(self):
return "name: '{}', type: '{}', address: '{}', offset: '{}', size: '{}'".format(
self.name, self.type, hex(self.addr), hex(self.offset), hex(self.size)
)
def __lt__(self, other):
return self.addr <= other.addr
class ELF:
"""
Utils for manipulating over ELF
"""
def __init__(self, elf_file, readelf_path="readelf"):
self.__elf_file = elf_file
self.utils = Utils()
self.__readelf_path = readelf_path
self.__sections = OrderedDict()
self.__symbols = OrderedDict()
self.__relocs = list()
self.__re_hexdecimal = "\s*[0-9A-Fa-f]+\s*"
self.__re_sec_name = "\s*[._a-zA-Z]+\s*"
self.__re_type = "\s*[A-Z]+\s*"
def __readelf_raw(self, options):
"""
Execute readelf with options and print raw output
:param options readelf options: ["opt1", "opt2", "opt3", ..., "optN"]
:returns raw output
"""
ret = subprocess.Popen(args=[self.__readelf_path] + options,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = ret.communicate()
if "readelf: Error: the PHDR segment is not covered by a LOAD segment" in stderr.decode("utf-8").strip():
ret.returncode = 0
if ret.returncode != 0:
raise ChildProcessError(stderr.decode("utf-8") + stdout.decode("utf-8"))
return stdout.decode("utf-8")
def set_elf_file(self, elf_file):
if os.path.abspath(self.__elf_file) != os.path.abspath(elf_file):
self.__elf_file = os.path.abspath(elf_file)
self.__sections.clear()
self.__symbols.clear()
self.__relocs.clear()
def get_elf_file(self):
return os.path.abspath(self.__elf_file)
def get_sections(self):
""""
Execute -> parse -> transform to dict() readelf output
:returns dict: {sec_addr : Section()}
"""
if len(self.__sections) == 0:
sec_header = self.__readelf_raw(["-SW", self.__elf_file]).strip()
secs = re.compile("^.*\[.*\](" + self.__re_sec_name + self.__re_type + self.__re_hexdecimal +
self.__re_hexdecimal + self.__re_hexdecimal + ")", re.MULTILINE)
found = secs.findall(sec_header)
for line in found:
line = line.split()
if len(line) == 5:
self.__sections[int(line[2], 16)] = Section(name=line[0], sec_type=line[1], addr=int(line[2], 16),
offset=int(line[3], 16), size=int(line[4], 16))
self.__sections = OrderedDict(sorted(self.__sections.items()))
return self.__sections
def get_symbols(self):
""""
Execute -> parse -> transform to dict() readelf output
:returns dict: {sym_addr : Symbol()}
"""
if len(self.__symbols) == 0:
sym_tab = self.__readelf_raw(["-sW", self.__elf_file])
syms = re.compile(r"^.*\d+:\s(.*$)", re.MULTILINE)
found = syms.findall(sym_tab.strip())
for line in found:
line = line.split()
if len(line) == 7:
size = line[1]
# This needs, because readelf prints sizes in hex if size is large
if size[:2].upper() == "0X":
size = int(size, 16)
else:
size = int(size, 10)
"""
Do not include additional compiler information with name $d and $x
$d and $x is mapping symbols, their virtual addresses may coincide with virtual addresses
of the real objects
"""
if line[6] not in ("$d", "$x"):
self.__symbols[int(line[0], 16)] = Symbol(addr=int(line[0], 16), size=size, sym_type=line[2],
bind=line[3], visibility=line[4], ndx=line[5],
name=line[6])
self.__symbols = OrderedDict(sorted(self.__symbols.items()))
return self.__symbols
def get_relocs(self, start_addr=None, end_addr=None):
""""
:param start_addr: start address :int
:param end_addr: end address: int
:returns list: [reloc1, reloc2, reloc3, ..., relocN]
"""
if len(self.__relocs) == 0:
relocs = self.__readelf_raw(["-rW", self.__elf_file])
rel = re.compile(r"^(" + self.__re_hexdecimal + ")\s*", re.MULTILINE)
self.__relocs = [self.utils.to_int(el) for el in rel.findall(relocs.strip())]
if start_addr and end_addr is not None:
ranged_rela = list()
for el in self.__relocs:
if self.utils.to_int(start_addr) <= self.utils.to_int(el) <= self.utils.to_int(end_addr):
ranged_rela.append(el)
return ranged_rela
return self.__relocs
def get_altinstructions(self, start_addr=None, end_addr=None):
"""
:param start_addr: start address :int
:param end_addr: end address: int
:returns list: [[alt_inst1_addr, length1], [alt_inst2_addr, length2], ...]
.altinstructions section contains an array of struct alt_instr.
As instance, for kernel 4.14 from /arch/arm64/include/asm/alternative.h
struct alt_instr {
s32 orig_offset; /* offset to original instruction */
s32 alt_offset; /* offset to replacement instruction */
u16 cpufeature; /* cpufeature bit set for replacement */
u8 orig_len; /* size of original instruction(s) */
u8 alt_len; /* size of new instruction(s), <= orig_len */
};
Later, address of original instruction can be calculated as
at runtime : &(alt_instr->orig_offset) + alt_instr->orig_offset + kernel offset
ELF processing : address of .altinstruction section + in section offset of alt_instr structure + value of alt_instr.orig_offset
details in /arch/arm64/kernel/alternative.c, void __apply_alternatives(void *, bool)
"""
# The struct_format should reflect <struct alt_instr> content
struct_format = '<iiHBB'
pattern_altinst_section_content = "^ *0x[0-9A-Fa-f]{16} (.*) .*.{16}$"
pattern_altinstr_section_addr = "^ *(0x[0-9A-Fa-f]{16}).*.*.{16}$"
ranged_altinst = list()
__hex_dump = self.__readelf_raw(["--hex-dump=.altinstructions", self.__elf_file])
if len(__hex_dump) == 0:
return ranged_altinst
# .altinstruction section start addr in ELF
__altinstr_section_addr = int(re.findall(pattern_altinstr_section_addr, __hex_dump, re.MULTILINE)[0], 16)
# To provide .altinstruction section content using host readelf only
# some magic with string parcing is needed
hex_dump_list = re.findall(pattern_altinst_section_content, __hex_dump, re.MULTILINE)
__hex_dump_str = ''.join(hex_dump_list).replace(" ", "")
__altinstr_section_bin = unhexlify(__hex_dump_str)
__struct_size = struct.calcsize(struct_format)
if (len(__altinstr_section_bin) % __struct_size) != 0:
return ranged_altinst
if start_addr and end_addr is not None:
__i = 0
while __i < (len(__altinstr_section_bin) - __struct_size):
__struct_byte = __altinstr_section_bin[__i: __i + __struct_size]
__struct_value = list(struct.unpack(struct_format, __struct_byte))
# original instruction addr (going to be replaced) considered as "gap"
__original_instruction_addr = __struct_value[0] + __altinstr_section_addr + __i
# derive the target ARM instruction(s) length.
__target_instruction_len = __struct_value[4]
if self.utils.to_int(start_addr) <= __original_instruction_addr <= self.utils.to_int(end_addr):
ranged_altinst.append([__original_instruction_addr, __target_instruction_len])
__i = __i + __struct_size
return ranged_altinst
def get_symbol_by_name(self, sym_names):
"""
Get symbol by_name
:param sym_names: "sym_name" : str or list
:return: Symbol() or [Symbol()]
"""
if isinstance(sym_names, str):
for addr, symbol_obj in self.get_symbols().items():
if symbol_obj.name == sym_names:
return symbol_obj
elif isinstance(sym_names, list):
symbols = [self.get_symbol_by_name(sym_name) for sym_name in sym_names]
return symbols
else:
raise ValueError
return None
def get_symbol_by_vaddr(self, vaddrs=None):
"""
Get symbol by virtual address
:param vaddrs: vaddr : int or list
:return: Symbol() or [Symbol()]
"""
if isinstance(vaddrs, int):
if vaddrs in self.get_symbols():
return self.get_symbols()[vaddrs]
for addr, symbol_obj in self.get_symbols().items():
if (addr + symbol_obj.size) >= vaddrs >= addr:
return symbol_obj
elif isinstance(vaddrs, list):
symbol = [self.get_symbol_by_vaddr(vaddr) for vaddr in vaddrs]
return symbol
else:
raise ValueError
return None
def get_section_by_name(self, sec_names=None):
"""
Get section by_name
:param sec_names: "sec_name" : str or list
:return: Section() or [Section()]
"""
if isinstance(sec_names, str):
for addr, section_obj in self.get_sections().items():
if section_obj.name == sec_names:
return section_obj
elif isinstance(sec_names, list):
sections = [self.get_section_by_name(sec_name) for sec_name in sec_names]
return sections
else:
raise ValueError
return None
def get_section_by_vaddr(self, vaddrs=None):
"""
Get section by virtual address
:param vaddrs: vaddr : int or list
:return: Section() or [Section()]
"""
if isinstance(vaddrs, int):
if vaddrs in self.get_sections():
return self.get_sections()[vaddrs]
for addr, section_obj in self.get_sections().items():
if (addr + section_obj.size) >= vaddrs >= addr:
return section_obj
elif isinstance(vaddrs, list):
sections = [self.get_symbol_by_vaddr(vaddr) for vaddr in vaddrs]
return sections
else:
raise ValueError
return None
def vaddr_to_file_offset(self, vaddrs):
"""
Transform virtual address to file offset
:param vaddrs: addr string or int or list
:returns file offset or list
"""
if isinstance(vaddrs, str) or isinstance(vaddrs, int):
section = self.get_section_by_vaddr(vaddrs)
return self.utils.to_int(vaddrs, 16) - section.addr + section.offset
elif isinstance(vaddrs, list):
return [self.vaddr_to_file_offset(vaddr) for vaddr in vaddrs]
else:
raise ValueError
def read_data_from_vaddr(self, vaddr, size, out_file):
with open(self.__elf_file, "rb") as elf_fp:
elf_fp.seek(self.vaddr_to_file_offset(vaddr))
with open(out_file, "wb") as out_fp:
out_fp.write(elf_fp.read(size))