Source code for pycca.asm.util

# -'- coding: utf-8 -'-

import os, re, sys, pickle, tempfile, subprocess, atexit

try:
    from __builtin__ import long
except ImportError:
    long = int


def phex(code):
    """Print hexadecimal representation of machine code.
    """
    if not isinstance(code, list):
        code = [code]
    for instr in code:
        for c in bytearray(instr):
            sys.stdout.write('%02x' % c)
        print('')

def pbin(code):
    """Print binary representation of machine code.
    """
    if not isinstance(code, list):
        code = [code]
    for instr in code:
        for c in bytearray(instr):
            sys.stdout.write(format(c, '08b'))
        print('')

[docs]def phexbin(code): """Print hexadecimal and binary representations of machine code. Argument may be string, bytes, or bytearray. """ if not isinstance(code, list): code = [code] for instr in code: line = '' for c in bytearray(instr): line += '%02x ' % c line += ' ' * (40 - len(line)) for c in bytearray(instr): line += format(c, '08b') + ' ' print(line)
[docs]def compare(instr): """Print instruction's code beside the output of GNU-as. Accepts a single :class:`Instruction <pycca.asm.Instruction>` argument. This is used to determine the machine code differences (hopefully there are none!) between the output of an Instruction and the equivalent output from the GNU assembler. """ asm = str(instr) print("asm: " + asm) try: code1 = instr.code except Exception as exc1: try: code2 = as_code(asm) except Exception as exc2: print(exc1.message) print("[pycca and gnu-as both failed.]") return print("[pycca failed; gnu-as did not]") phexbin(code2) raise try: code2 = as_code(asm) sys.stdout.write('py: ') phexbin(code1) sys.stdout.write('gnu: ') phexbin(code2) if code1 == code2: print("[codes match]") except Exception as exc2: phexbin(code1) print("[gnu-as failed; pycca did not]") raise
def run_as(asm, quiet=False, check_invalid_reg=False): """Use GNU assembler to compile the *asm* string argument. This prepends the given code with ".intel_syntax noprefix\n" before compiling and returns only the relevant machine code output. If the compile fails, then an exception is raised. """ #asm = """ #.section .text #.globl _start #.align 4 #_start: #""" + asm + '\n' if check_invalid_reg: for reg in invalid_regs(): if reg.name in asm: raise Exception("asm '%s' contains invalid register '%s'" % (asm, reg.name)) asm = ".intel_syntax noprefix\n" + asm + "\n" #print asm fname = tempfile.mktemp('.s') open(fname, 'w').write(asm) cmd = 'as {file} -o {file}.o 2>&1 && objdump -d {file}.o; rm -f {file} {file}.o'.format(file=fname) #print cmd out = subprocess.check_output(cmd, shell=True) out = out.decode('ascii').split('\n') for i,line in enumerate(out): if "Disassembly of section .text:" in line: return out[i+3:] if not quiet: print("--- code: ---") print(asm) print("--- output: ---") print('\n'.join(out)) print("-------------") errmsg = re.search(r'Error:\s*(.*)\n', '\n'.join(out)) if errmsg is None: errmsg = "Error running 'as' or 'objdump' (see above)." else: errmsg = errmsg.groups()[0] exc = Exception(errmsg) exc.asm = asm exc.output = '\n'.join(out) raise exc
[docs]def as_code(asm, quiet=False, check_invalid_reg=False, cache=False): """Use GNU assembler to compile the *asm* string argument. This prepends the given code with ``.intel_syntax noprefix`` before compiling and returns the machine code output converted to a bytearray. If the compile fails, then an exception is raised. If *check_invalid_reg* is True, then an exception will be raised if the instruction makes use of a register that is not supported on the current architecture (by default, GNU-as silently ignores such symbols). If *cache* is True, then the result will be cached in ``pycca/asm/gnu_as_cache.pk`` to speed up subsequent requests for the same instruction. """ # First try returning cached output if cache: return as_code_cached(asm, quiet, check_invalid_reg) # execute GAS, return compiled bytecode (or raise exception) code = b'' for line in run_as(asm, quiet=quiet, check_invalid_reg=check_invalid_reg): if line.strip() == '': continue m = re.match(r'\s*[a-f0-9]+:\s+(([a-f0-9][a-f0-9]\s+)+)', line) if m is None: raise Exception("Can't parse objdump output: \"%s\"" % line) byts = re.split(r'\s+', m.groups()[0]) for byt in byts: if byt == '': continue code += bytearray.fromhex(byt) return code
_as_code_cache = None def as_code_cached(asm, quiet, check_invalid_reg): # return cached output of as_code(). This returns the compiled machine # code or raises an exception with a cached error message. global _as_code_cache path = os.path.dirname(__file__) cachefile = os.path.join(path, 'gnu_as_cache.pk') key = (asm, check_invalid_reg) if _as_code_cache is None: if os.path.exists(cachefile): if sys.version_info.major == 2: _as_code_cache = pickle.load(open(cachefile, 'rb')) else: _as_code_cache = pickle.load(open(cachefile, 'rb'), fix_imports=True) else: _as_code_cache = {'__counter__': 0} atexit.register(write_as_code_cache) if key not in _as_code_cache: try: _as_code_cache[key] = (True, as_code(asm, quiet, check_invalid_reg, cache=False)) except Exception as err: _as_code_cache[key] = (False, (err.message, err.output)) raise finally: cnt = (_as_code_cache['__counter__'] + 1) % 100 _as_code_cache['__counter__'] = cnt if cnt == 0: write_as_code_cache() ok, output = _as_code_cache[key] if ok: return output else: err = Exception(output[0]) err.output = output[1] raise err def write_as_code_cache(): # write the cache of as_code() results to disk. global _as_code_cache path = os.path.dirname(__file__) cachefile = os.path.join(path, 'gnu_as_cache.pk') if sys.version_info.major == 2: pk = pickle.dumps(_as_code_cache, protocol=0) else: pk = pickle.dumps(_as_code_cache, protocol=0, fix_imports=True) try: open(cachefile, 'wb').write(pk) except IOError: # probably no write permission; skip caching. pass def all_registers(): """Return all registers defined in asm.register (excluding st(i) registers) """ from . import register regs = [] for name in dir(register): obj = getattr(register, name) if isinstance(obj, register.Register): regs.append(obj) return regs _invalid_regs = None def invalid_regs(): """Return a list of registers that are invalid for GNU-as on this arch. When running GNU-as in 32-bit mode, the rxx registers do not exist. This would be fine except that instead of generating an error, the compiler simply treats the unknown name as a null pointer [0x0]. To work around this, we first probe AS to see which registers it doesn't know about, then raise an exception when attempting to compile using those registers. """ from . import register global _invalid_regs if _invalid_regs is not None: return _invalid_regs _invalid_regs = [] nullptr = as_code('push [0x0]') for reg in all_registers(): try: code = as_code('push %s' % reg.name, quiet=True) if code == nullptr: _invalid_regs.append(reg) except: pass return _invalid_regs def check_valid_pointer(instr='push', pre=None, post=None): """Print a table indicating valid pointer modes for each register. """ from . import instructions regs = all_registers() regs.sort(key=lambda a: (a.bits, a.name)) checks = ['{reg}', '[{reg}]', '[2*{reg}]', '[{reg}+{reg}]', '[2*{reg}+{reg}]', '[{reg}+1]', '[2*{reg}+1]', '[{reg}+{reg}+1]', '[2*{reg}+{reg}+1]'] line = ' ' cols = [len(line)] for check in checks: add = check.format(reg='reg')+' ' cols.append(len(add)) line += add print(line) icls = getattr(instructions, instr) for reg in regs: if reg in invalid_regs() or 'mm' in reg.name: continue line = reg.name + ':' line += ' '*(cols[0]-len(line)) for i,check in enumerate(checks): arg = check.format(reg=reg.name) arg = eval(arg, {reg.name: reg}) args = [x for x in [pre, arg, post] if x is not None] instr = icls(*args) try: code1 = instr.code err1 = False except: err1 = True try: code2 = as_code(str(instr), quiet=True, check_invalid_reg=True) err2 = False except: err2 = True if err1 and err2: add = '.' elif err1: add = 'GNU' elif err2: add = 'PY' else: if code1 != code2: add = 'XXX' else: add = '+++' line += add + ' '*(cols[i+1]-len(add)) print(line)