diff --git a/meta/asttools/serialize.py b/meta/asttools/serialize.py index 2090010..b508d4f 100644 --- a/meta/asttools/serialize.py +++ b/meta/asttools/serialize.py @@ -9,16 +9,19 @@ import _ast import sys -node_name = '0__node_name__' +node_name = 0 not_py3 = sys.version_info.major < 3 class DictAst(NodeVisitor): + def __init__(self, attributes=True): + self.attributes = attributes def generic_visit(self, node): dct = {node_name: type(node).__name__} - for attr in node._attributes: - dct[attr] = getattr(node,attr) + if self.attributes: + for attr in node._attributes: + dct[attr] = getattr(node,attr) for field in node._fields: value = getattr(node,field) if isinstance(value, list): @@ -29,13 +32,13 @@ def generic_visit(self, node): dct[field] = value return dct -def serialize(node): +def serialize(node, attributes=True): ''' :param node: an _ast.AST object searialize an ast into a dictionary object ''' - return DictAst().visit(node) + return DictAst(attributes=attributes).visit(node) def deserialize(obj): ''' diff --git a/meta/asttools/tests/test_sourcegen.py b/meta/asttools/tests/test_sourcegen.py index 44422a5..ba62949 100644 --- a/meta/asttools/tests/test_sourcegen.py +++ b/meta/asttools/tests/test_sourcegen.py @@ -375,7 +375,6 @@ def test_logical(self): source = '(a and b and c)' self.assertSame(source) -# @unittest.expectedFailure def test_issue_9(self): source = '''def hi(): if a: diff --git a/meta/asttools/visitors/print_visitor.py b/meta/asttools/visitors/print_visitor.py index 6acdd38..dc070e0 100644 --- a/meta/asttools/visitors/print_visitor.py +++ b/meta/asttools/visitors/print_visitor.py @@ -61,12 +61,13 @@ def ast_items(node): class ASTPrinter(Visitor): - def __init__(self, indent=' ', level=0, newline='\n'): + def __init__(self, indent=' ', level=0, newline='\n', show_labels=True): self.out = StringIO() self._indent = '' self.one_indent = indent self.level = level self.newline = newline + self.show_labels=show_labels def dump(self, file=sys.stdout): self.out.seek(0) @@ -110,7 +111,10 @@ def visitDefault(self, node): while children: attr, child = children.pop(0) if isinstance(child, (list, tuple)): - text = '{attr}=['.format(attr=attr) + if self.show_labels: + text = '{attr}=['.format(attr=attr) + else: + text = '[' self.print(text) with self.indent(len(text)): for j, inner_child in enumerate(child): @@ -123,7 +127,10 @@ def visitDefault(self, node): self.print(']') else: - text = '{attr}='.format(attr=attr) + if self.show_labels: + text = '{attr}='.format(attr=attr) + else: + text = '' self.print(text) with self.indent(len(text)): @@ -156,7 +163,7 @@ def dump_ast(ast, indent=' ', newline='\n'): visitor.visit(ast) return visitor.dumps() -def print_ast(ast, indent=' ', initlevel=0, newline='\n', file=sys.stdout): +def print_ast(ast, indent=' ', initlevel=0, newline='\n', file=sys.stdout, show_labels=True): ''' Pretty print an ast node. @@ -173,6 +180,6 @@ def print_ast(ast, indent=' ', initlevel=0, newline='\n', file=sys.stdout): ''' - visitor = ASTPrinter(indent=indent, level=initlevel, newline=newline) + visitor = ASTPrinter(indent=indent, level=initlevel, newline=newline, show_labels=show_labels) visitor.visit(ast) visitor.dump(file=file) diff --git a/meta/decompiler/__init__.py b/meta/decompiler/__init__.py index e358a11..f2c9d65 100644 --- a/meta/decompiler/__init__.py +++ b/meta/decompiler/__init__.py @@ -4,7 +4,7 @@ This module can decompile arbitrary code objects into a python ast. ''' -from meta.decompiler.instructions import make_module, make_function +#from meta.decompiler.instructions import make_module, make_function import _ast import struct diff --git a/meta/decompiler/assignments.py b/meta/decompiler/assignments.py new file mode 100644 index 0000000..0954691 --- /dev/null +++ b/meta/decompiler/assignments.py @@ -0,0 +1,183 @@ +''' +Created on Nov 30, 2012 + +@author: sean +''' + +from __future__ import print_function + +import _ast + +from meta.utils import py3, py3op +from meta.decompiler.transformers import mkexpr +from meta.decompiler import extra_nodes as nodes +from ast import copy_location + +if py3: + class _ast_Print: pass +else: + _ast_Print = _ast.Print + + +def mkindex(index): + c = lambda node: copy_location(node, index) + if isinstance(index, _ast.Tuple): + dims = [] + have_slice = False + for dim in index.elts: + if not isinstance(dim, _ast.Slice): + dim = c(_ast.Index(value=dim)) + else: + have_slice = True + dims.append(dim) + + if have_slice: + index = c(_ast.ExtSlice(dims=dims)) + else: + index = c(_ast.Index(value=index)) + + elif not isinstance(index, _ast.Slice): + index = c(_ast.Index(value=index)) + return index + + +class AssignmentsMixin(object): + + def visit_UNPACK_SEQUENCE(self, instr): + + node = nodes.Unpack(nargs=instr.oparg, elts=[]) + self.push_ast_item(node) + +# def visit_STORE_ATTR(self, instr): +# +# attrname = instr.arg +# node = self.pop_ast_item() +# expr = self.pop_ast_item() +# expr = self.process_ifexpr(expr) +# +# assattr = _ast.Attribute(value=node, attr=attrname, ctx=_ast.Store(), lineno=instr.lineno, col_offset=0) +# set_attr = _ast.Assign(targets=[assattr], value=expr, lineno=instr.lineno, col_offset=0) +# +# self.push_ast_item(set_attr) + + def _STORE_IMPORT(self, instr, value): + if isinstance(value, _ast.ImportFrom): + as_name = instr.arg + name = value.names[-1].name + if as_name != name: + value.names[-1].asname = as_name + else: + as_name = instr.arg + if value.names[0].asname is None: + base_name = value.names[0].name.split('.')[0] + if base_name != as_name: + value.names[0].asname = as_name + + self.push_ast_item(value) + + + def visit_STORE_NAME(self, instr): + + value = self.pop_ast_item() + + if isinstance(value, (_ast.Import, _ast.ImportFrom)): + return self._STORE_IMPORT(instr, value) + elif isinstance(value, (_ast.ClassDef, _ast.FunctionDef)): + return self._STORE_CLS(instr, value) + + value = mkexpr(value) + ctx = _ast.Store() + + if instr.opname == 'STORE_ATTR': + name = nodes.cpy_loc(_ast.Attribute(value, instr.arg, ctx), instr) + value = self.pop_ast_item() + elif instr.opname == 'STORE_SUBSCR': + subj = self.pop_ast_item() + index = mkindex(value) + name = nodes.cpy_loc(_ast.Subscript(subj, index, ctx), instr) + value = self.pop_ast_item() + else: + name = nodes.cpy_loc(_ast.Name(instr.arg, ctx), instr) + + value = mkexpr(value) + + if isinstance(value, nodes.Unpack) and value.nargs: + value.elts.append(name) + value.nargs -= 1 + if value.nargs: + self.push_ast_item(value) + return + + if isinstance(value, nodes.Unpack): + assert value.nargs == 0, value.nargs + tgt = _ast.Tuple(value.elts, ctx) + targets = [nodes.cpy_loc(tgt, instr)] + value = self.pop_ast_item() + else: + targets = [name] + + if isinstance(value, _ast.AugAssign): + self.push_ast_item(value) + return + + if isinstance(value, _ast.Assign): + value.targets.extend(targets) + targets = value.targets + value = value.value + other_item = self.pop_ast_item() + + if other_item is not value: + are_tuples = len(targets) == 2 and isinstance(targets[0], _ast.Tuple) and isinstance(value, _ast.Tuple) + if are_tuples and len(targets[0].elts) == len(value.elts): + # This is not nessesary, just makes the bytecode the same + targets[0].elts.append(targets.pop()) + value.elts.append(other_item) + else: + targets = [nodes.cpy_loc(_ast.Tuple(targets, ctx), instr)] + value = nodes.cpy_loc(_ast.Tuple([value, other_item], _ast.Load()), instr) + + assign = _ast.Assign(targets=targets, value=value, lineno=instr.lineno, col_offset=0) + + self.push_ast_item(assign) + return + + visit_STORE_ATTR = visit_STORE_NAME + visit_STORE_SUBSCR = visit_STORE_NAME + + visit_STORE_FAST = visit_STORE_NAME + visit_STORE_DEREF = visit_STORE_NAME + + +# def visit_STORE_SUBSCR(self, instr): +# index = self.pop_ast_item() +# value = self.pop_ast_item() +# expr = self.pop_ast_item() +# +# expr = mkexpr(expr) +# +# if isinstance(expr, _ast.AugAssign): +# self.push_ast_item(expr) +# else: +# kw = dict(lineno=instr.lineno, col_offset=0) +# +# index = self.format_slice(index, kw) +# +# subscr = _ast.Subscript(value=value, slice=index, ctx=_ast.Store(), **kw) +# +# assign = _ast.Assign(targets=[subscr], value=expr, **kw) +# self.push_ast_item(assign) + + @py3op + def visit_STORE_LOCALS(self, instr): + 'remove Locals from class def' + self.pop_ast_item() + + def visit_STORE_GLOBAL(self, instr): + + if not isinstance(self._ast_stack[0], _ast.Global): + self._ast_stack.insert(0, _ast.Global(names=[])) + + if instr.arg not in self._ast_stack[0].names: + self._ast_stack[0].names.append(instr.arg) + + self.STORE_NAME(instr) diff --git a/meta/decompiler/extra_nodes.py b/meta/decompiler/extra_nodes.py new file mode 100644 index 0000000..94d0748 --- /dev/null +++ b/meta/decompiler/extra_nodes.py @@ -0,0 +1,262 @@ +''' +Created on Nov 30, 2012 + +@author: sean +''' + + +import _ast +from meta.decompiler.transformers import mkexpr +from ast import copy_location +from meta.asttools.visitors.print_visitor import print_ast + +def cpy_loc(node, instr): + node.lineno = instr.lineno + node.col_offset = 0 + if instr.is_jump: + node.to = instr.to + node.label = None + + return node + +_ast.stmt.__repr__ = lambda self: '%s()' % (type(self).__name__) +_ast.expr.__repr__ = lambda self: '%s()' % (type(self).__name__) +_ast.Name.__repr__ = lambda self: 'Name(%s)' % (self.id) +_ast.BoolOp.__repr__ = lambda self: '%s(%r)' % (type(self.op).__name__, self.values) +_ast.Assign.__repr__ = lambda self: 'Assign(%r, %r)' % (self.targets, self.value) +_ast.Import.__repr__ = lambda self: 'Import(%r)' % (self.names,) +_ast.ImportFrom.__repr__ = lambda self: 'ImportFrom(%r, %r)' % (self.module, self.names,) +_ast.alias.__repr__ = lambda self: 'alias(%r, %r)' % (self.name, self.asname) if self.asname else repr(self.name) +_ast.Compare.__repr__ = lambda self: 'Compare(%r, %r, %r)' % (self.left, self.ops, self.comparators) +_ast.cmpop.__repr__ = lambda self: type(self).__name__ +_ast.Num.__repr__ = lambda self: 'Num(%r)' %(self.n,) + +class Tmp(_ast.AST): + def __repr__(self): + return '' + +class Unpack(_ast.AST): + def __repr__(self): + return 'Unpack(%r)' % (self.elts,) + +class Iter(_ast.AST): + _attributes = 'lineno', 'col_offset' + _fields = 'value', + +class CtlFlow(_ast.AST): + _attributes = 'lineno', 'col_offset', 'to' + cond = True + + +class Block(CtlFlow): + _attributes = 'lineno', 'col_offset', 'to' + +class Loop(Block): + _fields = 'body', 'orelse' + +class Cond(CtlFlow): + _attributes = 'lineno', 'col_offset', 'to' + def __repr__(self): + return 'Jump%s(%r)' % (self.cond, self.test,) + + +class JumpX(CtlFlow): + _attributes = 'lineno', 'col_offset', 'to' + _fields = 'test', 'cond', 'body', 'orelse' + + def __repr__(self): + return '%s(%r)' % (type(self).__name__, self.test) + + def merge(self, node, span, stack): + assert isinstance(node, JumpX), node + assert not span + + orelse = node.orelse + while orelse: + orelse = orelse[0].orelse + + orelse.append(self) + return node + + def insert_orelse(self, node, span, stack): + print_ast(self) + print_ast(node) + print span + + assert False + +class Jump(CtlFlow): + _attributes = 'lineno', 'col_offset', 'to' + _fields = 'test', 'cond', 'body', 'orelse' + special_case = False + def __repr__(self): + return '%s()' % (type(self).__name__) + + + def detect_special_case(self, node, span): + if self.special_case: + return True + elif len(span) == 1 and isinstance(span[0], _ast.Compare) and isinstance(node, JumpOrPop) and isinstance(node.test, _ast.Compare): + return True + + return False + + def special_case_merge(self,node, span, stack): + + if self.special_case: + bool_op = mkboolop(node.test, self.special_case, node.cond) + self.special_case = copy_location(bool_op, node) + return + else: + assert not self.body, self.body + assert not self.orelse, self.orelse + + self.special_case = node.test + node.test.comparators.extend(span[0].comparators) + node.test.ops.extend(span[0].ops) + stack.extend([None,None]) + return + + def special_case_finalize(self, orelse, stack): + assert not self.body, self.body + assert not self.orelse, self.orelse + + assert len(orelse) == 2 + assert orelse[0] is None + return self.special_case + + def merge(self, node, span, stack): + + #======================================================================= + # Special Case for chained comparison + #======================================================================= + if self.detect_special_case(node, span): + self.special_case_merge(node, span, stack) + return + #======================================================================= + # End + #======================================================================= + + if isinstance(node, Jump): +# assert not span, span + print_ast(node) + print_ast(self) +# adsff + + print span + + if span: + print_ast(span[0]) + self.orelse = [node] + + else: + assert not self.test + assert not self.body + + body = span + orelse = [] + + if node.cond: + body,orelse = orelse, body + + + jif = JumpX(test=node.test, body=body, orelse=orelse, to=self.to) + return copy_location(jif, node) + + self.test = node.test + self.body = span + self.cond = node.cond + + def finalize(self, orelse, stack): + + if self.special_case: + return self.special_case_finalize(orelse, stack) + + if self.orelse: + # This happens for nest if exprs (e.g. 1 if a else 2 if b else 3) + assert len(self.orelse) == 1 + orelse = [self.orelse[0].finalize(orelse, stack)] + + if self.cond: + body = orelse + orelse = self.body + else: + body = self.body + + _if = _ast.If(self.test, body, orelse) + return copy_location(_if, self) + + +class JumpIf(CtlFlow): + pass + +def mkboolop(left, right, cond): + op = _ast.Or() if cond else _ast.And() + + left, right = mkexpr(left), mkexpr(right) + + if isinstance(left, _ast.BoolOp) and isinstance(left.op, type(op)): + left.values.append(right) + bool_op = left + else: + bool_op = _ast.BoolOp(op, [left, right]) + return bool_op + + +class PopJumpIf(JumpIf): + _attributes = 'lineno', 'col_offset', 'to', 'cond' + _fields = 'test', + + def __repr__(self): + return '%s(%r)' % (type(self).__name__, self.test) + + def merge(self, node, span, stack): + assert not span, span + if isinstance(node, PopJumpIf): #Boolean and/or + bool_op = mkboolop(node.test, self.test, node.cond) + self.test = copy_location(bool_op, node) + elif isinstance(node, JumpX): #This is the elif clause in an if statement + node.insert_orelse(self, span, stack) + return node + else: + assert False, node + +class JumpOrPop(JumpIf): + _attributes = 'lineno', 'col_offset', 'to', 'cond' + _fields = 'test', + def __repr__(self): + return '%s(%r)' % (type(self).__name__, self.test) + + def merge(self, node, span, stack): + assert not span, span + assert isinstance(node, (JumpIf)), node + + if isinstance(node.test, _ast.Compare) and isinstance(self.test, _ast.Compare) and node.test.comparators[-1] is self.test.left: + node.test.comparators.extend(self.test.comparators) + node.test.ops.extend(self.test.ops) + self.test = node.test + else: + bool_op = mkboolop(node.test, self.test, node.cond) + self.test = copy_location(bool_op, node) + + def finalize(self, span, stack): + + assert len(span) == 1 + node = span[0] + bool_op = mkboolop(self.test, node, self.cond) + return copy_location(bool_op, node) + +def rsplit(lst, key): + matches = [x for x in lst if key(x)] + if matches: + idx = lst.index(matches[-1]) + return lst[:idx], lst[idx], lst[idx + 1:] + else: + return lst[:], None, [] + + +class BUILD_MAP(_ast.AST): + _attributes = 'lineno', 'col_offset', 'nremain' + _fields = 'keys', 'values' + + diff --git a/meta/decompiler/instructions.py b/meta/decompiler/instructions.py index dab4171..7133ae7 100644 --- a/meta/decompiler/instructions.py +++ b/meta/decompiler/instructions.py @@ -5,13 +5,15 @@ ''' from __future__ import print_function -from meta.decompiler.simple_instructions import SimpleInstructions -from meta.decompiler.control_flow_instructions import CtrlFlowInstructions +#from meta.decompiler.simple_instructions import SimpleInstructions +#from meta.decompiler.control_flow_instructions import CtrlFlowInstructions import _ast from meta.asttools import print_ast from meta.utils import py3, py3op, py2op from meta.decompiler.expression_mutator import ExpressionMutator from ast import copy_location as cpy_loc +from meta.decompiler.nextgen import InstructionVisitor +from meta.decompiler.transformers import mkstmnt, pop_top function_ops = ['CALL_FUNCTION', 'CALL_FUNCTION_KW', 'CALL_FUNCTION_VAR', 'CALL_FUNCTION_VAR_KW'] @@ -61,9 +63,10 @@ def pop_return(stmnts): def make_module(code): - from meta.decompiler.disassemble import disassemble - instructions = Instructions(disassemble(code)) - stmnts = instructions.stmnt() + stmnts = InstructionVisitor(code).make_ast() + stmnts = [pop_top(stmnt) for stmnt in stmnts] +# instructions = Instructions(disassemble(code)) +# stmnts = instructions.stmnt() doc = pop_doc(stmnts) pop_return(stmnts) @@ -79,11 +82,8 @@ def make_module(code): @py2op def make_function(code, defaults=None, lineno=0): - from meta.decompiler.disassemble import disassemble - - instructions = Instructions(disassemble(code)) - - stmnts = instructions.stmnt() + + stmnts = InstructionVisitor(code).make_ast() if code.co_flags & 2: vararg = None @@ -130,11 +130,8 @@ def make_function(code, defaults=None, lineno=0): @make_function.py3op def make_function(code, defaults=None, annotations=(), kw_defaults=(), lineno=0): - from meta.decompiler.disassemble import disassemble - instructions = Instructions(disassemble(code)) - - stmnts = instructions.stmnt() + stmnts = InstructionVisitor(code).make_ast() if code.co_flags & 2: vararg = None @@ -260,7 +257,8 @@ def bitrange(x, start, stop): return ((1 << (stop - start)) - 1) & (x >> start) level = 0 -class Instructions(CtrlFlowInstructions, SimpleInstructions): + +class _Instructions_(object): def __init__(self, ilst, stack_items=None, jump_map=False, outer_scope=None): self.ilst_processed = [] @@ -301,7 +299,7 @@ def push_ast_item(self, item): self._ast_stack.append(item) def decompile_block(self, ilst, stack_items=None, jump_map=False): - return Instructions(ilst, stack_items=stack_items, jump_map=jump_map, outer_scope=self) + return _Instructions_(ilst, stack_items=stack_items, jump_map=jump_map, outer_scope=self) def stmnt(self): diff --git a/meta/decompiler/nextgen.py b/meta/decompiler/nextgen.py new file mode 100644 index 0000000..bf9fb72 --- /dev/null +++ b/meta/decompiler/nextgen.py @@ -0,0 +1,191 @@ +from meta.decompiler.disassemble import disassemble +import _ast +from meta.asttools.visitors.print_visitor import print_ast +from meta.asttools.visitors.pysourcegen import dump_python_source +from meta.decompiler import extra_nodes as nodes +from meta.decompiler.transformers import mkstmnt, mkexpr + +from meta.decompiler.simple_instructions import SimpleInstructionMixin +from meta.decompiler.assignments import AssignmentsMixin + + +DEBG = False + +def indexof(lst, test, start=0): + items = [item for item in lst[start:] if test(item)] + if not items: + return -1 + fist_item = items[0] + return lst.index(fist_item, start) + +def POP_JUMP_IF(cond): + def visitor(self, instr): + test = self.pop_ast_item() + node = nodes.PopJumpIf(test=test, to=instr.to, cond=cond) + nodes.cpy_loc(node, instr) + self.push_ast_item(node) + return visitor + + +def JUMP_IF_X_OR_POP(cond): + def visitor(self, instr): + test = self.pop_ast_item() + node = nodes.JumpOrPop(test=test, to=instr.to, cond=cond) + nodes.cpy_loc(node, instr) + self.push_ast_item(node) + return visitor + +class InstructionVisitor(SimpleInstructionMixin, AssignmentsMixin): + def __init__(self, code): + self.instructions = disassemble(code) + self.code = code + self._ast_stack = [] + self.labels = {} + self.jump_or_pop_ctx = False + + def make_ast(self): + for idx, instr in enumerate(self.instructions): + self.idx = idx + self.visit(instr) + return [mkstmnt(node) for node in self._ast_stack] + + def visit(self, instr): + if instr.is_jump: + self.labels.setdefault(instr.to, []) + + if instr.i in self.labels: + # self.handle_label(instr.i) + self.merge_control_flow(instr.i) + + method_name = 'visit_%s' % instr.opname.replace('+', '_') + visitor_method = getattr(self, method_name) + result = visitor_method(instr) + + if DEBG: self.print_stack_state(instr) + + return result + + def print_stack_state(self, msg, width=50): + tmplt = '%%-%is --> %%s' % (width,) + print tmplt % (msg, self._ast_stack) + + def pop_ast_item(self): + node = self._ast_stack.pop() + push_back = [] + while isinstance(node, nodes.CtlFlow): #Don't pop + push_back.insert(0, node) + node = self._ast_stack.pop() + self._ast_stack.extend(push_back) + return node + + + def push_ast_item(self, item): + self._ast_stack.append(item) + + def merge_control_flow(self, i): + first_branch = lambda node: isinstance(node, nodes.CtlFlow) and node.to == i + next_branch = lambda node: isinstance(node, nodes.CtlFlow) + + while True: + idx = indexof(self._ast_stack, first_branch) + if idx == -1: + break + node = self._ast_stack.pop(idx) + + next_idx = indexof(self._ast_stack, next_branch, idx) + if next_idx == -1: + span = self._ast_stack[idx:] + del self._ast_stack[idx:] + self.push_ast_item(node.finalize(span, self._ast_stack)) + else: + next_node = self._ast_stack.pop(next_idx) + span = self._ast_stack[idx:next_idx] + del self._ast_stack[idx:next_idx] + new_node = next_node.merge(node, span, self._ast_stack) + if new_node is None: + new_node = next_node + + self._ast_stack.insert(idx, new_node) + + + if DEBG: self.print_stack_state("Merge Flow %02i" % (i)) + + + visit_POP_JUMP_IF_FALSE = POP_JUMP_IF(False) + visit_POP_JUMP_IF_TRUE = POP_JUMP_IF(True) + + visit_JUMP_IF_TRUE_OR_POP = JUMP_IF_X_OR_POP(True) + visit_JUMP_IF_FALSE_OR_POP = JUMP_IF_X_OR_POP(False) + + def visit_POP_TOP(self, instr): + value = self.pop_ast_item() + + if value is None: + self.push_ast_item(value) + return + elif isinstance(value, (_ast.ImportFrom)): + self.push_ast_item(value) + return + + value = mkexpr(value) + + node = _ast.Expr(value) + nodes.cpy_loc(node, instr) + self.push_ast_item(node) + + + def visit_JUMP(self, instr): + node = nodes.cpy_loc(nodes.Jump(test=None, body=[], orelse=[], cond=False, to=instr.to), instr) + self.push_ast_item(node) + + + visit_JUMP_ABSOLUTE = visit_JUMP + visit_JUMP_FORWARD = visit_JUMP + + + def visit_SETUP_LOOP(self, instr): + node = nodes.Loop(body=None, orelse=None) + nodes.cpy_loc(node, instr) + self.push_ast_item(node) + + def visit_POP_BLOCK(self, instr): + left, block, right = nodes.rsplit(self._ast_stack, key=lambda node: isinstance(node, nodes.Block) and not node.body) + self._ast_stack = left + assert not block.body + block.body = right + + self.push_ast_item(block) + + def visit_GET_ITER(self, instr): + value = self.pop_ast_item() + node = nodes.Iter(value) + nodes.cpy_loc(node, instr) + self.push_ast_item(node) + + def visit_FOR_ITER(self, instr): + value = self.pop_ast_item() + node = _ast.For(None, value, [], []) + nodes.cpy_loc(node, instr) + self.push_ast_item(node) + +if __name__ == '__main__': + DEBG = True + def foo(): + if a: + b + elif c: + d + elif e: + f + statements = InstructionVisitor(foo.func_code).make_ast() + + print '----' + for stmnt in statements: + print_ast(stmnt) + print '----' + print '----' + for stmnt in statements: + print dump_python_source(stmnt) + print '----' + + # print "statements", [serialize(stmnt) for stmnt in statements] diff --git a/meta/decompiler/print_statements.py b/meta/decompiler/print_statements.py new file mode 100644 index 0000000..a8029f5 --- /dev/null +++ b/meta/decompiler/print_statements.py @@ -0,0 +1,69 @@ +''' +Created on Nov 30, 2012 + +@author: sean +''' +from _ast import Print as _ast_Print + +class PrintMixin(object): + + + + + def visit_PRINT_ITEM(self, instr): + + item = self.pop_ast_item() + + if self._ast_stack: + print_ = self._ast_stack[-1] + else: + print_ = None + + if isinstance(print_, _ast_Print) and not print_.nl and print_.dest == None: + print_.values.append(item) + else: + print_ = _ast_Print(dest=None, values=[item], nl=False, lineno=instr.lineno, col_offset=0) + self.push_ast_item(print_) + + def visit_PRINT_NEWLINE(self, instr): + item = self._ast_stack[-1] + + if isinstance(item, _ast_Print) and not item.nl and item.dest == None: + item.nl = True + else: + print_ = _ast_Print(dest=None, values=[], nl=True, lineno=instr.lineno, col_offset=0) + self.push_ast_item(print_) + + def visit_PRINT_ITEM_TO(self, instr): + + stream = self.pop_ast_item() + + print_ = None + + if isinstance(stream, _ast_Print) and not stream.nl: + print_ = stream + stream = self.pop_ast_item() + dup_print = self.pop_ast_item() + assert dup_print is print_ + self.push_ast_item(stream) + else: + print_ = _ast_Print(dest=stream, values=[], nl=False, lineno=instr.lineno, col_offset=0) + + item = self.pop_ast_item() + + print_.values.append(item) + self.push_ast_item(print_) + + def visit_PRINT_NEWLINE_TO(self, instr): + + item = self.pop_ast_item() + stream = self.pop_ast_item() + + self.push_ast_item(item) + + if isinstance(item, _ast_Print) and not item.nl and item.dest is stream: + item.nl = True + else: + print_ = _ast_Print(dest=stream, values=[], nl=True, lineno=instr.lineno, col_offset=0) + self.push_ast_item(print_) + diff --git a/meta/decompiler/simple_instructions.py b/meta/decompiler/simple_instructions.py index 26b86ed..dbb56a2 100644 --- a/meta/decompiler/simple_instructions.py +++ b/meta/decompiler/simple_instructions.py @@ -13,6 +13,8 @@ from meta.asttools.visitors.print_visitor import print_ast, dump_ast from meta.asttools import cmp_ast from meta.decompiler.expression_mutator import ExpressionMutator +from meta.decompiler.transformers import mkexpr +from meta.decompiler import extra_nodes as nodes if py3: class _ast_Print: pass @@ -29,18 +31,18 @@ def isNone(node): def BINARY_(OP): - def BINARY_OP(self, instr): + def visit_BINARY_OP(self, instr): right = self.pop_ast_item() left = self.pop_ast_item() add = _ast.BinOp(left=left, right=right, op=OP(), lineno=instr.lineno, col_offset=0) self.push_ast_item(add) - return BINARY_OP + return visit_BINARY_OP def INPLACE_(OP): - def INPLACE_OP(self, instr): + def visit_INPLACE_OP(self, instr): right = self.pop_ast_item() left = self.pop_ast_item() @@ -49,18 +51,18 @@ def INPLACE_OP(self, instr): self.push_ast_item(aug_assign) - return INPLACE_OP + return visit_INPLACE_OP def UNARY_(OP): - def UNARY_OP(self, instr): + def visit_UNARY_OP(self, instr): expr = self.pop_ast_item() not_ = _ast.UnaryOp(op=OP(), operand=expr, lineno=instr.lineno, col_offset=0) self.push_ast_item(not_) - return UNARY_OP + return visit_UNARY_OP CMP_OPMAP = {'>=' :_ast.GtE, '<=' :_ast.LtE, @@ -92,49 +94,48 @@ def make_const(arg, lineno=0, col_offset=0): const = arg return const - -class SimpleInstructions(object): - def LOAD_CONST(self, instr): - const = make_const(instr.arg, lineno=instr.lineno, col_offset=0) +class SimpleInstructionMixin(object): + def visit_LOAD_CONST(self, instr): + const = make_const(instr.arg, lineno=instr.lineno, col_offset=0) self.push_ast_item(const) - def LOAD_NAME(self, instr): + def visit_LOAD_NAME(self, instr): name = _ast.Name(id=instr.arg, ctx=_ast.Load(), lineno=instr.lineno, col_offset=0) self.push_ast_item(name) - def LOAD_DEREF(self, instr): + def visit_LOAD_DEREF(self, instr): name = _ast.Name(id=instr.arg, ctx=_ast.Load(), lineno=instr.lineno, col_offset=0) self.push_ast_item(name) - def CALL_FUNCTION_VAR(self, instr): + def visit_CALL_FUNCTION_VAR(self, instr): arg = self.pop_ast_item() - self.CALL_FUNCTION(instr) + self.visit_CALL_FUNCTION(instr) callfunc = self.pop_ast_item() callfunc.starargs = arg self.push_ast_item(callfunc) - def CALL_FUNCTION_KW(self, instr): + def visit_CALL_FUNCTION_KW(self, instr): kwarg = self.pop_ast_item() - self.CALL_FUNCTION(instr) + self.visit_CALL_FUNCTION(instr) callfunc = self.pop_ast_item() callfunc.kwargs = kwarg self.push_ast_item(callfunc) - def CALL_FUNCTION_VAR_KW(self, instr): + def visit_CALL_FUNCTION_VAR_KW(self, instr): kwarg = self.pop_ast_item() arg = self.pop_ast_item() - self.CALL_FUNCTION(instr) + self.visit_CALL_FUNCTION(instr) callfunc = self.pop_ast_item() callfunc.starargs = arg @@ -142,7 +143,7 @@ def CALL_FUNCTION_VAR_KW(self, instr): self.push_ast_item(callfunc) - def CALL_FUNCTION(self, instr): + def visit_CALL_FUNCTION(self, instr): nkwargs = instr.oparg >> 8 nargs = (~(nkwargs << 8)) & instr.oparg @@ -181,94 +182,23 @@ def CALL_FUNCTION(self, instr): self.push_ast_item(callfunc) - def LOAD_FAST(self, instr): + def visit_LOAD_FAST(self, instr): name = _ast.Name(id=instr.arg, ctx=_ast.Load(), lineno=instr.lineno, col_offset=0) self.push_ast_item(name) - def LOAD_GLOBAL(self, instr): + def visit_LOAD_GLOBAL(self, instr): name = _ast.Name(id=instr.arg, ctx=_ast.Load(), lineno=instr.lineno, col_offset=0) self.push_ast_item(name) - def STORE_FAST(self, instr): - self.STORE_NAME(instr) - - def STORE_DEREF(self, instr): - self.STORE_NAME(instr) - - def STORE_NAME(self, instr): - - value = self.pop_ast_item() - value = self.process_ifexpr(value) - - if isinstance(value, _ast.Import): - - if value.from_: - assert isinstance(self._ast_stack[-1], _ast.ImportFrom) - from_ = self.pop_ast_item() - - as_name = instr.arg - name = from_.names[0].name - if as_name != name: - from_.names[0].asname = as_name - - self.push_ast_item(from_) - else: - as_name = instr.arg - if value.names[0].asname is None: - base_name = value.names[0].name.split('.')[0] - if base_name != as_name: - value.names[0].asname = as_name - - self.push_ast_item(value) - - elif isinstance(value, (_ast.Attribute)) and isinstance(value.value, (_ast.Import)): - asname = instr.arg - value = value.value - value.names[0].asname = asname - - self.push_ast_item(value) - - elif isinstance(value, (_ast.ClassDef, _ast.FunctionDef)): - as_name = instr.arg - value.name = as_name - self.push_ast_item(value) - elif isinstance(value, _ast.AugAssign): - self.push_ast_item(value) - elif isinstance(value, _ast.Assign): - _ = self.pop_ast_item() - assname = _ast.Name(instr.arg, _ast.Store(), lineno=instr.lineno, col_offset=0) - value.targets.append(assname) - self.push_ast_item(value) - else: - - assname = _ast.Name(instr.arg, _ast.Store(), lineno=instr.lineno, col_offset=0) - - assign = _ast.Assign(targets=[assname], value=value, lineno=instr.lineno, col_offset=0) - self.push_ast_item(assign) - @py3op - def STORE_LOCALS(self, instr): - 'remove Locals from class def' - self.pop_ast_item() - - def STORE_GLOBAL(self, instr): - - if not isinstance(self._ast_stack[0], _ast.Global): - self._ast_stack.insert(0, _ast.Global(names=[])) - - if instr.arg not in self._ast_stack[0].names: - self._ast_stack[0].names.append(instr.arg) - - self.STORE_NAME(instr) - - def RETURN_VALUE(self, instr): + def visit_RETURN_VALUE(self, instr): value = self.pop_ast_item() - value = self.process_ifexpr(value) + value = mkexpr(value) ret = _ast.Return(value=value, lineno=instr.lineno, col_offset=0) self.push_ast_item(ret) - def LOAD_ATTR(self, instr): + def visit_LOAD_ATTR(self, instr): name = self.pop_ast_item() @@ -278,19 +208,7 @@ def LOAD_ATTR(self, instr): self.push_ast_item(get_attr) - def STORE_ATTR(self, instr): - - attrname = instr.arg - node = self.pop_ast_item() - expr = self.pop_ast_item() - expr = self.process_ifexpr(expr) - - assattr = _ast.Attribute(value=node, attr=attrname, ctx=_ast.Store(), lineno=instr.lineno, col_offset=0) - set_attr = _ast.Assign(targets=[assattr], value=expr, lineno=instr.lineno, col_offset=0) - - self.push_ast_item(set_attr) - - def IMPORT_NAME(self, instr): + def visit_IMPORT_NAME(self, instr): from_ = self.pop_ast_item() @@ -303,17 +221,23 @@ def IMPORT_NAME(self, instr): self.push_ast_item(import_) - def IMPORT_FROM(self, instr): + def visit_IMPORT_FROM(self, instr): import_ = self.pop_ast_item() - - names = [_ast.alias(instr.arg, None)] - modname = import_.names[0].name - from_ = _ast.ImportFrom(module=modname, names=names, level=0, lineno=instr.lineno, col_offset=0) + + alias = _ast.alias(instr.arg, None) + assert len(import_.names) == 1, import_.names + + if isinstance(import_, _ast.ImportFrom): + from_ = import_ + from_.names.append(alias) + else: + modname = import_.names[0].name + from_ = _ast.ImportFrom(module=modname, names=[alias], level=0, lineno=instr.lineno, col_offset=0) self.push_ast_item(from_) - self.push_ast_item(import_) +# self.push_ast_item(import_) - def IMPORT_STAR(self, instr): + def visit_IMPORT_STAR(self, instr): import_ = self.pop_ast_item() names = import_.names @@ -323,11 +247,11 @@ def IMPORT_STAR(self, instr): self.push_ast_item(from_) - def process_ifexpr(self, node): - if node == 'LOAD_LOCALS': #Special directive - return node - - return ExpressionMutator().visit(node) +# def visit_process_ifexpr(self, node): +# if node == 'LOAD_LOCALS': #Special directive +# return node +# +# return ExpressionMutator().visit(node) def POP_TOP(self, instr): @@ -345,7 +269,7 @@ def POP_TOP(self, instr): discard = _ast.Expr(value=node, lineno=instr.lineno, col_offset=0) self.push_ast_item(discard) - def ROT_TWO(self, instr): + def _visit_ROT_TWO(self, instr): one = self.pop_ast_item() two = self.pop_ast_item() @@ -374,42 +298,42 @@ def ROT_TWO(self, instr): self.push_ast_item(one) self.push_ast_item(two) - BINARY_ADD = BINARY_(_ast.Add) - BINARY_SUBTRACT = BINARY_(_ast.Sub) - BINARY_DIVIDE = BINARY_(_ast.Div) - BINARY_TRUE_DIVIDE = BINARY_(_ast.Div) - BINARY_MULTIPLY = BINARY_(_ast.Mult) - BINARY_FLOOR_DIVIDE = BINARY_(_ast.FloorDiv) - BINARY_POWER = BINARY_(_ast.Pow) - - BINARY_AND = BINARY_(_ast.BitAnd) - BINARY_OR = BINARY_(_ast.BitOr) - BINARY_XOR = BINARY_(_ast.BitXor) - - BINARY_LSHIFT = BINARY_(_ast.LShift) - BINARY_RSHIFT = BINARY_(_ast.RShift) - BINARY_MODULO = BINARY_(_ast.Mod) - - INPLACE_ADD = INPLACE_(_ast.Add) - INPLACE_SUBTRACT = INPLACE_(_ast.Sub) - INPLACE_DIVIDE = INPLACE_(_ast.Div) - INPLACE_FLOOR_DIVIDE = INPLACE_(_ast.FloorDiv) - INPLACE_MULTIPLY = INPLACE_(_ast.Mult) - - INPLACE_AND = INPLACE_(_ast.BitAnd) - INPLACE_OR = INPLACE_(_ast.BitOr) - INPLACE_LSHIFT = INPLACE_(_ast.LShift) - INPLACE_RSHIFT = INPLACE_(_ast.RShift) - INPLACE_POWER = INPLACE_(_ast.Pow) - INPLACE_MODULO = INPLACE_(_ast.Mod) - INPLACE_XOR = INPLACE_(_ast.BitXor) - - UNARY_NOT = UNARY_(_ast.Not) - UNARY_NEGATIVE = UNARY_(_ast.USub) - UNARY_INVERT = UNARY_(_ast.Invert) - UNARY_POSITIVE = UNARY_(_ast.UAdd) - - def COMPARE_OP(self, instr): + visit_BINARY_ADD = BINARY_(_ast.Add) + visit_BINARY_SUBTRACT = BINARY_(_ast.Sub) + visit_BINARY_DIVIDE = BINARY_(_ast.Div) + visit_BINARY_TRUE_DIVIDE = BINARY_(_ast.Div) + visit_BINARY_MULTIPLY = BINARY_(_ast.Mult) + visit_BINARY_FLOOR_DIVIDE = BINARY_(_ast.FloorDiv) + visit_BINARY_POWER = BINARY_(_ast.Pow) + + visit_BINARY_AND = BINARY_(_ast.BitAnd) + visit_BINARY_OR = BINARY_(_ast.BitOr) + visit_BINARY_XOR = BINARY_(_ast.BitXor) + + visit_BINARY_LSHIFT = BINARY_(_ast.LShift) + visit_BINARY_RSHIFT = BINARY_(_ast.RShift) + visit_BINARY_MODULO = BINARY_(_ast.Mod) + + visit_INPLACE_ADD = INPLACE_(_ast.Add) + visit_INPLACE_SUBTRACT = INPLACE_(_ast.Sub) + visit_INPLACE_DIVIDE = INPLACE_(_ast.Div) + visit_INPLACE_FLOOR_DIVIDE = INPLACE_(_ast.FloorDiv) + visit_INPLACE_MULTIPLY = INPLACE_(_ast.Mult) + + visit_INPLACE_AND = INPLACE_(_ast.BitAnd) + visit_INPLACE_OR = INPLACE_(_ast.BitOr) + visit_INPLACE_LSHIFT = INPLACE_(_ast.LShift) + visit_INPLACE_RSHIFT = INPLACE_(_ast.RShift) + visit_INPLACE_POWER = INPLACE_(_ast.Pow) + visit_INPLACE_MODULO = INPLACE_(_ast.Mod) + visit_INPLACE_XOR = INPLACE_(_ast.BitXor) + + visit_UNARY_NOT = UNARY_(_ast.Not) + visit_UNARY_NEGATIVE = UNARY_(_ast.USub) + visit_UNARY_INVERT = UNARY_(_ast.Invert) + visit_UNARY_POSITIVE = UNARY_(_ast.UAdd) + + def visit_COMPARE_OP(self, instr): op = instr.arg @@ -420,12 +344,9 @@ def COMPARE_OP(self, instr): OP = CMP_OPMAP[op] compare = _ast.Compare(left=expr, ops=[OP()], comparators=[right], lineno=instr.lineno, col_offset=0) - self.push_ast_item(compare) - - - def YIELD_VALUE(self, instr): + def visit_YIELD_VALUE(self, instr): value = self.pop_ast_item() yield_ = _ast.Yield(value=value, lineno=instr.lineno, col_offset=0) @@ -434,7 +355,7 @@ def YIELD_VALUE(self, instr): self.seen_yield = True - def BUILD_LIST(self, instr): + def visit_BUILD_LIST(self, instr): nitems = instr.oparg @@ -445,7 +366,7 @@ def BUILD_LIST(self, instr): self.push_ast_item(list_) - def BUILD_TUPLE(self, instr): + def visit_BUILD_TUPLE(self, instr): nitems = instr.oparg @@ -460,7 +381,7 @@ def BUILD_TUPLE(self, instr): self.push_ast_item(list_) - def BUILD_SET(self, instr): + def visit_BUILD_SET(self, instr): nitems = instr.oparg @@ -471,72 +392,43 @@ def BUILD_SET(self, instr): self.push_ast_item(list_) - def BUILD_MAP(self, instr): - - nitems = instr.oparg - keys = [] - values = [] - for i in range(nitems): - map_instrs = [] - while 1: - new_instr = self.ilst.pop(0) - - if new_instr.opname == 'STORE_MAP': - break - - map_instrs.append(new_instr) - - items = self.decompile_block(map_instrs).stmnt() - assert len(items) == 2 - - values.append(items[0]) - keys.append(items[1]) - - - list_ = _ast.Dict(keys=keys, values=values, lineno=instr.lineno, col_offset=0) - self.push_ast_item(list_) - - def UNPACK_SEQUENCE(self, instr): - nargs = instr.oparg - - nodes = [] - ast_tuple = _ast.Tuple(elts=nodes, ctx=_ast.Store(), lineno=instr.lineno, col_offset=0) - for i in range(nargs): - nex_instr = self.ilst.pop(0) - self.push_ast_item(None) - self.visit(nex_instr) - - node = self.pop_ast_item() - nodes.append(node.targets[0]) - - expr = self.pop_ast_item() - if isinstance(expr, _ast.Assign): - assgn = expr - assgn.targets.append(ast_tuple) - - value_dup = self.pop_ast_item() - - assert cmp_ast(assgn.value, value_dup) + def visit_BUILD_MAP(self, instr): + build_map = nodes.BUILD_MAP(keys=[], values=[], nremain=instr.oparg) + self.push_ast_item(nodes.cpy_loc(build_map, instr)) + + def visit_STORE_MAP(self, instr): + + key = self.pop_ast_item() + value = self.pop_ast_item() + + build_map = self.pop_ast_item() + build_map.nremain -= 1 + + build_map.keys.append(key) + build_map.values.append(value) + + if build_map.nremain > 0: + node = build_map + if build_map.nremain == 0: + node = _ast.Dict(keys=build_map.keys, values=build_map.values, lineno=instr.lineno, col_offset=0) - else: - assgn = _ast.Assign(targets=[ast_tuple], value=expr, lineno=instr.lineno, col_offset=0) - self.push_ast_item(assgn) + self.push_ast_item(node) - def DELETE_NAME(self, instr): + def visit_DELETE_NAME(self, instr): name = _ast.Name(id=instr.arg, ctx=_ast.Del(), lineno=instr.lineno, col_offset=0) delete = _ast.Delete(targets=[name], lineno=instr.lineno, col_offset=0) self.push_ast_item(delete) - def DELETE_FAST(self, instr): + def visit_DELETE_FAST(self, instr): name = _ast.Name(id=instr.arg, ctx=_ast.Del(), lineno=instr.lineno, col_offset=0) delete = _ast.Delete(targets=[name], lineno=instr.lineno, col_offset=0) self.push_ast_item(delete) - def DELETE_ATTR(self, instr): + def visit_DELETE_ATTR(self, instr): expr = self.pop_ast_item() attr = _ast.Attribute(value=expr, attr=instr.arg, ctx=_ast.Del(), lineno=instr.lineno, col_offset=0) @@ -544,10 +436,10 @@ def DELETE_ATTR(self, instr): delete = _ast.Delete(targets=[attr], lineno=instr.lineno, col_offset=0) self.push_ast_item(delete) - def EXEC_STMT(self, instr): - locals_ = self.pop_ast_item() - globals_ = self.pop_ast_item() - expr = self.pop_ast_item() + def visit_EXEC_STMT(self, instr): + locals_ = mkexpr(self.pop_ast_item()) + globals_ = mkexpr(self.pop_ast_item()) + expr = mkexpr(self.pop_ast_item()) if locals_ is globals_: locals_ = None @@ -559,7 +451,7 @@ def EXEC_STMT(self, instr): self.push_ast_item(exec_) - def DUP_TOP(self, instr): + def visit_DUP_TOP(self, instr): expr = self.pop_ast_item() @@ -567,7 +459,7 @@ def DUP_TOP(self, instr): self.push_ast_item(expr) @py3op - def DUP_TOP_TWO(self, instr): + def visit_DUP_TOP_TWO(self, instr): expr1 = self.pop_ast_item() expr2 = self.pop_ast_item() @@ -578,7 +470,7 @@ def DUP_TOP_TWO(self, instr): self.push_ast_item(expr1) - def DUP_TOPX(self, instr): + def visit_DUP_TOPX(self, instr): exprs = [] for i in range(instr.oparg): @@ -588,7 +480,13 @@ def DUP_TOPX(self, instr): self._ast_stack.extend(exprs) self._ast_stack.extend(exprs) - def ROT_THREE(self, instr): + def visit_ROT_TWO(self, instr): + n0 = self.pop_ast_item() + n1 = self.pop_ast_item() + self.push_ast_item(n0) + self.push_ast_item(n1) + + def visit_ROT_THREE(self, instr): expr1 = self.pop_ast_item() expr2 = self.pop_ast_item() expr3 = self.pop_ast_item() @@ -598,7 +496,7 @@ def ROT_THREE(self, instr): self.push_ast_item(expr2) - def ROT_FOUR(self, instr): + def visit_ROT_FOUR(self, instr): expr1 = self.pop_ast_item() expr2 = self.pop_ast_item() expr3 = self.pop_ast_item() @@ -609,67 +507,6 @@ def ROT_FOUR(self, instr): self.push_ast_item(expr3) self.push_ast_item(expr2) - - - - def PRINT_ITEM(self, instr): - - item = self.pop_ast_item() - - if self._ast_stack: - print_ = self._ast_stack[-1] - else: - print_ = None - - if isinstance(print_, _ast_Print) and not print_.nl and print_.dest == None: - print_.values.append(item) - else: - print_ = _ast_Print(dest=None, values=[item], nl=False, lineno=instr.lineno, col_offset=0) - self.push_ast_item(print_) - - def PRINT_NEWLINE(self, instr): - item = self._ast_stack[-1] - - if isinstance(item, _ast_Print) and not item.nl and item.dest == None: - item.nl = True - else: - print_ = _ast_Print(dest=None, values=[], nl=True, lineno=instr.lineno, col_offset=0) - self.push_ast_item(print_) - - def PRINT_ITEM_TO(self, instr): - - stream = self.pop_ast_item() - - print_ = None - - if isinstance(stream, _ast_Print) and not stream.nl: - print_ = stream - stream = self.pop_ast_item() - dup_print = self.pop_ast_item() - assert dup_print is print_ - self.push_ast_item(stream) - else: - print_ = _ast_Print(dest=stream, values=[], nl=False, lineno=instr.lineno, col_offset=0) - - item = self.pop_ast_item() - - print_.values.append(item) - self.push_ast_item(print_) - - def PRINT_NEWLINE_TO(self, instr): - - item = self.pop_ast_item() - stream = self.pop_ast_item() - - self.push_ast_item(item) - - if isinstance(item, _ast_Print) and not item.nl and item.dest is stream: - item.nl = True - else: - print_ = _ast_Print(dest=stream, values=[], nl=True, lineno=instr.lineno, col_offset=0) - self.push_ast_item(print_) - - def format_slice(self, index, kw): if isinstance(index, _ast.Tuple): @@ -691,7 +528,7 @@ def format_slice(self, index, kw): index = _ast.Index(value=index, **kw) return index - def BINARY_SUBSCR(self, instr): + def visit_BINARY_SUBSCR(self, instr): index = self.pop_ast_item() value = self.pop_ast_item() @@ -704,7 +541,7 @@ def BINARY_SUBSCR(self, instr): self.push_ast_item(subscr) - def SLICE_0(self, instr): + def visit_SLICE_0(self, instr): 'obj[:]' value = self.pop_ast_item() @@ -714,7 +551,7 @@ def SLICE_0(self, instr): self.push_ast_item(subscr) - def SLICE_1(self, instr): + def visit_SLICE_1(self, instr): 'obj[lower:]' lower = self.pop_ast_item() value = self.pop_ast_item() @@ -725,7 +562,7 @@ def SLICE_1(self, instr): self.push_ast_item(subscr) - def SLICE_2(self, instr): + def visit_SLICE_2(self, instr): 'obj[:stop]' upper = self.pop_ast_item() value = self.pop_ast_item() @@ -737,7 +574,7 @@ def SLICE_2(self, instr): self.push_ast_item(subscr) - def SLICE_3(self, instr): + def visit_SLICE_3(self, instr): 'obj[lower:upper]' upper = self.pop_ast_item() lower = self.pop_ast_item() @@ -750,7 +587,7 @@ def SLICE_3(self, instr): self.push_ast_item(subscr) - def BUILD_SLICE(self, instr): + def visit_BUILD_SLICE(self, instr): step = None upper = None @@ -771,7 +608,7 @@ def BUILD_SLICE(self, instr): self.push_ast_item(slice) - def STORE_SLICE_0(self, instr): + def visit_STORE_SLICE_0(self, instr): 'obj[:] = expr' value = self.pop_ast_item() expr = self.pop_ast_item() @@ -783,7 +620,7 @@ def STORE_SLICE_0(self, instr): assign = _ast.Assign(targets=[subscr], value=expr, **kw) self.push_ast_item(assign) - def STORE_SLICE_1(self, instr): + def visit_STORE_SLICE_1(self, instr): 'obj[lower:] = expr' lower = self.pop_ast_item() value = self.pop_ast_item() @@ -797,7 +634,7 @@ def STORE_SLICE_1(self, instr): self.push_ast_item(assign) - def STORE_SLICE_2(self, instr): + def visit_STORE_SLICE_2(self, instr): 'obj[:upper] = expr' upper = self.pop_ast_item() value = self.pop_ast_item() @@ -810,7 +647,7 @@ def STORE_SLICE_2(self, instr): assign = _ast.Assign(targets=[subscr], value=expr, **kw) self.push_ast_item(assign) - def STORE_SLICE_3(self, instr): + def visit_STORE_SLICE_3(self, instr): 'obj[lower:upper] = expr' upper = self.pop_ast_item() @@ -832,7 +669,7 @@ def STORE_SLICE_3(self, instr): self.push_ast_item(assign) - def DELETE_SLICE_0(self, instr): + def visit_DELETE_SLICE_0(self, instr): 'obj[:] = expr' value = self.pop_ast_item() @@ -843,7 +680,7 @@ def DELETE_SLICE_0(self, instr): delete = _ast.Delete(targets=[subscr], **kw) self.push_ast_item(delete) - def DELETE_SLICE_1(self, instr): + def visit_DELETE_SLICE_1(self, instr): 'obj[lower:] = expr' lower = self.pop_ast_item() value = self.pop_ast_item() @@ -856,7 +693,7 @@ def DELETE_SLICE_1(self, instr): self.push_ast_item(delete) - def DELETE_SLICE_2(self, instr): + def visit_DELETE_SLICE_2(self, instr): 'obj[:upper] = expr' upper = self.pop_ast_item() value = self.pop_ast_item() @@ -868,7 +705,7 @@ def DELETE_SLICE_2(self, instr): delete = _ast.Delete(targets=[subscr], **kw) self.push_ast_item(delete) - def DELETE_SLICE_3(self, instr): + def visit_DELETE_SLICE_3(self, instr): 'obj[lower:upper] = expr' upper = self.pop_ast_item() lower = self.pop_ast_item() @@ -881,26 +718,7 @@ def DELETE_SLICE_3(self, instr): delete = _ast.Delete(targets=[subscr], **kw) self.push_ast_item(delete) - def STORE_SUBSCR(self, instr): - index = self.pop_ast_item() - value = self.pop_ast_item() - expr = self.pop_ast_item() - - expr = self.process_ifexpr(expr) - - if isinstance(expr, _ast.AugAssign): - self.push_ast_item(expr) - else: - kw = dict(lineno=instr.lineno, col_offset=0) - - index = self.format_slice(index, kw) - - subscr = _ast.Subscript(value=value, slice=index, ctx=_ast.Store(), **kw) - - assign = _ast.Assign(targets=[subscr], value=expr, **kw) - self.push_ast_item(assign) - - def DELETE_SUBSCR(self, instr): + def visit_DELETE_SUBSCR(self, instr): index = self.pop_ast_item() value = self.pop_ast_item() @@ -914,7 +732,7 @@ def DELETE_SUBSCR(self, instr): self.push_ast_item(delete) @py2op - def RAISE_VARARGS(self, instr): + def visit_RAISE_VARARGS(self, instr): nargs = instr.oparg tback = None @@ -931,8 +749,8 @@ def RAISE_VARARGS(self, instr): lineno=instr.lineno, col_offset=0) self.push_ast_item(raise_) - @RAISE_VARARGS.py3op - def RAISE_VARARGS(self, instr): + @visit_RAISE_VARARGS.py3op + def visit_RAISE_VARARGS(self, instr): nargs = instr.oparg cause = None @@ -948,7 +766,7 @@ def RAISE_VARARGS(self, instr): self.push_ast_item(raise_) @py3op - def EXTENDED_ARG(self, instr): + def visit_EXTENDED_ARG(self, instr): code = self.pop_ast_item() argument_names = self.pop_ast_item() @@ -957,14 +775,14 @@ def EXTENDED_ARG(self, instr): kw = dict(lineno=instr.lineno, col_offset=0) for argument_name in argument_names.elts[::-1]: annotation = self.pop_ast_item() - arg = _ast.arg(annotation=annotation, arg=argument_name.s, **kw) #@UndefinedVariable + arg = _ast.arg(annotation=annotation, arg=argument_name.s, **kw) # @UndefinedVariable args.append(arg) for arg in args: self.push_ast_item(arg) self.push_ast_item(code) - @EXTENDED_ARG.py2op - def EXTENDED_ARG(self, instr): + @visit_EXTENDED_ARG.py2op + def visit_EXTENDED_ARG(self, instr): raise Exception("This is not available in python 2.x") diff --git a/meta/decompiler/tests/__init__.py b/meta/decompiler/tests/__init__.py index c3d1b7f..cf449b2 100644 --- a/meta/decompiler/tests/__init__.py +++ b/meta/decompiler/tests/__init__.py @@ -1,7 +1,7 @@ import unittest import sys import _ast -from meta.decompiler import make_module +from meta.decompiler.instructions import make_module from meta.asttools import cmp_ast, print_ast from meta.testing import py2, py2only from meta.asttools.visitors.pysourcegen import dump_python_source diff --git a/meta/decompiler/tests/test_decompiler.py b/meta/decompiler/tests/test_decompiler.py index 5e71198..8518dda 100644 --- a/meta/decompiler/tests/test_decompiler.py +++ b/meta/decompiler/tests/test_decompiler.py @@ -19,7 +19,7 @@ def test_logic1(self): def test_logic2(self): 'a or (b or c)' - self.statement('a or (b or c)') + self.statement('a or (b or c)', 'a or b or c') def test_if_expr_discard(self): @@ -212,7 +212,7 @@ def test_if4(self): self.statement('if a or b: c') def test_if5(self): - self.statement('if not a: c') + self.statement('if not a: c', 'if a: pass; else: c') def test_if6(self): self.statement('if not a or b: c') diff --git a/meta/decompiler/tests/test_logical_expr.py b/meta/decompiler/tests/test_logical_expr.py index f5d91c7..bea541c 100644 --- a/meta/decompiler/tests/test_logical_expr.py +++ b/meta/decompiler/tests/test_logical_expr.py @@ -38,10 +38,10 @@ def test_chain1_or(self): self.statement('x or a == b > f') def test_chain1_and2(self): - self.statement('x and a == b > f and z', 'x and (((a == b) and (b > f)) and z)') + self.statement('x and a == b > f and z', 'x and (a == b > f) and z') def test_chain1_or2(self): - self.statement('x or a == b > f or z', 'x or (((a == b) and (b > f)) or z)') + self.statement('x or a == b > f or z', 'x or (a == b > f) or z') def test_chain2_or(self): self.statement('x or a == b > f < e') diff --git a/meta/decompiler/tests/test_simple.py b/meta/decompiler/tests/test_simple.py index 2f2902f..2601bbe 100644 --- a/meta/decompiler/tests/test_simple.py +++ b/meta/decompiler/tests/test_simple.py @@ -241,7 +241,7 @@ def test_subscr(self): self.statement(stmnt) def test_subscr_assign(self): - stmnt = 'x[y] =z' + stmnt = 'x[y] = z' self.statement(stmnt) def test_subscr_del(self): diff --git a/meta/decompiler/transformers.py b/meta/decompiler/transformers.py new file mode 100644 index 0000000..669b0f2 --- /dev/null +++ b/meta/decompiler/transformers.py @@ -0,0 +1,92 @@ +''' +Created on Nov 30, 2012 + +@author: sean +''' +import _ast +from ast import copy_location, NodeTransformer + + +class ExprTransformer(NodeTransformer): + + def visit_If(self, node): + assert len(node.body) == 1 + assert len(node.orelse) == 1 + + body = self.visit(node.body[0]) + orelse = self.visit(node.orelse[0]) + _if_exp = _ast.IfExp(node.test, body, orelse) + copy_location(_if_exp, node) + return _if_exp + +# visit_If = visit_POP_JUMP_IF_FALSE +# +# def visit_POP_JUMP_IF_TRUE(self, node): +# +# assert len(node.body) == 1 +# assert len(node.orelse) == 1 +# +# not_test = _ast.UnaryOp(_ast.Not() , node.test) +# copy_location(not_test, node) +# +# _if_exp = _ast.IfExp(not_test, node.body[0], node.orelse[0]) +# copy_location(_if_exp, node) +# return _if_exp + +# def visit_BoolOp(self, node): +# +# if isinstance(node.op, _ast.And): +# i = 0 +# while i < len(node.values) - 1: +# left = mkexpr(node.values[i]) +# right = mkexpr(node.values[i + 1]) +# if isinstance(left, _ast.Compare) and isinstance(right, _ast.Compare): +# if left.comparators[-1] is right.left: +# node.values.pop(i + 1) +# left.comparators.extend(right.comparators) +# left.ops.extend(right.ops) +# i += 1 +# if len(node.values) == 1: +# return node.values[0] +## print 'ret', node +# +# return node + + def visit_BUILD_MAP(self, node): + return copy_location(_ast.Dict([],[]), node) + +class StatementTransformer(NodeTransformer): + pass +# def generic_visit(self, node): +# +# if isinstance(node, _ast.stmt): +# return NodeTransformer.generic_visit(self, node) +# else: +# return node +# +# def visit_Expr(self, node): +# new_node = mkexpr(node.value) +# copy_location(new_node, node) +# return new_node + +# def visit_POP_JUMP_IF_FALSE(self, node): +# _if = _ast.If(node.test, node.body, node.orelse) +# copy_location(_if, node) +# return _if +# +# def visit_POP_JUMP_IF_TRUE(self, node): +# not_test = _ast.UnaryOp(_ast.Not() , node.test) +# copy_location(not_test, node) +# _if = _ast.If(not_test, node.body, node.orelse) +# copy_location(_if, node) +# return _if + +mkexpr = lambda node: ExprTransformer().visit(node) +mkstmnt = lambda node: StatementTransformer().visit(node) + +def pop_top(stmnt): + if isinstance(stmnt, _ast.expr): + node = _ast.Expr(stmnt) + return copy_location(node, stmnt) + + return stmnt diff --git a/meta/scripts/depyc.py b/meta/scripts/depyc.py index beab575..b87197b 100644 --- a/meta/scripts/depyc.py +++ b/meta/scripts/depyc.py @@ -22,8 +22,17 @@ from meta import asttools from meta.asttools.visitors.pysourcegen import dump_python_source from meta.decompiler.recompile import dump_pyc +from ast import NodeVisitor, NodeTransformer py3 = sys.version_info.major >= 3 + +def dum_ast(node, fd, compact): + if compact: + print_ast(node, file=fd) + else: + json.dump(serialize(node), fd, indent=2) + + def depyc(args): @@ -66,7 +75,7 @@ def src_tool(args): print_code(code) return elif args.output_type == 'ast': - json.dump(serialize(mod_ast), args.output, indent=2) + dum_ast(mod_ast, args.output, args.compact) return elif args.output_type == 'python': print(source.decode(), file=args.output) @@ -87,6 +96,8 @@ def src_tool(args): raise Exception("unknow output type %r" % args.output_type) return + + def ast_tool(args): print("Reconstructing AST %r" % (args.input.name,), file=sys.stderr) @@ -125,6 +136,10 @@ def setup_parser(parser): parser.add_argument('-t', '--input-type', default='from_filename', dest='input_type', choices=['from_filename', 'python', 'pyc', 'ast']) parser.add_argument('-o', '--output', default='-', type=FileType('wb')) + + parser.add_argument('--compact', action='store_true', + help='print ast in a compact format (this is not reloadable)', + ) group = parser.add_mutually_exclusive_group() group.add_argument('--python', default='python', action='store_const', const='python',