From b64d04dc3affddd60ea21e54e12f7e137372ae1d Mon Sep 17 00:00:00 2001 From: JoYo <> Date: Wed, 19 Jan 2022 13:33:20 -0500 Subject: [PATCH] checking for unessisary disassembly --- subdisassem/disassemble.py | 51 ++++++++++++++++++++++++-------------- subdisassem/schema.py | 3 +-- subdisassem/scripts.py | 29 +++++++++++++--------- 3 files changed, 51 insertions(+), 32 deletions(-) diff --git a/subdisassem/disassemble.py b/subdisassem/disassemble.py index 06dcdd8..d9bd1c7 100644 --- a/subdisassem/disassemble.py +++ b/subdisassem/disassemble.py @@ -30,20 +30,42 @@ import logging class _CapstoneBase: - def __init__(self, payload: bytes, offset: int = 0): - self.disassembly = list() - - for opcode in self.capstone.disasm(payload, offset): - self.disassembly.append(opcode) + def __init__(self): + self.arch = self.__class__.__name__ + pass def __repr__(self) -> str: return self.objdump def __len__(self) -> int: + if not self.disassembly: + logging.debug( + f"payload_missing: use {self.__class__}.load(payload=bytes) prior" + ) + return 0 + return len(self.disassembly) + def load(self, payload: bytes, offset: int = 0): + disassembly = list() + + for opcode in self.capstone.disasm(payload, offset): + disassembly.append(opcode) + + if disassembly: + self.disassembly = disassembly + else: + logging.debug("disassembly_empty") + self.disassembly = list() + @property def objdump(self) -> str: + if not self.disassembly: + logging.debug( + f"payload_missing: use {self.__class__}.load(payload=bytes) prior" + ) + return "" + opcodes = str() for opcode in self.disassembly: @@ -53,6 +75,12 @@ class _CapstoneBase: @property def disasm(self) -> list: + if not self.disassembly: + logging.debug( + f"payload_missing: use {self.__class__}.load(payload=bytes) prior" + ) + return [] + opcodes = list() for opcode in self.disassembly: @@ -70,64 +98,51 @@ class _CapstoneBase: class X86_intel(_CapstoneBase): capstone = Cs(CS_ARCH_X86, CS_MODE_16) - arch = "x86-16" class X86(_CapstoneBase): capstone = Cs(CS_ARCH_X86, CS_MODE_32) - arch = "x86-32" class X86_64(_CapstoneBase): capstone = Cs(CS_ARCH_X86, CS_MODE_64) - arch = "x86-64" class ARM(_CapstoneBase): capstone = Cs(CS_ARCH_ARM, CS_MODE_ARM) - arch = "ARM" class Thumb(_CapstoneBase): capstone = Cs(CS_ARCH_ARM, CS_MODE_THUMB) - arch = "Thumb" class ARM_64(_CapstoneBase): capstone = Cs(CS_ARCH_ARM64, CS_MODE_ARM) - arch = "ARM 64" class MIPS_32_eb(_CapstoneBase): capstone = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN) - arch = "MIPS-32 (Big-endian)" class MIPS_64_el(_CapstoneBase): capstone = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_LITTLE_ENDIAN) - arch = "MIPS-64-EL (Little-endian)" class PPC_64(_CapstoneBase): capstone = Cs(CS_ARCH_PPC, CS_MODE_BIG_ENDIAN) - arch = "PPC-64" class Sparc(_CapstoneBase): capstone = Cs(CS_ARCH_SPARC, CS_MODE_BIG_ENDIAN) - arch = "Sparc" class SparcV9(_CapstoneBase): capstone = Cs(CS_ARCH_SPARC, CS_MODE_BIG_ENDIAN + CS_MODE_V9) - arch = "SparcV9" class SystemZ(_CapstoneBase): capstone = Cs(CS_ARCH_SYSZ, 0) - arch = "SystemZ" class XCore(_CapstoneBase): capstone = Cs(CS_ARCH_XCORE, 0) - arch = "XCore" diff --git a/subdisassem/schema.py b/subdisassem/schema.py index 28db24e..7294bf2 100644 --- a/subdisassem/schema.py +++ b/subdisassem/schema.py @@ -33,12 +33,11 @@ class Disassembly(Base): @property def values(self) -> dict: values_dict = { + "id": self.id, "arch": self.arch, - "checksum": self.checksum, "count": self.count, "size": self.size, "offset": self.offset, - "path": self.path, } return values_dict diff --git a/subdisassem/scripts.py b/subdisassem/scripts.py index 9291a5e..da8e213 100644 --- a/subdisassem/scripts.py +++ b/subdisassem/scripts.py @@ -89,7 +89,22 @@ def subdisassem_script(): for arch in archs: for offset in range(args.fuzz): - disasembler = arch(payload=raw_bytes, offset=offset) + disasembler = arch() + exists = ( + session.query(Disassembly) + .filter(Disassembly.checksum == checksum) + .filter(Disassembly.offset == offset) + .filter(Disassembly.arch == disasembler.arch) + .first() + ) + + if exists: + logging.debug( + f"subdiassembly_exists: {[disasembler.arch, checksum, offset]}" + ) + continue + + disasembler.load(payload=raw_bytes, offset=offset) row = Disassembly() row.arch = disasembler.arch row.checksum = checksum @@ -98,17 +113,7 @@ def subdisassem_script(): row.offset = offset row.opcodes = disasembler.objdump row.path = str(args.bin_path.absolute()) - - exists = ( - session.query(Disassembly) - .filter(Disassembly.checksum == row.checksum) - .filter(Disassembly.offset == row.offset) - .filter(Disassembly.arch == row.arch) - .first() - ) - - if not exists: - session.add(row) + session.add(row) session.commit()