diff --git a/Dockerfile b/Dockerfile index b097fe0..e7c394c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,12 +2,22 @@ FROM ubuntu:22.04 ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && apt-get install --yes \ - python3-capstone \ + build-essential \ + git \ python3-setuptools \ python3-sqlalchemy \ && apt-get clean +RUN git clone -b next https://github.com/capstone-engine/capstone.git /capstone/ +WORKDIR /capstone/ +RUN sh /capstone/make.sh +RUN sh /capstone/make.sh install + COPY setup.py /app/ COPY subdisassem /app/subdisassem/ WORKDIR /app/ RUN python3 setup.py install + +RUN useradd -m subdisassem +WORKDIR /home/subdisassem +ENV HOME /home/subdisassem diff --git a/subdisassem/disassemble.py b/subdisassem/disassemble.py index 25f9b4b..f2c1eac 100644 --- a/subdisassem/disassemble.py +++ b/subdisassem/disassemble.py @@ -32,6 +32,9 @@ import logging class _CapstoneBase: def __init__(self, payload: bytes, offset: int = 0): self.arch = self.__class__.__name__ + self.capstone.skipdata = True + self.capstone.skipdata_setup = ("unknown", None, None) + disassembly = list() for opcode in self.capstone.disasm(payload, offset): @@ -66,65 +69,74 @@ class _CapstoneBase: opcodes = list() for opcode in self.disassembly: - opcodes.append( - [ - opcode.address, - opcode.mnemonic, - opcode.op_str, - opcode.size, - ] - ) + opcodes.append([opcode.address, opcode.mnemonic, opcode.op_str]) return opcodes + @property + def rates(self) -> list: + mnemonics = list() -class X86_intel(_CapstoneBase): + for opcode in self.disassembly: + mnemonics.append(opcode.mnemonic) + + _rates = dict() + + for mnemonic in set(mnemonics): + _rates[mnemonic] = mnemonics.count(mnemonic) + + listed = sorted(((value, key) for (key, value) in _rates.items()), reverse=True) + + return listed + + +class x86_16(_CapstoneBase): capstone = Cs(CS_ARCH_X86, CS_MODE_16) -class X86(_CapstoneBase): +class x86_32(_CapstoneBase): capstone = Cs(CS_ARCH_X86, CS_MODE_32) -class X86_64(_CapstoneBase): +class x86_64(_CapstoneBase): capstone = Cs(CS_ARCH_X86, CS_MODE_64) -class ARM(_CapstoneBase): +class armv7(_CapstoneBase): capstone = Cs(CS_ARCH_ARM, CS_MODE_ARM) -class Thumb(_CapstoneBase): +class thumb2(_CapstoneBase): capstone = Cs(CS_ARCH_ARM, CS_MODE_THUMB) -class ARM_64(_CapstoneBase): +class aarch64(_CapstoneBase): capstone = Cs(CS_ARCH_ARM64, CS_MODE_ARM) -class MIPS_32_eb(_CapstoneBase): +class mips32(_CapstoneBase): capstone = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN) -class MIPS_64_el(_CapstoneBase): +class mips64_el(_CapstoneBase): capstone = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_LITTLE_ENDIAN) -class PPC_64(_CapstoneBase): +class ppc64(_CapstoneBase): capstone = Cs(CS_ARCH_PPC, CS_MODE_BIG_ENDIAN) -class Sparc(_CapstoneBase): +class sparc(_CapstoneBase): capstone = Cs(CS_ARCH_SPARC, CS_MODE_BIG_ENDIAN) -class SparcV9(_CapstoneBase): +class sparcv9(_CapstoneBase): capstone = Cs(CS_ARCH_SPARC, CS_MODE_BIG_ENDIAN + CS_MODE_V9) -class SystemZ(_CapstoneBase): +class systemz(_CapstoneBase): capstone = Cs(CS_ARCH_SYSZ, 0) -class XCore(_CapstoneBase): +class xcore(_CapstoneBase): capstone = Cs(CS_ARCH_XCORE, 0) diff --git a/subdisassem/schema.py b/subdisassem/schema.py index a43ffba..9513d5d 100644 --- a/subdisassem/schema.py +++ b/subdisassem/schema.py @@ -20,6 +20,7 @@ class Disassembly(Base): arch = Column(String, nullable=False) checksum = Column(String, nullable=False) count = Column(Integer, nullable=False) + rates = Column(String, nullable=False) size = Column(Integer, nullable=False) offset = Column(Integer, nullable=False) opcodes = Column(String, nullable=False) @@ -40,6 +41,7 @@ class Disassembly(Base): "id": self.id, "arch": self.arch, "count": self.count, + "rates": self.rates, "size": self.size, "offset": self.offset, } diff --git a/subdisassem/scripts.py b/subdisassem/scripts.py index 29c862f..1038e1f 100644 --- a/subdisassem/scripts.py +++ b/subdisassem/scripts.py @@ -5,19 +5,19 @@ from sqlalchemy import desc import logging from .disassemble import ( - X86_intel, - X86, - X86_64, - ARM, - Thumb, - ARM_64, - MIPS_32_eb, - MIPS_64_el, - PPC_64, - Sparc, - SparcV9, - SystemZ, - XCore, + x86_16, + x86_32, + x86_64, + armv7, + thumb2, + aarch64, + mips32, + mips64_el, + ppc64, + sparc, + sparcv9, + systemz, + xcore, ) from .schema import db_config, Disassembly @@ -28,7 +28,9 @@ def subdisassem_script(): parser.add_argument("-v", "--verbose", action="count", help="verbose logging") parser.add_argument("-b", "--bin-path", required=True) parser.add_argument("-l", "--log", action="store_true", help="log to file") - parser.add_argument("-f", "--fuzz", default=64, help="offset bruteforce max") + parser.add_argument( + "-f", "--fuzz", type=int, default=64, help="offset bruteforce max" + ) args = parser.parse_args() args.bin_path = Path(args.bin_path) @@ -72,19 +74,19 @@ def subdisassem_script(): logging.info(f"sha1sum: {checksum}") archs = [ - X86_intel, - X86, - X86_64, - ARM, - Thumb, - ARM_64, - MIPS_32_eb, - MIPS_64_el, - PPC_64, - Sparc, - SparcV9, - SystemZ, - XCore, + x86_16, + x86_32, + x86_64, + armv7, + thumb2, + aarch64, + mips32, + mips64_el, + ppc64, + sparc, + sparcv9, + systemz, + xcore, ] for arch in archs: @@ -108,6 +110,7 @@ def subdisassem_script(): row.arch = disasembler.arch row.checksum = checksum row.count = len(disasembler) + row.rates = str(disasembler.rates[:5]) row.size = len(raw_bytes) - offset row.offset = offset row.opcodes = disasembler.objdump