capstone skipdata, publish mnemonic rates

master
JoYo 2022-01-21 15:02:52 -05:00
parent 8640a29b3b
commit e5d446c22f
4 changed files with 76 additions and 49 deletions

View File

@ -2,12 +2,22 @@ FROM ubuntu:22.04
ENV DEBIAN_FRONTEND noninteractive ENV DEBIAN_FRONTEND noninteractive
RUN apt-get update && apt-get install --yes \ RUN apt-get update && apt-get install --yes \
python3-capstone \ build-essential \
git \
python3-setuptools \ python3-setuptools \
python3-sqlalchemy \ python3-sqlalchemy \
&& apt-get clean && apt-get clean
RUN git clone -b next https://github.com/capstone-engine/capstone.git /capstone/
WORKDIR /capstone/
RUN sh /capstone/make.sh
RUN sh /capstone/make.sh install
COPY setup.py /app/ COPY setup.py /app/
COPY subdisassem /app/subdisassem/ COPY subdisassem /app/subdisassem/
WORKDIR /app/ WORKDIR /app/
RUN python3 setup.py install RUN python3 setup.py install
RUN useradd -m subdisassem
WORKDIR /home/subdisassem
ENV HOME /home/subdisassem

View File

@ -32,6 +32,9 @@ import logging
class _CapstoneBase: class _CapstoneBase:
def __init__(self, payload: bytes, offset: int = 0): def __init__(self, payload: bytes, offset: int = 0):
self.arch = self.__class__.__name__ self.arch = self.__class__.__name__
self.capstone.skipdata = True
self.capstone.skipdata_setup = ("unknown", None, None)
disassembly = list() disassembly = list()
for opcode in self.capstone.disasm(payload, offset): for opcode in self.capstone.disasm(payload, offset):
@ -66,65 +69,74 @@ class _CapstoneBase:
opcodes = list() opcodes = list()
for opcode in self.disassembly: for opcode in self.disassembly:
opcodes.append( opcodes.append([opcode.address, opcode.mnemonic, opcode.op_str])
[
opcode.address,
opcode.mnemonic,
opcode.op_str,
opcode.size,
]
)
return opcodes return opcodes
@property
def rates(self) -> list:
mnemonics = list()
class X86_intel(_CapstoneBase): for opcode in self.disassembly:
mnemonics.append(opcode.mnemonic)
_rates = dict()
for mnemonic in set(mnemonics):
_rates[mnemonic] = mnemonics.count(mnemonic)
listed = sorted(((value, key) for (key, value) in _rates.items()), reverse=True)
return listed
class x86_16(_CapstoneBase):
capstone = Cs(CS_ARCH_X86, CS_MODE_16) capstone = Cs(CS_ARCH_X86, CS_MODE_16)
class X86(_CapstoneBase): class x86_32(_CapstoneBase):
capstone = Cs(CS_ARCH_X86, CS_MODE_32) capstone = Cs(CS_ARCH_X86, CS_MODE_32)
class X86_64(_CapstoneBase): class x86_64(_CapstoneBase):
capstone = Cs(CS_ARCH_X86, CS_MODE_64) capstone = Cs(CS_ARCH_X86, CS_MODE_64)
class ARM(_CapstoneBase): class armv7(_CapstoneBase):
capstone = Cs(CS_ARCH_ARM, CS_MODE_ARM) capstone = Cs(CS_ARCH_ARM, CS_MODE_ARM)
class Thumb(_CapstoneBase): class thumb2(_CapstoneBase):
capstone = Cs(CS_ARCH_ARM, CS_MODE_THUMB) capstone = Cs(CS_ARCH_ARM, CS_MODE_THUMB)
class ARM_64(_CapstoneBase): class aarch64(_CapstoneBase):
capstone = Cs(CS_ARCH_ARM64, CS_MODE_ARM) capstone = Cs(CS_ARCH_ARM64, CS_MODE_ARM)
class MIPS_32_eb(_CapstoneBase): class mips32(_CapstoneBase):
capstone = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN) capstone = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN)
class MIPS_64_el(_CapstoneBase): class mips64_el(_CapstoneBase):
capstone = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_LITTLE_ENDIAN) capstone = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_LITTLE_ENDIAN)
class PPC_64(_CapstoneBase): class ppc64(_CapstoneBase):
capstone = Cs(CS_ARCH_PPC, CS_MODE_BIG_ENDIAN) capstone = Cs(CS_ARCH_PPC, CS_MODE_BIG_ENDIAN)
class Sparc(_CapstoneBase): class sparc(_CapstoneBase):
capstone = Cs(CS_ARCH_SPARC, CS_MODE_BIG_ENDIAN) capstone = Cs(CS_ARCH_SPARC, CS_MODE_BIG_ENDIAN)
class SparcV9(_CapstoneBase): class sparcv9(_CapstoneBase):
capstone = Cs(CS_ARCH_SPARC, CS_MODE_BIG_ENDIAN + CS_MODE_V9) capstone = Cs(CS_ARCH_SPARC, CS_MODE_BIG_ENDIAN + CS_MODE_V9)
class SystemZ(_CapstoneBase): class systemz(_CapstoneBase):
capstone = Cs(CS_ARCH_SYSZ, 0) capstone = Cs(CS_ARCH_SYSZ, 0)
class XCore(_CapstoneBase): class xcore(_CapstoneBase):
capstone = Cs(CS_ARCH_XCORE, 0) capstone = Cs(CS_ARCH_XCORE, 0)

View File

@ -20,6 +20,7 @@ class Disassembly(Base):
arch = Column(String, nullable=False) arch = Column(String, nullable=False)
checksum = Column(String, nullable=False) checksum = Column(String, nullable=False)
count = Column(Integer, nullable=False) count = Column(Integer, nullable=False)
rates = Column(String, nullable=False)
size = Column(Integer, nullable=False) size = Column(Integer, nullable=False)
offset = Column(Integer, nullable=False) offset = Column(Integer, nullable=False)
opcodes = Column(String, nullable=False) opcodes = Column(String, nullable=False)
@ -40,6 +41,7 @@ class Disassembly(Base):
"id": self.id, "id": self.id,
"arch": self.arch, "arch": self.arch,
"count": self.count, "count": self.count,
"rates": self.rates,
"size": self.size, "size": self.size,
"offset": self.offset, "offset": self.offset,
} }

View File

@ -5,19 +5,19 @@ from sqlalchemy import desc
import logging import logging
from .disassemble import ( from .disassemble import (
X86_intel, x86_16,
X86, x86_32,
X86_64, x86_64,
ARM, armv7,
Thumb, thumb2,
ARM_64, aarch64,
MIPS_32_eb, mips32,
MIPS_64_el, mips64_el,
PPC_64, ppc64,
Sparc, sparc,
SparcV9, sparcv9,
SystemZ, systemz,
XCore, xcore,
) )
from .schema import db_config, Disassembly from .schema import db_config, Disassembly
@ -28,7 +28,9 @@ def subdisassem_script():
parser.add_argument("-v", "--verbose", action="count", help="verbose logging") parser.add_argument("-v", "--verbose", action="count", help="verbose logging")
parser.add_argument("-b", "--bin-path", required=True) parser.add_argument("-b", "--bin-path", required=True)
parser.add_argument("-l", "--log", action="store_true", help="log to file") parser.add_argument("-l", "--log", action="store_true", help="log to file")
parser.add_argument("-f", "--fuzz", default=64, help="offset bruteforce max") parser.add_argument(
"-f", "--fuzz", type=int, default=64, help="offset bruteforce max"
)
args = parser.parse_args() args = parser.parse_args()
args.bin_path = Path(args.bin_path) args.bin_path = Path(args.bin_path)
@ -72,19 +74,19 @@ def subdisassem_script():
logging.info(f"sha1sum: {checksum}") logging.info(f"sha1sum: {checksum}")
archs = [ archs = [
X86_intel, x86_16,
X86, x86_32,
X86_64, x86_64,
ARM, armv7,
Thumb, thumb2,
ARM_64, aarch64,
MIPS_32_eb, mips32,
MIPS_64_el, mips64_el,
PPC_64, ppc64,
Sparc, sparc,
SparcV9, sparcv9,
SystemZ, systemz,
XCore, xcore,
] ]
for arch in archs: for arch in archs:
@ -108,6 +110,7 @@ def subdisassem_script():
row.arch = disasembler.arch row.arch = disasembler.arch
row.checksum = checksum row.checksum = checksum
row.count = len(disasembler) row.count = len(disasembler)
row.rates = str(disasembler.rates[:5])
row.size = len(raw_bytes) - offset row.size = len(raw_bytes) - offset
row.offset = offset row.offset = offset
row.opcodes = disasembler.objdump row.opcodes = disasembler.objdump