single arch from rizin
parent
2ad8d47d3a
commit
e0c42b8406
|
@ -2,20 +2,11 @@ FROM rizin/rizin:latest
|
|||
|
||||
USER root
|
||||
RUN apt-get update && apt-get install --yes \
|
||||
build-essential \
|
||||
git \
|
||||
python3-setuptools \
|
||||
python3-sqlalchemy \
|
||||
&& apt-get clean
|
||||
|
||||
RUN git clone -b next https://github.com/capstone-engine/capstone.git /capstone/
|
||||
WORKDIR /capstone/
|
||||
RUN sh /capstone/make.sh
|
||||
RUN sh /capstone/make.sh install
|
||||
|
||||
COPY setup.py /app/
|
||||
COPY subdisassem /app/subdisassem/
|
||||
WORKDIR /app/
|
||||
RUN python3 setup.py install
|
||||
|
||||
USER rizin
|
||||
|
|
1
setup.py
1
setup.py
|
@ -12,6 +12,7 @@ setup(
|
|||
python_requires=">3",
|
||||
install_requires=[
|
||||
"capstone",
|
||||
"rzpipe",
|
||||
"SQLAlchemy",
|
||||
],
|
||||
)
|
||||
|
|
|
@ -31,7 +31,6 @@ import logging
|
|||
|
||||
class _CapstoneBase:
|
||||
def __init__(self, payload: bytes, offset: int = 0):
|
||||
self.arch = self.__class__.__name__
|
||||
self.capstone.skipdata = True
|
||||
self.capstone.skipdata_setup = ("unknown", None, None)
|
||||
|
||||
|
|
|
@ -0,0 +1,114 @@
|
|||
from pathlib import Path
|
||||
import logging
|
||||
import rzpipe
|
||||
import json
|
||||
|
||||
|
||||
class _RizinBase:
|
||||
def __init__(self, path: Path):
|
||||
rz_pipe = rzpipe.open(str(path.absolute()))
|
||||
|
||||
for cmd in self.arch_cmds:
|
||||
rz_pipe.cmd(cmd)
|
||||
|
||||
rz_pipe.cmd("aa")
|
||||
result = rz_pipe.cmd("pdj")
|
||||
rz_pipe.quit()
|
||||
self.disassembly = json.loads(result)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.objdump
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.disassembly)
|
||||
|
||||
def __lt__(self, other):
|
||||
return len(self) < len(other)
|
||||
|
||||
@property
|
||||
def objdump(self) -> str:
|
||||
if hasattr(self, "_objdump"):
|
||||
return self._objdump
|
||||
|
||||
self._objdump = str()
|
||||
|
||||
for each in self.disassembly:
|
||||
offset = each.get("offset")
|
||||
opcode = each.get("opcode")
|
||||
self._objdump += f"{offset:#02x}:\t{opcode}\n"
|
||||
|
||||
return self._objdump
|
||||
|
||||
@property
|
||||
def disasm(self) -> list:
|
||||
if hasattr(self, "_disasm"):
|
||||
return self._disasm
|
||||
|
||||
self._disasm = list()
|
||||
|
||||
for each in self.disassembly:
|
||||
offset = each.get("offset")
|
||||
opcode = each.get("opcode")
|
||||
|
||||
if opcode:
|
||||
mnemonic = opcode.split(" ")[0]
|
||||
opcode = opcode.split(" ")[1:]
|
||||
else:
|
||||
mnemonic = None
|
||||
|
||||
self._disasm.append([offset, mnemonic, opcode])
|
||||
|
||||
return self._disasm
|
||||
|
||||
@property
|
||||
def rets(self) -> list:
|
||||
if hasattr(self, "_rets"):
|
||||
return self._rets
|
||||
|
||||
self._rets = list()
|
||||
|
||||
for each in self.disasm:
|
||||
_, mnemonic, _ = each
|
||||
|
||||
if mnemonic and "ret" in mnemonic:
|
||||
self._rets.append(mnemonic)
|
||||
|
||||
return self._rets
|
||||
|
||||
@property
|
||||
def ret_rates(self) -> list:
|
||||
rates = dict()
|
||||
|
||||
for mnemonic in set(self.rets):
|
||||
rates[mnemonic] = self.rets.count(mnemonic)
|
||||
|
||||
_ret_rates = sorted(
|
||||
((value, key) for (key, value) in rates.items()), reverse=True
|
||||
)
|
||||
|
||||
return _ret_rates
|
||||
|
||||
@property
|
||||
def mnemonic_rates(self) -> list:
|
||||
mnemonics = list()
|
||||
|
||||
for each in self.disasm:
|
||||
_, mnemonic, _ = each
|
||||
|
||||
if mnemonic:
|
||||
mnemonics.append(mnemonic)
|
||||
|
||||
rates = dict()
|
||||
|
||||
for mnemonic in set(mnemonics):
|
||||
rates[mnemonic] = mnemonics.count(mnemonic)
|
||||
|
||||
_mnemonic_rates = sorted(
|
||||
((value, key) for (key, value) in rates.items()), reverse=True
|
||||
)
|
||||
|
||||
return _mnemonic_rates
|
||||
|
||||
|
||||
class x86_16(_RizinBase):
|
||||
arch_cmds = ["e asm.arch=x86", "e asm.bits=16"]
|
|
@ -5,6 +5,7 @@ from sqlalchemy import desc
|
|||
import logging
|
||||
|
||||
from . import capstone_wrapper
|
||||
from . import rizin_wrapper
|
||||
from .schema import db_config, Disassembly
|
||||
|
||||
|
||||
|
@ -21,9 +22,11 @@ def subdisassem_script():
|
|||
args.bin_path = Path(args.bin_path)
|
||||
|
||||
if args.verbose:
|
||||
print_count = -1
|
||||
level = logging.DEBUG
|
||||
format = "%(asctime)s %(filename)s:%(lineno)d %(message)s"
|
||||
else:
|
||||
print_count = 5
|
||||
level = logging.INFO
|
||||
format = "%(asctime)s %(message)s"
|
||||
|
||||
|
@ -58,7 +61,7 @@ def subdisassem_script():
|
|||
|
||||
logging.info(f"sha1sum: {checksum}")
|
||||
|
||||
archs = [
|
||||
capstone_archs = [
|
||||
capstone_wrapper.x86_16,
|
||||
capstone_wrapper.x86_32,
|
||||
capstone_wrapper.x86_64,
|
||||
|
@ -74,13 +77,14 @@ def subdisassem_script():
|
|||
capstone_wrapper.xcore,
|
||||
]
|
||||
|
||||
for arch in archs:
|
||||
for arch in capstone_archs:
|
||||
for offset in range(args.fuzz):
|
||||
exists = (
|
||||
session.query(Disassembly)
|
||||
.filter(Disassembly.checksum == checksum)
|
||||
.filter(Disassembly.offset == offset)
|
||||
.filter(Disassembly.arch == arch.__name__)
|
||||
.filter(Disassembly.engine == str(arch.__bases__))
|
||||
.first()
|
||||
)
|
||||
|
||||
|
@ -92,15 +96,52 @@ def subdisassem_script():
|
|||
|
||||
disasembler = arch(payload=raw_bytes, offset=offset)
|
||||
row = Disassembly()
|
||||
row.arch = disasembler.arch
|
||||
row.arch = disasembler.__class__.__name__
|
||||
row.checksum = checksum
|
||||
row.count = len(disasembler)
|
||||
row.engine = str(arch.__bases__)
|
||||
row.mnemonic_rates = str(disasembler.mnemonic_rates[:5])
|
||||
row.mnemonic_rates = str(disasembler.mnemonic_rates[:print_count])
|
||||
row.offset = offset
|
||||
row.opcodes = disasembler.objdump
|
||||
row.path = str(args.bin_path.absolute())
|
||||
row.ret_rates = str(disasembler.ret_rates[:5])
|
||||
row.ret_rates = str(disasembler.ret_rates[:print_count])
|
||||
row.size = len(raw_bytes) - offset
|
||||
session.add(row)
|
||||
|
||||
session.commit()
|
||||
|
||||
rizin_archs = [
|
||||
rizin_wrapper.x86_16,
|
||||
]
|
||||
|
||||
for arch in rizin_archs:
|
||||
for offset in range(args.fuzz):
|
||||
exists = (
|
||||
session.query(Disassembly)
|
||||
.filter(Disassembly.checksum == checksum)
|
||||
.filter(Disassembly.offset == offset)
|
||||
.filter(Disassembly.arch == arch.__name__)
|
||||
.filter(Disassembly.engine == str(arch.__bases__))
|
||||
.first()
|
||||
)
|
||||
|
||||
if exists:
|
||||
logging.debug(
|
||||
f"subdiassembly_exists: {[arch.__name__, checksum, offset]}"
|
||||
)
|
||||
continue
|
||||
|
||||
disasembler = arch(path=args.bin_path)
|
||||
row = Disassembly()
|
||||
row.arch = disasembler.__class__.__name__
|
||||
row.checksum = checksum
|
||||
row.count = len(disasembler)
|
||||
row.engine = str(arch.__bases__)
|
||||
row.mnemonic_rates = str(disasembler.mnemonic_rates[:print_count])
|
||||
row.offset = offset
|
||||
row.opcodes = disasembler.objdump
|
||||
row.path = str(args.bin_path.absolute())
|
||||
row.ret_rates = str(disasembler.ret_rates[:print_count])
|
||||
row.size = len(raw_bytes) - offset
|
||||
session.add(row)
|
||||
|
||||
|
@ -108,7 +149,16 @@ def subdisassem_script():
|
|||
|
||||
tops = list()
|
||||
|
||||
for arch in archs:
|
||||
for arch in capstone_archs:
|
||||
top = (
|
||||
session.query(Disassembly)
|
||||
.filter(Disassembly.arch == arch.__name__)
|
||||
.order_by(desc("count"))
|
||||
.first()
|
||||
)
|
||||
tops.append(top)
|
||||
|
||||
for arch in rizin_archs:
|
||||
top = (
|
||||
session.query(Disassembly)
|
||||
.filter(Disassembly.arch == arch.__name__)
|
||||
|
@ -119,5 +169,5 @@ def subdisassem_script():
|
|||
|
||||
tops = sorted(tops, key=len, reverse=True)
|
||||
|
||||
for top in tops[:5]:
|
||||
for top in tops[:print_count]:
|
||||
logging.info(top)
|
||||
|
|
Loading…
Reference in New Issue