Compare commits
4 Commits
e5d446c22f
...
e0c42b8406
Author | SHA1 | Date |
---|---|---|
JoYo | e0c42b8406 | |
JoYo | 2ad8d47d3a | |
JoYo | 8cb6a2f393 | |
JoYo | c370df827c |
15
Dockerfile
15
Dockerfile
|
@ -1,23 +1,12 @@
|
||||||
FROM ubuntu:22.04
|
FROM rizin/rizin:latest
|
||||||
ENV DEBIAN_FRONTEND noninteractive
|
|
||||||
|
|
||||||
|
USER root
|
||||||
RUN apt-get update && apt-get install --yes \
|
RUN apt-get update && apt-get install --yes \
|
||||||
build-essential \
|
|
||||||
git \
|
|
||||||
python3-setuptools \
|
python3-setuptools \
|
||||||
python3-sqlalchemy \
|
python3-sqlalchemy \
|
||||||
&& apt-get clean
|
&& apt-get clean
|
||||||
|
|
||||||
RUN git clone -b next https://github.com/capstone-engine/capstone.git /capstone/
|
|
||||||
WORKDIR /capstone/
|
|
||||||
RUN sh /capstone/make.sh
|
|
||||||
RUN sh /capstone/make.sh install
|
|
||||||
|
|
||||||
COPY setup.py /app/
|
COPY setup.py /app/
|
||||||
COPY subdisassem /app/subdisassem/
|
COPY subdisassem /app/subdisassem/
|
||||||
WORKDIR /app/
|
WORKDIR /app/
|
||||||
RUN python3 setup.py install
|
RUN python3 setup.py install
|
||||||
|
|
||||||
RUN useradd -m subdisassem
|
|
||||||
WORKDIR /home/subdisassem
|
|
||||||
ENV HOME /home/subdisassem
|
|
||||||
|
|
1
setup.py
1
setup.py
|
@ -12,6 +12,7 @@ setup(
|
||||||
python_requires=">3",
|
python_requires=">3",
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"capstone",
|
"capstone",
|
||||||
|
"rzpipe",
|
||||||
"SQLAlchemy",
|
"SQLAlchemy",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
|
@ -31,7 +31,6 @@ import logging
|
||||||
|
|
||||||
class _CapstoneBase:
|
class _CapstoneBase:
|
||||||
def __init__(self, payload: bytes, offset: int = 0):
|
def __init__(self, payload: bytes, offset: int = 0):
|
||||||
self.arch = self.__class__.__name__
|
|
||||||
self.capstone.skipdata = True
|
self.capstone.skipdata = True
|
||||||
self.capstone.skipdata_setup = ("unknown", None, None)
|
self.capstone.skipdata_setup = ("unknown", None, None)
|
||||||
|
|
||||||
|
@ -74,18 +73,42 @@ class _CapstoneBase:
|
||||||
return opcodes
|
return opcodes
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def rates(self) -> list:
|
def rets(self) -> list:
|
||||||
|
if hasattr(self, "_rets"):
|
||||||
|
return self._rets
|
||||||
|
|
||||||
|
self._rets = list()
|
||||||
|
|
||||||
|
for opcode in self.disassembly:
|
||||||
|
if "ret" in opcode.mnemonic:
|
||||||
|
self._rets.append(opcode.mnemonic)
|
||||||
|
|
||||||
|
return self._rets
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ret_rates(self) -> list:
|
||||||
|
rates = dict()
|
||||||
|
|
||||||
|
for mnemonic in set(self.rets):
|
||||||
|
rates[mnemonic] = self.rets.count(mnemonic)
|
||||||
|
|
||||||
|
listed = sorted(((value, key) for (key, value) in rates.items()), reverse=True)
|
||||||
|
|
||||||
|
return listed
|
||||||
|
|
||||||
|
@property
|
||||||
|
def mnemonic_rates(self) -> list:
|
||||||
mnemonics = list()
|
mnemonics = list()
|
||||||
|
|
||||||
for opcode in self.disassembly:
|
for opcode in self.disassembly:
|
||||||
mnemonics.append(opcode.mnemonic)
|
mnemonics.append(opcode.mnemonic)
|
||||||
|
|
||||||
_rates = dict()
|
rates = dict()
|
||||||
|
|
||||||
for mnemonic in set(mnemonics):
|
for mnemonic in set(mnemonics):
|
||||||
_rates[mnemonic] = mnemonics.count(mnemonic)
|
rates[mnemonic] = mnemonics.count(mnemonic)
|
||||||
|
|
||||||
listed = sorted(((value, key) for (key, value) in _rates.items()), reverse=True)
|
listed = sorted(((value, key) for (key, value) in rates.items()), reverse=True)
|
||||||
|
|
||||||
return listed
|
return listed
|
||||||
|
|
|
@ -0,0 +1,114 @@
|
||||||
|
from pathlib import Path
|
||||||
|
import logging
|
||||||
|
import rzpipe
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
class _RizinBase:
|
||||||
|
def __init__(self, path: Path):
|
||||||
|
rz_pipe = rzpipe.open(str(path.absolute()))
|
||||||
|
|
||||||
|
for cmd in self.arch_cmds:
|
||||||
|
rz_pipe.cmd(cmd)
|
||||||
|
|
||||||
|
rz_pipe.cmd("aa")
|
||||||
|
result = rz_pipe.cmd("pdj")
|
||||||
|
rz_pipe.quit()
|
||||||
|
self.disassembly = json.loads(result)
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return self.objdump
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
return len(self.disassembly)
|
||||||
|
|
||||||
|
def __lt__(self, other):
|
||||||
|
return len(self) < len(other)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def objdump(self) -> str:
|
||||||
|
if hasattr(self, "_objdump"):
|
||||||
|
return self._objdump
|
||||||
|
|
||||||
|
self._objdump = str()
|
||||||
|
|
||||||
|
for each in self.disassembly:
|
||||||
|
offset = each.get("offset")
|
||||||
|
opcode = each.get("opcode")
|
||||||
|
self._objdump += f"{offset:#02x}:\t{opcode}\n"
|
||||||
|
|
||||||
|
return self._objdump
|
||||||
|
|
||||||
|
@property
|
||||||
|
def disasm(self) -> list:
|
||||||
|
if hasattr(self, "_disasm"):
|
||||||
|
return self._disasm
|
||||||
|
|
||||||
|
self._disasm = list()
|
||||||
|
|
||||||
|
for each in self.disassembly:
|
||||||
|
offset = each.get("offset")
|
||||||
|
opcode = each.get("opcode")
|
||||||
|
|
||||||
|
if opcode:
|
||||||
|
mnemonic = opcode.split(" ")[0]
|
||||||
|
opcode = opcode.split(" ")[1:]
|
||||||
|
else:
|
||||||
|
mnemonic = None
|
||||||
|
|
||||||
|
self._disasm.append([offset, mnemonic, opcode])
|
||||||
|
|
||||||
|
return self._disasm
|
||||||
|
|
||||||
|
@property
|
||||||
|
def rets(self) -> list:
|
||||||
|
if hasattr(self, "_rets"):
|
||||||
|
return self._rets
|
||||||
|
|
||||||
|
self._rets = list()
|
||||||
|
|
||||||
|
for each in self.disasm:
|
||||||
|
_, mnemonic, _ = each
|
||||||
|
|
||||||
|
if mnemonic and "ret" in mnemonic:
|
||||||
|
self._rets.append(mnemonic)
|
||||||
|
|
||||||
|
return self._rets
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ret_rates(self) -> list:
|
||||||
|
rates = dict()
|
||||||
|
|
||||||
|
for mnemonic in set(self.rets):
|
||||||
|
rates[mnemonic] = self.rets.count(mnemonic)
|
||||||
|
|
||||||
|
_ret_rates = sorted(
|
||||||
|
((value, key) for (key, value) in rates.items()), reverse=True
|
||||||
|
)
|
||||||
|
|
||||||
|
return _ret_rates
|
||||||
|
|
||||||
|
@property
|
||||||
|
def mnemonic_rates(self) -> list:
|
||||||
|
mnemonics = list()
|
||||||
|
|
||||||
|
for each in self.disasm:
|
||||||
|
_, mnemonic, _ = each
|
||||||
|
|
||||||
|
if mnemonic:
|
||||||
|
mnemonics.append(mnemonic)
|
||||||
|
|
||||||
|
rates = dict()
|
||||||
|
|
||||||
|
for mnemonic in set(mnemonics):
|
||||||
|
rates[mnemonic] = mnemonics.count(mnemonic)
|
||||||
|
|
||||||
|
_mnemonic_rates = sorted(
|
||||||
|
((value, key) for (key, value) in rates.items()), reverse=True
|
||||||
|
)
|
||||||
|
|
||||||
|
return _mnemonic_rates
|
||||||
|
|
||||||
|
|
||||||
|
class x86_16(_RizinBase):
|
||||||
|
arch_cmds = ["e asm.arch=x86", "e asm.bits=16"]
|
|
@ -20,11 +20,13 @@ class Disassembly(Base):
|
||||||
arch = Column(String, nullable=False)
|
arch = Column(String, nullable=False)
|
||||||
checksum = Column(String, nullable=False)
|
checksum = Column(String, nullable=False)
|
||||||
count = Column(Integer, nullable=False)
|
count = Column(Integer, nullable=False)
|
||||||
rates = Column(String, nullable=False)
|
engine = Column(String, nullable=False)
|
||||||
size = Column(Integer, nullable=False)
|
mnemonic_rates = Column(String, nullable=False)
|
||||||
offset = Column(Integer, nullable=False)
|
offset = Column(Integer, nullable=False)
|
||||||
opcodes = Column(String, nullable=False)
|
opcodes = Column(String, nullable=False)
|
||||||
path = Column(String, nullable=False)
|
path = Column(String, nullable=False)
|
||||||
|
ret_rates = Column(String, nullable=False)
|
||||||
|
size = Column(Integer, nullable=False)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f"<Disassembly {json.dumps(self.values, indent=1)}>"
|
return f"<Disassembly {json.dumps(self.values, indent=1)}>"
|
||||||
|
@ -40,10 +42,11 @@ class Disassembly(Base):
|
||||||
values_dict = {
|
values_dict = {
|
||||||
"id": self.id,
|
"id": self.id,
|
||||||
"arch": self.arch,
|
"arch": self.arch,
|
||||||
|
"engine": self.engine,
|
||||||
"count": self.count,
|
"count": self.count,
|
||||||
"rates": self.rates,
|
|
||||||
"size": self.size,
|
"size": self.size,
|
||||||
"offset": self.offset,
|
"mnemonic_rates": self.mnemonic_rates,
|
||||||
|
"ret_rates": self.ret_rates,
|
||||||
}
|
}
|
||||||
|
|
||||||
return values_dict
|
return values_dict
|
||||||
|
|
|
@ -4,22 +4,8 @@ from pathlib import Path
|
||||||
from sqlalchemy import desc
|
from sqlalchemy import desc
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from .disassemble import (
|
from . import capstone_wrapper
|
||||||
x86_16,
|
from . import rizin_wrapper
|
||||||
x86_32,
|
|
||||||
x86_64,
|
|
||||||
armv7,
|
|
||||||
thumb2,
|
|
||||||
aarch64,
|
|
||||||
mips32,
|
|
||||||
mips64_el,
|
|
||||||
ppc64,
|
|
||||||
sparc,
|
|
||||||
sparcv9,
|
|
||||||
systemz,
|
|
||||||
xcore,
|
|
||||||
)
|
|
||||||
|
|
||||||
from .schema import db_config, Disassembly
|
from .schema import db_config, Disassembly
|
||||||
|
|
||||||
|
|
||||||
|
@ -29,16 +15,18 @@ def subdisassem_script():
|
||||||
parser.add_argument("-b", "--bin-path", required=True)
|
parser.add_argument("-b", "--bin-path", required=True)
|
||||||
parser.add_argument("-l", "--log", action="store_true", help="log to file")
|
parser.add_argument("-l", "--log", action="store_true", help="log to file")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-f", "--fuzz", type=int, default=64, help="offset bruteforce max"
|
"-f", "--fuzz", type=int, default=1, help="offset bruteforce max"
|
||||||
)
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
args.bin_path = Path(args.bin_path)
|
args.bin_path = Path(args.bin_path)
|
||||||
|
|
||||||
if args.verbose:
|
if args.verbose:
|
||||||
|
print_count = -1
|
||||||
level = logging.DEBUG
|
level = logging.DEBUG
|
||||||
format = "%(asctime)s %(filename)s:%(lineno)d %(message)s"
|
format = "%(asctime)s %(filename)s:%(lineno)d %(message)s"
|
||||||
else:
|
else:
|
||||||
|
print_count = 5
|
||||||
level = logging.INFO
|
level = logging.INFO
|
||||||
format = "%(asctime)s %(message)s"
|
format = "%(asctime)s %(message)s"
|
||||||
|
|
||||||
|
@ -73,29 +61,30 @@ def subdisassem_script():
|
||||||
|
|
||||||
logging.info(f"sha1sum: {checksum}")
|
logging.info(f"sha1sum: {checksum}")
|
||||||
|
|
||||||
archs = [
|
capstone_archs = [
|
||||||
x86_16,
|
capstone_wrapper.x86_16,
|
||||||
x86_32,
|
capstone_wrapper.x86_32,
|
||||||
x86_64,
|
capstone_wrapper.x86_64,
|
||||||
armv7,
|
capstone_wrapper.armv7,
|
||||||
thumb2,
|
capstone_wrapper.thumb2,
|
||||||
aarch64,
|
capstone_wrapper.aarch64,
|
||||||
mips32,
|
capstone_wrapper.mips32,
|
||||||
mips64_el,
|
capstone_wrapper.mips64_el,
|
||||||
ppc64,
|
capstone_wrapper.ppc64,
|
||||||
sparc,
|
capstone_wrapper.sparc,
|
||||||
sparcv9,
|
capstone_wrapper.sparcv9,
|
||||||
systemz,
|
capstone_wrapper.systemz,
|
||||||
xcore,
|
capstone_wrapper.xcore,
|
||||||
]
|
]
|
||||||
|
|
||||||
for arch in archs:
|
for arch in capstone_archs:
|
||||||
for offset in range(args.fuzz):
|
for offset in range(args.fuzz):
|
||||||
exists = (
|
exists = (
|
||||||
session.query(Disassembly)
|
session.query(Disassembly)
|
||||||
.filter(Disassembly.checksum == checksum)
|
.filter(Disassembly.checksum == checksum)
|
||||||
.filter(Disassembly.offset == offset)
|
.filter(Disassembly.offset == offset)
|
||||||
.filter(Disassembly.arch == arch.__name__)
|
.filter(Disassembly.arch == arch.__name__)
|
||||||
|
.filter(Disassembly.engine == str(arch.__bases__))
|
||||||
.first()
|
.first()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -107,21 +96,69 @@ def subdisassem_script():
|
||||||
|
|
||||||
disasembler = arch(payload=raw_bytes, offset=offset)
|
disasembler = arch(payload=raw_bytes, offset=offset)
|
||||||
row = Disassembly()
|
row = Disassembly()
|
||||||
row.arch = disasembler.arch
|
row.arch = disasembler.__class__.__name__
|
||||||
row.checksum = checksum
|
row.checksum = checksum
|
||||||
row.count = len(disasembler)
|
row.count = len(disasembler)
|
||||||
row.rates = str(disasembler.rates[:5])
|
row.engine = str(arch.__bases__)
|
||||||
row.size = len(raw_bytes) - offset
|
row.mnemonic_rates = str(disasembler.mnemonic_rates[:print_count])
|
||||||
row.offset = offset
|
row.offset = offset
|
||||||
row.opcodes = disasembler.objdump
|
row.opcodes = disasembler.objdump
|
||||||
row.path = str(args.bin_path.absolute())
|
row.path = str(args.bin_path.absolute())
|
||||||
|
row.ret_rates = str(disasembler.ret_rates[:print_count])
|
||||||
|
row.size = len(raw_bytes) - offset
|
||||||
|
session.add(row)
|
||||||
|
|
||||||
|
session.commit()
|
||||||
|
|
||||||
|
rizin_archs = [
|
||||||
|
rizin_wrapper.x86_16,
|
||||||
|
]
|
||||||
|
|
||||||
|
for arch in rizin_archs:
|
||||||
|
for offset in range(args.fuzz):
|
||||||
|
exists = (
|
||||||
|
session.query(Disassembly)
|
||||||
|
.filter(Disassembly.checksum == checksum)
|
||||||
|
.filter(Disassembly.offset == offset)
|
||||||
|
.filter(Disassembly.arch == arch.__name__)
|
||||||
|
.filter(Disassembly.engine == str(arch.__bases__))
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
|
||||||
|
if exists:
|
||||||
|
logging.debug(
|
||||||
|
f"subdiassembly_exists: {[arch.__name__, checksum, offset]}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
disasembler = arch(path=args.bin_path)
|
||||||
|
row = Disassembly()
|
||||||
|
row.arch = disasembler.__class__.__name__
|
||||||
|
row.checksum = checksum
|
||||||
|
row.count = len(disasembler)
|
||||||
|
row.engine = str(arch.__bases__)
|
||||||
|
row.mnemonic_rates = str(disasembler.mnemonic_rates[:print_count])
|
||||||
|
row.offset = offset
|
||||||
|
row.opcodes = disasembler.objdump
|
||||||
|
row.path = str(args.bin_path.absolute())
|
||||||
|
row.ret_rates = str(disasembler.ret_rates[:print_count])
|
||||||
|
row.size = len(raw_bytes) - offset
|
||||||
session.add(row)
|
session.add(row)
|
||||||
|
|
||||||
session.commit()
|
session.commit()
|
||||||
|
|
||||||
tops = list()
|
tops = list()
|
||||||
|
|
||||||
for arch in archs:
|
for arch in capstone_archs:
|
||||||
|
top = (
|
||||||
|
session.query(Disassembly)
|
||||||
|
.filter(Disassembly.arch == arch.__name__)
|
||||||
|
.order_by(desc("count"))
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
tops.append(top)
|
||||||
|
|
||||||
|
for arch in rizin_archs:
|
||||||
top = (
|
top = (
|
||||||
session.query(Disassembly)
|
session.query(Disassembly)
|
||||||
.filter(Disassembly.arch == arch.__name__)
|
.filter(Disassembly.arch == arch.__name__)
|
||||||
|
@ -132,5 +169,5 @@ def subdisassem_script():
|
||||||
|
|
||||||
tops = sorted(tops, key=len, reverse=True)
|
tops = sorted(tops, key=len, reverse=True)
|
||||||
|
|
||||||
for top in tops[:3]:
|
for top in tops[:print_count]:
|
||||||
logging.info(top)
|
logging.info(top)
|
||||||
|
|
Loading…
Reference in New Issue