Compare commits
	
		
			4 Commits 
		
	
	
		
			e5d446c22f
			...
			e0c42b8406
		
	
	| Author | SHA1 | Date | 
|---|---|---|
| 
							
							
								 | 
						e0c42b8406 | |
| 
							
							
								 | 
						2ad8d47d3a | |
| 
							
							
								 | 
						8cb6a2f393 | |
| 
							
							
								 | 
						c370df827c | 
							
								
								
									
										15
									
								
								Dockerfile
								
								
								
								
							
							
						
						
									
										15
									
								
								Dockerfile
								
								
								
								
							| 
						 | 
				
			
			@ -1,23 +1,12 @@
 | 
			
		|||
FROM ubuntu:22.04
 | 
			
		||||
ENV DEBIAN_FRONTEND noninteractive
 | 
			
		||||
FROM rizin/rizin:latest
 | 
			
		||||
 | 
			
		||||
USER root
 | 
			
		||||
RUN apt-get update && apt-get install --yes \
 | 
			
		||||
    build-essential \
 | 
			
		||||
    git \
 | 
			
		||||
    python3-setuptools \
 | 
			
		||||
    python3-sqlalchemy \
 | 
			
		||||
    && apt-get clean
 | 
			
		||||
 | 
			
		||||
RUN git clone -b next https://github.com/capstone-engine/capstone.git /capstone/
 | 
			
		||||
WORKDIR /capstone/
 | 
			
		||||
RUN sh /capstone/make.sh
 | 
			
		||||
RUN sh /capstone/make.sh install
 | 
			
		||||
 | 
			
		||||
COPY setup.py /app/
 | 
			
		||||
COPY subdisassem /app/subdisassem/
 | 
			
		||||
WORKDIR /app/
 | 
			
		||||
RUN python3 setup.py install
 | 
			
		||||
 | 
			
		||||
RUN useradd -m subdisassem
 | 
			
		||||
WORKDIR /home/subdisassem
 | 
			
		||||
ENV HOME /home/subdisassem
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										1
									
								
								setup.py
								
								
								
								
							
							
						
						
									
										1
									
								
								setup.py
								
								
								
								
							| 
						 | 
				
			
			@ -12,6 +12,7 @@ setup(
 | 
			
		|||
    python_requires=">3",
 | 
			
		||||
    install_requires=[
 | 
			
		||||
        "capstone",
 | 
			
		||||
        "rzpipe",
 | 
			
		||||
        "SQLAlchemy",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -31,7 +31,6 @@ import logging
 | 
			
		|||
 | 
			
		||||
class _CapstoneBase:
 | 
			
		||||
    def __init__(self, payload: bytes, offset: int = 0):
 | 
			
		||||
        self.arch = self.__class__.__name__
 | 
			
		||||
        self.capstone.skipdata = True
 | 
			
		||||
        self.capstone.skipdata_setup = ("unknown", None, None)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -74,18 +73,42 @@ class _CapstoneBase:
 | 
			
		|||
        return opcodes
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def rates(self) -> list:
 | 
			
		||||
    def rets(self) -> list:
 | 
			
		||||
        if hasattr(self, "_rets"):
 | 
			
		||||
            return self._rets
 | 
			
		||||
 | 
			
		||||
        self._rets = list()
 | 
			
		||||
 | 
			
		||||
        for opcode in self.disassembly:
 | 
			
		||||
            if "ret" in opcode.mnemonic:
 | 
			
		||||
                self._rets.append(opcode.mnemonic)
 | 
			
		||||
 | 
			
		||||
        return self._rets
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def ret_rates(self) -> list:
 | 
			
		||||
        rates = dict()
 | 
			
		||||
 | 
			
		||||
        for mnemonic in set(self.rets):
 | 
			
		||||
            rates[mnemonic] = self.rets.count(mnemonic)
 | 
			
		||||
 | 
			
		||||
        listed = sorted(((value, key) for (key, value) in rates.items()), reverse=True)
 | 
			
		||||
 | 
			
		||||
        return listed
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def mnemonic_rates(self) -> list:
 | 
			
		||||
        mnemonics = list()
 | 
			
		||||
 | 
			
		||||
        for opcode in self.disassembly:
 | 
			
		||||
            mnemonics.append(opcode.mnemonic)
 | 
			
		||||
 | 
			
		||||
        _rates = dict()
 | 
			
		||||
        rates = dict()
 | 
			
		||||
 | 
			
		||||
        for mnemonic in set(mnemonics):
 | 
			
		||||
            _rates[mnemonic] = mnemonics.count(mnemonic)
 | 
			
		||||
            rates[mnemonic] = mnemonics.count(mnemonic)
 | 
			
		||||
 | 
			
		||||
        listed = sorted(((value, key) for (key, value) in _rates.items()), reverse=True)
 | 
			
		||||
        listed = sorted(((value, key) for (key, value) in rates.items()), reverse=True)
 | 
			
		||||
 | 
			
		||||
        return listed
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,114 @@
 | 
			
		|||
from pathlib import Path
 | 
			
		||||
import logging
 | 
			
		||||
import rzpipe
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class _RizinBase:
 | 
			
		||||
    def __init__(self, path: Path):
 | 
			
		||||
        rz_pipe = rzpipe.open(str(path.absolute()))
 | 
			
		||||
 | 
			
		||||
        for cmd in self.arch_cmds:
 | 
			
		||||
            rz_pipe.cmd(cmd)
 | 
			
		||||
 | 
			
		||||
        rz_pipe.cmd("aa")
 | 
			
		||||
        result = rz_pipe.cmd("pdj")
 | 
			
		||||
        rz_pipe.quit()
 | 
			
		||||
        self.disassembly = json.loads(result)
 | 
			
		||||
 | 
			
		||||
    def __repr__(self) -> str:
 | 
			
		||||
        return self.objdump
 | 
			
		||||
 | 
			
		||||
    def __len__(self) -> int:
 | 
			
		||||
        return len(self.disassembly)
 | 
			
		||||
 | 
			
		||||
    def __lt__(self, other):
 | 
			
		||||
        return len(self) < len(other)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def objdump(self) -> str:
 | 
			
		||||
        if hasattr(self, "_objdump"):
 | 
			
		||||
            return self._objdump
 | 
			
		||||
 | 
			
		||||
        self._objdump = str()
 | 
			
		||||
 | 
			
		||||
        for each in self.disassembly:
 | 
			
		||||
            offset = each.get("offset")
 | 
			
		||||
            opcode = each.get("opcode")
 | 
			
		||||
            self._objdump += f"{offset:#02x}:\t{opcode}\n"
 | 
			
		||||
 | 
			
		||||
        return self._objdump
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def disasm(self) -> list:
 | 
			
		||||
        if hasattr(self, "_disasm"):
 | 
			
		||||
            return self._disasm
 | 
			
		||||
 | 
			
		||||
        self._disasm = list()
 | 
			
		||||
 | 
			
		||||
        for each in self.disassembly:
 | 
			
		||||
            offset = each.get("offset")
 | 
			
		||||
            opcode = each.get("opcode")
 | 
			
		||||
 | 
			
		||||
            if opcode:
 | 
			
		||||
                mnemonic = opcode.split(" ")[0]
 | 
			
		||||
                opcode = opcode.split(" ")[1:]
 | 
			
		||||
            else:
 | 
			
		||||
                mnemonic = None
 | 
			
		||||
 | 
			
		||||
            self._disasm.append([offset, mnemonic, opcode])
 | 
			
		||||
 | 
			
		||||
        return self._disasm
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def rets(self) -> list:
 | 
			
		||||
        if hasattr(self, "_rets"):
 | 
			
		||||
            return self._rets
 | 
			
		||||
 | 
			
		||||
        self._rets = list()
 | 
			
		||||
 | 
			
		||||
        for each in self.disasm:
 | 
			
		||||
            _, mnemonic, _ = each
 | 
			
		||||
 | 
			
		||||
            if mnemonic and "ret" in mnemonic:
 | 
			
		||||
                self._rets.append(mnemonic)
 | 
			
		||||
 | 
			
		||||
        return self._rets
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def ret_rates(self) -> list:
 | 
			
		||||
        rates = dict()
 | 
			
		||||
 | 
			
		||||
        for mnemonic in set(self.rets):
 | 
			
		||||
            rates[mnemonic] = self.rets.count(mnemonic)
 | 
			
		||||
 | 
			
		||||
        _ret_rates = sorted(
 | 
			
		||||
            ((value, key) for (key, value) in rates.items()), reverse=True
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        return _ret_rates
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def mnemonic_rates(self) -> list:
 | 
			
		||||
        mnemonics = list()
 | 
			
		||||
 | 
			
		||||
        for each in self.disasm:
 | 
			
		||||
            _, mnemonic, _ = each
 | 
			
		||||
 | 
			
		||||
            if mnemonic:
 | 
			
		||||
                mnemonics.append(mnemonic)
 | 
			
		||||
 | 
			
		||||
        rates = dict()
 | 
			
		||||
 | 
			
		||||
        for mnemonic in set(mnemonics):
 | 
			
		||||
            rates[mnemonic] = mnemonics.count(mnemonic)
 | 
			
		||||
 | 
			
		||||
        _mnemonic_rates = sorted(
 | 
			
		||||
            ((value, key) for (key, value) in rates.items()), reverse=True
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        return _mnemonic_rates
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class x86_16(_RizinBase):
 | 
			
		||||
    arch_cmds = ["e asm.arch=x86", "e asm.bits=16"]
 | 
			
		||||
| 
						 | 
				
			
			@ -20,11 +20,13 @@ class Disassembly(Base):
 | 
			
		|||
    arch = Column(String, nullable=False)
 | 
			
		||||
    checksum = Column(String, nullable=False)
 | 
			
		||||
    count = Column(Integer, nullable=False)
 | 
			
		||||
    rates = Column(String, nullable=False)
 | 
			
		||||
    size = Column(Integer, nullable=False)
 | 
			
		||||
    engine = Column(String, nullable=False)
 | 
			
		||||
    mnemonic_rates = Column(String, nullable=False)
 | 
			
		||||
    offset = Column(Integer, nullable=False)
 | 
			
		||||
    opcodes = Column(String, nullable=False)
 | 
			
		||||
    path = Column(String, nullable=False)
 | 
			
		||||
    ret_rates = Column(String, nullable=False)
 | 
			
		||||
    size = Column(Integer, nullable=False)
 | 
			
		||||
 | 
			
		||||
    def __repr__(self):
 | 
			
		||||
        return f"<Disassembly {json.dumps(self.values, indent=1)}>"
 | 
			
		||||
| 
						 | 
				
			
			@ -40,10 +42,11 @@ class Disassembly(Base):
 | 
			
		|||
        values_dict = {
 | 
			
		||||
            "id": self.id,
 | 
			
		||||
            "arch": self.arch,
 | 
			
		||||
            "engine": self.engine,
 | 
			
		||||
            "count": self.count,
 | 
			
		||||
            "rates": self.rates,
 | 
			
		||||
            "size": self.size,
 | 
			
		||||
            "offset": self.offset,
 | 
			
		||||
            "mnemonic_rates": self.mnemonic_rates,
 | 
			
		||||
            "ret_rates": self.ret_rates,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return values_dict
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4,22 +4,8 @@ from pathlib import Path
 | 
			
		|||
from sqlalchemy import desc
 | 
			
		||||
import logging
 | 
			
		||||
 | 
			
		||||
from .disassemble import (
 | 
			
		||||
    x86_16,
 | 
			
		||||
    x86_32,
 | 
			
		||||
    x86_64,
 | 
			
		||||
    armv7,
 | 
			
		||||
    thumb2,
 | 
			
		||||
    aarch64,
 | 
			
		||||
    mips32,
 | 
			
		||||
    mips64_el,
 | 
			
		||||
    ppc64,
 | 
			
		||||
    sparc,
 | 
			
		||||
    sparcv9,
 | 
			
		||||
    systemz,
 | 
			
		||||
    xcore,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
from . import capstone_wrapper
 | 
			
		||||
from . import rizin_wrapper
 | 
			
		||||
from .schema import db_config, Disassembly
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -29,16 +15,18 @@ def subdisassem_script():
 | 
			
		|||
    parser.add_argument("-b", "--bin-path", required=True)
 | 
			
		||||
    parser.add_argument("-l", "--log", action="store_true", help="log to file")
 | 
			
		||||
    parser.add_argument(
 | 
			
		||||
        "-f", "--fuzz", type=int, default=64, help="offset bruteforce max"
 | 
			
		||||
        "-f", "--fuzz", type=int, default=1, help="offset bruteforce max"
 | 
			
		||||
    )
 | 
			
		||||
    args = parser.parse_args()
 | 
			
		||||
 | 
			
		||||
    args.bin_path = Path(args.bin_path)
 | 
			
		||||
 | 
			
		||||
    if args.verbose:
 | 
			
		||||
        print_count = -1
 | 
			
		||||
        level = logging.DEBUG
 | 
			
		||||
        format = "%(asctime)s %(filename)s:%(lineno)d %(message)s"
 | 
			
		||||
    else:
 | 
			
		||||
        print_count = 5
 | 
			
		||||
        level = logging.INFO
 | 
			
		||||
        format = "%(asctime)s %(message)s"
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -73,29 +61,30 @@ def subdisassem_script():
 | 
			
		|||
 | 
			
		||||
    logging.info(f"sha1sum: {checksum}")
 | 
			
		||||
 | 
			
		||||
    archs = [
 | 
			
		||||
        x86_16,
 | 
			
		||||
        x86_32,
 | 
			
		||||
        x86_64,
 | 
			
		||||
        armv7,
 | 
			
		||||
        thumb2,
 | 
			
		||||
        aarch64,
 | 
			
		||||
        mips32,
 | 
			
		||||
        mips64_el,
 | 
			
		||||
        ppc64,
 | 
			
		||||
        sparc,
 | 
			
		||||
        sparcv9,
 | 
			
		||||
        systemz,
 | 
			
		||||
        xcore,
 | 
			
		||||
    capstone_archs = [
 | 
			
		||||
        capstone_wrapper.x86_16,
 | 
			
		||||
        capstone_wrapper.x86_32,
 | 
			
		||||
        capstone_wrapper.x86_64,
 | 
			
		||||
        capstone_wrapper.armv7,
 | 
			
		||||
        capstone_wrapper.thumb2,
 | 
			
		||||
        capstone_wrapper.aarch64,
 | 
			
		||||
        capstone_wrapper.mips32,
 | 
			
		||||
        capstone_wrapper.mips64_el,
 | 
			
		||||
        capstone_wrapper.ppc64,
 | 
			
		||||
        capstone_wrapper.sparc,
 | 
			
		||||
        capstone_wrapper.sparcv9,
 | 
			
		||||
        capstone_wrapper.systemz,
 | 
			
		||||
        capstone_wrapper.xcore,
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    for arch in archs:
 | 
			
		||||
    for arch in capstone_archs:
 | 
			
		||||
        for offset in range(args.fuzz):
 | 
			
		||||
            exists = (
 | 
			
		||||
                session.query(Disassembly)
 | 
			
		||||
                .filter(Disassembly.checksum == checksum)
 | 
			
		||||
                .filter(Disassembly.offset == offset)
 | 
			
		||||
                .filter(Disassembly.arch == arch.__name__)
 | 
			
		||||
                .filter(Disassembly.engine == str(arch.__bases__))
 | 
			
		||||
                .first()
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -107,21 +96,69 @@ def subdisassem_script():
 | 
			
		|||
 | 
			
		||||
            disasembler = arch(payload=raw_bytes, offset=offset)
 | 
			
		||||
            row = Disassembly()
 | 
			
		||||
            row.arch = disasembler.arch
 | 
			
		||||
            row.arch = disasembler.__class__.__name__
 | 
			
		||||
            row.checksum = checksum
 | 
			
		||||
            row.count = len(disasembler)
 | 
			
		||||
            row.rates = str(disasembler.rates[:5])
 | 
			
		||||
            row.size = len(raw_bytes) - offset
 | 
			
		||||
            row.engine = str(arch.__bases__)
 | 
			
		||||
            row.mnemonic_rates = str(disasembler.mnemonic_rates[:print_count])
 | 
			
		||||
            row.offset = offset
 | 
			
		||||
            row.opcodes = disasembler.objdump
 | 
			
		||||
            row.path = str(args.bin_path.absolute())
 | 
			
		||||
            row.ret_rates = str(disasembler.ret_rates[:print_count])
 | 
			
		||||
            row.size = len(raw_bytes) - offset
 | 
			
		||||
            session.add(row)
 | 
			
		||||
 | 
			
		||||
    session.commit()
 | 
			
		||||
 | 
			
		||||
    rizin_archs = [
 | 
			
		||||
        rizin_wrapper.x86_16,
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    for arch in rizin_archs:
 | 
			
		||||
        for offset in range(args.fuzz):
 | 
			
		||||
            exists = (
 | 
			
		||||
                session.query(Disassembly)
 | 
			
		||||
                .filter(Disassembly.checksum == checksum)
 | 
			
		||||
                .filter(Disassembly.offset == offset)
 | 
			
		||||
                .filter(Disassembly.arch == arch.__name__)
 | 
			
		||||
                .filter(Disassembly.engine == str(arch.__bases__))
 | 
			
		||||
                .first()
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            if exists:
 | 
			
		||||
                logging.debug(
 | 
			
		||||
                    f"subdiassembly_exists: {[arch.__name__, checksum, offset]}"
 | 
			
		||||
                )
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            disasembler = arch(path=args.bin_path)
 | 
			
		||||
            row = Disassembly()
 | 
			
		||||
            row.arch = disasembler.__class__.__name__
 | 
			
		||||
            row.checksum = checksum
 | 
			
		||||
            row.count = len(disasembler)
 | 
			
		||||
            row.engine = str(arch.__bases__)
 | 
			
		||||
            row.mnemonic_rates = str(disasembler.mnemonic_rates[:print_count])
 | 
			
		||||
            row.offset = offset
 | 
			
		||||
            row.opcodes = disasembler.objdump
 | 
			
		||||
            row.path = str(args.bin_path.absolute())
 | 
			
		||||
            row.ret_rates = str(disasembler.ret_rates[:print_count])
 | 
			
		||||
            row.size = len(raw_bytes) - offset
 | 
			
		||||
            session.add(row)
 | 
			
		||||
 | 
			
		||||
    session.commit()
 | 
			
		||||
 | 
			
		||||
    tops = list()
 | 
			
		||||
 | 
			
		||||
    for arch in archs:
 | 
			
		||||
    for arch in capstone_archs:
 | 
			
		||||
        top = (
 | 
			
		||||
            session.query(Disassembly)
 | 
			
		||||
            .filter(Disassembly.arch == arch.__name__)
 | 
			
		||||
            .order_by(desc("count"))
 | 
			
		||||
            .first()
 | 
			
		||||
        )
 | 
			
		||||
        tops.append(top)
 | 
			
		||||
 | 
			
		||||
    for arch in rizin_archs:
 | 
			
		||||
        top = (
 | 
			
		||||
            session.query(Disassembly)
 | 
			
		||||
            .filter(Disassembly.arch == arch.__name__)
 | 
			
		||||
| 
						 | 
				
			
			@ -132,5 +169,5 @@ def subdisassem_script():
 | 
			
		|||
 | 
			
		||||
    tops = sorted(tops, key=len, reverse=True)
 | 
			
		||||
 | 
			
		||||
    for top in tops[:3]:
 | 
			
		||||
    for top in tops[:print_count]:
 | 
			
		||||
        logging.info(top)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue