..

52pojie2024 Android高级逆向复现

这个题太复杂了,过年的时候没什么时间做,想着后面复现。但是一直也没找到时间,终于最近五一假期有了点时间。

间接跳转、间接调用反混淆

这个题主要逻辑都在 native 层,混淆特别严重,目测有间接跳转、间接调用、间接取数据、字符串加密、控制流平坦化、虚假控制流,而且混淆还是过程间混淆,有点难蚌。毫无疑问其中影响最大的是间接跳转,然后就是间接调用,至于虚假控制流、间接取数据这些可以用插件或者直接改data段只读解决。所以,这里优先处理间接跳转、间接调用,主要思路也很简单就是直接上符号执行跑,然后拿blrbr指令的约束求解目标地址,实现如下:

import angr
import claripy
import pyvex
from capstone import *
import lief


class AntiObf(object):
    def __init__(self, filename):
        self.proj = angr.Project(filename, main_opts={'base_addr': 0}, load_options={'auto_load_libs': False})
        self.capstone = self.proj.arch.capstone
        self.keystone = self.proj.arch.keystone
        self.elf: lief.ELF.Binary = lief.parse(filename)
        self.deobfed_func = set()

    def fuck_indirect(self, func_addr, key=0):
        if func_addr in self.deobfed_func:
            return
        self.deobfed_func.add(func_addr)

        obf_func_arr = set()

        cond_var_count = 0
        cond_var_dic = dict()

        def set_variable(state, var_idx):
            nonlocal cond_var_count, cond_var_dic
            cond_var = claripy.BVS(f"cond_{cond_var_count}", 1)
            state.scratch.temps[var_idx] = cond_var
            # 保存变量信息
            cond_var_name = list(cond_var.variables)[0]
            cond_var_dic[cond_var_name] = (state.scratch.ins_addr, cond_var)
            cond_var_count += 1

        def statement_hook(state: angr.SimState):
            statement = state.scratch.irsb.statements[state.inspect.statement]
            for expr in statement.expressions:
                if isinstance(expr, pyvex.expr.ITE):
                    set_variable(state, expr.cond.tmp)

        def instruction_handler(state: angr.SimState):
            inst_addr = state.inspect.instruction
            inst: CsInsn = state.block(inst_addr).capstone.insns[0]
            if inst.mnemonic == 'br':  # 第一个 br 走完 => init_state 完成
                target_addr = getattr(state.regs, self.capstone.reg_name(inst.operands[0].reg))
                if not state.solver.symbolic(target_addr):
                    target_func_addr = state.solver.eval(target_addr)
                    code, _ = self.keystone.asm(f'b 0x{target_func_addr:x}', inst_addr)
                    self.elf.patch_address(inst_addr, code)
                    return
                dep_var_names = list(target_addr.variables)
                assert len(dep_var_names) == 1  # 必然等于 1,不等于 1 就去死吧 cnm
                cond_inst_addr, dep_var = cond_var_dic[dep_var_names[0]]
                # 获取条件
                cond_inst: CsInsn = state.block(cond_inst_addr).capstone.insns[0]
                if cond_inst.mnemonic == 'csel' or cond_inst.mnemonic == 'cset':
                    cond_name = cond_inst.op_str.split(', ')[-1]
                else:
                    raise Exception(f"{cond_inst_addr:x} 我不认识 {cond_inst.mnemonic}")
                # true 的 目标地址
                true_state = state.copy()
                true_state.solver.add(dep_var == 1)
                true_successor = true_state.solver.eval(target_addr)
                code, _ = self.keystone.asm(f'b.{cond_name} 0x{true_successor:x}', inst_addr - 4)
                self.elf.patch_address(inst_addr - 4, code)
                # false 的 目标地址
                false_state = state.copy()
                false_state.solver.add(dep_var == 0)
                false_successor = false_state.solver.eval(target_addr)
                code, _ = self.keystone.asm(f'b 0x{false_successor:x}', inst_addr)
                self.elf.patch_address(inst_addr, code)
            if inst.mnemonic == 'blr':
                target_addr = getattr(state.regs, self.capstone.reg_name(inst.operands[0].reg))
                if not state.solver.symbolic(target_addr):
                    target_func_addr = state.solver.eval(target_addr)
                    main_obj = self.proj.loader.main_object
                    flag = True
                    if main_obj.min_addr < target_func_addr < main_obj.max_addr:
                        code, _ = self.keystone.asm(f'bl 0x{target_func_addr:x}', inst_addr)
                    else:
                        symbol = self.proj.loader.find_symbol(target_func_addr).name
                        target_func_addr = self.proj.loader.main_object.plt[symbol]
                        code, _ = self.keystone.asm(f'bl 0x{target_func_addr:x}', inst_addr)
                        flag = False
                    self.elf.patch_address(inst_addr, code)
                    if target_func_addr not in self.deobfed_func and flag:
                        x0 = state.regs.x0
                        if state.solver.symbolic(x0):
                            obf_func_arr.add((target_func_addr, 0))
                            return
                        x0 = state.solver.eval(x0)
                        key_val = state.solver.eval(state.mem[x0].uint32_t.resolved)
                        obf_func_arr.add((target_func_addr, key_val))

        blk_state = self.proj.factory.call_state(func_addr, angr.PointerWrapper(key.to_bytes(4, 'little'), buffer=True),
                                                 add_options={angr.options.CALLLESS})
        blk_state.inspect.b("instruction", when=angr.BP_BEFORE, action=instruction_handler)
        blk_state.inspect.b("statement", when=angr.BP_BEFORE, action=statement_hook)
        addr_visited = set()

        def s1nk_filter(state):
            nonlocal addr_visited
            cur_addr = state.solver.eval(state.regs.ip)
            if isinstance(cur_addr, int) and cur_addr not in addr_visited:
                addr_visited.add(cur_addr)
                return "active"
            return "visited"

        simgr = self.proj.factory.simulation_manager(blk_state)
        while len(simgr.active) > 0:
            simgr.step(filter_func=s1nk_filter)
        for (func, key) in obf_func_arr:
            self.fuck_indirect(func, key)

    def export(self, path):
        self.elf.write(path)


if __name__ == '__main__':
    anti = AntiObf('target/lib52pojie.so')
    anti.fuck_indirect(0x1a864)
    anti.fuck_indirect(0x1befc)
    anti.export('target/lib52pojie_patched.so')

逻辑分析

经过上面的反混淆 + gooMBA去除虚假控制流 + data 段只读去除部分间接数据访问,代码就基本能看了。JNI_OnLoad 就动态注册了个 native 函数,在 0x1BEFC 所以直接看 sub_1BEFC。

native 函数 sub_1BEFC

sub_1BEFC 主要逻辑如下:

  1. base64 解码(sub_24290)输入的注册码

  2. 使用解密函数(sub_1E924)解密base64 解码后的注册码

  3. 一堆位运算再来解密一次

image-20240506234157512

  1. 对比uid的md5和解密完成后的结果

另外还有一个检测函数(sub_1E24C),使用 system_property_get 检测 init.svc.adbd 的值

image-20240506234628508

这里就出现了字符串加密,所以顺便提一下我是咋解密字符串的,我是直接用unicorn模拟执行的。

import hashlib

import lief
from unicorn import *
from unicorn.arm64_const import *

DEBUG = True

STACK_ADDR = 0x100000
STACK_SIZE = 0x2000
HEAP_ADDR = 0x200000
HEAP_SIZE = 0x2000


def align_size(val, alignment=0x1000):
    return ((val + alignment - 1) & ~(alignment - 1)) + 0x1000


def align_addr(val, alignment=0x1000):
    return val & ~(alignment - 1)


def str_decrypt_func(uc: Uc, x0, x1, start, end, length):
    uc.reg_write(UC_ARM64_REG_SP, STACK_ADDR + 0x1000)
    uc.reg_write(UC_ARM64_REG_X0, x0)
    uc.reg_write(UC_ARM64_REG_X1, x1)

    uc.emu_start(start, end)
    return uc.mem_read(x0, length)


def main():
    uc = Uc(UC_ARCH_ARM64, UC_MODE_ARM)
    # 加载 ELF
    elf: lief.ELF.Binary = lief.parse('./target/lib52pojie.so')
    for segment in elf.segments:
        segment: lief.ELF.Segment
        if segment.type != lief.ELF.SEGMENT_TYPES.LOAD:
            continue
        addr = align_addr(segment.virtual_address)
        size = align_size(segment.virtual_size)
        uc.mem_map(addr, size)
        if DEBUG:
            print(f"mem_map: ({addr:x}, {size:x}), real: ({segment.virtual_address:x}, {segment.virtual_size:x})")
        uc.mem_write(segment.virtual_address, bytes(segment.content))
    uc.mem_map(STACK_ADDR, STACK_SIZE)
    uc.mem_map(HEAP_ADDR, HEAP_ADDR)

    # 解密字符串
    a = str_decrypt_func(uc, 367975, 267577, 0x23F28, 0x240C8, 0x20)
    print(a)

if __name__ == '__main__':
    main()

解密函数 sub_1E924

解密函数逻辑主要如下:

  1. 创建了两个大数(常量),使用 sub_2A5C8

    可以用 frida 拦截一手,

        // hook create bignum
        Interceptor.attach(lib52pojie.base.add(0x2A5C8), {
            onEnter: function (args) {
                let v = args[1].readByteArray(args[2].toInt32());
                console.log(v);
            },
        });
    
  2. 根据 text 数据md5、还有一个从 apk 文件计算出来的数据的md5,计算得到第三个大数

image-20240506235806102

具体算法如下,当然也可以 frida hook bypass text段 md5 后直接读

text_segment_md5 = bytes.fromhex('54c6fed759984e8d27bff5d9e6489235')
d2[0:0x10] = text_segment_md5
apk_calc_ret = 0xccf1c7a4
apk_calc_ret_md5 = hashlib.md5(apk_calc_ret.to_bytes(4, byteorder='little')).digest()
d2[0x10:0x20] = apk_calc_ret_md5
for i in range(2, 8):
    apk_calc_ret_md5 = hashlib.md5(apk_calc_ret_md5).digest()
    d2[i * 0x10: (i + 1) * 0x10] = apk_calc_ret_md5
d2[:2] = b'\x00\x00'
d2 = int.from_bytes(d2, byteorder='big')

read_apk_calac 这个函数 (sub_16FFC)从 maps 里拿了 apk 的路径,然后读取 apk 计算了什么东西,计算逻辑在 sub_3DA80,挺复杂的我懒得看了反正,直接hook拿结果的。

  1. 根据输入创建大数,然后就是 m1 = pow(e, d1, n) 和 m2 = pow(e, d2, n),m = mod(m1 * m2, n),事实上不就是 m = pow(e, d1 + d2, n),也就是说 RSA 算法私钥是 d1 + d2

  2. 解密完成之后,check了解密结果是否前两字节等于 0002,和之后的第一个0的位置是否大于8,然后把这个0后面的数据返回,查了一下这是 pkcsv1_5 的格式

    参考:https://datatracker.ietf.org/doc/html/rfc8017#section-7.2.1

image-20240507001121351

image-20240507002413025

除了这些之外,还有一个对摄像头的获取,用来检测?

image-20240507001745241

解密脚本编写

RSA 已知d 求e ,感觉应该行毕竟e的值一般都很小。但是我也不会,所以我直接用的官方 hint 给的 e,有会的爹可以教教我。

import hashlib
from Crypto.PublicKey import RSA
from Crypto.Cipher import PKCS1_v1_5
import base64


def encrypt(message):
    m_len = len(message)
    ret = bytearray(m_len)
    for idx in range(m_len):
        t = (-message[idx]) & 0xff
        t -= idx
        t &= 0xff
        t ^= 0xC8
        t = (t << 5) | (t >> 3)
        t &= 0xff
        t -= idx
        t &= 0xff
        t = (t << 5) | (t >> 3)
        t &= 0xff
        t -= idx
        t &= 0xff
        t ^= idx
        t += 14
        t &= 0xff
        t ^= 5
        t = (t >> 5) | (t << 3)
        t &= 0xff
        t ^= 0x69
        t += idx
        t &= 0xff
        t = (t >> 5) | (t << 3)
        t -= idx
        t &= 0xff
        ret[idx] = t
    return ret


n = 0xbfbdcc68db43621b3d16e308b8cbb66ad6715da8e46b76f1731bcfa26107ff3012fc585565b3efe01d83d30198078a34f80bade1fb5d23c6ccff8026e6d187de723bdfd1f263d700ca8c664919a885783c40f0eb2fc6282233c14b9efe8f0f995623cd6095d266d3edba0520a34d99e07b3412afc7bde7f26c9b937252786d87
d1 = 0x3c9bf2cf5f948d635c8be9a69293a75fef179dce9dea355ad7b18158f89b5105e864aeb58088a3f529a1e0c1faf0c7a3b02809d38c160d8d4d4490bec3b5b30200a7afb8f8aa523cfc00cfa2ec35880b688e7fe983f0c3688885ebd0fb27a19c7ac039df7eda59d538d5a479e8ce45658a177a2164c032940644bc1e7fb3961e
d2 = bytearray(0x80)

text_segment_md5 = bytes.fromhex('54c6fed759984e8d27bff5d9e6489235')
d2[0:0x10] = text_segment_md5
apk_calc_ret = 0xccf1c7a4
apk_calc_ret_md5 = hashlib.md5(apk_calc_ret.to_bytes(4, byteorder='little')).digest()
d2[0x10:0x20] = apk_calc_ret_md5
for i in range(2, 8):
    apk_calc_ret_md5 = hashlib.md5(apk_calc_ret_md5).digest()
    d2[i * 0x10: (i + 1) * 0x10] = apk_calc_ret_md5
d2[:2] = b'\x00\x00'
d2 = int.from_bytes(d2, byteorder='big')
d = d1 + d2

e = 0x200001
md5_enc = encrypt(hashlib.md5(b'01585780').digest())
rsa = RSA.construct((n, e, d))
ret = base64.b64encode(PKCS1_v1_5.new(rsa).encrypt(md5_enc))
print(ret.decode())