Rizin
unix-like reverse engineering framework and cli tools
analysis_objc.c
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: 2019-2020 pancake
2 // SPDX-License-Identifier: LGPL-3.0-only
3 
4 /* This code has been based on Alvaro's
5  * rzpipe-python script which was based on FireEye script for IDA Pro.
6  *
7  * https://www.fireeye.com/blog/threat-research/2017/03/introduction_to_reve.html
8  */
9 
10 #include <rz_core.h>
11 
12 #include "core_private.h"
13 
14 typedef struct {
16  HtUP *up;
17  size_t word_size;
22 } RzCoreObjc;
23 
24 const size_t objc2ClassSize = 0x28;
25 const size_t objc2ClassInfoOffs = 0x20;
26 const size_t objc2ClassMethSize = 0x18;
27 const size_t objc2ClassBaseMethsOffs = 0x20;
28 const size_t objc2ClassMethImpOffs = 0x10;
29 
30 static void array_add(RzCoreObjc *o, ut64 va, ut64 xrefs_to) {
31  bool found = false;
32  RzVector *vec = ht_up_find(o->up, va, &found);
33  if (!found || !vec) {
34  vec = rz_vector_new(sizeof(ut64), NULL, NULL);
35  ht_up_insert(o->up, va, vec);
36  }
37  ut64 *addr;
38  rz_vector_foreach(vec, addr) {
39  if (xrefs_to == *addr) {
40  return;
41  }
42  }
43  // extend vector and insert new element
44  rz_vector_push(vec, &xrefs_to);
45 }
46 
47 static void kv_array_free(HtUPKv *kv) {
48  rz_vector_free(kv->value);
49 }
50 
51 static inline bool isValid(ut64 addr) {
52  return (addr != 0LL && addr != UT64_MAX);
53 }
54 
55 static inline bool isInvalid(ut64 addr) {
56  return !isValid(addr);
57 }
58 
59 static inline bool inBetween(RzBinSection *s, ut64 addr) {
60  if (!s || isInvalid(addr)) {
61  return false;
62  }
63  const ut64 from = s->vaddr;
64  const ut64 to = from + s->vsize;
65  return RZ_BETWEEN(from, addr, to);
66 }
67 
68 static ut32 readDword(RzCoreObjc *objc, ut64 addr, bool *success) {
69  ut8 buf[4];
70  *success = rz_io_read_at(objc->core->io, addr, buf, sizeof(buf));
71  return rz_read_le32(buf);
72 }
73 
74 static ut64 readQword(RzCoreObjc *objc, ut64 addr, bool *success) {
75  ut8 buf[8] = { 0 };
76  *success = rz_io_read_at(objc->core->io, addr, buf, sizeof(buf));
77  return rz_read_le64(buf);
78 }
79 
80 static void objc_analyze(RzCore *core) {
81  const char *notify = "Analyzing code to find selfref references";
82  rz_core_notify_begin(core, "%s", notify);
83  (void)rz_core_analysis_refs(core, 0);
84  if (!strcmp("arm", rz_config_get(core->config, "asm.arch"))) {
85  const bool emu_lazy = rz_config_get_i(core->config, "emu.lazy");
86  rz_config_set_i(core->config, "emu.lazy", true);
88  rz_config_set_i(core->config, "emu.lazy", emu_lazy);
89  }
90  rz_core_notify_done(core, "%s", notify);
91 }
92 
93 static ut64 getRefPtr(RzCoreObjc *o, ut64 classMethodsVA, bool *rfound) {
94  *rfound = false;
95 
96  bool readSuccess;
97  ut64 namePtr = readQword(o, classMethodsVA, &readSuccess);
98  if (!readSuccess) {
99  return UT64_MAX;
100  }
101 
102  size_t cnt = 0;
103  ut64 ref = UT64_MAX;
104  bool isMsgRef = false;
105 
106  RzVector *vec = ht_up_find(o->up, namePtr, rfound);
107  if (!*rfound || !vec) {
108  *rfound = false;
109  return false;
110  }
111  ut64 *addr;
112  rz_vector_foreach(vec, addr) {
113  const ut64 at = *addr;
114  if (inBetween(o->_selrefs, at)) {
115  isMsgRef = false;
116  ref = at;
117  } else if (inBetween(o->_msgrefs, at)) {
118  isMsgRef = true;
119  ref = at;
120  } else if (inBetween(o->_const, at)) {
121  cnt++;
122  }
123  }
124  if (cnt > 1 || ref == 0 || ref == UT64_MAX) {
125  *rfound = false;
126  return UT64_MAX;
127  }
128  return isMsgRef ? ref - 8 : ref;
129 }
130 
131 static bool objc_build_refs(RzCoreObjc *objc) {
132  ut64 off;
133  rz_return_val_if_fail(objc->_const && objc->_selrefs, false);
134 
135  const ut64 va_const = objc->_const->vaddr;
136  size_t ss_const = objc->_const->vsize;
137  const ut64 va_selrefs = objc->_selrefs->vaddr;
138  size_t ss_selrefs = objc->_selrefs->vsize;
139 
140  // TODO: check if ss_const or ss_selrefs are too big before going further
141  size_t maxsize = RZ_MAX(ss_const, ss_selrefs);
142  ut8 *buf = calloc(1, maxsize);
143  if (!buf) {
144  return false;
145  }
146  const size_t word_size = objc->word_size; // assuming 8 because of the read_le64
147  if (!rz_io_read_at(objc->core->io, objc->_const->vaddr, buf, ss_const)) {
148  RZ_LOG_ERROR("aao: Cannot read the whole const section %zu\n", ss_const);
149  return false;
150  }
151  for (off = 0; off + word_size < ss_const; off += word_size) {
152  ut64 va = va_const + off;
153  ut64 xrefs_to = rz_read_le64(buf + off);
154  if (isValid(xrefs_to)) {
155  array_add(objc, va, xrefs_to);
156  }
157  }
158  if (!rz_io_read_at(objc->core->io, va_selrefs, buf, ss_selrefs)) {
159  RZ_LOG_ERROR("aao: Cannot read the whole selrefs section\n");
160  return false;
161  }
162  for (off = 0; off + word_size < ss_selrefs; off += word_size) {
163  ut64 va = va_selrefs + off;
164  ut64 xrefs_to = rz_read_le64(buf + off);
165  if (isValid(xrefs_to)) {
166  array_add(objc, xrefs_to, va);
167  }
168  }
169  free(buf);
170  return true;
171 }
172 
175  if (!sections) {
176  return false;
177  }
179  o->core = core;
180  o->word_size = (core->rasm->bits == 64) ? 8 : 4;
181  if (o->word_size != 8) {
182  RZ_LOG_WARN("aao is experimental on 32bit binaries\n");
183  }
184 
185  RzBinSection *s;
186  RzListIter *iter;
187  rz_list_foreach (sections, iter, s) {
188  const char *name = s->name;
189  if (strstr(name, "__objc_data")) {
190  o->_data = s;
191  } else if (strstr(name, "__objc_selrefs")) {
192  o->_selrefs = s;
193  } else if (strstr(name, "__objc_msgrefs")) {
194  o->_msgrefs = s;
195  } else if (strstr(name, "__objc_const")) {
196  o->_const = s;
197  }
198  }
199  if (!o->_const || ((o->_selrefs || o->_msgrefs) && !(o->_data && o->_const))) {
200  free(o);
201  return NULL;
202  }
203  o->up = ht_up_new(NULL, kv_array_free, NULL);
204 
205  return o;
206 }
207 
208 static void core_objc_free(RzCoreObjc *o) {
209  if (o) {
210  ht_up_free(o->up);
211  free(o);
212  }
213 }
214 
215 static bool objc_find_refs(RzCore *core) {
216  RzCoreObjc *objc = core_objc_new(core);
217  if (!objc) {
218  RZ_LOG_DEBUG("Could not find necessary Objective-C sections...\n");
219  return false;
220  }
221 
222  if (!objc_build_refs(objc)) {
223  core_objc_free(objc);
224  return false;
225  }
226  const char *notify = "Parsing metadata in ObjC to find hidden xrefs";
227  rz_core_notify_begin(core, "%s", notify);
228 
229  size_t total_xrefs = 0;
230  bool readSuccess = true;
231  for (ut64 off = 0; off < objc->_data->vsize && readSuccess; off += objc2ClassSize) {
232  if (!readSuccess || rz_cons_is_breaked()) {
233  break;
234  }
235 
236  ut64 va = objc->_data->vaddr + off;
237  // XXX do a single rz_io_read_at() and just rz_read_le64() here
238  ut64 classRoVA = readQword(objc, va + objc2ClassInfoOffs, &readSuccess);
239  if (!readSuccess || isInvalid(classRoVA)) {
240  continue;
241  }
242  ut64 classMethodsVA = readQword(objc, classRoVA + objc2ClassBaseMethsOffs, &readSuccess);
243  if (!readSuccess || isInvalid(classMethodsVA)) {
244  continue;
245  }
246 
247  ut32 count = readDword(objc, classMethodsVA + 4, &readSuccess);
248  if (!readSuccess || ((ut32)count == UT32_MAX)) {
249  continue;
250  }
251 
252  classMethodsVA += 8; // advance to start of class methods array
253  ut64 to = classMethodsVA + (objc2ClassMethSize * count);
254  if (classMethodsVA > to || classMethodsVA + 0xfffff < to) {
255  RZ_LOG_WARN("objc: the input binary might be malformed or this could be a bug.\n");
256  continue;
257  }
258  for (va = classMethodsVA; va < to; va += objc2ClassMethSize) {
259  if (rz_cons_is_breaked()) {
260  break;
261  }
262  bool found = false;
263  ut64 selRefVA = getRefPtr(objc, va, &found);
264  if (!found) {
265  continue;
266  }
267  bool succ = false;
268  ut64 funcVA = readQword(objc, va + objc2ClassMethImpOffs, &succ);
269  if (!succ) {
270  break;
271  }
272 
273  RzList *list = rz_analysis_xrefs_get_to(core->analysis, selRefVA);
274  if (list) {
275  RzListIter *iter;
276  RzAnalysisXRef *xref;
277  rz_list_foreach (list, iter, xref) {
279  total_xrefs++;
280  }
281  }
282  }
283  }
284  rz_core_notify_done(core, "%s", notify);
285 
286  const ut64 va_selrefs = objc->_selrefs->vaddr;
287  const ut64 ss_selrefs = va_selrefs + objc->_selrefs->vsize;
288 
289  rz_core_notify_begin(core, "Found %zu objc xrefs...", total_xrefs);
290  size_t total_words = 0;
291  const size_t word_size = objc->word_size;
292  for (ut64 a = va_selrefs; a < ss_selrefs; a += word_size) {
293  rz_meta_set(core->analysis, RZ_META_TYPE_DATA, a, word_size, NULL);
294  total_words++;
295  }
296  rz_core_notify_done(core, "Found %zu objc xrefs in %zu dwords.", total_xrefs, total_words);
297  core_objc_free(objc);
298  return true;
299 }
300 
301 RZ_API bool cmd_analysis_objc(RzCore *core, bool auto_analysis) {
302  rz_return_val_if_fail(core, 0);
303  if (!auto_analysis) {
304  objc_analyze(core);
305  }
306  return objc_find_refs(core);
307 }
static void objc_analyze(RzCore *core)
Definition: analysis_objc.c:80
static bool objc_find_refs(RzCore *core)
const size_t objc2ClassMethImpOffs
Definition: analysis_objc.c:28
const size_t objc2ClassSize
Definition: analysis_objc.c:24
static void kv_array_free(HtUPKv *kv)
Definition: analysis_objc.c:47
static void core_objc_free(RzCoreObjc *o)
static bool isValid(ut64 addr)
Definition: analysis_objc.c:51
static RzCoreObjc * core_objc_new(RzCore *core)
static ut32 readDword(RzCoreObjc *objc, ut64 addr, bool *success)
Definition: analysis_objc.c:68
static bool isInvalid(ut64 addr)
Definition: analysis_objc.c:55
const size_t objc2ClassMethSize
Definition: analysis_objc.c:26
RZ_API bool cmd_analysis_objc(RzCore *core, bool auto_analysis)
static bool objc_build_refs(RzCoreObjc *objc)
static bool inBetween(RzBinSection *s, ut64 addr)
Definition: analysis_objc.c:59
static void array_add(RzCoreObjc *o, ut64 va, ut64 xrefs_to)
Definition: analysis_objc.c:30
const size_t objc2ClassInfoOffs
Definition: analysis_objc.c:25
const size_t objc2ClassBaseMethsOffs
Definition: analysis_objc.c:27
static ut64 getRefPtr(RzCoreObjc *o, ut64 classMethodsVA, bool *rfound)
Definition: analysis_objc.c:93
static ut64 readQword(RzCoreObjc *objc, ut64 addr, bool *success)
Definition: analysis_objc.c:74
RZ_DEPRECATE RZ_API RZ_BORROW RzList * rz_bin_get_sections(RZ_NONNULL RzBin *bin)
Definition: bin.c:597
RzList * sections(RzBinFile *bf)
Definition: bin_ne.c:110
RZ_API bool rz_core_analysis_refs(RZ_NONNULL RzCore *core, size_t nbytes)
Analyze xrefs and prints the result.
Definition: canalysis.c:3272
RZ_IPI void rz_core_analysis_esil_default(RzCore *core)
Definition: cil.c:409
RZ_API ut64 rz_config_get_i(RzConfig *cfg, RZ_NONNULL const char *name)
Definition: config.c:119
RZ_API RzConfigNode * rz_config_set_i(RzConfig *cfg, RZ_NONNULL const char *name, const ut64 i)
Definition: config.c:419
RZ_API RZ_BORROW const char * rz_config_get(RzConfig *cfg, RZ_NONNULL const char *name)
Definition: config.c:75
RZ_API bool rz_cons_is_breaked(void)
Definition: cons.c:373
#define RZ_API
#define NULL
Definition: cris-opc.c:27
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void count
Definition: sflib.h:98
uint32_t ut32
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
RZ_API const KEY_TYPE bool * found
Definition: ht_inc.h:130
voidpf void * buf
Definition: ioapi.h:138
uint8_t ut8
Definition: lh5801.h:11
RZ_API void rz_core_notify_begin(RZ_NONNULL RzCore *core, RZ_NONNULL const char *format,...)
Prints a message definining the beginning of a task.
Definition: core.c:33
RZ_API void rz_core_notify_done(RZ_NONNULL RzCore *core, RZ_NONNULL const char *format,...)
Prints a message definining the end of a task which succeeded.
Definition: core.c:60
static void list(RzEgg *egg)
Definition: rz-gg.c:52
void * calloc(size_t number, size_t size)
Definition: malloc.c:102
RZ_API bool rz_meta_set(RzAnalysis *a, RzAnalysisMetaType type, ut64 addr, ut64 size, const char *str)
Definition: meta.c:191
int off
Definition: pal.c:13
static RzSocket * s
Definition: rtr.c:28
@ RZ_ANALYSIS_XREF_TYPE_CODE
Definition: rz_analysis.h:900
@ RZ_META_TYPE_DATA
Definition: rz_analysis.h:289
#define rz_return_val_if_fail(expr, val)
Definition: rz_assert.h:108
static ut32 rz_read_le32(const void *src)
Definition: rz_endian.h:239
static ut64 rz_read_le64(const void *src)
Definition: rz_endian.h:266
RZ_API bool rz_io_read_at(RzIO *io, ut64 addr, ut8 *buf, int len)
Definition: io.c:300
#define RZ_LOG_WARN(fmtstr,...)
Definition: rz_log.h:56
#define RZ_LOG_DEBUG(fmtstr,...)
Definition: rz_log.h:49
#define RZ_LOG_ERROR(fmtstr,...)
Definition: rz_log.h:58
#define RZ_NEW0(x)
Definition: rz_types.h:284
#define UT32_MAX
Definition: rz_types_base.h:99
#define RZ_MAX(x, y)
#define UT64_MAX
Definition: rz_types_base.h:86
#define RZ_BETWEEN(x, y, z)
RZ_API void * rz_vector_push(RzVector *vec, void *x)
Definition: vector.c:197
#define rz_vector_foreach(vec, it)
Definition: rz_vector.h:169
RZ_API void rz_vector_free(RzVector *vec)
Definition: vector.c:75
RZ_API RzVector * rz_vector_new(size_t elem_size, RzVectorFree free, void *free_user)
Definition: vector.c:42
static struct sockaddr static addrlen static backlog const void static flags void struct sockaddr from
Definition: sfsocketcall.h:123
static struct sockaddr static addrlen static backlog const void static flags void struct sockaddr socklen_t static fromlen const void const struct sockaddr to
Definition: sfsocketcall.h:125
#define a(i)
Definition: sha256.c:41
RzBinSection * _selrefs
Definition: analysis_objc.c:18
RzBinSection * _const
Definition: analysis_objc.c:20
RzCore * core
Definition: analysis_objc.c:15
RzBinSection * _msgrefs
Definition: analysis_objc.c:19
RzBinSection * _data
Definition: analysis_objc.c:21
size_t word_size
Definition: analysis_objc.c:17
Definition: z80asm.h:102
int bits
Definition: rz_asm.h:100
RzBin * bin
Definition: rz_core.h:298
RzAsm * rasm
Definition: rz_core.h:323
RzAnalysis * analysis
Definition: rz_core.h:322
RzIO * io
Definition: rz_core.h:313
RzConfig * config
Definition: rz_core.h:300
ut64(WINAPI *w32_GetEnabledXStateFeatures)()
RZ_API RzList * rz_analysis_xrefs_get_to(RzAnalysis *analysis, ut64 addr)
Definition: xrefs.c:173
RZ_API bool rz_analysis_xrefs_set(RzAnalysis *analysis, ut64 from, ut64 to, RzAnalysisXRefType type)
Definition: xrefs.c:117
static int addr
Definition: z80asm.c:58