Rizin
unix-like reverse engineering framework and cli tools
coresymbolication.c
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: 2020 mrmacete <mrmacete@protonmail.ch>
2 // SPDX-License-Identifier: LGPL-3.0-only
3 
4 #include <rz_types.h>
5 #include <rz_util.h>
6 #include "coresymbolication.h"
7 
8 #define RZ_CS_EL_OFF_SEGS 0x58
9 #define RZ_CS_EL_SIZE_SEG 0x20
10 #define RZ_CS_EL_SIZE_SECT_64 0x18
11 #define RZ_CS_EL_SIZE_SECT_32 0x10
12 #define RZ_CS_EL_SIZE_SYM 0x18
13 #define RZ_CS_EL_SIZE_LSYM 0x24
14 #define RZ_CS_EL_SIZE_LINFO 0x14
15 
18  if (hdr && rz_buf_fread_at(buf, off, (ut8 *)hdr, "13i16c5i", 1) == sizeof(RzCoreSymCacheElementHdr)) {
19  return hdr;
20  }
21  free(hdr);
22  return NULL;
23 }
24 
26  if (seg) {
27  free(seg->name);
28  }
29 }
30 
32  if (sec) {
33  free(sec->name);
34  }
35 }
36 
38  if (flc) {
39  free(flc->file);
40  }
41 }
42 
44  if (sym) {
45  free(sym->name);
46  free(sym->mangled_name);
47  }
48 }
49 
51  if (sym) {
54  }
55 }
56 
58  if (line) {
60  }
61 }
62 
64  if (!element) {
65  return;
66  }
67  size_t i;
68  if (element->segments) {
69  for (i = 0; i < element->hdr->n_segments; i++) {
71  }
72  }
73  if (element->sections) {
74  for (i = 0; i < element->hdr->n_sections; i++) {
76  }
77  }
78  if (element->symbols) {
79  for (i = 0; i < element->hdr->n_symbols; i++) {
81  }
82  }
83  if (element->lined_symbols) {
84  for (i = 0; i < element->hdr->n_lined_symbols; i++) {
86  }
87  }
88  if (element->line_info) {
89  for (i = 0; i < element->hdr->n_line_info; i++) {
91  }
92  }
93  free(element->segments);
94  free(element->sections);
95  free(element->symbols);
96  free(element->lined_symbols);
97  free(element->line_info);
98  free(element->hdr);
99  free(element->file_name);
100  free(element->binary_version);
101  free(element);
102 }
103 
105  size_t i;
106  for (i = 0; i < element->hdr->n_segments; i++) {
107  RzCoreSymCacheElementSegment *seg = &element->segments[i];
108  if (seg->size == 0) {
109  continue;
110  }
111  if (seg->paddr < pa && pa < seg->paddr + seg->size) {
112  return pa - seg->paddr + seg->vaddr;
113  }
114  }
115  return pa;
116 }
117 
118 static char *str_dup_safe(const ut8 *b, const ut8 *str, const ut8 *end) {
119  if (str >= b && str < end) {
120  int len = rz_str_nlen((const char *)str, end - str);
121  if (len) {
122  return rz_str_ndup((const char *)str, len);
123  }
124  }
125  return NULL;
126 }
127 
128 static char *str_dup_safe_fixed(const ut8 *b, const ut8 *str, ut64 len, const ut8 *end) {
129  if (str >= b && str + len < end) {
130  char *result = calloc(1, len + 1);
131  if (result) {
132  rz_str_ncpy(result, (const char *)str, len);
133  return result;
134  }
135  }
136  return NULL;
137 }
138 
140  RzCoreSymCacheElement *result = NULL;
141  ut8 *b = NULL;
143  if (!hdr) {
144  return NULL;
145  }
146  if (hdr->version != 1) {
147  RZ_LOG_ERROR("Unsupported CoreSymbolication cache version (%d)\n", hdr->version);
148  goto beach;
149  }
150  if (hdr->size == 0 || hdr->size > rz_buf_size(buf) - off) {
151  RZ_LOG_ERROR("Corrupted CoreSymbolication header: size out of bounds (0x%x)\n", hdr->size);
152  goto beach;
153  }
154  result = RZ_NEW0(RzCoreSymCacheElement);
155  if (!result) {
156  goto beach;
157  }
158  result->hdr = hdr;
159  b = malloc(hdr->size);
160  if (!b) {
161  goto beach;
162  }
163  if (rz_buf_read_at(buf, off, b, hdr->size) != hdr->size) {
164  goto beach;
165  }
166  ut8 *end = b + hdr->size;
167  if (file_name) {
168  result->file_name = strdup(file_name);
169  } else if (hdr->file_name_off) {
170  result->file_name = str_dup_safe(b, b + (size_t)hdr->file_name_off, end);
171  }
172  if (hdr->version_off) {
173  result->binary_version = str_dup_safe(b, b + (size_t)hdr->version_off, end);
174  }
175  const size_t word_size = bits / 8;
176  const ut64 start_of_sections = (ut64)hdr->n_segments * RZ_CS_EL_SIZE_SEG + RZ_CS_EL_OFF_SEGS;
177  const ut64 sect_size = (bits == 32) ? RZ_CS_EL_SIZE_SECT_32 : RZ_CS_EL_SIZE_SECT_64;
178  const ut64 start_of_symbols = start_of_sections + (ut64)hdr->n_sections * sect_size;
179  const ut64 start_of_lined_symbols = start_of_symbols + (ut64)hdr->n_symbols * RZ_CS_EL_SIZE_SYM;
180  const ut64 start_of_line_info = start_of_lined_symbols + (ut64)hdr->n_lined_symbols * RZ_CS_EL_SIZE_LSYM;
181  const ut64 start_of_unknown_pairs = start_of_line_info + (ut64)hdr->n_line_info * RZ_CS_EL_SIZE_LINFO;
182  const ut64 start_of_strings = start_of_unknown_pairs + (ut64)hdr->n_symbols * 8;
183 
184  ut64 page_zero_size = 0;
185  size_t page_zero_idx = 0;
186 
187  if (UT32_MUL_OVFCHK(hdr->n_segments, sizeof(RzCoreSymCacheElementSegment))) {
188  goto beach;
189  } else if (UT32_MUL_OVFCHK(hdr->n_sections, sizeof(RzCoreSymCacheElementSection))) {
190  goto beach;
191  } else if (UT32_MUL_OVFCHK(hdr->n_symbols, sizeof(RzCoreSymCacheElementSymbol))) {
192  goto beach;
193  } else if (UT32_MUL_OVFCHK(hdr->n_lined_symbols, sizeof(RzCoreSymCacheElementLinedSymbol))) {
194  goto beach;
195  } else if (UT32_MUL_OVFCHK(hdr->n_line_info, sizeof(RzCoreSymCacheElementLineInfo))) {
196  goto beach;
197  }
198  if (hdr->n_segments > 0) {
200  if (!result->segments) {
201  goto beach;
202  }
203  size_t i;
204  ut8 *cursor = b + RZ_CS_EL_OFF_SEGS;
205  for (i = 0; i < hdr->n_segments && (cursor + 8) <= end; i++) {
206  RzCoreSymCacheElementSegment *seg = &result->segments[i];
207  seg->paddr = seg->vaddr = rz_read_le64(cursor);
208  cursor += 8;
209  if (cursor >= end) {
210  goto beach;
211  }
212  seg->size = seg->vsize = rz_read_le64(cursor);
213  cursor += 8;
214  if (cursor >= end) {
215  goto beach;
216  }
217  seg->name = str_dup_safe_fixed(b, cursor, 16, end);
218  cursor += 16;
219  if (!seg->name) {
220  goto beach;
221  }
222 
223  if (!strcmp(seg->name, "__PAGEZERO")) {
224  page_zero_size = seg->size;
225  page_zero_idx = i;
226  seg->paddr = seg->vaddr = 0;
227  seg->size = 0;
228  }
229  }
230  for (i = 0; i < hdr->n_segments && page_zero_size > 0; i++) {
231  if (i == page_zero_idx) {
232  continue;
233  }
234  RzCoreSymCacheElementSegment *seg = &result->segments[i];
235  if (seg->vaddr < page_zero_size) {
236  seg->vaddr += page_zero_size;
237  }
238  }
239  }
240  bool relative_to_strings = false;
241  ut8 *string_origin;
242  if (hdr->n_sections > 0) {
244  if (!result->sections) {
245  goto beach;
246  }
247  size_t i;
248  ut8 *cursor = b + start_of_sections;
249  ut8 *upper_boundary = end - word_size;
250  for (i = 0; i < hdr->n_sections && cursor < upper_boundary; i++) {
251  ut8 *sect_start = cursor;
252  RzCoreSymCacheElementSection *sect = &result->sections[i];
253  sect->vaddr = sect->paddr = rz_read_ble(cursor, false, bits);
254  if (sect->vaddr < page_zero_size) {
255  sect->vaddr += page_zero_size;
256  }
257  cursor += word_size;
258  if (cursor >= upper_boundary) {
259  goto beach;
260  }
261  sect->size = rz_read_ble(cursor, false, bits);
262  cursor += word_size;
263  if (cursor >= upper_boundary) {
264  goto beach;
265  }
266  ut64 sect_name_off = rz_read_ble(cursor, false, bits);
267  if (!i && !sect_name_off) {
268  relative_to_strings = true;
269  }
270  cursor += word_size;
271  if (bits == 32) {
272  cursor += word_size;
273  }
274  string_origin = relative_to_strings ? b + start_of_strings : sect_start;
275  sect->name = str_dup_safe(b, string_origin + (size_t)sect_name_off, end);
276  }
277  }
278  if (hdr->n_symbols) {
280  if (!result->symbols) {
281  goto beach;
282  }
283  size_t i;
284  ut8 *cursor = b + start_of_symbols;
285  for (i = 0; i < hdr->n_symbols && cursor + RZ_CS_EL_SIZE_SYM <= end; i++) {
286  RzCoreSymCacheElementSymbol *sym = &result->symbols[i];
287  sym->paddr = rz_read_le32(cursor);
288  sym->size = rz_read_le32(cursor + 0x4);
289  sym->unk1 = rz_read_le32(cursor + 0x8);
290  size_t name_off = rz_read_le32(cursor + 0xc);
291  size_t mangled_name_off = rz_read_le32(cursor + 0x10);
292  sym->unk2 = (st32)rz_read_le32(cursor + 0x14);
293  string_origin = relative_to_strings ? b + start_of_strings : cursor;
294  sym->name = str_dup_safe(b, string_origin + name_off, end);
295  if (!sym->name) {
296  cursor += RZ_CS_EL_SIZE_SYM;
297  goto beach;
298  }
299  string_origin = relative_to_strings ? b + start_of_strings : cursor;
300  sym->mangled_name = str_dup_safe(b, string_origin + mangled_name_off, end);
301  if (!sym->mangled_name) {
302  cursor += RZ_CS_EL_SIZE_SYM;
303  goto beach;
304  }
305  cursor += RZ_CS_EL_SIZE_SYM;
306  }
307  if (i < hdr->n_symbols) {
308  hdr->n_symbols = i;
309  }
310  }
311  if (hdr->n_lined_symbols) {
313  if (!result->lined_symbols) {
314  goto beach;
315  }
316  size_t i;
317  ut8 *cursor = b + start_of_lined_symbols;
318  for (i = 0; i < hdr->n_lined_symbols && cursor + RZ_CS_EL_SIZE_LSYM <= end; i++) {
320  lsym->sym.paddr = rz_read_le32(cursor);
321  lsym->sym.size = rz_read_le32(cursor + 0x4);
322  lsym->sym.unk1 = rz_read_le32(cursor + 0x8);
323  size_t name_off = rz_read_le32(cursor + 0xc);
324  size_t mangled_name_off = rz_read_le32(cursor + 0x10);
325  lsym->sym.unk2 = (st32)rz_read_le32(cursor + 0x14);
326  size_t file_name_off = rz_read_le32(cursor + 0x18);
327  lsym->flc.line = rz_read_le32(cursor + 0x1c);
328  lsym->flc.col = rz_read_le32(cursor + 0x20);
329  string_origin = relative_to_strings ? b + start_of_strings : cursor;
330  lsym->sym.name = str_dup_safe(b, string_origin + name_off, end);
331  if (!lsym->sym.name) {
332  goto beach;
333  }
334  string_origin = relative_to_strings ? b + start_of_strings : cursor;
335  lsym->sym.mangled_name = str_dup_safe(b, string_origin + mangled_name_off, end);
336  if (!lsym->sym.mangled_name) {
337  goto beach;
338  }
339  string_origin = relative_to_strings ? b + start_of_strings : cursor;
340  lsym->flc.file = str_dup_safe(b, string_origin + file_name_off, end);
341  if (!lsym->flc.file) {
342  goto beach;
343  }
344  cursor += RZ_CS_EL_SIZE_LSYM;
345  }
346  if (i < hdr->n_lined_symbols) {
347  hdr->n_lined_symbols = i;
348  }
349  }
350  if (hdr->n_line_info) {
352  if (!result->line_info) {
353  goto beach;
354  }
355  size_t i;
356  ut8 *cursor = b + start_of_line_info;
357  for (i = 0; i < hdr->n_line_info && cursor + RZ_CS_EL_SIZE_LINFO <= end; i++) {
359  info->paddr = rz_read_le32(cursor);
360  info->size = rz_read_le32(cursor + 4);
361  size_t file_name_off = rz_read_le32(cursor + 8);
362  info->flc.line = rz_read_le32(cursor + 0xc);
363  info->flc.col = rz_read_le32(cursor + 0x10);
364  string_origin = relative_to_strings ? b + start_of_strings : cursor;
365  info->flc.file = str_dup_safe(b, string_origin + file_name_off, end);
366  if (!info->flc.file) {
367  goto beach;
368  }
369  cursor += RZ_CS_EL_SIZE_LINFO;
370  }
371  if (i < hdr->n_line_info) {
372  hdr->n_line_info = i;
373  }
374  }
375 
376  /*
377  * TODO:
378  * Figure out the meaning of the 2 arrays of hdr->n_symbols
379  * 32-bit integers located at the end of line info.
380  * Those are the last info before the strings at the end.
381  */
382  free(b);
383  return result;
384 
385 beach:
386  free(b);
388  return NULL;
389 }
size_t len
Definition: 6502dis.c:15
lzma_index ** i
Definition: index.h:629
RzBinInfo * info(RzBinFile *bf)
Definition: bin_ne.c:86
int bits(struct state *s, int need)
Definition: blast.c:72
#define RZ_API
#define RZ_CS_EL_SIZE_SYM
static void rz_coresym_cache_element_lined_symbol_fini(RzCoreSymCacheElementLinedSymbol *sym)
#define RZ_CS_EL_SIZE_SECT_64
#define RZ_CS_EL_SIZE_SEG
static void rz_coresym_cache_element_line_info_fini(RzCoreSymCacheElementLineInfo *line)
#define RZ_CS_EL_SIZE_LSYM
RZ_API void rz_coresym_cache_element_free(RzCoreSymCacheElement *element)
#define RZ_CS_EL_SIZE_SECT_32
RZ_API ut64 rz_coresym_cache_element_pa2va(RzCoreSymCacheElement *element, ut64 pa)
static char * str_dup_safe_fixed(const ut8 *b, const ut8 *str, ut64 len, const ut8 *end)
static void rz_coresym_cache_element_symbol_fini(RzCoreSymCacheElementSymbol *sym)
static RzCoreSymCacheElementHdr * rz_coresym_cache_element_header_new(RzBuffer *buf, size_t off, int bits)
static void rz_coresym_cache_element_segment_fini(RzCoreSymCacheElementSegment *seg)
#define RZ_CS_EL_SIZE_LINFO
static char * str_dup_safe(const ut8 *b, const ut8 *str, const ut8 *end)
#define RZ_CS_EL_OFF_SEGS
static void rz_coresym_cache_element_section_fini(RzCoreSymCacheElementSection *sec)
static void rz_coresym_cache_element_flc_fini(RzCoreSymCacheElementFLC *flc)
RZ_API RzCoreSymCacheElement * rz_coresym_cache_element_new(RzBinFile *bf, RzBuffer *buf, ut64 off, int bits, RZ_OWN char *file_name)
#define NULL
Definition: cris-opc.c:27
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
voidpf void * buf
Definition: ioapi.h:138
uint8_t ut8
Definition: lh5801.h:11
void * malloc(size_t size)
Definition: malloc.c:123
void * calloc(size_t number, size_t size)
Definition: malloc.c:102
static const char file_name
Definition: sflib.h:131
return strdup("=SP r13\n" "=LR r14\n" "=PC r15\n" "=A0 r0\n" "=A1 r1\n" "=A2 r2\n" "=A3 r3\n" "=ZF zf\n" "=SF nf\n" "=OF vf\n" "=CF cf\n" "=SN or0\n" "gpr lr .32 56 0\n" "gpr pc .32 60 0\n" "gpr cpsr .32 64 0 ____tfiae_________________qvczn\n" "gpr or0 .32 68 0\n" "gpr tf .1 64.5 0 thumb\n" "gpr ef .1 64.9 0 endian\n" "gpr jf .1 64.24 0 java\n" "gpr qf .1 64.27 0 sticky_overflow\n" "gpr vf .1 64.28 0 overflow\n" "gpr cf .1 64.29 0 carry\n" "gpr zf .1 64.30 0 zero\n" "gpr nf .1 64.31 0 negative\n" "gpr itc .4 64.10 0 if_then_count\n" "gpr gef .4 64.16 0 great_or_equal\n" "gpr r0 .32 0 0\n" "gpr r1 .32 4 0\n" "gpr r2 .32 8 0\n" "gpr r3 .32 12 0\n" "gpr r4 .32 16 0\n" "gpr r5 .32 20 0\n" "gpr r6 .32 24 0\n" "gpr r7 .32 28 0\n" "gpr r8 .32 32 0\n" "gpr r9 .32 36 0\n" "gpr r10 .32 40 0\n" "gpr r11 .32 44 0\n" "gpr r12 .32 48 0\n" "gpr r13 .32 52 0\n" "gpr r14 .32 56 0\n" "gpr r15 .32 60 0\n" "gpr r16 .32 64 0\n" "gpr r17 .32 68 0\n")
line
Definition: setup.py:34
int off
Definition: pal.c:13
RZ_API st64 rz_buf_read_at(RZ_NONNULL RzBuffer *b, ut64 addr, RZ_NONNULL RZ_OUT ut8 *buf, ut64 len)
Read len bytes of the buffer at the specified address.
Definition: buf.c:1136
RZ_API st64 rz_buf_fread_at(RZ_NONNULL RzBuffer *b, ut64 addr, RZ_NONNULL ut8 *buf, RZ_NONNULL const char *fmt, int n)
...
Definition: buf.c:1001
RZ_API ut64 rz_buf_size(RZ_NONNULL RzBuffer *b)
Return the size of the buffer.
Definition: buf.c:1225
static ut32 rz_read_le32(const void *src)
Definition: rz_endian.h:239
static ut64 rz_read_le64(const void *src)
Definition: rz_endian.h:266
static ut64 rz_read_ble(const void *src, bool big_endian, int size)
Definition: rz_endian.h:517
#define RZ_LOG_ERROR(fmtstr,...)
Definition: rz_log.h:58
RZ_API char * rz_str_ndup(RZ_NULLABLE const char *ptr, int len)
Create new copy of string ptr limited to size len.
Definition: str.c:1006
RZ_API size_t rz_str_ncpy(char *dst, const char *src, size_t n)
Secure string copy with null terminator.
Definition: str.c:923
RZ_API size_t rz_str_nlen(const char *s, size_t n)
Definition: str.c:1949
#define RZ_OWN
Definition: rz_types.h:62
#define RZ_NEW0(x)
Definition: rz_types.h:284
#define RZ_NEWS0(x, y)
Definition: rz_types.h:282
#define st32
Definition: rz_types_base.h:12
#define b(i)
Definition: sha256.c:42
XX curplugin == o->plugin.
Definition: rz_bin.h:298
char * file
Definition: rz_bin.h:210
RzCoreSymCacheElementSegment * segments
RzCoreSymCacheElementLineInfo * line_info
RzCoreSymCacheElementSection * sections
RzCoreSymCacheElementHdr * hdr
RzCoreSymCacheElementLinedSymbol * lined_symbols
RzCoreSymCacheElementSymbol * symbols
if(dbg->bits==RZ_SYS_BITS_64)
Definition: windows-arm64.h:4
ut64(WINAPI *w32_GetEnabledXStateFeatures)()