Rizin
unix-like reverse engineering framework and cli tools
prof.h
Go to the documentation of this file.
1 /******************************************************************************/
2 #ifdef JEMALLOC_H_TYPES
3 
4 typedef struct prof_bt_s prof_bt_t;
5 typedef struct prof_cnt_s prof_cnt_t;
6 typedef struct prof_tctx_s prof_tctx_t;
7 typedef struct prof_gctx_s prof_gctx_t;
8 typedef struct prof_tdata_s prof_tdata_t;
9 
10 /* Option defaults. */
11 #ifdef JEMALLOC_PROF
12 # define PROF_PREFIX_DEFAULT "jeprof"
13 #else
14 # define PROF_PREFIX_DEFAULT ""
15 #endif
16 #define LG_PROF_SAMPLE_DEFAULT 19
17 #define LG_PROF_INTERVAL_DEFAULT -1
18 
19 /*
20  * Hard limit on stack backtrace depth. The version of prof_backtrace() that
21  * is based on __builtin_return_address() necessarily has a hard-coded number
22  * of backtrace frame handlers, and should be kept in sync with this setting.
23  */
24 #define PROF_BT_MAX 128
25 
26 /* Initial hash table size. */
27 #define PROF_CKH_MINITEMS 64
28 
29 /* Size of memory buffer to use when writing dump files. */
30 #define PROF_DUMP_BUFSIZE 65536
31 
32 /* Size of stack-allocated buffer used by prof_printf(). */
33 #define PROF_PRINTF_BUFSIZE 128
34 
35 /*
36  * Number of mutexes shared among all gctx's. No space is allocated for these
37  * unless profiling is enabled, so it's okay to over-provision.
38  */
39 #define PROF_NCTX_LOCKS 1024
40 
41 /*
42  * Number of mutexes shared among all tdata's. No space is allocated for these
43  * unless profiling is enabled, so it's okay to over-provision.
44  */
45 #define PROF_NTDATA_LOCKS 256
46 
47 /*
48  * prof_tdata pointers close to NULL are used to encode state information that
49  * is used for cleaning up during thread shutdown.
50  */
51 #define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1)
52 #define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2)
53 #define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY
54 
55 #endif /* JEMALLOC_H_TYPES */
56 /******************************************************************************/
57 #ifdef JEMALLOC_H_STRUCTS
58 
59 struct prof_bt_s {
60  /* Backtrace, stored as len program counters. */
61  void **vec;
62  unsigned len;
63 };
64 
65 #ifdef JEMALLOC_PROF_LIBGCC
66 /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
67 typedef struct {
68  prof_bt_t *bt;
69  unsigned max;
70 } prof_unwind_data_t;
71 #endif
72 
73 struct prof_cnt_s {
74  /* Profiling counters. */
75  uint64_t curobjs;
76  uint64_t curbytes;
77  uint64_t accumobjs;
78  uint64_t accumbytes;
79 };
80 
81 typedef enum {
82  prof_tctx_state_initializing,
83  prof_tctx_state_nominal,
84  prof_tctx_state_dumping,
85  prof_tctx_state_purgatory /* Dumper must finish destroying. */
86 } prof_tctx_state_t;
87 
88 struct prof_tctx_s {
89  /* Thread data for thread that performed the allocation. */
90  prof_tdata_t *tdata;
91 
92  /*
93  * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
94  * defunct during teardown.
95  */
96  uint64_t thr_uid;
97  uint64_t thr_discrim;
98 
99  /* Profiling counters, protected by tdata->lock. */
100  prof_cnt_t cnts;
101 
102  /* Associated global context. */
103  prof_gctx_t *gctx;
104 
105  /*
106  * UID that distinguishes multiple tctx's created by the same thread,
107  * but coexisting in gctx->tctxs. There are two ways that such
108  * coexistence can occur:
109  * - A dumper thread can cause a tctx to be retained in the purgatory
110  * state.
111  * - Although a single "producer" thread must create all tctx's which
112  * share the same thr_uid, multiple "consumers" can each concurrently
113  * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only
114  * gets called once each time cnts.cur{objs,bytes} drop to 0, but this
115  * threshold can be hit again before the first consumer finishes
116  * executing prof_tctx_destroy().
117  */
118  uint64_t tctx_uid;
119 
120  /* Linkage into gctx's tctxs. */
121  rb_node(prof_tctx_t) tctx_link;
122 
123  /*
124  * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
125  * sample vs destroy race.
126  */
127  bool prepared;
128 
129  /* Current dump-related state, protected by gctx->lock. */
130  prof_tctx_state_t state;
131 
132  /*
133  * Copy of cnts snapshotted during early dump phase, protected by
134  * dump_mtx.
135  */
136  prof_cnt_t dump_cnts;
137 };
138 typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
139 
140 struct prof_gctx_s {
141  /* Protects nlimbo, cnt_summed, and tctxs. */
142  malloc_mutex_t *lock;
143 
144  /*
145  * Number of threads that currently cause this gctx to be in a state of
146  * limbo due to one of:
147  * - Initializing this gctx.
148  * - Initializing per thread counters associated with this gctx.
149  * - Preparing to destroy this gctx.
150  * - Dumping a heap profile that includes this gctx.
151  * nlimbo must be 1 (single destroyer) in order to safely destroy the
152  * gctx.
153  */
154  unsigned nlimbo;
155 
156  /*
157  * Tree of profile counters, one for each thread that has allocated in
158  * this context.
159  */
160  prof_tctx_tree_t tctxs;
161 
162  /* Linkage for tree of contexts to be dumped. */
163  rb_node(prof_gctx_t) dump_link;
164 
165  /* Temporary storage for summation during dump. */
166  prof_cnt_t cnt_summed;
167 
168  /* Associated backtrace. */
169  prof_bt_t bt;
170 
171  /* Backtrace vector, variable size, referred to by bt. */
172  void *vec[1];
173 };
174 typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
175 
176 struct prof_tdata_s {
177  malloc_mutex_t *lock;
178 
179  /* Monotonically increasing unique thread identifier. */
180  uint64_t thr_uid;
181 
182  /*
183  * Monotonically increasing discriminator among tdata structures
184  * associated with the same thr_uid.
185  */
186  uint64_t thr_discrim;
187 
188  /* Included in heap profile dumps if non-NULL. */
189  char *thread_name;
190 
191  bool attached;
192  bool expired;
193 
194  rb_node(prof_tdata_t) tdata_link;
195 
196  /*
197  * Counter used to initialize prof_tctx_t's tctx_uid. No locking is
198  * necessary when incrementing this field, because only one thread ever
199  * does so.
200  */
201  uint64_t tctx_uid_next;
202 
203  /*
204  * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks
205  * backtraces for which it has non-zero allocation/deallocation counters
206  * associated with thread-specific prof_tctx_t objects. Other threads
207  * may write to prof_tctx_t contents when freeing associated objects.
208  */
209  ckh_t bt2tctx;
210 
211  /* Sampling state. */
212  uint64_t prng_state;
213  uint64_t bytes_until_sample;
214 
215  /* State used to avoid dumping while operating on prof internals. */
216  bool enq;
217  bool enq_idump;
218  bool enq_gdump;
219 
220  /*
221  * Set to true during an early dump phase for tdata's which are
222  * currently being dumped. New threads' tdata's have this initialized
223  * to false so that they aren't accidentally included in later dump
224  * phases.
225  */
226  bool dumping;
227 
228  /*
229  * True if profiling is active for this tdata's thread
230  * (thread.prof.active mallctl).
231  */
232  bool active;
233 
234  /* Temporary storage for summation during dump. */
235  prof_cnt_t cnt_summed;
236 
237  /* Backtrace vector, used for calls to prof_backtrace(). */
238  void *vec[PROF_BT_MAX];
239 };
240 typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
241 
242 #endif /* JEMALLOC_H_STRUCTS */
243 /******************************************************************************/
244 #ifdef JEMALLOC_H_EXTERNS
245 
246 extern bool opt_prof;
247 extern bool opt_prof_active;
248 extern bool opt_prof_thread_active_init;
249 extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
250 extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
251 extern bool opt_prof_gdump; /* High-water memory dumping. */
252 extern bool opt_prof_final; /* Final profile dumping. */
253 extern bool opt_prof_leak; /* Dump leak summary at exit. */
254 extern bool opt_prof_accum; /* Report cumulative bytes. */
255 extern char opt_prof_prefix[
256  /* Minimize memory bloat for non-prof builds. */
257 #ifdef JEMALLOC_PROF
258  PATH_MAX +
259 #endif
260  1];
261 
262 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
263 extern bool prof_active;
264 
265 /* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
266 extern bool prof_gdump_val;
267 
268 /*
269  * Profile dump interval, measured in bytes allocated. Each arena triggers a
270  * profile dump when it reaches this threshold. The effect is that the
271  * interval between profile dumps averages prof_interval, though the actual
272  * interval between dumps will tend to be sporadic, and the interval will be a
273  * maximum of approximately (prof_interval * narenas).
274  */
275 extern uint64_t prof_interval;
276 
277 /*
278  * Initialized as opt_lg_prof_sample, and potentially modified during profiling
279  * resets.
280  */
281 extern size_t lg_prof_sample;
282 
283 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
284 void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
285  prof_tctx_t *tctx);
286 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
287 void bt_init(prof_bt_t *bt, void **vec);
288 void prof_backtrace(prof_bt_t *bt);
289 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
290 #ifdef JEMALLOC_JET
291 size_t prof_tdata_count(void);
292 size_t prof_bt_count(void);
293 const prof_cnt_t *prof_cnt_all(void);
294 typedef int (prof_dump_open_t)(bool, const char *);
295 extern prof_dump_open_t *prof_dump_open;
296 typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
297 extern prof_dump_header_t *prof_dump_header;
298 #endif
299 void prof_idump(tsdn_t *tsdn);
300 bool prof_mdump(tsd_t *tsd, const char *filename);
301 void prof_gdump(tsdn_t *tsdn);
302 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
303 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
304 void prof_reset(tsd_t *tsd, size_t lg_sample);
305 void prof_tdata_cleanup(tsd_t *tsd);
306 bool prof_active_get(tsdn_t *tsdn);
307 bool prof_active_set(tsdn_t *tsdn, bool active);
308 const char *prof_thread_name_get(tsd_t *tsd);
309 int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
310 bool prof_thread_active_get(tsd_t *tsd);
311 bool prof_thread_active_set(tsd_t *tsd, bool active);
312 bool prof_thread_active_init_get(tsdn_t *tsdn);
313 bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
314 bool prof_gdump_get(tsdn_t *tsdn);
315 bool prof_gdump_set(tsdn_t *tsdn, bool active);
316 void prof_boot0(void);
317 void prof_boot1(void);
318 bool prof_boot2(tsd_t *tsd);
319 void prof_prefork0(tsdn_t *tsdn);
320 void prof_prefork1(tsdn_t *tsdn);
321 void prof_postfork_parent(tsdn_t *tsdn);
322 void prof_postfork_child(tsdn_t *tsdn);
323 void prof_sample_threshold_update(prof_tdata_t *tdata);
324 
325 #endif /* JEMALLOC_H_EXTERNS */
326 /******************************************************************************/
327 #ifdef JEMALLOC_H_INLINES
328 
329 #ifndef JEMALLOC_ENABLE_INLINE
330 bool prof_active_get_unlocked(void);
331 bool prof_gdump_get_unlocked(void);
332 prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create);
333 prof_tctx_t *prof_tctx_get(tsdn_t *tsdn, const void *ptr);
334 void prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
335  prof_tctx_t *tctx);
336 void prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
337  const void *old_ptr, prof_tctx_t *tctx);
338 bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
339  prof_tdata_t **tdata_out);
340 prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
341  bool update);
342 void prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize,
343  prof_tctx_t *tctx);
344 void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
345  prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
346  size_t old_usize, prof_tctx_t *old_tctx);
347 void prof_free(tsd_t *tsd, const void *ptr, size_t usize);
348 #endif
349 
350 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
353 {
354 
355  /*
356  * Even if opt_prof is true, sampling can be temporarily disabled by
357  * setting prof_active to false. No locking is used when reading
358  * prof_active in the fast path, so there are no guarantees regarding
359  * how long it will take for all threads to notice state changes.
360  */
361  return (prof_active);
362 }
363 
366 {
367 
368  /*
369  * No locking is used when reading prof_gdump_val in the fast path, so
370  * there are no guarantees regarding how long it will take for all
371  * threads to notice state changes.
372  */
373  return (prof_gdump_val);
374 }
375 
376 JEMALLOC_ALWAYS_INLINE prof_tdata_t *
377 prof_tdata_get(tsd_t *tsd, bool create)
378 {
379  prof_tdata_t *tdata;
380 
381  if (unlikely(!config_prof))
382  return NULL;
383 
384  tdata = tsd_prof_tdata_get(tsd);
385  if (create) {
386  if (unlikely(tdata == NULL)) {
387  if (tsd_nominal(tsd)) {
388  tdata = prof_tdata_init(tsd);
389  tsd_prof_tdata_set(tsd, tdata);
390  }
391  } else if (unlikely(tdata->expired)) {
392  tdata = prof_tdata_reinit(tsd, tdata);
393  tsd_prof_tdata_set(tsd, tdata);
394  }
395  if (unlikely ( ( (tdata != NULL) || ! (tdata->attached) ) ) )
396  return NULL;
397  }
398 
399  return (tdata);
400 }
401 
402 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
403 prof_tctx_get(tsdn_t *tsdn, const void *ptr)
404 {
405  if (unlikely (!config_prof || ptr == NULL))
406  return NULL;
407  return (arena_prof_tctx_get(tsdn, ptr));
408 }
409 
411 prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
412 {
413  cassert(config_prof);
414  assert(ptr != NULL);
415  arena_prof_tctx_set(tsdn, ptr, usize, tctx);
416 }
417 
419 prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, const void *old_ptr,
420  prof_tctx_t *old_tctx)
421 {
422  cassert(config_prof);
423  assert(ptr != NULL);
424 
425  arena_prof_tctx_reset(tsdn, ptr, usize, old_ptr, old_tctx);
426 }
427 
429 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
430  prof_tdata_t **tdata_out)
431 {
432  prof_tdata_t *tdata;
433 
434  if (unlikely(!config_prof))
435  return false;
436 
437  tdata = prof_tdata_get(tsd, true);
438  if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX))
439  tdata = NULL;
440 
441  if (tdata_out != NULL)
442  *tdata_out = tdata;
443 
444  if (unlikely(tdata == NULL))
445  return (true);
446 
447  if (likely(tdata->bytes_until_sample >= usize)) {
448  if (update)
449  tdata->bytes_until_sample -= usize;
450  return (true);
451  } else {
452  /* Compute new sample threshold. */
453  if (update)
455  return (!tdata->active);
456  }
457 }
458 
459 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
460 prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
461 {
462  prof_tctx_t *ret;
463  prof_tdata_t *tdata;
464  prof_bt_t bt;
465 
466  if (unlikely(usize != s2u(usize)))
467  return NULL;
468 
470  &tdata)))
471  ret = (prof_tctx_t *)(uintptr_t)1U;
472  else {
473  bt_init(&bt, tdata->vec);
474  prof_backtrace(&bt);
475  ret = prof_lookup(tsd, &bt);
476  }
477 
478  return (ret);
479 }
480 
482 prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
483 {
484  cassert(config_prof);
485  assert(ptr != NULL);
486  assert(usize == isalloc(tsdn, ptr, true));
487 
488  if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
489  prof_malloc_sample_object(tsdn, ptr, usize, tctx);
490  else
491  prof_tctx_set(tsdn, ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
492 }
493 
495 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
496  bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
497  prof_tctx_t *old_tctx)
498 {
499  bool sampled, old_sampled;
500 
501  cassert(config_prof);
502  assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
503 
504  if (prof_active && !updated && ptr != NULL) {
505  assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
506  if (prof_sample_accum_update(tsd, usize, true, NULL)) {
507  /*
508  * Don't sample. The usize passed to prof_alloc_prep()
509  * was larger than what actually got allocated, so a
510  * backtrace was captured for this allocation, even
511  * though its actual usize was insufficient to cross the
512  * sample threshold.
513  */
514  prof_alloc_rollback(tsd, tctx, true);
515  tctx = (prof_tctx_t *)(uintptr_t)1U;
516  }
517  }
518 
519  sampled = ((uintptr_t)tctx > (uintptr_t)1U);
520  old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
521 
522  if (unlikely(sampled))
523  prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx);
524  else
525  prof_tctx_reset(tsd_tsdn(tsd), ptr, usize, old_ptr, old_tctx);
526 
527  if (unlikely(old_sampled))
528  prof_free_sampled_object(tsd, old_usize, old_tctx);
529 }
530 
532 prof_free(tsd_t *tsd, const void *ptr, size_t usize)
533 {
534  prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
535 
536  cassert(config_prof);
537  assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
538 
539  if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
540  prof_free_sampled_object(tsd, usize, tctx);
541 }
542 #endif
543 
544 #endif /* JEMALLOC_H_INLINES */
545 /******************************************************************************/
size_t len
Definition: 6502dis.c:15
static RzBuffer * create(RzBin *bin, const ut8 *code, int codelen, const ut8 *data, int datalen, RzBinArchOptions *opt)
Definition: bin_cgc.c:16
#define NULL
Definition: cris-opc.c:27
static bool update(RzCrypto *cry, const ut8 *buf, int len)
Definition: crypto_aes.c:92
int max
Definition: enough.c:225
const char * filename
Definition: ioapi.h:137
static const bool config_prof
size_t s2u(size_t size)
#define JEMALLOC_ALWAYS_INLINE
#define likely(expr)
Definition: lz4.c:174
#define unlikely(expr)
Definition: lz4.c:177
assert(limit<=UINT32_MAX/2)
#define prof_boot0
#define opt_prof_final
#define prof_tdata_cleanup
#define prof_tdata_count
#define prof_free_sampled_object
#define prof_dump_header
#define prof_backtrace
#define prof_thread_active_init_set
#define prof_tdata_init
#define tsd_prof_tdata_set
#define arena_prof_tctx_reset
#define prof_active_set
#define opt_prof_prefix
#define prof_malloc_sample_object
#define prof_boot2
#define opt_prof_leak
#define prof_interval
#define prof_gdump_set
#define prof_postfork_parent
#define opt_prof
#define prof_gdump_val
#define prof_boot1
#define opt_prof_accum
#define prof_lookup
#define opt_prof_gdump
#define opt_prof_active
#define lg_prof_sample
#define prof_thread_name_set
#define prof_bt_count
#define prof_tctx_get
#define prof_active_get
#define prof_thread_active_get
#define arena_prof_tctx_set
#define prof_gdump
#define prof_tdata_get
#define prof_gdump_get
#define prof_postfork_child
#define prof_reset
#define tsd_tsdn
#define opt_lg_prof_interval
#define opt_prof_thread_active_init
#define opt_lg_prof_sample
#define prof_tctx_reset
#define prof_gdump_get_unlocked
#define prof_thread_name_get
#define prof_malloc
#define prof_free
#define prof_tdata_reinit
#define prof_sample_threshold_update
#define tsd_nominal
#define isalloc
#define prof_alloc_prep
#define prof_idump
#define prof_dump_open
#define bt_init
#define tsd_prof_tdata_get
#define prof_sample_accum_update
#define prof_active_get_unlocked
#define prof_alloc_rollback
#define prof_tctx_set
#define prof_realloc
#define prof_active
#define prof_thread_active_set
#define prof_prefork1
#define prof_mdump
#define prof_prefork0
#define prof_thread_active_init_get
#define arena_prof_tctx_get
#define rb_node(a_type)
Definition: rb.h:33
#define rb_tree(a_type)
Definition: rb.h:42
static int
Definition: sfsocketcall.h:114
unsigned long uint64_t
Definition: sftypes.h:28
int ssize_t
Definition: sftypes.h:39
_W64 unsigned int uintptr_t
Definition: dis.h:43
#define bool
Definition: sysdefs.h:146
static void lock(volatile int *lk)
Definition: malloc.c:61