Rizin
unix-like reverse engineering framework and cli tools
assembler.c
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: 2022 Dhruv Maroo <dhruvmaru007@gmail.com>
2 // SPDX-License-Identifier: LGPL-3.0-only
3 
4 #include "assembler.h"
5 #include "regs.h"
6 
7 extern const SHOpRaw sh_op_lookup[];
8 extern const ut32 OPCODE_NUM;
9 
17  return pb.is_param ? pb.param.mode : pb.addr.mode;
18 }
19 
26 static char *sh_op_space_params(const char *buffer) {
27  char *spaced = strdup(buffer);
28  bool inside_paren = false;
29 
30  for (ut8 i = 0; spaced[i] != '\0'; i++) {
31  switch (spaced[i]) {
32  // there won't be nested parens so the logic is trivial
33  case '(':
34  inside_paren = true;
35  break;
36  case ')':
37  inside_paren = false;
38  break;
39  case ',':
40  if (!inside_paren) {
41  spaced[i] = ' ';
42  }
43  break;
44  default:
45  break;
46  }
47  }
48  return spaced;
49 }
50 
58 static ut32 sh_op_reg_bits(const char *param, ut8 offset) {
59  const int reg_num = sizeof(sh_registers) / sizeof(char *);
60  for (ut8 i = 0; i < reg_num; i++) {
61  if (!strcmp(sh_registers[i], param)) {
62  if (i >= SH_REG_IND_R0B) {
63  /* In case we encounter a banked register, we should just decode it as it's un-banked counterpart */
64  i -= SH_REG_IND_R0B;
65  }
66  return ((ut32)i) << offset;
67  }
68  }
69  RZ_LOG_ERROR("SuperH: Invalid register encountered by the assembler\n");
70  return 0;
71 }
72 
83 static ut32 sh_op_param_bits(SHParamBuilder shb, const char *param, SHScaling scaling, ut64 pc) {
84  if (shb.is_param) {
85  return 0;
86  }
87 
88  ut32 opcode = 0;
89  struct sh_param_builder_addr_t shba = shb.addr;
90  char *const reg = strdup(param);
91  char *const dup = strdup(param);
92  char *const disp = strdup(param);
93  ut8 d;
94 
95  switch (shba.mode) {
96  case SH_REG_DIRECT:
97  case SH_PC_RELATIVE_REG:
98  // %s
99  opcode = sh_op_reg_bits(reg, shba.start);
100  break;
101  case SH_REG_INDIRECT:
102  // @%s
103  sscanf(param, "@%s", reg);
104  opcode = sh_op_reg_bits(reg, shba.start);
105  break;
106  case SH_REG_INDIRECT_I: {
107  // @%s+
108  char *plus = strchr(dup, '+');
109  if (!plus) {
110  break;
111  }
112  *plus = '\0';
113  sscanf(dup, "@%s", reg);
114  opcode = sh_op_reg_bits(reg, shba.start);
115  break;
116  }
117  case SH_REG_INDIRECT_D:
118  // @-%s
119  sscanf(param, "@-%s", reg);
120  opcode = sh_op_reg_bits(reg, shba.start);
121  break;
122  case SH_REG_INDIRECT_DISP: {
123  // @(%s,%s)
124  char *comma = strchr(dup, ',');
125  if (!comma) {
126  break;
127  }
128  *comma = '\0';
129  sscanf(dup, "@(%s", disp);
130 
131  comma++;
132  char *paren = strchr(comma, ')');
133  if (!paren) {
134  break;
135  }
136  *paren = '\0';
137 
138  d = (rz_num_get(NULL, disp) / sh_scaling_size[scaling]) & 0xf;
139  opcode = d << shba.start;
140  opcode |= sh_op_reg_bits(comma, shba.start + 4);
141  break;
142  }
144  // @(r0,%s)
145  char *paren = strchr(dup, ')');
146  if (!paren) {
147  break;
148  }
149  *paren = '\0';
150  paren = dup + strlen("@(r0,");
151  opcode = sh_op_reg_bits(paren, shba.start);
152  break;
153  }
154  case SH_GBR_INDIRECT_DISP: {
155  // @(%s,gbr)
156  char *comma = strchr(dup, ',');
157  if (!comma) {
158  break;
159  }
160  *comma = '\0';
161  sscanf(dup, "@(%s", disp);
162  d = rz_num_get(NULL, disp) / sh_scaling_size[scaling];
163  opcode = d << shba.start;
164  break;
165  }
166  case SH_PC_RELATIVE_DISP: {
167  // @(%s,pc)
168  char *comma = strchr(dup, ',');
169  if (!comma) {
170  break;
171  }
172  *comma = '\0';
173  sscanf(dup, "@(%s,pc)", disp);
174  d = rz_num_get(NULL, disp) / sh_scaling_size[scaling];
175  opcode = d << shba.start;
176  break;
177  }
178  case SH_PC_RELATIVE8:
179  d = (st16)((st64)rz_num_get(NULL, disp) - (st64)pc - 4) / 2;
180  opcode = d << shba.start;
181  break;
182  case SH_PC_RELATIVE12: {
183  ut16 dd = ((st16)((st64)rz_num_get(NULL, disp) - (st64)pc - 4) / 2) & 0xfff;
184  opcode = dd << shba.start;
185  break;
186  }
187  case SH_IMM_U:
188  case SH_IMM_S:
189  d = rz_num_get(NULL, disp);
190  opcode = d << shba.start;
191  break;
192  default:
193  RZ_LOG_ERROR("SuperH: Invalid addressing mode encountered by the assembler\n");
194  }
195 
196  free(reg);
197  free(disp);
198  free(dup);
199  return opcode;
200 }
201 
209 static ut64 sh_op_movl_param_bits(const char *reg_direct, const char *reg_disp_indirect) {
210  ut64 opcode = sh_op_reg_bits(reg_direct, NIB1);
211 
212  char *const dup = strdup(reg_disp_indirect);
213  char *comma = strchr(dup, ',');
214  if (!comma) {
215  goto fail;
216  }
217  *comma = '\0';
218  char *reg = comma + 1;
219  char *paren = strchr(reg, ')');
220  if (!paren) {
221  goto fail;
222  }
223  *paren = '\0';
224 
225  char *const disp = strdup(reg_disp_indirect);
226  sscanf(dup, "@(%s", disp);
227  ut8 d = (rz_num_get(NULL, disp) / sh_scaling_size[SH_SCALING_L]) & 0xf;
228  opcode |= d << NIB0;
229  opcode |= sh_op_reg_bits(reg, NIB2);
230 
231  free(disp);
232 fail:
233  free(dup);
234  return opcode;
235 }
236 
241 
242 /* This function is NOT robust. It is incapable of detecting invalid operand inputs.
243 If you provide an invalid operand, the behavior is, for all practical purposes, undefined.
244 The resulting assembled instruction will be complete gibberish and should not be used. */
251 static SHAddrHelper sh_op_get_addr_mode(const char *param) {
252  SHAddrHelper ret;
253  // Assume that we don't care about the register index
254  ret.reg = SH_REG_IND_SIZE;
255 
256  const ut8 reg_num = sizeof(sh_registers) / sizeof(char *);
257  /* Check if it is a register or not by iterating through all the register names.
258  This could also have been SH_PC_RELATIVE_REG, and we have no way to know.
259  But we can take care of this case in sh_op_compare, since no instruction
260  can have both SH_REG_DIRECT and SH_PC_RELATIVE_REG as its addressing modes */
261  for (ut8 i = 0; i < reg_num; i++) {
262  if (!strcmp(param, sh_registers[i])) {
263  ret.mode = SH_REG_DIRECT;
264  /* Well in case of `SH_REG_DIRECT` addressing mode, we do care about the register index.
265  This is because there are instructions (like `LDC` and `STC`) which have different
266  opcodes for the same addressing mode but different registers.
267  But, such ambiguous instructions have different opcodes only for non-gpr registers
268  (like sr, gbr, vbr, ssr, spc, dbr), hence we will only set ret.reg if the index is really non-gpr.
269  We will also store if we found a banked register, since we can that way find the correct instruction
270  which corresponds to banked register as a param */
271  if ((i > SH_REG_IND_PC && i < SH_REG_IND_FR0) || i >= SH_REG_IND_R0B) {
272  ret.reg = i;
273  }
274  return ret;
275  }
276  }
277 
278  switch (param[0]) {
279  case '@':
280  switch (param[1]) {
281  case 'r':
282  if (rz_str_endswith(param, "+")) {
283  ret.mode = SH_REG_INDIRECT_I;
284  } else {
285  ret.mode = SH_REG_INDIRECT;
286  }
287  break;
288  case '-':
289  ret.mode = SH_REG_INDIRECT_D;
290  break;
291  case '(':
292  if (strcmp(param, "@(r0,gbr)") == 0) {
294  } else if (rz_str_startswith(param, "@(r0,")) {
296  } else if (rz_str_endswith(param, ",gbr)")) {
298  } else if (rz_str_endswith(param, ",pc)")) {
300  } else {
302  }
303  break;
304  default:
305  // unreachable
307  }
308  break;
309  default:
310  /* If none of the above checks pass, we can assume it is a number
311  In this case, it could be any one of the following:
312  - SH_PC_RELATIVE8
313  - SH_PC_RELATIVE12
314  - SH_IMM_U
315  - SH_IMM_S
316  Again, we will just return SH_IMM_U, and take care of it in sh_op_compare
317  by considering all the above addressing modes to be equal
318  */
319  ret.mode = SH_IMM_U;
320  }
321 
322  return ret;
323 }
324 
333 static bool sh_op_compare(SHOpRaw raw, const char *mnem, SHAddrHelper modes[]) {
334  bool x = true;
335  x &= (strcmp(mnem, raw.str_mnem) == 0);
336 
337  // Quick return
338  if (!x) {
339  return x;
340  }
341 
342  for (ut8 i = 0; i < 2; i++) {
344  switch (md) {
345  case SH_REG_DIRECT:
346  case SH_PC_RELATIVE_REG:
347  md = SH_REG_DIRECT;
348  break;
349  case SH_PC_RELATIVE8:
350  case SH_PC_RELATIVE12:
351  case SH_IMM_U:
352  case SH_IMM_S:
353  md = SH_IMM_U;
354  break;
355  default:
356  break;
357  }
358 
359  x &= (modes[i].mode == md);
360 
361  /* We also need to make sure that we got the instruction corresponding
362  to the correct register by checking the register index in the SHAddrHelper
363  and the register in the SHOpRaw */
364  if (modes[i].reg < SH_REG_IND_R0B) {
365  /* We can only compare the registers if the param_builder is a param, and not an an addr
366  Also, the addressing mode has to be SH_REG_DIRECT, since the ambiguous instructions (`LDC` and `STC`)
367  are only ambiguous for params with direct register addressing */
369  x &= (modes[i].reg == raw.param_builder[i].param.param[0]);
370  } else {
371  /* In any other case, we did not get what we expected, so we can conclude that the instructions are not the same */
372  x &= false;
373  }
374  }
375 
376  /* Check whether this instruction really has banked register as its param */
377  if (modes[i].reg >= SH_REG_IND_R0B && modes[i].reg != SH_REG_IND_SIZE) {
378  /* If it has a banked register, then it must be a addr
379  (at least in case of all implemented instructions) */
380  if (!raw.param_builder[i].is_param) {
381  /* The number of bits to be used for a banked register must be 3
382  (at least in case of all implemented instructions) */
383  x &= (raw.param_builder[i].addr.bits == 3);
384  } else {
385  x &= false;
386  }
387  }
388  }
389 
390  return x;
391 }
392 
402 RZ_IPI ut16 sh_assembler(RZ_NONNULL const char *buffer, ut64 pc, RZ_NULLABLE bool *success) {
404  if (success) {
405  *success = true;
406  }
407 
408  char *mnem = NULL;
409  ut16 opcode = 0;
410  char *spaced = sh_op_space_params(buffer);
411  RzList *tokens = rz_str_split_duplist(spaced, " ", true);
412  free(spaced);
413  if (!tokens) {
414  goto bye;
415  }
416  RzListIter *itr, *tmp;
417  char *tok;
418  rz_list_foreach_safe (tokens, itr, tmp, tok) {
419  if (rz_str_is_whitespace(tok)) {
420  rz_list_delete(tokens, itr);
421  }
422  }
423  ut32 token_num = rz_list_length(tokens);
424  if (token_num == 0 || token_num > 3) {
425  RZ_LOG_ERROR("SuperH: Invalid number of operands in the instruction\n")
426  goto bye;
427  }
428 
429  mnem = (char *)rz_list_pop_head(tokens);
431  ut8 j = 0;
432  rz_list_foreach (tokens, itr, tok) {
433  sham[j] = sh_op_get_addr_mode(tok);
434  j++;
435  }
436 
437  for (ut16 i = 0; i < OPCODE_NUM; i++) {
438  if (!sh_op_compare(sh_op_lookup[i], mnem, sham)) {
439  continue;
440  }
441 
442  SHOpRaw raw = sh_op_lookup[i];
443  opcode = raw.opcode ^ raw.mask;
444  /* Now opcode only has the bits corresponding to the instruction
445  The bits corresponding to the operands are supposed to be calculated */
446 
447  // check for "weird" MOVL
448  if (raw.opcode == MOVL) {
449  char *reg_direct = rz_list_pop_head(tokens);
450  char *reg_disp_indirect = rz_list_pop_head(tokens);
451 
452  opcode |= sh_op_movl_param_bits(reg_direct, reg_disp_indirect);
453 
454  free(reg_direct);
455  free(reg_disp_indirect);
456  goto return_opcode;
457  }
458 
459  RzListIter *itr;
460  char *param;
461  j = 0;
462  rz_list_foreach (tokens, itr, param) {
463  opcode |= sh_op_param_bits(raw.param_builder[j], param, raw.scaling, pc);
464  j++;
465  }
466 
467  return_opcode:
468  rz_list_free(tokens);
469  free(mnem);
470  return opcode;
471  }
472 
473  RZ_LOG_ERROR("SuperH: Failed to assemble: \"%s\"\n", buffer);
474 
475 bye:
476  if (success) {
477  success = false;
478  }
479  rz_list_free(tokens);
480  free(mnem);
481  return 0;
482 }
#define mnem(n, mn)
#define RZ_IPI
Definition: analysis_wasm.c:11
lzma_index ** i
Definition: index.h:629
#define NULL
Definition: cris-opc.c:27
uint16_t ut16
uint32_t ut32
void bye(char *msg1, char *msg2)
Definition: gzappend.c:93
RZ_API void Ht_() free(HtName_(Ht) *ht)
Definition: ht_inc.c:130
voidpf uLong offset
Definition: ioapi.h:144
#define reg(n)
uint8_t ut8
Definition: lh5801.h:11
#define NIB2
Definition: common.h:46
#define NIB0
Definition: common.h:44
#define MOVL
Definition: common.h:70
#define NIB1
Definition: common.h:45
RZ_API void rz_list_delete(RZ_NONNULL RzList *list, RZ_NONNULL RzListIter *iter)
Removes an entry in the list by using the RzListIter pointer.
Definition: list.c:162
RZ_API ut32 rz_list_length(RZ_NONNULL const RzList *list)
Returns the length of the list.
Definition: list.c:109
RZ_API RZ_OWN void * rz_list_pop_head(RZ_NONNULL RzList *list)
Removes and returns the first element of the list.
Definition: list.c:401
RZ_API void rz_list_free(RZ_NONNULL RzList *list)
Empties the list and frees the list pointer.
Definition: list.c:137
static static fork const void static count static fd const char const char static newpath char char char static envp time_t static t const char static mode static whence const char static dir time_t static t unsigned static seconds const char struct utimbuf static buf static inc static sig const char static mode dup
Definition: sflib.h:68
return strdup("=SP r13\n" "=LR r14\n" "=PC r15\n" "=A0 r0\n" "=A1 r1\n" "=A2 r2\n" "=A3 r3\n" "=ZF zf\n" "=SF nf\n" "=OF vf\n" "=CF cf\n" "=SN or0\n" "gpr lr .32 56 0\n" "gpr pc .32 60 0\n" "gpr cpsr .32 64 0 ____tfiae_________________qvczn\n" "gpr or0 .32 68 0\n" "gpr tf .1 64.5 0 thumb\n" "gpr ef .1 64.9 0 endian\n" "gpr jf .1 64.24 0 java\n" "gpr qf .1 64.27 0 sticky_overflow\n" "gpr vf .1 64.28 0 overflow\n" "gpr cf .1 64.29 0 carry\n" "gpr zf .1 64.30 0 zero\n" "gpr nf .1 64.31 0 negative\n" "gpr itc .4 64.10 0 if_then_count\n" "gpr gef .4 64.16 0 great_or_equal\n" "gpr r0 .32 0 0\n" "gpr r1 .32 4 0\n" "gpr r2 .32 8 0\n" "gpr r3 .32 12 0\n" "gpr r4 .32 16 0\n" "gpr r5 .32 20 0\n" "gpr r6 .32 24 0\n" "gpr r7 .32 28 0\n" "gpr r8 .32 32 0\n" "gpr r9 .32 36 0\n" "gpr r10 .32 40 0\n" "gpr r11 .32 44 0\n" "gpr r12 .32 48 0\n" "gpr r13 .32 52 0\n" "gpr r14 .32 56 0\n" "gpr r15 .32 60 0\n" "gpr r16 .32 64 0\n" "gpr r17 .32 68 0\n")
int x
Definition: mipsasm.c:20
static const char * sh_registers[]
Definition: regs.h:10
#define rz_warn_if_reached()
Definition: rz_assert.h:29
#define rz_return_val_if_fail(expr, val)
Definition: rz_assert.h:108
#define RZ_LOG_ERROR(fmtstr,...)
Definition: rz_log.h:58
RZ_API ut64 rz_num_get(RzNum *num, const char *str)
Definition: unum.c:172
RZ_API int RZ_API bool rz_str_is_whitespace(RZ_NONNULL const char *str)
Checks if the whole string is composed of whitespace.
Definition: str.c:2004
RZ_API RzList * rz_str_split_duplist(const char *str, const char *c, bool trim)
Split the string str according to the substring c and returns a RzList with the result.
Definition: str.c:3464
RZ_API bool rz_str_startswith(RZ_NONNULL const char *str, RZ_NONNULL const char *needle)
Checks if a string starts with a specifc sequence of characters (case sensitive)
Definition: str.c:3286
RZ_API bool rz_str_endswith(RZ_NONNULL const char *str, RZ_NONNULL const char *needle)
Checks if a string ends with a specifc sequence of characters (case sensitive)
Definition: str.c:3329
#define RZ_NULLABLE
Definition: rz_types.h:65
#define RZ_NONNULL
Definition: rz_types.h:64
#define st64
Definition: rz_types_base.h:10
#define st16
Definition: rz_types_base.h:14
static SHAddrMode sh_pb_get_addrmode(SHParamBuilder pb)
Get the addressing mode for pb.
Definition: assembler.c:16
static ut32 sh_op_reg_bits(const char *param, ut8 offset)
Get the bits corresponding to the register param (i.e. register number shifted at offset)
Definition: assembler.c:58
static bool sh_op_compare(SHOpRaw raw, const char *mnem, SHAddrHelper modes[])
Check whether raw and instruction to be formed using mnem and modes will be equivalent.
Definition: assembler.c:333
static char * sh_op_space_params(const char *buffer)
Replace all the commas outside operands with spaces (i.e. "space out" the operands)
Definition: assembler.c:26
static ut64 sh_op_movl_param_bits(const char *reg_direct, const char *reg_disp_indirect)
Special assembler functions for the operands of "weird" MOVL instruction.
Definition: assembler.c:209
static ut32 sh_op_param_bits(SHParamBuilder shb, const char *param, SHScaling scaling, ut64 pc)
Get the opcode bits corresponding to param, scaling, pc and addressing mode (shb.mode) This function ...
Definition: assembler.c:83
struct sh_addr_dissassembler_helper_t SHAddrHelper
RZ_IPI ut16 sh_assembler(RZ_NONNULL const char *buffer, ut64 pc, RZ_NULLABLE bool *success)
Assemble instruction from SuperH-4 ISA FPU instructions not implemented yet.
Definition: assembler.c:402
const SHOpRaw sh_op_lookup[]
Definition: lookup.c:7
static SHAddrHelper sh_op_get_addr_mode(const char *param)
Get the addressing mode being used in param.
Definition: assembler.c:251
const ut32 OPCODE_NUM
Definition: lookup.c:195
enum sh_scaling_t SHScaling
@ SH_SCALING_L
long word
Definition: disassembler.h:39
enum sh_addr_mode_t SHAddrMode
enum sh_register_index_t SHRegisterIndex
static const ut8 sh_scaling_size[]
Definition: disassembler.h:43
@ SH_REG_IND_FR0
Definition: disassembler.h:105
@ SH_REG_IND_PC
Definition: disassembler.h:90
@ SH_REG_IND_SIZE
Definition: disassembler.h:149
@ SH_REG_IND_R0B
Definition: disassembler.h:139
@ SH_PC_RELATIVE8
Definition: disassembler.h:28
@ SH_IMM_S
8-bit immediate value (sign-extended)
Definition: disassembler.h:32
@ SH_REG_INDIRECT_DISP
register indirect with displacement
Definition: disassembler.h:23
@ SH_PC_RELATIVE_DISP
Definition: disassembler.h:27
@ SH_REG_DIRECT
Definition: disassembler.h:19
@ SH_PC_RELATIVE_REG
Definition: disassembler.h:30
@ SH_PC_RELATIVE12
Definition: disassembler.h:29
@ SH_REG_INDIRECT_INDEXED
indexed register indirect
Definition: disassembler.h:24
@ SH_ADDR_INVALID
Definition: disassembler.h:18
@ SH_REG_INDIRECT_I
register indirect with post-increment
Definition: disassembler.h:21
@ SH_REG_INDIRECT
Definition: disassembler.h:20
@ SH_IMM_U
8-bit immediate value (zero-extended)
Definition: disassembler.h:31
@ SH_REG_INDIRECT_D
register indirect with pre-decrement
Definition: disassembler.h:22
@ SH_GBR_INDIRECT_DISP
Definition: disassembler.h:25
@ SH_GBR_INDIRECT_INDEXED
Definition: disassembler.h:26
#define d(i)
Definition: sha256.c:44
Definition: buffer.h:15
const char * str_mnem
string mnemonic
Definition: common.h:25
SHScaling scaling
scaling for the opcode
Definition: common.h:29
SHParamBuilder param_builder[2]
param builders for the params
Definition: common.h:30
ut16 opcode
opcode
Definition: common.h:27
ut16 mask
mask for opcode to mask out param bits
Definition: common.h:28
ut8 start
start bit of the param (assuming little-endian)
Definition: common.h:10
SHAddrMode mode
addressing mode being used
Definition: common.h:12
st8 bits
bits to be read (-1, if you want this to be inferred from mode)
Definition: common.h:11
SHParam param
Definition: common.h:19
struct sh_param_builder_addr_t addr
Definition: common.h:18
bool is_param
whether a param was directly passed
Definition: common.h:21
ut16 param[2]
Definition: disassembler.h:238
SHAddrMode mode
Definition: disassembler.h:239
#define fail(test)
Definition: tests.h:29
ut64(WINAPI *w32_GetEnabledXStateFeatures)()
static const z80_opcode dd[]
Definition: z80_tab.h:844
static int comma
Definition: z80asm.c:76