Rizin
unix-like reverse engineering framework and cli tools
memcmplen.h
Go to the documentation of this file.
1 //
5 //
6 // Author: Lasse Collin
7 //
8 // This file has been put into the public domain.
9 // You can do whatever you want with this file.
10 //
12 
13 #ifndef LZMA_MEMCMPLEN_H
14 #define LZMA_MEMCMPLEN_H
15 
16 #include "common.h"
17 
18 #ifdef HAVE_IMMINTRIN_H
19 # include <immintrin.h>
20 #endif
21 
22 
42 static inline uint32_t lzma_attribute((__always_inline__))
43 lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2,
45 {
46  assert(len <= limit);
48 
49 #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
50  && ((TUKLIB_GNUC_REQ(3, 4) && defined(__x86_64__)) \
51  || (defined(__INTEL_COMPILER) && defined(__x86_64__)) \
52  || (defined(__INTEL_COMPILER) && defined(_M_X64)) \
53  || (defined(_MSC_VER) && defined(_M_X64)))
54  // NOTE: This will use 64-bit unaligned access which
55  // TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit, but
56  // it's convenient here at least as long as it's x86-64 only.
57  //
58  // I keep this x86-64 only for now since that's where I know this
59  // to be a good method. This may be fine on other 64-bit CPUs too.
60  // On big endian one should use xor instead of subtraction and switch
61  // to __builtin_clzll().
62 #define LZMA_MEMCMPLEN_EXTRA 8
63  while (len < limit) {
64  const uint64_t x = read64ne(buf1 + len) - read64ne(buf2 + len);
65  if (x != 0) {
66 # if defined(_M_X64) // MSVC or Intel C compiler on Windows
67  unsigned long tmp;
68  _BitScanForward64(&tmp, x);
69  len += (uint32_t)tmp >> 3;
70 # else // GCC, clang, or Intel C compiler
71  len += (uint32_t)__builtin_ctzll(x) >> 3;
72 # endif
73  return my_min(len, limit);
74  }
75 
76  len += 8;
77  }
78 
79  return limit;
80 
81 #elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
82  && defined(HAVE__MM_MOVEMASK_EPI8) \
83  && ((defined(__GNUC__) && defined(__SSE2_MATH__)) \
84  || (defined(__INTEL_COMPILER) && defined(__SSE2__)) \
85  || (defined(_MSC_VER) && defined(_M_IX86_FP) \
86  && _M_IX86_FP >= 2))
87  // NOTE: Like above, this will use 128-bit unaligned access which
88  // TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit.
89  //
90  // SSE2 version for 32-bit and 64-bit x86. On x86-64 the above
91  // version is sometimes significantly faster and sometimes
92  // slightly slower than this SSE2 version, so this SSE2
93  // version isn't used on x86-64.
94 # define LZMA_MEMCMPLEN_EXTRA 16
95  while (len < limit) {
96  const uint32_t x = 0xFFFF ^ _mm_movemask_epi8(_mm_cmpeq_epi8(
97  _mm_loadu_si128((const __m128i *)(buf1 + len)),
98  _mm_loadu_si128((const __m128i *)(buf2 + len))));
99 
100  if (x != 0) {
101  len += ctz32(x);
102  return my_min(len, limit);
103  }
104 
105  len += 16;
106  }
107 
108  return limit;
109 
110 #elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) && !defined(WORDS_BIGENDIAN)
111  // Generic 32-bit little endian method
112 # define LZMA_MEMCMPLEN_EXTRA 4
113  while (len < limit) {
114  uint32_t x = read32ne(buf1 + len) - read32ne(buf2 + len);
115  if (x != 0) {
116  if ((x & 0xFFFF) == 0) {
117  len += 2;
118  x >>= 16;
119  }
120 
121  if ((x & 0xFF) == 0)
122  ++len;
123 
124  return my_min(len, limit);
125  }
126 
127  len += 4;
128  }
129 
130  return limit;
131 
132 #elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) && defined(WORDS_BIGENDIAN)
133  // Generic 32-bit big endian method
134 # define LZMA_MEMCMPLEN_EXTRA 4
135  while (len < limit) {
136  uint32_t x = read32ne(buf1 + len) ^ read32ne(buf2 + len);
137  if (x != 0) {
138  if ((x & 0xFFFF0000) == 0) {
139  len += 2;
140  x <<= 16;
141  }
142 
143  if ((x & 0xFF000000) == 0)
144  ++len;
145 
146  return my_min(len, limit);
147  }
148 
149  len += 4;
150  }
151 
152  return limit;
153 
154 #else
155  // Simple portable version that doesn't use unaligned access.
156 # define LZMA_MEMCMPLEN_EXTRA 0
157  while (len < limit && buf1[len] == buf2[len])
158  ++len;
159 
160  return len;
161 #endif
162 }
163 
164 #endif
#define const
Definition: ansidecl.h:240
static uint32_t lzma_attribute((__always_inline__)) lzma_memcmplen(const uint8_t *buf1
assert(limit<=UINT32_MAX/2)
static uint32_t const uint8_t uint32_t len
Definition: memcmplen.h:44
static uint32_t const uint8_t * buf2
Definition: memcmplen.h:43
static uint32_t const uint8_t uint32_t uint32_t limit
Definition: memcmplen.h:45
int x
Definition: mipsasm.c:20
unsigned int uint32_t
Definition: sftypes.h:29
unsigned long uint64_t
Definition: sftypes.h:28
unsigned char uint8_t
Definition: sftypes.h:31
#define UINT32_MAX
Definitions common to the whole liblzma library.
#define my_min(x, y)
Definition: sysdefs.h:185
static uint32_t ctz32(uint32_t n)
static uint32_t read32ne(const uint8_t *buf)
static uint64_t read64ne(const uint8_t *buf)