17 #define _T_simd __m256i
18 #define _T_mask __mmask8
21 #define _SC_(x) ymm_##x
22 #define _SV_(x) (ymm_##x.y)
25 #define _F_(x) x##_avx512x8
33 #define _M_I_(x) _mm256_##x
36 #define _M_SI_(x) _mm256_##x##_si256
39 #define _M_MGI_(x) _mm256_m##x
42 #define _M_GI_(name, idx, base, scale) _mm256_##name(base, idx, scale)
45 #define _SIMD_MASK_BIT_ (sizeof(_T_simd) / sizeof(uint32_t))
46 #define _SIMD_MASK_MAX_ CNE_LEN2MASK(_SIMD_MASK_BIT_, uint32_t)
48 #define _SIMD_FLOW_NUM_ (2 * _SIMD_MASK_BIT_)
49 #define _SIMD_FLOW_MSK_ (_SIMD_FLOW_NUM_ - 1)
52 #define _SIMD_PTR_NUM_ (sizeof(_T_simd) / sizeof(uintptr_t))
53 #define _SIMD_PTR_MSK_ CNE_LEN2MASK(_SIMD_PTR_NUM_, uint32_t)
55 static const cne_ymm_t _SC_(match_mask) = {
69 static const cne_ymm_t _SC_(index_mask) = {
83 static const cne_ymm_t _SC_(trlo_idle) = {
98 static const cne_ymm_t _SC_(trhi_idle) = {
105 static const cne_ymm_t _SC_(shuffle_input) = {
107 0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c,
108 0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c,
112 static const cne_ymm_t _SC_(four_32) = {
119 static const cne_ymm_t _SC_(idx_add) = {
126 static const cne_ymm_t _SC_(range_base) = {
128 0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c,
129 0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c,
133 static const cne_ymm_t _SC_(pminp) = {
135 0x00, 0x01, 0x02, 0x03,
136 0x08, 0x09, 0x0a, 0x0b,
140 static const __mmask16 _SC_(pmidx_msk) = 0x55;
142 static const cne_ymm_t _SC_(pmidx[2]) = {
145 0, 0, 1, 0, 2, 0, 3, 0,
150 4, 0, 5, 0, 6, 0, 7, 0,
161 static inline __m128i
162 _m256_mask_gather_epi8x4(__m256i pdata, __mmask8 mask)
167 static const uint32_t zero;
169 p.y = _mm256_mask_set1_epi64(pdata, mask ^ _SIMD_PTR_MSK_, (uintptr_t)&zero);
171 v.u32[0] = *(uint8_t *)p.u64[0];
172 v.u32[1] = *(uint8_t *)p.u64[1];
173 v.u32[2] = *(uint8_t *)p.u64[2];
174 v.u32[3] = *(uint8_t *)p.u64[3];
183 _F_(gather_bytes)(__m256i zero,
const __m256i p[2],
const uint32_t m[2], uint32_t bnum)
187 if (bnum ==
sizeof(uint8_t)) {
188 inp[0] = _m256_mask_gather_epi8x4(p[0], m[0]);
189 inp[1] = _m256_mask_gather_epi8x4(p[1], m[1]);
191 inp[0] = _mm256_mmask_i64gather_epi32(_mm256_castsi256_si128(zero), m[0], p[0], NULL,
193 inp[1] = _mm256_mmask_i64gather_epi32(_mm256_castsi256_si128(zero), m[1], p[1], NULL,
198 return _mm256_permutex2var_epi32(_mm256_castsi128_si256(inp[0]), _SV_(pminp),
199 _mm256_castsi128_si256(inp[1]));
202 #include "acl_run_avx512_common.h"
209 search_avx512x8x2(
const struct cne_acl_ctx *ctx,
const uint8_t **data, uint32_t *results,
210 uint32_t total_packets, uint32_t categories)
213 const struct cne_acl_match_results *pr;
214 struct acl_flow_avx512 flow;
215 uint32_t match[ctx->num_tries * total_packets];
217 for (i = 0, pm = match; i != ctx->num_tries; i++, pm += total_packets) {
220 acl_set_flow_avx512(&flow, ctx, i, data, pm, total_packets);
223 _F_(search_trie)(&flow);
227 pr = (
const struct cne_acl_match_results *)(ctx->trans_table + ctx->match_index);
230 _F_(resolve_single_cat)(results, pr, match, total_packets, ctx->num_tries);
232 resolve_mcle8_avx512x1(results, pr, match, total_packets, categories, ctx->num_tries);
237 #undef _SIMD_PTR_MSK_
238 #undef _SIMD_PTR_NUM_
239 #undef _SIMD_FLOW_MSK_
240 #undef _SIMD_FLOW_NUM_
241 #undef _SIMD_MASK_MAX_
242 #undef _SIMD_MASK_BIT_
#define __cne_always_inline