Vector Optimized Library of Kernels 3.1.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_8u_conv_k7_r2puppet_8u.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10#ifndef INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
11#define INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
12
13#include <string.h>
14#include <volk/volk.h>
16
17typedef union {
18 // decision_t is a BIT vector
19 unsigned char* t;
20 unsigned int* w;
22
23static inline int parity(int x, unsigned char* Partab)
24{
25 x ^= (x >> 16);
26 x ^= (x >> 8);
27 return Partab[x];
28}
29
30static inline int chainback_viterbi(unsigned char* data,
31 unsigned int nbits,
32 unsigned int endstate,
33 unsigned int tailsize,
34 unsigned char* decisions)
35{
36 unsigned char* d;
37 int d_ADDSHIFT = 0;
38 int d_numstates = (1 << 6);
39 int d_decision_t_size = d_numstates / 8;
40 unsigned int d_k = 7;
41 int d_framebits = nbits;
42 /* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
43 d = decisions;
44 /* Make room beyond the end of the encoder register so we can
45 * accumulate a full byte of decoded data
46 */
47
48 endstate = (endstate % d_numstates) << d_ADDSHIFT;
49
50 /* The store into data[] only needs to be done every 8 bits.
51 * But this avoids a conditional branch, and the writes will
52 * combine in the cache anyway
53 */
54
55 d += tailsize * d_decision_t_size; /* Look past tail */
56 int retval;
57 int dif = tailsize - (d_k - 1);
58 // printf("break, %d, %d\n", dif, (nbits+dif)%d_framebits);
59 p_decision_t dec;
60 while (nbits-- > d_framebits - (d_k - 1)) {
61 int k;
62 dec.t = &d[nbits * d_decision_t_size];
63 k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
64
65 endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
66 // data[((nbits+dif)%nbits)>>3] = endstate>>d_SUBSHIFT;
67 // printf("%d, %d\n", k, (nbits+dif)%d_framebits);
68 data[((nbits + dif) % d_framebits)] = k;
69
70 retval = endstate;
71 }
72 nbits += 1;
73
74 while (nbits-- != 0) {
75 int k;
76
77 dec.t = &d[nbits * d_decision_t_size];
78
79 k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
80
81 endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
82 data[((nbits + dif) % d_framebits)] = k;
83 }
84 // printf("%d, %d, %d, %d, %d, %d, %d, %d\n",
85 // data[4095],data[4094],data[4093],data[4092],data[4091],data[4090],data[4089],data[4088]);
86
87
88 return retval >> d_ADDSHIFT;
89}
90
91
92#if LV_HAVE_SSE3
93
94#include <emmintrin.h>
95#include <mmintrin.h>
96#include <pmmintrin.h>
97#include <stdio.h>
98#include <xmmintrin.h>
99
100static inline void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char* dec,
101 unsigned char* syms,
102 unsigned int framebits)
103{
104 if (framebits < 12) {
105 return;
106 }
107
108 static int once = 1;
109 int d_numstates = (1 << 6);
110 int rate = 2;
111 static unsigned char* D;
112 static unsigned char* Y;
113 static unsigned char* X;
114 static unsigned int excess = 6;
115 static unsigned char* Branchtab;
116 static unsigned char Partab[256];
117
118 int d_polys[2] = { 79, 109 };
119
120
121 if (once) {
122
123 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
124 Y = X + d_numstates;
125 Branchtab =
126 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
127 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
129 int state, i;
130 int cnt, ti;
131
132 /* Initialize parity lookup table */
133 for (i = 0; i < 256; i++) {
134 cnt = 0;
135 ti = i;
136 while (ti) {
137 if (ti & 1)
138 cnt++;
139 ti >>= 1;
140 }
141 Partab[i] = cnt & 1;
142 }
143 /* Initialize the branch table */
144 for (state = 0; state < d_numstates / 2; state++) {
145 for (i = 0; i < rate; i++) {
146 Branchtab[i * d_numstates / 2 + state] =
147 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
148 }
149 }
150
151 once = 0;
152 }
153
154 // unbias the old_metrics
155 memset(X, 31, d_numstates);
156
157 // initialize decisions
158 memset(D, 0, (d_numstates / 8) * (framebits + 6));
159
161 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
162
163 unsigned int min = X[0];
164 int i = 0, state = 0;
165 for (i = 0; i < (d_numstates); ++i) {
166 if (X[i] < min) {
167 min = X[i];
168 state = i;
169 }
170 }
171
172 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
173
174 return;
175}
176
177#endif /*LV_HAVE_SSE3*/
178
179
180#if LV_HAVE_NEON
181
183
184static inline void volk_8u_conv_k7_r2puppet_8u_neonspiral(unsigned char* dec,
185 unsigned char* syms,
186 unsigned int framebits)
187{
188 if (framebits < 12) {
189 return;
190 }
191
192 static int once = 1;
193 int d_numstates = (1 << 6);
194 int rate = 2;
195 static unsigned char* D;
196 static unsigned char* Y;
197 static unsigned char* X;
198 static unsigned int excess = 6;
199 static unsigned char* Branchtab;
200 static unsigned char Partab[256];
201
202 int d_polys[2] = { 79, 109 };
203
204
205 if (once) {
206
207 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
208 Y = X + d_numstates;
209 Branchtab =
210 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
211 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
213 int state, i;
214 int cnt, ti;
215
216 /* Initialize parity lookup table */
217 for (i = 0; i < 256; i++) {
218 cnt = 0;
219 ti = i;
220 while (ti) {
221 if (ti & 1)
222 cnt++;
223 ti >>= 1;
224 }
225 Partab[i] = cnt & 1;
226 }
227 /* Initialize the branch table */
228 for (state = 0; state < d_numstates / 2; state++) {
229 for (i = 0; i < rate; i++) {
230 Branchtab[i * d_numstates / 2 + state] =
231 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
232 }
233 }
234
235 once = 0;
236 }
237
238 // unbias the old_metrics
239 memset(X, 31, d_numstates);
240
241 // initialize decisions
242 memset(D, 0, (d_numstates / 8) * (framebits + 6));
243
245 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
246
247 unsigned int min = X[0];
248 int i = 0, state = 0;
249 for (i = 0; i < (d_numstates); ++i) {
250 if (X[i] < min) {
251 min = X[i];
252 state = i;
253 }
254 }
255
256 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
257
258 return;
259}
260
261#endif /*LV_HAVE_NEON*/
262
263
264#if LV_HAVE_AVX2
265
266#include <immintrin.h>
267#include <stdio.h>
268
269static inline void volk_8u_conv_k7_r2puppet_8u_avx2(unsigned char* dec,
270 unsigned char* syms,
271 unsigned int framebits)
272{
273 if (framebits < 12) {
274 return;
275 }
276
277 static int once = 1;
278 int d_numstates = (1 << 6);
279 int rate = 2;
280 static unsigned char* D;
281 static unsigned char* Y;
282 static unsigned char* X;
283 static unsigned int excess = 6;
284 static unsigned char* Branchtab;
285 static unsigned char Partab[256];
286
287 int d_polys[2] = { 79, 109 };
288
289
290 if (once) {
291
292 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
293 Y = X + d_numstates;
294 Branchtab =
295 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
296 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
298 int state, i;
299 int cnt, ti;
300
301 /* Initialize parity lookup table */
302 for (i = 0; i < 256; i++) {
303 cnt = 0;
304 ti = i;
305 while (ti) {
306 if (ti & 1)
307 cnt++;
308 ti >>= 1;
309 }
310 Partab[i] = cnt & 1;
311 }
312 /* Initialize the branch table */
313 for (state = 0; state < d_numstates / 2; state++) {
314 for (i = 0; i < rate; i++) {
315 Branchtab[i * d_numstates / 2 + state] =
316 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
317 }
318 }
319
320 once = 0;
321 }
322
323 // unbias the old_metrics
324 memset(X, 31, d_numstates);
325
326 // initialize decisions
327 memset(D, 0, (d_numstates / 8) * (framebits + 6));
328
329 volk_8u_x4_conv_k7_r2_8u_avx2(
330 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
331
332 unsigned int min = X[0];
333 int i = 0, state = 0;
334 for (i = 0; i < (d_numstates); ++i) {
335 if (X[i] < min) {
336 min = X[i];
337 state = i;
338 }
339 }
340
341 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
342
343 return;
344}
345
346#endif /*LV_HAVE_AVX2*/
347
348
349#if LV_HAVE_GENERIC
350
351
352static inline void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char* dec,
353 unsigned char* syms,
354 unsigned int framebits)
355{
356 if (framebits < 12) {
357 return;
358 }
359
360 static int once = 1;
361 int d_numstates = (1 << 6);
362 int rate = 2;
363 static unsigned char* Y;
364 static unsigned char* X;
365 static unsigned char* D;
366 static unsigned int excess = 6;
367 static unsigned char* Branchtab;
368 static unsigned char Partab[256];
369
370 int d_polys[2] = { 79, 109 };
371
372
373 if (once) {
374
375 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
376 Y = X + d_numstates;
377 Branchtab =
378 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
379 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
381
382 int state, i;
383 int cnt, ti;
384
385 /* Initialize parity lookup table */
386 for (i = 0; i < 256; i++) {
387 cnt = 0;
388 ti = i;
389 while (ti) {
390 if (ti & 1)
391 cnt++;
392 ti >>= 1;
393 }
394 Partab[i] = cnt & 1;
395 }
396 /* Initialize the branch table */
397 for (state = 0; state < d_numstates / 2; state++) {
398 for (i = 0; i < rate; i++) {
399 Branchtab[i * d_numstates / 2 + state] =
400 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
401 }
402 }
403
404 once = 0;
405 }
406
407 // unbias the old_metrics
408 memset(X, 31, d_numstates);
409
410 // initialize decisions
411 memset(D, 0, (d_numstates / 8) * (framebits + 6));
412
414 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
415
416 unsigned int min = X[0];
417 int i = 0, state = 0;
418 for (i = 0; i < (d_numstates); ++i) {
419 if (X[i] < min) {
420 min = X[i];
421 state = i;
422 }
423 }
424
425 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
426
427 return;
428}
429
430#endif /* LV_HAVE_GENERIC */
431
432#endif /*INCLUDED_volk_8u_conv_k7_r2puppet_8u_H*/
Definition volk_8u_conv_k7_r2puppet_8u.h:17
unsigned int * w
Definition volk_8u_conv_k7_r2puppet_8u.h:20
unsigned char * t
Definition volk_8u_conv_k7_r2puppet_8u.h:19
size_t volk_get_alignment(void)
Get the machine alignment in bytes.
Definition volk.tmpl.c:90
static void volk_8u_conv_k7_r2puppet_8u_neonspiral(unsigned char *dec, unsigned char *syms, unsigned int framebits)
Definition volk_8u_conv_k7_r2puppet_8u.h:184
static int chainback_viterbi(unsigned char *data, unsigned int nbits, unsigned int endstate, unsigned int tailsize, unsigned char *decisions)
Definition volk_8u_conv_k7_r2puppet_8u.h:30
static int parity(int x, unsigned char *Partab)
Definition volk_8u_conv_k7_r2puppet_8u.h:23
static void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char *dec, unsigned char *syms, unsigned int framebits)
Definition volk_8u_conv_k7_r2puppet_8u.h:100
static void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char *dec, unsigned char *syms, unsigned int framebits)
Definition volk_8u_conv_k7_r2puppet_8u.h:352
static void volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition volk_8u_x4_conv_k7_r2_8u.h:322
static void volk_8u_x4_conv_k7_r2_8u_neonspiral(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition volk_8u_x4_conv_k7_r2_8u.h:612
static void volk_8u_x4_conv_k7_r2_8u_generic(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition volk_8u_x4_conv_k7_r2_8u.h:900
for i
Definition volk_config_fixed.tmpl.h:13
__VOLK_DECL_BEGIN VOLK_API void * volk_malloc(size_t size, size_t alignment)
Allocate size bytes of data aligned to alignment.
Definition volk_malloc.c:38