Compare commits

...

4 Commits

Author SHA1 Message Date
Sylvain Munaut
ffdfaa0f27 tests: Re-enable the convolve_test by default
Change-Id: Ia26ef75bb11482fc0db3b790db1c93c8b74229d1
Signed-off-by: Sylvain Munaut <tnt@246tNt.com>
2018-12-21 16:49:09 +01:00
Sylvain Munaut
f4d4591883 arm/convolve: Fix the vfp4 real convolution for h_len=12
Change-Id: Ic73f0746edd3f1f22bb1d79d4c64aa740691dd48
Signed-off-by: Sylvain Munaut <tnt@246tNt.com>
2018-12-21 16:47:12 +01:00
Sylvain Munaut
6333b0fe80 tests: Rework the convolve_test
Besides just general cleanup, the major changes are :
 - Always run the reference / base implementation and print
   those results. Theses should be consistent across platform
   and check the base implementation works
 - Run the optimized version and compare results with small
   error tolerance against the reference results.

Change-Id: I4e203d2c4b778af77d630ed15d4cef6b0c0eb76d
Signed-off-by: Sylvain Munaut <tnt@246tNt.com>
2018-12-21 16:38:31 +01:00
Sylvain Munaut
912207f5ef convolve: Remove support for step, offset parameters
- Those are not used any where
 - Those are not supported by the sse/neon accelerated versions
 - And I see very little use cases for those.

Change-Id: Ic850269a0ed5d98c0ea68980afd31016ed555b48
Signed-off-by: Sylvain Munaut <tnt@246tNt.com>
2018-12-21 16:25:46 +01:00
14 changed files with 1065 additions and 310 deletions

View File

@@ -88,7 +88,7 @@ bool Channelizer::rotate(const float *in, size_t len)
convolve_real(hInputs[i], blockLen,
subFilters[i], hLen,
hOutputs[i], blockLen,
0, blockLen, 1, 0);
0, blockLen);
}
cxvec_fft(fftHandle);

View File

@@ -143,7 +143,7 @@ int Resampler::rotate(const float *in, size_t in_len, float *out, size_t out_len
convolve_real(in, in_len,
reinterpret_cast<float *>(partitions[path]),
filt_len, &out[2 * i], out_len - i,
n, 1, 1, 0);
n, 1);
}
return out_len;

View File

@@ -102,7 +102,7 @@ bool Synthesis::rotate(float *out, size_t len)
convolve_real(hInputs[i], blockLen,
subFilters[i], hLen,
hOutputs[i], blockLen,
0, blockLen, 1, 0);
0, blockLen);
}
/* Interleave into output vector */

View File

@@ -29,17 +29,15 @@
int _base_convolve_real(float *x, int x_len,
float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset);
int start, int len);
int _base_convolve_complex(float *x, int x_len,
float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset);
int start, int len);
int bounds_check(int x_len, int h_len, int y_len,
int start, int len, int step);
int start, int len);
#ifdef HAVE_NEON
/* Calls into NEON assembler */
@@ -69,35 +67,32 @@ void convolve_init(void)
int convolve_real(float *x, int x_len,
float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset)
int start, int len)
{
void (*conv_func)(float *, float *, float *, int) = NULL;
if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
if (bounds_check(x_len, h_len, y_len, start, len) < 0)
return -1;
memset(y, 0, len * 2 * sizeof(float));
#ifdef HAVE_NEON
if (step <= 4) {
switch (h_len) {
case 4:
conv_func = neon_conv_real4;
break;
case 8:
conv_func = neon_conv_real8;
break;
case 12:
conv_func = neon_conv_real12;
break;
case 16:
conv_func = neon_conv_real16;
break;
case 20:
conv_func = neon_conv_real20;
break;
}
switch (h_len) {
case 4:
conv_func = neon_conv_real4;
break;
case 8:
conv_func = neon_conv_real8;
break;
case 12:
conv_func = neon_conv_real12;
break;
case 16:
conv_func = neon_conv_real16;
break;
case 20:
conv_func = neon_conv_real20;
break;
}
#endif
if (conv_func) {
@@ -107,7 +102,7 @@ int convolve_real(float *x, int x_len,
_base_convolve_real(x, x_len,
h, h_len,
y, y_len,
start, len, step, offset);
start, len);
}
return len;
@@ -118,18 +113,17 @@ int convolve_real(float *x, int x_len,
int convolve_complex(float *x, int x_len,
float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset)
int start, int len)
{
void (*conv_func)(float *, float *, float *, int, int) = NULL;
if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
if (bounds_check(x_len, h_len, y_len, start, len) < 0)
return -1;
memset(y, 0, len * 2 * sizeof(float));
#ifdef HAVE_NEON
if (step <= 4 && !(h_len % 4))
if (!(h_len % 4))
conv_func = neon_conv_cmplx_4n;
#endif
if (conv_func) {
@@ -139,7 +133,7 @@ int convolve_complex(float *x, int x_len,
_base_convolve_complex(x, x_len,
h, h_len,
y, y_len,
start, len, step, offset);
start, len);
}
return len;

View File

@@ -92,8 +92,8 @@ neon_conv_real12:
vld2.32 {q8-q9}, [r4], r6
vld2.32 {q10-q11}, [r5], r6
#ifdef HAVE_NEON_FMA
vfma.f32 q1, q6, q0
vfma.f32 q3, q7, q0
vmul.f32 q1, q6, q0
vmul.f32 q3, q7, q0
vfma.f32 q1, q8, q2
vfma.f32 q3, q9, q2
vfma.f32 q1, q10, q4

View File

@@ -6,26 +6,22 @@ void *convolve_h_alloc(size_t num);
int convolve_real(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset);
int start, int len);
int convolve_complex(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset);
int start, int len);
int base_convolve_real(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset);
int start, int len);
int base_convolve_complex(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset);
int start, int len);
void convolve_init(void);

View File

@@ -41,17 +41,17 @@ static void mac_cmplx(const float *x, const float *h, float *y)
/* Base vector complex-complex multiply and accumulate */
static void mac_real_vec_n(const float *x, const float *h, float *y,
int len, int step, int offset)
int len)
{
for (int i = offset; i < len; i += step)
for (int i=0; i<len; i++)
mac_real(&x[2 * i], &h[2 * i], y);
}
/* Base vector complex-complex multiply and accumulate */
static void mac_cmplx_vec_n(const float *x, const float *h, float *y,
int len, int step, int offset)
int len)
{
for (int i = offset; i < len; i += step)
for (int i=0; i<len; i++)
mac_cmplx(&x[2 * i], &h[2 * i], y);
}
@@ -59,14 +59,12 @@ static void mac_cmplx_vec_n(const float *x, const float *h, float *y,
int _base_convolve_real(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset)
int start, int len)
{
for (int i = 0; i < len; i++) {
mac_real_vec_n(&x[2 * (i - (h_len - 1) + start)],
h,
&y[2 * i], h_len,
step, offset);
&y[2 * i], h_len);
}
return len;
@@ -76,14 +74,13 @@ int _base_convolve_real(const float *x, int x_len,
int _base_convolve_complex(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset)
int start, int len)
{
for (int i = 0; i < len; i++) {
mac_cmplx_vec_n(&x[2 * (i - (h_len - 1) + start)],
h,
&y[2 * i],
h_len, step, offset);
h_len);
}
return len;
@@ -91,10 +88,10 @@ int _base_convolve_complex(const float *x, int x_len,
/* Buffer validity checks */
int bounds_check(int x_len, int h_len, int y_len,
int start, int len, int step)
int start, int len)
{
if ((x_len < 1) || (h_len < 1) ||
(y_len < 1) || (len < 1) || (step < 1)) {
(y_len < 1) || (len < 1)) {
fprintf(stderr, "Convolve: Invalid input\n");
return -1;
}
@@ -113,10 +110,9 @@ int bounds_check(int x_len, int h_len, int y_len,
int base_convolve_real(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset)
int start, int len)
{
if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
if (bounds_check(x_len, h_len, y_len, start, len) < 0)
return -1;
memset(y, 0, len * 2 * sizeof(float));
@@ -124,17 +120,16 @@ int base_convolve_real(const float *x, int x_len,
return _base_convolve_real(x, x_len,
h, h_len,
y, y_len,
start, len, step, offset);
start, len);
}
/* API: Non-aligned (no SSE) complex-complex */
int base_convolve_complex(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset)
int start, int len)
{
if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
if (bounds_check(x_len, h_len, y_len, start, len) < 0)
return -1;
memset(y, 0, len * 2 * sizeof(float));
@@ -142,7 +137,7 @@ int base_convolve_complex(const float *x, int x_len,
return _base_convolve_complex(x, x_len,
h, h_len,
y, y_len,
start, len, step, offset);
start, len);
}
/* Aligned filter tap allocation */

View File

@@ -30,25 +30,25 @@
/* Architecture dependant function pointers */
struct convolve_cpu_context {
void (*conv_cmplx_4n) (const float *, int, const float *, int, float *,
int, int, int, int, int);
int, int, int);
void (*conv_cmplx_8n) (const float *, int, const float *, int, float *,
int, int, int, int, int);
int, int, int);
void (*conv_cmplx) (const float *, int, const float *, int, float *,
int, int, int, int, int);
int, int, int);
void (*conv_real4) (const float *, int, const float *, int, float *,
int, int, int, int, int);
int, int, int);
void (*conv_real8) (const float *, int, const float *, int, float *,
int, int, int, int, int);
int, int, int);
void (*conv_real12) (const float *, int, const float *, int, float *,
int, int, int, int, int);
int, int, int);
void (*conv_real16) (const float *, int, const float *, int, float *,
int, int, int, int, int);
int, int, int);
void (*conv_real20) (const float *, int, const float *, int, float *,
int, int, int, int, int);
int, int, int);
void (*conv_real4n) (const float *, int, const float *, int, float *,
int, int, int, int, int);
int, int, int);
void (*conv_real) (const float *, int, const float *, int, float *, int,
int, int, int, int);
int, int);
};
static struct convolve_cpu_context c;
@@ -56,17 +56,15 @@ static struct convolve_cpu_context c;
int _base_convolve_real(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset);
int start, int len);
int _base_convolve_complex(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset);
int start, int len);
int bounds_check(int x_len, int h_len, int y_len,
int start, int len, int step);
int start, int len);
/* API: Initalize convolve module */
void convolve_init(void)
@@ -99,46 +97,37 @@ void convolve_init(void)
/* API: Aligned complex-real */
int convolve_real(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len, int start, int len, int step, int offset)
float *y, int y_len, int start, int len)
{
if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
if (bounds_check(x_len, h_len, y_len, start, len) < 0)
return -1;
memset(y, 0, len * 2 * sizeof(float));
if (step <= 4) {
switch (h_len) {
case 4:
c.conv_real4(x, x_len, h, h_len, y, y_len, start, len,
step, offset);
break;
case 8:
c.conv_real8(x, x_len, h, h_len, y, y_len, start, len,
step, offset);
break;
case 12:
c.conv_real12(x, x_len, h, h_len, y, y_len, start, len,
step, offset);
break;
case 16:
c.conv_real16(x, x_len, h, h_len, y, y_len, start, len,
step, offset);
break;
case 20:
c.conv_real20(x, x_len, h, h_len, y, y_len, start, len,
step, offset);
break;
default:
if (!(h_len % 4))
c.conv_real4n(x, x_len, h, h_len, y, y_len,
start, len, step, offset);
else
c.conv_real(x, x_len, h, h_len, y, y_len, start,
len, step, offset);
}
} else
c.conv_real(x, x_len, h, h_len, y, y_len, start, len, step,
offset);
switch (h_len) {
case 4:
c.conv_real4(x, x_len, h, h_len, y, y_len, start, len);
break;
case 8:
c.conv_real8(x, x_len, h, h_len, y, y_len, start, len);
break;
case 12:
c.conv_real12(x, x_len, h, h_len, y, y_len, start, len);
break;
case 16:
c.conv_real16(x, x_len, h, h_len, y, y_len, start, len);
break;
case 20:
c.conv_real20(x, x_len, h, h_len, y, y_len, start, len);
break;
default:
if (!(h_len % 4))
c.conv_real4n(x, x_len, h, h_len, y, y_len,
start, len);
else
c.conv_real(x, x_len, h, h_len, y, y_len, start,
len);
}
return len;
}
@@ -147,26 +136,19 @@ int convolve_real(const float *x, int x_len,
int convolve_complex(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len, int step, int offset)
int start, int len)
{
if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
if (bounds_check(x_len, h_len, y_len, start, len) < 0)
return -1;
memset(y, 0, len * 2 * sizeof(float));
if (step <= 4) {
if (!(h_len % 8))
c.conv_cmplx_8n(x, x_len, h, h_len, y, y_len, start,
len, step, offset);
else if (!(h_len % 4))
c.conv_cmplx_4n(x, x_len, h, h_len, y, y_len, start,
len, step, offset);
else
c.conv_cmplx(x, x_len, h, h_len, y, y_len, start, len,
step, offset);
} else
c.conv_cmplx(x, x_len, h, h_len, y, y_len, start, len, step,
offset);
if (!(h_len % 8))
c.conv_cmplx_8n(x, x_len, h, h_len, y, y_len, start, len);
else if (!(h_len % 4))
c.conv_cmplx_4n(x, x_len, h, h_len, y, y_len, start, len);
else
c.conv_cmplx(x, x_len, h, h_len, y, y_len, start, len);
return len;
}

View File

@@ -34,12 +34,12 @@
void sse_conv_real4(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len, int step, int offset)
int start, int len)
{
/* NOTE: The parameter list of this function has to match the parameter
* list of _base_convolve_real() in convolve_base.c. This specific
* implementation, ignores some of the parameters of
* _base_convolve_complex(), which are: x_len, y_len, offset, step */
* _base_convolve_complex(), which are: x_len, y_len. */
__m128 m0, m1, m2, m3, m4, m5, m6, m7;
@@ -75,7 +75,7 @@ void sse_conv_real4(const float *x, int x_len,
void sse_conv_real8(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len, int step, int offset)
int start, int len)
{
/* See NOTE in sse_conv_real4() */
@@ -126,7 +126,7 @@ void sse_conv_real8(const float *x, int x_len,
void sse_conv_real12(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len, int step, int offset)
int start, int len)
{
/* See NOTE in sse_conv_real4() */
@@ -192,7 +192,7 @@ void sse_conv_real12(const float *x, int x_len,
void sse_conv_real16(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len, int step, int offset)
int start, int len)
{
/* See NOTE in sse_conv_real4() */
@@ -271,7 +271,7 @@ void sse_conv_real16(const float *x, int x_len,
void sse_conv_real20(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len, int step, int offset)
int start, int len)
{
/* See NOTE in sse_conv_real4() */
@@ -361,7 +361,7 @@ void sse_conv_real20(const float *x, int x_len,
void sse_conv_real4n(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len, int step, int offset)
int start, int len)
{
/* See NOTE in sse_conv_real4() */
@@ -408,12 +408,12 @@ void sse_conv_real4n(const float *x, int x_len,
void sse_conv_cmplx_4n(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len, int step, int offset)
int start, int len)
{
/* NOTE: The parameter list of this function has to match the parameter
* list of _base_convolve_complex() in convolve_base.c. This specific
* implementation, ignores some of the parameters of
* _base_convolve_complex(), which are: x_len, y_len, offset, step. */
* _base_convolve_complex(), which are: x_len, y_len. */
__m128 m0, m1, m2, m3, m4, m5, m6, m7;
@@ -466,7 +466,7 @@ void sse_conv_cmplx_4n(const float *x, int x_len,
void sse_conv_cmplx_8n(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len, int step, int offset)
int start, int len)
{
/* See NOTE in sse_conv_cmplx_4n() */

View File

@@ -23,46 +23,46 @@
void sse_conv_real4(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len, int step, int offset);
int start, int len);
/* 8-tap SSE complex-real convolution */
void sse_conv_real8(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len, int step, int offset);
int start, int len);
/* 12-tap SSE complex-real convolution */
void sse_conv_real12(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len, int step, int offset);
int start, int len);
/* 16-tap SSE complex-real convolution */
void sse_conv_real16(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len, int step, int offset);
int start, int len);
/* 20-tap SSE complex-real convolution */
void sse_conv_real20(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len, int step, int offset);
int start, int len);
/* 4*N-tap SSE complex-real convolution */
void sse_conv_real4n(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len, int step, int offset);
int start, int len);
/* 4*N-tap SSE complex-complex convolution */
void sse_conv_cmplx_4n(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len, int step, int offset);
int start, int len);
/* 8*N-tap SSE complex-complex convolution */
void sse_conv_cmplx_8n(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len,
int start, int len, int step, int offset);
int start, int len);

View File

@@ -285,8 +285,7 @@ enum ConvType {
static signalVector *convolve(const signalVector *x, const signalVector *h,
signalVector *y, ConvType spanType,
size_t start = 0, size_t len = 0,
size_t step = 1, int offset = 0)
size_t start = 0, size_t len = 0)
{
int rc;
size_t head = 0, tail = 0;
@@ -354,22 +353,22 @@ static signalVector *convolve(const signalVector *x, const signalVector *h,
rc = convolve_real((float *) _x->begin(), _x->size(),
(float *) h->begin(), h->size(),
(float *) y->begin(), y->size(),
start, len, step, offset);
start, len);
} else if (!h->isReal() && h->isAligned()) {
rc = convolve_complex((float *) _x->begin(), _x->size(),
(float *) h->begin(), h->size(),
(float *) y->begin(), y->size(),
start, len, step, offset);
start, len);
} else if (h->isReal() && !h->isAligned()) {
rc = base_convolve_real((float *) _x->begin(), _x->size(),
(float *) h->begin(), h->size(),
(float *) y->begin(), y->size(),
start, len, step, offset);
start, len);
} else if (!h->isReal() && !h->isAligned()) {
rc = base_convolve_complex((float *) _x->begin(), _x->size(),
(float *) h->begin(), h->size(),
(float *) y->begin(), y->size(),
start, len, step, offset);
start, len);
} else {
rc = -1;
}
@@ -1482,7 +1481,7 @@ static int detectBurst(const signalVector &burst,
/* Correlate */
if (!convolve(corr_in, sync->sequence, &corr,
CUSTOM, start, len, 1, 0)) {
CUSTOM, start, len)) {
delete dec;
return -1;
}

View File

@@ -1,150 +1,265 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include "convolve.h"
#define TESTVEC_LEN 1000
#define DO_INIT 1
struct test_vec
{
float *x;
float *h;
float *y_ref;
float *y_iut;
float x_vect[TESTVEC_LEN];
float y_vect[TESTVEC_LEN];
float h_vect[TESTVEC_LEN];
float *x;
float *h;
float *y;
int x_len; /* Theses are in # of _floats_ ! */
int h_len; /* Theses are in # of _floats_ ! */
int y_len; /* Theses are in # of _floats_ ! */
};
/* Generate some random values for testing */
void gen_floats(float *vect, int len)
static void
gen_floats(float *vect, int len)
{
int i;
for (i = 0; i < len; i++) {
vect[i] = (float)rand()/(float)(RAND_MAX);
vect[i] = (float)rand()/(float)(RAND_MAX) - 0.5f;
}
}
/* Reset testvectors */
static void reset_testvec(int seed)
{
srand(seed);
memset(x_vect,0,sizeof(x_vect));
memset(y_vect,0,sizeof(y_vect));
memset(h_vect,0,sizeof(h_vect));
x=x_vect + TESTVEC_LEN/2;
y=y_vect + TESTVEC_LEN/2;
h=h_vect + TESTVEC_LEN/2;
gen_floats(x_vect,TESTVEC_LEN);
gen_floats(h_vect,TESTVEC_LEN);
}
/* Show float vector data cut and paste friendly */
static void dump_floats(float *vect, int len, char *name)
static void
dump_floats(float *vect, int len, char *name)
{
int i;
printf("float %s[] = {", name);
printf("float %s[] = {\n\t", name);
for(i = 0; i < len; i++) {
printf("%f",vect[i]);
if(i<len-1)
printf(",");
char *end;
if (i == len-1)
end = "\n";
else if ((i&3) == 3)
end = ",\n\t";
else
end = ", ";
printf("%14.7e%s", vect[i], end);
}
printf("}\n");
}
/* Compare float with tolerance of delta (absolute) and epsilon (relative) */
static int
compare_floats(float *v0, float *v1, int len, float delta, float epsilon)
{
int i;
for (i=0; i<len; i++)
{
float a = v0[i];
float b = v1[i];
if (fabsf(a - b) < delta)
continue;
if (fabsf(1.0f - (a/b)) < epsilon)
continue;
return 1;
}
return 0;
}
/* Reset test vectors */
static void
test_vec_reset(struct test_vec *tv, int seed)
{
srand(seed);
memset(tv->x, 0, tv->x_len * sizeof(float));
memset(tv->h, 0, tv->h_len * sizeof(float));
memset(tv->y_ref, 0, tv->y_len * sizeof(float));
memset(tv->y_iut, 0, tv->y_len * sizeof(float));
gen_floats(tv->x, tv->x_len);
gen_floats(tv->h, tv->h_len);
}
/* Allocate test vectors */
static struct test_vec *
test_vec_alloc(int x_len, int h_len)
{
struct test_vec *tv;
tv = calloc(1, sizeof(struct test_vec));
if (!tv)
return NULL;
tv->x_len = x_len;
tv->h_len = h_len;
tv->y_len = x_len; /* Results can never be longer than x */
tv->x = convolve_h_alloc(x_len);
tv->h = convolve_h_alloc(h_len);
tv->y_ref = convolve_h_alloc(tv->y_len);
tv->y_iut = convolve_h_alloc(tv->y_len);
test_vec_reset(tv, 0);
return tv;
}
/* Release test vectors */
static void
test_vec_release(struct test_vec *tv)
{
if (!tv)
return;
free(tv->x);
free(tv->h);
free(tv->y_ref);
free(tv->y_iut);
free(tv);
}
/* Test complex convolution */
static void test_convolve_complex(int h_len)
static void
test_convolve_complex(struct test_vec *tv, int h_len)
{
int x_len;
int y_len;
int start;
int len;
int step;
int offset;
int start, len;
int rv;
x_len=34;
y_len=26;
start=8;
len=26;
step=1;
offset=1;
reset_testvec(0);
dump_floats(x,x_len,"x");
printf("\n");
dump_floats(h,h_len,"h");
printf("\n");
convolve_complex(x, x_len, h, h_len, y, y_len, start, len, step, offset);
dump_floats(y,y_len,"y");
printf("\n");
test_vec_reset(tv, 0);
/* Compute params that fit within our test vectors */
x_len = tv->x_len / 2; /* float vs complex */
start = h_len - 1;
len = x_len - start;
/* Run both 'base/ref' implementation and the potentially optimized one */
base_convolve_complex(
tv->x, x_len,
tv->h, h_len,
tv->y_ref, tv->y_len,
start, len
);
convolve_complex(
tv->x, x_len,
tv->h, h_len,
tv->y_iut, tv->y_len,
start, len
);
/* Print the 'ref' results. Those should be consistent across platforms */
dump_floats(tv->y_ref, 2 * len, "y_ref");
/* Compare to 'iut' ones with small tolerance for precision */
rv = compare_floats(tv->y_ref, tv->y_iut, len * 2, 1e-5f, 1e-5f);
printf("IUT: %s\n", rv ? "!!! FAIL !!!" : "PASS");
if (rv)
dump_floats(tv->y_iut, 2 * len, "y_iut");
}
/* Test real convolution */
static void test_convolve_real(int h_len)
static void
test_convolve_real(struct test_vec *tv, int h_len)
{
int x_len;
int y_len;
int start;
int len;
int step;
int offset;
int start, len;
int rv;
x_len=34;
y_len=26;
start=8;
len=26;
step=1;
offset=1;
reset_testvec(0);
dump_floats(x,x_len,"x");
printf("\n");
dump_floats(h,h_len,"h");
printf("\n");
convolve_real(x, x_len, h, h_len, y, y_len, start, len, step, offset);
dump_floats(y,y_len,"y");
printf("\n");
test_vec_reset(tv, 0);
/* Compute params that fit within our test vectors */
x_len = tv->x_len / 2; /* float vs complex */
start = h_len - 1;
len = x_len - start;
/* Run both 'base/ref' implementation and the potentially optimized one */
base_convolve_real(
tv->x, x_len,
tv->h, h_len,
tv->y_ref, tv->y_len,
start, len
);
convolve_real(
tv->x, x_len,
tv->h, h_len,
tv->y_iut, tv->y_len,
start, len
);
/* Print the 'ref' results. Those should be consistent across platforms */
dump_floats(tv->y_ref, 2 * len, "y_ref");
/* Compare to 'iut' ones with small tolerance for precision */
rv = compare_floats(tv->y_ref, tv->y_iut, len * 2, 1e-5f, 1e-5f);
printf("IUT: %s\n", rv ? "!!! FAIL !!!" : "PASS");
if (rv)
dump_floats(tv->y_iut, 2 * len, "y_iut");
}
int main(void)
{
#if DO_INIT == 1
struct test_vec *tv;
int i;
convolve_init();
#endif
printf("==== TEST COMPLEX BASE IMPLEMENTATION ====\n");
test_convolve_complex(17);
/* Alloc test vectors */
/* All *2 is to account for the facts all vectors are actually
* complex and need two floats */
tv = test_vec_alloc(100*2, 25*2);
printf("==== TEST COMPLEX SSE3 IMPLEMENTATION: (h_len%%4=0) ====\n");
test_convolve_complex(20);
/* Dump all input data to make sure we work off the same input data */
printf("==== TEST INPUT DATA ====\n");
dump_floats(tv->x, tv->x_len, "x");
dump_floats(tv->h, tv->h_len, "h");
printf("\n");
printf("\n");
printf("==== TEST COMPLEX SSE3 IMPLEMENTATION: (h_len%%8=0) ====\n");
test_convolve_complex(16);
/* Test complex */
printf("==== TEST COMPLEX ====\n");
for (i=4; i<=24; i+=4)
{
printf(" -- h_len = %d --\n", i);
test_convolve_complex(tv, i);
printf("\n");
}
printf(" -- h_len = %d --\n", 25);
test_convolve_complex(tv, 25);
printf("\n");
printf("\n");
printf("==== TEST REAL BASE IMPLEMENTATION ====\n");
test_convolve_real(17);
/* Test real */
printf("==== TEST REAL ====\n");
printf("==== TEST REAL SSE3 IMPLEMENTATION (hlen=4) ====\n");
test_convolve_real(4);
for (i=4; i<=24; i+=4)
{
printf(" -- h_len = %d --\n", i);
test_convolve_real(tv, i);
printf("\n");
}
printf("==== TEST REAL SSE3 IMPLEMENTATION (hlen=8) ====\n");
test_convolve_real(8);
printf(" -- h_len = %d --\n", 25);
test_convolve_real(tv, 25);
printf("==== TEST REAL SSE3 IMPLEMENTATION (hlen=12) ====\n");
test_convolve_real(12);
printf("\n");
printf("\n");
printf("==== TEST REAL SSE3 IMPLEMENTATION (hlen=16) ====\n");
test_convolve_real(16);
printf("==== TEST REAL SSE3 IMPLEMENTATION (hlen=20) ====\n");
test_convolve_real(20);
printf("==== TEST REAL SSE3 IMPLEMENTATION (h_len%%4=0) ====\n");
test_convolve_real(24);
/* All done ! */
test_vec_release(tv);
return 0;
}

View File

@@ -1,72 +1,748 @@
==== TEST COMPLEX BASE IMPLEMENTATION ====
float x[] = {0.828957,0.675654,0.904170,0.191112,0.394521,0.706067,0.868924,0.547397,0.738959,0.932485,0.233119,0.926576,0.551443,0.933420,0.494407,0.552568,0.939129,0.799646,0.814139,0.594497,0.657201,0.995300,0.935852,0.324541,0.874309,0.589157,0.637771,0.759324,0.775421,0.794910,0.262785,0.604379,0.470564,0.166955}
float h[] = {0.726144,0.746635,0.470674,0.211604,0.963092,0.264553,0.265818,0.725771,0.590649,0.313560,0.547613,0.946811,0.793753,0.690502,0.276120,0.792995,0.446645}
float y[] = {0.389293,10.824917,-0.676577,10.619646,0.283489,11.279525,0.384482,11.586230,0.711259,11.540458,-0.391531,11.281723,0.019900,12.278080,-0.070459,11.104558,0.087938,11.825965,-1.003252,11.698885,0.358887,11.911197,-0.678904,11.933812,0.245140,11.886644}
==== TEST COMPLEX SSE3 IMPLEMENTATION: (h_len%4=0) ====
float x[] = {0.828957,0.675654,0.904170,0.191112,0.394521,0.706067,0.868924,0.547397,0.738959,0.932485,0.233119,0.926576,0.551443,0.933420,0.494407,0.552568,0.939129,0.799646,0.814139,0.594497,0.657201,0.995300,0.935852,0.324541,0.874309,0.589157,0.637771,0.759324,0.775421,0.794910,0.262785,0.604379,0.470564,0.166955}
float h[] = {0.726144,0.746635,0.470674,0.211604,0.963092,0.264553,0.265818,0.725771,0.590649,0.313560,0.547613,0.946811,0.793753,0.690502,0.276120,0.792995,0.446645,0.327805,0.785346,0.676628}
float y[] = {-0.641594,12.367426,-0.970113,12.963129,-0.466783,13.747334,0.637486,13.341836,-0.168561,14.091346,0.306652,15.018833,0.233741,14.726789,-0.011241,15.034849,0.000155,13.639509,0.558827,15.495646,-0.406179,14.103148,-0.000244,15.591370,-0.492319,14.785577}
==== TEST COMPLEX SSE3 IMPLEMENTATION: (h_len%8=0) ====
float x[] = {0.828957,0.675654,0.904170,0.191112,0.394521,0.706067,0.868924,0.547397,0.738959,0.932485,0.233119,0.926576,0.551443,0.933420,0.494407,0.552568,0.939129,0.799646,0.814139,0.594497,0.657201,0.995300,0.935852,0.324541,0.874309,0.589157,0.637771,0.759324,0.775421,0.794910,0.262785,0.604379,0.470564,0.166955}
float h[] = {0.726144,0.746635,0.470674,0.211604,0.963092,0.264553,0.265818,0.725771,0.590649,0.313560,0.547613,0.946811,0.793753,0.690502,0.276120,0.792995}
float y[] = {-0.278295,10.097409,0.919633,11.502825,0.340383,10.979163,0.891132,11.679869,0.425363,11.186544,1.099703,12.121126,0.188196,11.180099,0.228905,12.436676,0.149904,11.522589,0.543155,11.703615,0.033465,12.425473,0.561782,12.373415,-0.218184,12.154579}
==== TEST INPUT DATA ====
float x[] = {
3.4018773e-01, -1.0561708e-01, 2.8309923e-01, 2.9844004e-01,
4.1164738e-01, -3.0244863e-01, -1.6477725e-01, 2.6822960e-01,
-2.2222528e-01, 5.3969979e-02, -2.2602946e-02, 1.2887090e-01,
-1.3521552e-01, 1.3400912e-02, 4.5222974e-01, 4.1619509e-01,
1.3571173e-01, 2.1729696e-01, -3.5839742e-01, 1.0696888e-01,
-4.8369944e-01, -2.5711322e-01, -3.6276841e-01, 3.0417675e-01,
-3.4332091e-01, -9.9055618e-02, -3.7020957e-01, -3.9119118e-01,
4.9892449e-01, -2.8174311e-01, 1.2932420e-02, 3.3911222e-01,
1.1263984e-01, -2.0396838e-01, 1.3755226e-01, 2.4287164e-02,
-6.4170063e-03, 4.7277504e-01, -2.0748320e-01, 2.7135772e-01,
2.6744962e-02, 2.6991385e-01, -9.9771380e-02, 3.9152944e-01,
-2.1668527e-01, -1.4754164e-01, 3.0772454e-01, 4.1902649e-01,
-4.3024471e-01, 4.4932705e-01, 2.5995374e-02, -4.1394415e-01,
-3.0778617e-01, 1.6322690e-01, 3.9023262e-01, -1.5110707e-01,
-4.3582869e-01, -4.7997695e-01, -4.2298257e-02, -4.3690416e-01,
-2.6172006e-01, 4.7063410e-01, 4.0220809e-01, 3.5091978e-01,
-2.3333424e-01, 3.9760351e-02, -1.2479302e-01, 2.6024872e-01,
1.2535393e-02, 1.6772377e-01, 3.1606436e-02, -4.6071965e-01,
-6.2362403e-02, 4.3183506e-01, 4.3080980e-01, 2.2095233e-01,
-2.1570659e-01, 2.3853433e-01, 1.3997883e-01, -1.4595133e-01,
1.8786138e-01, -3.3402583e-01, -5.9895486e-02, 3.8007522e-01,
3.2920110e-01, -1.6966286e-01, -2.7103183e-01, 3.9337242e-01,
-1.4963982e-01, 1.8666989e-01, 4.5646822e-01, 8.8640153e-02,
1.5730405e-01, 3.5867631e-01, -6.0440093e-02, 4.2396981e-01,
-1.0156333e-01, 3.1476688e-01, 1.8421853e-01, 4.1097206e-01,
-1.7509341e-02, -2.8417504e-01, 4.5025235e-01, 4.2012823e-01,
-3.5233998e-01, 3.8106215e-01, 1.4108062e-01, -6.8046570e-02,
1.1959648e-01, -2.1894059e-01, 2.8600210e-01, -1.9254214e-01,
-5.2966416e-02, -2.7389336e-01, -3.1246689e-01, -2.2376531e-01,
5.6443751e-02, -8.3498716e-02, -3.3039290e-01, 4.0680391e-01,
-3.9682883e-01, -3.7392467e-01, -4.5559406e-03, 2.6047522e-01,
4.8475164e-01, 4.3500400e-01, 1.8444502e-01, -1.1681166e-01,
2.4977088e-01, -1.3133645e-01, -2.0583963e-01, -2.6773846e-01,
8.4488511e-02, -2.5558728e-01, -3.4761021e-01, 2.3214853e-01,
-3.7452510e-01, 2.9347038e-01, -3.3589807e-01, 2.4507141e-01,
-4.2547020e-01, 4.5010406e-01, -4.4747072e-01, 2.1563351e-02,
-3.2378936e-01, -2.5993764e-01, 2.9779804e-01, 2.3265439e-01,
1.5656364e-01, 4.6740514e-01, 1.3945836e-01, 2.5973487e-01,
-4.0651953e-01, -3.6509758e-01, 2.0210087e-02, -4.2176786e-01,
-4.3009359e-01, -2.9534492e-01, -3.8579524e-02, 3.1967729e-01,
7.3318601e-02, 2.5558084e-01, -4.4806117e-01, -3.4219289e-01,
4.9999356e-01, -2.9567140e-01, 3.8995564e-01, -3.7453151e-01,
4.9779898e-01, -4.4594243e-01, 3.7053984e-01, -4.2767119e-01,
-4.9583840e-01, 4.2306912e-01, 9.3892157e-02, -3.1962773e-01,
-3.3686849e-01, -1.0830978e-01, 4.1302669e-01, 3.1969517e-01,
-1.4090464e-01, 5.2485049e-02, 7.9429984e-02, -4.7424167e-02,
1.8738741e-01, -4.0035993e-01, 3.0807972e-02, 2.5729382e-01,
-1.9570485e-01, 4.9222845e-01, 7.6971114e-02, 3.7761378e-01,
2.4780929e-01, 1.2890995e-01, -4.6457911e-01, 2.4780285e-01,
3.3323854e-01, 4.2537653e-01, 3.7327135e-01, 3.3103752e-01
}
float h[] = {
4.7943413e-01, 2.4381119e-01, 4.0336633e-01, 4.8359573e-01,
1.6688031e-01, -2.7414858e-03, -3.3603200e-01, 3.3001184e-01,
3.8894874e-01, -4.2300531e-01, 1.4970696e-01, -2.5195587e-01,
1.2947971e-01, -2.7086303e-01, 2.0061994e-01, -1.8313286e-01,
-1.7122295e-01, -2.6857203e-01, -4.2583904e-01, 1.3307220e-01,
-2.7634358e-01, 1.5113211e-01, 1.0685980e-02, 4.7146571e-01,
-2.1995798e-01, 4.6106875e-02, 2.1926856e-01, -3.8671944e-01,
-2.8516561e-02, 9.2539907e-02, 4.4431812e-01, -4.9082428e-02,
-1.6364887e-01, 3.4768444e-01, -6.5486699e-02, -4.9676853e-01,
-1.5505704e-01, 9.8481297e-02, 3.3324331e-01, -2.6610827e-01,
1.7547596e-01, -1.7049730e-02, -1.8064171e-02, -1.9504431e-01,
2.1208727e-01, -3.1744421e-01, 1.2182283e-01, -4.5913571e-01,
-8.6016268e-02, 1.9598377e-01
}
==== TEST COMPLEX ====
-- h_len = 4 --
float y_ref[] = {
1.9343475e-01, 9.3477979e-02, 4.0537333e-01, 2.4291979e-01,
3.0472055e-03, -5.7278976e-02, -2.2253948e-01, -2.4831513e-02,
-5.0298059e-01, 2.4738863e-02, -1.4398363e-01, 3.6272820e-02,
2.1425188e-02, 2.4170342e-01, 2.5287169e-01, 4.0868199e-01,
-2.4411464e-01, -2.5641626e-01, -1.8032607e-01, -4.0198106e-01,
-2.6670712e-01, -3.0024880e-01, -4.7619563e-01, 4.6459973e-02,
-1.3436908e-01, -6.2609112e-01, 2.8793404e-01, 1.2088522e-02,
1.1311802e-01, 1.3249211e-01, -6.3249446e-02, -1.9376054e-02,
1.2786689e-01, -7.4752592e-02, -3.0313748e-01, 1.9675869e-01,
-4.2374337e-01, 1.1469635e-01, -1.7944857e-01, 2.4500139e-01,
-5.6082594e-01, 1.8232608e-01, -1.1055143e-01, -2.2480465e-01,
-8.9124054e-02, 4.1810924e-01, -2.9270607e-01, 2.3528263e-02,
-2.3733716e-01, 1.6376571e-01, 2.7985922e-01, -2.8395247e-01,
1.2747175e-01, 1.8492913e-01, 2.0462361e-01, -6.9898784e-01,
-1.9105226e-01, -4.3899474e-01, -1.1355378e-01, -1.8941317e-01,
-3.3047011e-01, 3.7652293e-01, -8.5748151e-02, 1.6105306e-01,
-1.5378098e-01, 2.0000164e-01, -3.1687921e-01, -7.4600063e-02,
-2.6237980e-02, 5.3075723e-02, -4.0242106e-02, -1.8479857e-01,
-1.0247727e-01, 6.2493140e-01, 2.0375948e-02, 3.5236996e-01,
-1.0939813e-01, -1.3314986e-01, 2.7642086e-01, 1.4951222e-01,
-2.0726323e-02, -2.4084167e-01, 3.7983395e-02, 2.1266100e-01,
-3.0746281e-01, 1.7893985e-01, -4.7129101e-01, 7.0373103e-02,
-6.8367735e-02, 2.0652397e-01, 8.5595772e-03, 3.0616778e-01,
-4.5505679e-01, 3.2760209e-01, -1.9399881e-01, 4.2417210e-01,
-5.4352093e-01, 3.4104413e-01, 1.8741676e-01, -5.6594178e-02,
-4.3370463e-02, 3.8067535e-01, -1.5754598e-01, 3.9581478e-01,
-1.8522340e-01, 2.5978774e-01, 3.9373603e-01, 1.2938097e-02,
4.8844910e-01, -8.8651918e-02, 2.5098252e-01, -1.4847451e-01,
9.5206015e-03, -6.4541638e-01, 1.7062227e-01, -1.2636250e-01,
-4.3425602e-01, -1.7230041e-01, -5.4331875e-01, -1.7097193e-01,
-1.6823083e-01, -1.7737076e-03, -9.0653047e-02, 6.4022112e-01,
4.5608354e-01, 3.6825955e-01, 3.0204356e-01, 1.2642944e-01,
2.5181353e-01, -4.4521901e-01, 9.5902674e-02, -4.2330515e-01,
-1.7935884e-01, -3.1960091e-01, -5.7719946e-01, -2.8603512e-01,
-4.3163729e-01, -9.2835695e-02, -4.9010974e-01, -3.2805949e-03,
-7.3624301e-01, -1.1802941e-01, -3.8121629e-01, -4.2757961e-01,
-1.8941823e-01, 7.0607528e-02, 2.0424317e-01, 4.7988847e-01,
-4.4694379e-02, 5.2305567e-01, 2.6032710e-01, -2.9846272e-01,
-5.8884420e-02, -6.0276937e-01, -3.2680467e-02, -5.3263980e-01,
-2.7922690e-02, -1.2659723e-01, -3.3661029e-01, 4.9088931e-01,
3.2778956e-02, -1.0467410e-02, 2.5721970e-01, 9.9789083e-02,
7.4869239e-01, 2.0786656e-01, 7.8238732e-01, -4.0181538e-01,
6.9601524e-01, 1.2460417e-01, 4.1056134e-02, -3.1220391e-01,
-4.4923437e-01, 1.0146232e-02, 1.3927090e-01, -3.4886351e-01,
-1.5751208e-01, 2.4592637e-01, 1.2013673e-01, 3.9524502e-01,
-9.0466201e-02, -1.3352618e-01, 2.2801059e-01, -2.6137942e-01,
-1.0633877e-01, -4.6385981e-02, -4.7687450e-01, 3.3604053e-01,
-2.4935976e-01, 1.6206253e-01, -3.4675413e-01, 3.8130435e-01,
-3.9775372e-01, 7.9527527e-02
}
IUT: PASS
==== TEST REAL BASE IMPLEMENTATION ====
float x[] = {0.828957,0.675654,0.904170,0.191112,0.394521,0.706067,0.868924,0.547397,0.738959,0.932485,0.233119,0.926576,0.551443,0.933420,0.494407,0.552568,0.939129,0.799646,0.814139,0.594497,0.657201,0.995300,0.935852,0.324541,0.874309,0.589157,0.637771,0.759324,0.775421,0.794910,0.262785,0.604379,0.470564,0.166955}
-- h_len = 8 --
float y_ref[] = {
3.1198353e-01, 2.7249879e-01, 6.7253590e-01, 2.8881702e-01,
1.5280618e-01, 3.1966239e-02, 4.2916775e-02, 9.2015103e-02,
-5.3432661e-01, 3.8334787e-01, -4.2692611e-01, 4.9350762e-01,
-4.7033417e-01, 5.5272996e-01, 5.8690500e-02, 6.5384853e-01,
-5.2055192e-01, -2.2096071e-01, -4.0731701e-01, -5.8656257e-01,
-1.1307193e-01, -6.5060866e-01, -2.5256541e-01, 1.7588115e-01,
-1.4830187e-02, -6.2870300e-01, 5.7129776e-01, 1.7633122e-01,
4.7617540e-01, 5.3651673e-01, 6.5487638e-02, 2.9540601e-01,
4.0660924e-01, 1.6996843e-01, -9.6623927e-02, 5.6359464e-01,
-4.2338252e-01, 2.2107749e-01, 2.5678758e-02, 4.8195598e-01,
-5.8352947e-01, 4.7328997e-01, -4.4623044e-01, -4.3656242e-01,
-3.9437938e-01, 4.6684524e-01, -4.8115996e-01, -6.5126479e-02,
-4.8775205e-01, 2.3525867e-01, 2.6559117e-01, -3.1239271e-01,
3.7656814e-01, 5.7330167e-01, 5.7215315e-01, -5.6904000e-01,
-2.4907845e-01, -3.3432126e-01, -6.1997123e-02, 1.6389854e-02,
-1.3020961e-01, 3.9775756e-01, -5.2837055e-02, 4.8124433e-02,
1.6283178e-01, 3.5327283e-01, -7.2935686e-02, -2.3906565e-01,
-3.3615954e-02, 1.7306522e-01, 2.6595213e-02, -4.2700648e-01,
-6.9573633e-02, 5.0495386e-01, 2.4001123e-01, 6.0639191e-01,
1.4440927e-01, -2.1232469e-01, 5.4238892e-01, 4.1379702e-01,
2.7374083e-01, -1.0671920e-01, 5.1122200e-01, 2.2112623e-01,
1.8754447e-01, 4.4758397e-01, -1.7173983e-01, 2.8306428e-01,
2.4443226e-01, 3.5680223e-01, 3.5102800e-01, 4.2337304e-01,
-3.3537811e-01, 2.7921921e-01, 1.8583983e-01, 4.3009305e-01,
-5.3709638e-01, 4.4081950e-01, 1.0034083e-01, -3.5329488e-01,
-2.7987373e-01, 1.3525811e-01, -3.0972615e-01, 2.0086128e-01,
-4.3193302e-01, 3.3690983e-01, 8.4279850e-02, 1.7117807e-01,
4.2224085e-01, 1.1130261e-01, 3.8781387e-01, 2.2697037e-01,
-4.1848086e-02, -7.1504021e-01, 4.7950408e-01, -2.1731076e-01,
-1.5708333e-01, -3.7279499e-01, -6.4571846e-01, -4.2268100e-01,
-3.1042582e-01, -9.2476785e-02, -3.3923823e-01, 8.1528240e-01,
3.9577165e-01, 6.0557044e-01, 3.0287346e-01, 7.9299420e-01,
2.2274709e-01, 2.0846851e-01, -4.6026833e-02, 1.1982823e-01,
-2.2598577e-01, 1.9759813e-01, -6.3750428e-01, -3.1139046e-02,
-3.4190997e-01, -5.2494712e-02, -1.9467109e-01, -1.1175934e-02,
-6.1602581e-01, -2.4191037e-02, -6.2194943e-01, -3.7835273e-01,
-6.9009584e-01, 1.8178366e-01, 3.7999839e-02, 4.6113190e-01,
-3.3597833e-01, 6.7432904e-01, 3.5137755e-01, -2.1184278e-01,
-8.1254162e-02, -7.9297805e-01, -3.8410002e-01, -9.8119044e-01,
-5.4825153e-02, -9.5365292e-01, -4.7106034e-01, 8.0184937e-03,
-3.8255863e-03, -4.4410813e-01, 9.0961985e-02, -6.1882250e-02,
7.3677480e-01, 5.7637358e-01, 7.2739714e-01, -5.3133565e-01,
6.6477937e-01, 1.8967161e-01, 2.9077831e-01, -4.6150434e-01,
-5.1277536e-01, 6.4325705e-03, 2.0184588e-01, -3.4849805e-01,
8.0386028e-03, 2.2010116e-01, 5.2124852e-01, 6.1381078e-01,
1.6747525e-01, 2.6221049e-01, 6.3898122e-01, -7.9696737e-03,
3.3133650e-01, 1.5950076e-02
}
IUT: PASS
float h[] = {0.726144,0.746635,0.470674,0.211604,0.963092,0.264553,0.265818,0.725771,0.590649,0.313560,0.547613,0.946811,0.793753,0.690502,0.276120,0.792995,0.446645}
-- h_len = 12 --
float y_ref[] = {
5.1073134e-01, -6.4233445e-02, 1.1001339e+00, 1.1007318e-01,
5.7089889e-01, -1.7514147e-01, 6.4569569e-01, 4.1822547e-01,
-5.4747516e-01, 7.7281708e-01, -6.0102689e-01, 8.0545586e-01,
-6.9234514e-01, 4.6278694e-01, -1.3791974e-01, 7.1026200e-01,
-8.5625696e-01, -4.3486625e-01, -5.9517044e-01, -9.2069548e-01,
-1.6659707e-01, -9.8640192e-01, -1.5582368e-01, -1.5324977e-01,
-6.8623766e-02, -7.0452332e-01, 4.4063550e-01, -9.7249970e-02,
5.3327537e-01, 3.0112669e-01, 2.2389071e-01, -1.3262859e-01,
7.8076601e-01, 4.7906137e-01, 3.3731923e-02, 4.0715045e-01,
-7.4400723e-02, 4.3422836e-01, 2.0777807e-02, 5.4539984e-01,
-7.2161388e-01, 8.7671280e-01, -6.9292784e-01, -7.3140836e-01,
-5.0670183e-01, 2.5833350e-01, -4.4549638e-01, -3.6429164e-01,
-2.2987276e-01, 1.2921053e-01, 1.8583019e-01, -2.8588608e-01,
3.1971890e-01, 8.1832850e-01, 1.4488581e-01, -7.8593260e-01,
-2.4141420e-01, -4.6235585e-01, 1.2655887e-01, -1.2093870e-01,
-2.5066316e-01, 5.9214741e-01, -1.0893403e-01, 2.4207115e-01,
-2.3780495e-01, 1.6324958e-01, -1.5237269e-01, -3.9022326e-01,
-9.6253231e-02, 5.2076980e-02, -8.9428246e-02, -3.9848536e-01,
-4.9820563e-01, 4.3686843e-01, -1.2591672e-01, 1.6599865e-01,
-2.8170899e-02, -5.1566052e-01, 6.8874002e-01, 1.1286099e-01,
9.7053885e-02, 8.8207424e-03, 2.6398507e-01, 9.8689049e-03,
-6.0056053e-02, 2.8918642e-01, 3.9130948e-02, -2.4782214e-02,
4.4995353e-01, 6.4529550e-01, 3.6543220e-01, 5.7481551e-01,
-3.5263625e-01, 3.2261267e-01, 3.0429733e-01, 5.3616601e-01,
-6.3705307e-01, 4.3583199e-01, 2.8269362e-01, -5.4143071e-01,
-1.8208441e-01, -3.8809724e-02, -1.6317154e-01, 4.8688924e-01,
-6.3979185e-01, 4.3474686e-01, -7.8023329e-02, 1.8357828e-01,
4.6792713e-01, -4.4948310e-02, 4.5472518e-01, 3.6658123e-01,
-9.4489828e-02, -7.5101864e-01, 3.5948592e-01, -2.8623044e-01,
-1.8304718e-01, -7.9028600e-01, -5.6438643e-01, -8.5815549e-01,
-2.2451524e-02, -5.9062058e-01, 1.4486849e-01, 3.8622388e-01,
7.9944402e-01, 7.3982805e-01, 2.2160605e-01, 1.0366077e+00,
-1.8426970e-01, 2.4351463e-01, -7.3407546e-02, -4.2445374e-01,
1.4537066e-01, 2.7947389e-02, -1.7712221e-01, -9.8013066e-02,
-3.1047958e-01, 3.0333564e-01, -2.4632016e-01, 6.7275390e-02,
-5.5016017e-01, -2.7382451e-01, -2.7444303e-01, -2.6239043e-01,
-3.1040829e-01, 5.4154611e-01, 1.3651639e-02, 1.2248676e+00,
-4.8179182e-01, 1.1706464e+00, -2.1114275e-01, -5.1367894e-02,
-1.6235633e-01, -7.6988780e-01, -3.3773273e-01, -1.3110837e+00,
1.0939775e-01, -5.6715608e-01, -6.0381925e-01, -5.2970968e-02,
-1.3944679e-01, -4.1512328e-01, 3.3509767e-01, -1.5947121e-01,
6.3523126e-01, 7.9222643e-01, 3.9298099e-01, -5.0257689e-01,
2.8029183e-01, -2.2847716e-02, 2.3597348e-01, -7.2399336e-01,
-6.3985258e-01, -3.9040399e-01, 6.1344981e-02, -4.3278030e-01,
-1.4337064e-01, 7.6462559e-02
}
IUT: PASS
float y[] = {5.354852,5.387001,4.829278,5.046340,5.849788,5.775999,5.653334,5.372714,5.999860,5.593828,5.628739,5.178002,6.010774,6.186034,6.337766,5.538046,5.616131,6.289612,5.486091,5.835261,6.277413,5.894117,5.563587,6.082063,5.828556,6.160175}
-- h_len = 16 --
float y_ref[] = {
3.9259350e-01, 2.0336020e-01, 1.2083304e+00, -1.8030062e-01,
6.8609583e-01, -5.3055910e-04, 5.8721924e-01, 4.7836140e-01,
-6.4573079e-01, 8.9168346e-01, -4.4504160e-01, 1.0042783e+00,
-7.0414054e-01, 6.7187703e-01, -1.3145420e-01, 6.1453128e-01,
-5.6792223e-01, -2.1333663e-01, -9.1251004e-01, -7.2268122e-01,
7.9308510e-02, -1.2288902e+00, -2.5458413e-01, 1.3551895e-01,
1.0557353e-02, -1.0434419e+00, 2.3528877e-01, -1.6334504e-03,
6.3718635e-01, -1.5170360e-01, -1.8770680e-01, 2.1240243e-01,
8.8036948e-01, 5.8363783e-01, 4.2129651e-02, 7.6204038e-01,
1.4566663e-01, 3.3907735e-01, -1.2638038e-01, 6.4064759e-01,
-6.2329191e-01, 7.5265288e-01, -5.7464683e-01, -5.5148101e-01,
-5.2435344e-01, 1.6771759e-01, -3.9474785e-01, 7.4480176e-03,
-1.7047867e-02, -1.8528795e-01, 2.0242094e-01, -3.1942075e-01,
3.4801385e-01, 8.6850441e-01, 1.3732195e-01, -1.0011898e+00,
-2.2828943e-01, -5.0333396e-02, 4.2781591e-02, -3.1782448e-01,
-2.8394341e-02, 8.3352113e-01, 3.8200065e-02, 4.3320811e-01,
-1.2285206e-01, 1.5387420e-01, -1.5085199e-01, -2.4370183e-01,
7.9104662e-02, 2.5195837e-01, -6.1237380e-02, -5.0630927e-01,
-4.3581635e-02, 5.5287701e-01, -4.8868680e-01, 2.4484858e-01,
2.8410962e-01, -6.1659271e-01, 6.8510634e-01, 1.7286855e-01,
2.9607964e-01, -2.4299805e-01, 1.5033059e-01, -1.5005481e-01,
-2.1097726e-01, 1.0886001e-01, -8.1835359e-02, -7.1222454e-02,
1.9846845e-01, 9.7944200e-01, 2.0166935e-01, 3.8065171e-01,
-2.1964781e-01, 6.5044796e-01, 3.3935797e-01, 6.6456228e-01,
-4.1065484e-01, 5.3015858e-01, 6.5628278e-01, -7.4122947e-01,
-4.1306469e-01, -2.9108244e-01, -1.3873807e-01, 2.6657555e-01,
-9.5916164e-01, 6.3136345e-01, -2.6429197e-01, 2.5425220e-01,
3.2098943e-01, 2.8284523e-01, 3.7179819e-01, 6.9147599e-01,
-2.3179966e-01, -6.6986769e-01, 3.5698265e-01, -2.3401824e-01,
-2.2967517e-02, -6.6492826e-01, -5.7599217e-01, -5.9439480e-01,
2.4301717e-01, -5.0280505e-01, 5.7166010e-02, 2.5395346e-01,
9.0813631e-01, 4.3165860e-01, -2.1851237e-01, 9.6684217e-01,
-1.9854844e-01, 3.1733474e-01, -2.5028959e-01, -1.3191071e-01,
1.2713718e-01, 2.7541250e-02, 1.8362647e-01, -3.3003873e-01,
-4.0098429e-01, 2.1789065e-01, 8.6135000e-02, -3.1211060e-01,
-5.3512049e-01, -5.6808853e-01, -5.7678592e-01, -1.9351965e-01,
-4.8246443e-01, 2.2094266e-01, -1.2133773e-01, 1.6067516e+00,
-2.7641732e-01, 1.0420220e+00, -4.3420127e-01, 1.8913136e-01,
1.6305616e-01, -8.9072859e-01, -3.8818783e-01, -1.4747425e+00,
1.9507036e-01, -4.8474717e-01, -8.2032663e-01, 2.4695456e-02,
-4.3185152e-02, -1.4206313e-01, 5.4319465e-01, 1.0437109e-02,
6.0523671e-01, 8.8012832e-01, 6.2208772e-01, -5.2706617e-01,
3.4703535e-01, 3.4169286e-01
}
IUT: PASS
==== TEST REAL SSE3 IMPLEMENTATION (hlen=4) ====
float x[] = {0.828957,0.675654,0.904170,0.191112,0.394521,0.706067,0.868924,0.547397,0.738959,0.932485,0.233119,0.926576,0.551443,0.933420,0.494407,0.552568,0.939129,0.799646,0.814139,0.594497,0.657201,0.995300,0.935852,0.324541,0.874309,0.589157,0.637771,0.759324,0.775421,0.794910,0.262785,0.604379,0.470564,0.166955}
-- h_len = 20 --
float y_ref[] = {
4.0563765e-01, 2.7768248e-01, 1.4988428e+00, -1.4390233e-01,
7.1137142e-01, 1.2297645e-01, 5.2460098e-01, 2.6881298e-01,
-2.8073692e-01, 9.4002217e-01, -7.3673451e-01, 1.2523794e+00,
-5.0840402e-01, 1.8347138e-01, -9.8511763e-02, 1.0402999e+00,
-7.3959279e-01, -6.3208389e-01, -9.9019027e-01, -4.8580292e-01,
-4.3206736e-02, -1.6494110e+00, -3.8819546e-01, 8.3392441e-01,
2.5613919e-01, -1.1556400e+00, 4.8095024e-01, 2.1489243e-01,
7.2430336e-01, -4.5171279e-01, -2.9797465e-01, 4.0803951e-01,
9.1165602e-01, 3.5424012e-01, 1.8917093e-01, 8.9390326e-01,
2.3863241e-02, 2.1634607e-01, 1.5027168e-01, 8.7479091e-01,
-6.6391504e-01, 2.8770256e-01, -6.2296259e-01, -4.7120559e-01,
-5.6871128e-01, 2.0659479e-01, -5.0873393e-01, -2.0042220e-01,
2.4109995e-01, 2.0149592e-01, -2.2671439e-02, -5.4053026e-01,
7.4681568e-01, 9.8399460e-01, 2.1579692e-01, -9.8881048e-01,
-2.2159560e-01, -2.4822801e-01, 1.2267539e-01, -2.1502660e-01,
1.9123185e-01, 8.6089939e-01, -8.6792998e-02, 2.3703022e-01,
2.6896733e-01, 1.1151828e-02, -5.9111112e-01, -1.9654661e-02,
4.0600595e-01, -1.1281250e-01, -1.0220084e-01, -3.4878132e-01,
-1.1436477e-01, 2.0784064e-01, -7.2063136e-01, 2.4080434e-01,
9.7794682e-02, -6.2281477e-01, 6.3973004e-01, 3.0914345e-01,
3.0690259e-01, 1.9527674e-01, 1.3511479e-02, -3.5927096e-01,
1.9869280e-01, 3.8655108e-01, -7.6620430e-02, -5.5920191e-02,
4.3548766e-01, 7.8014821e-01, 3.2739228e-01, -6.8113729e-03,
-6.8596470e-01, 6.7433977e-01, 2.8661230e-01, 5.4591709e-01,
-5.6738234e-01, 9.7609603e-01, 6.3488019e-01, -6.6698337e-01,
-2.1748835e-01, 2.6263937e-02, 1.3686508e-02, 4.6669549e-01,
-9.7789866e-01, 6.1850429e-01, -1.5288228e-01, 2.3137388e-01,
5.1109362e-01, 2.8883606e-01, 4.3707043e-01, 8.1758893e-01,
5.2922159e-02, -9.1069019e-01, 1.6946894e-01, -3.1696463e-01,
-9.7715080e-02, -9.0262455e-01, -1.0273845e+00, -2.7913937e-01,
3.9362526e-01, -4.4658157e-01, 1.4889301e-01, 5.7529253e-01,
9.6562743e-01, 3.0147529e-01, -1.0069013e-02, 6.2530518e-01,
-4.5818228e-01, 4.1249818e-01, -2.1397476e-01, -6.4531231e-01,
-8.7075405e-02, -4.2375207e-02, -7.2089776e-02, 2.4483189e-02,
-5.8266813e-01, 6.1949864e-02, 2.9266685e-01, 2.1767689e-01,
-4.8038912e-01, -8.5518450e-01, -6.4128172e-01, 1.1196103e-01,
-2.2738291e-01, -1.6352636e-01, -3.1652111e-01, 1.5964899e+00,
-2.1126030e-01, 1.1061658e+00, -6.0621572e-01, 3.3688825e-01,
5.0539368e-01, -7.8245437e-01, -1.5758494e-01, -1.5151085e+00,
9.8507836e-02, -4.8568571e-01, -6.4452505e-01, -1.7298427e-01,
1.4388496e-01, 1.1540639e-01
}
IUT: PASS
float h[] = {0.726144,0.746635,0.470674,0.211604}
-- h_len = 24 --
float y_ref[] = {
6.3018608e-01, 2.8423050e-01, 1.8153185e+00, 2.1489501e-01,
6.1149991e-01, 2.0270358e-01, 5.9271133e-01, 4.7805154e-01,
-4.6508217e-01, 9.6335644e-01, -9.4050735e-01, 1.2220898e+00,
-1.0467027e+00, 1.4672846e-01, -8.1817716e-02, 1.1990718e+00,
-6.0469484e-01, -6.5175623e-01, -7.2200888e-01, -4.6047974e-01,
4.7492534e-02, -1.4756467e+00, -1.6498202e-01, 1.0429246e+00,
1.1710593e-01, -1.1640908e+00, 5.4710066e-01, 2.3070689e-01,
9.1671783e-01, -4.7983694e-01, 2.9820830e-02, 3.6921877e-01,
9.2340273e-01, 3.8007712e-01, 1.7183515e-01, 7.6075757e-01,
6.0101196e-02, 1.8053912e-01, 1.7395832e-01, 7.4399179e-01,
-3.9787713e-01, 2.6257843e-01, -5.3116250e-01, -2.0372190e-01,
-3.0838940e-01, 1.0527763e-01, -2.0168084e-01, -2.5559211e-01,
5.6158608e-01, 2.5170538e-01, 3.7997139e-01, -3.7583706e-01,
1.1538334e+00, 1.1128649e+00, 3.1257099e-01, -8.9715719e-01,
-3.3971965e-03, -4.4487441e-01, 4.6779409e-01, 1.6864493e-02,
2.8930783e-01, 8.3552551e-01, 2.4338216e-03, 2.2411425e-01,
1.0011864e-01, -1.8143091e-01, -7.4504924e-01, -1.9404535e-01,
1.4155512e-01, -1.3064578e-01, -2.7652603e-01, -3.5652000e-01,
-3.4334987e-02, 3.9129543e-01, -9.5754206e-01, 5.2517068e-01,
1.0776453e-01, -5.0084651e-01, 8.6342621e-01, 3.5743105e-01,
4.9151617e-01, -2.7927294e-02, 6.5405264e-02, -6.2989593e-01,
1.2833342e-01, 3.7550265e-01, -3.1207931e-01, -1.8727586e-01,
4.2959970e-01, 9.0468168e-01, 3.2441798e-01, 3.0514628e-01,
-5.3860873e-01, 1.0568659e+00, 4.5496196e-01, 1.0691149e+00,
-5.6619692e-01, 1.5336677e+00, 4.2874324e-01, -2.7978027e-01,
-2.8027374e-01, 1.3865612e-01, 2.6137283e-01, 4.8582289e-01,
-6.8132639e-01, 5.3316730e-01, -1.1344668e-01, 3.8112825e-01,
2.0137414e-01, 3.2724720e-01, 8.4503561e-02, 1.0122974e+00,
-1.5016308e-01, -8.3359891e-01, 3.3556390e-01, -2.2457816e-01,
-2.3018801e-01, -7.5040269e-01, -1.2586766e+00, -4.3722793e-01,
2.3992740e-01, -7.5550735e-01, -1.8255928e-01, -5.5400416e-02,
7.8102481e-01, -3.0305600e-01, 3.2669067e-02, 5.3476524e-01,
-5.7471651e-01, 4.2629388e-01, -2.3708928e-01, -5.9372860e-01,
-1.3963582e-01, -3.8945466e-02, 1.1991196e-01, 4.2234767e-02,
-6.1405408e-01, -2.4031635e-02, 2.2417614e-01, 1.2318638e-01,
-4.8036790e-01, -9.8540503e-01, -4.1925645e-01, 2.6757696e-01,
1.4580157e-01, -7.0448965e-02, 1.8385217e-02, 1.6279721e+00,
-1.4290724e-02, 1.3662102e+00, -3.4957036e-01, 4.5002824e-01,
1.0109739e+00, -8.2454365e-01
}
IUT: PASS
float y[] = {1.154625,1.856899,1.754012,1.866038,1.759821,1.614741,1.946849,1.905307,2.034228,1.369325,1.929276,1.644739,1.911431,1.455565,1.751712,1.711433,1.206255,1.551974,1.351406,1.252433,1.410497,1.527218,1.666560,1.330974,1.544475,1.701906}
-- h_len = 25 --
float y_ref[] = {
5.7913333e-01, 1.6126008e-01, 1.8942088e+00, 2.5559562e-01,
6.0598469e-01, 1.2834232e-01, 5.8875954e-01, 5.6752849e-01,
-3.3352613e-01, 9.1922688e-01, -8.5124290e-01, 1.2513809e+00,
-1.1164272e+00, 5.4953381e-02, -1.8518874e-01, 1.2477132e+00,
-5.9241670e-01, -7.0090598e-01, -7.6227915e-01, -5.0732279e-01,
1.3543148e-02, -1.4876170e+00, -7.7407114e-02, 1.0887483e+00,
3.7837453e-02, -1.2134576e+00, 4.6674094e-01, 2.9613313e-01,
8.8852322e-01, -5.4262978e-01, 4.6384465e-02, 4.0920654e-01,
9.7270721e-01, 4.4562656e-01, 1.0249857e-01, 7.1632636e-01,
6.5035716e-02, 2.5965095e-01, 1.2017685e-01, 6.5703750e-01,
-4.2158994e-01, 2.1719480e-01, -5.8779824e-01, -1.2188602e-01,
-3.9221483e-01, 1.0525467e-01, -2.7957320e-01, -3.0390570e-01,
5.0863296e-01, 2.0472553e-01, 2.8358173e-01, -3.7508351e-01,
1.2110332e+00, 1.1338770e+00, 1.9150364e-01, -8.4505290e-01,
-4.7772221e-02, -5.4670489e-01, 4.6899489e-01, 5.0367117e-02,
3.2192940e-01, 8.7779695e-01, 1.5568122e-02, 2.9672778e-01,
1.5835327e-01, -1.6825218e-01, -6.7431760e-01, -2.3603633e-01,
1.5306443e-01, -1.1240147e-01, -3.2783383e-01, -4.5626339e-01,
7.3081911e-02, 3.4568703e-01, -1.0081991e+00, 5.0187266e-01,
-1.9185714e-02, -4.4326046e-01, 8.7045413e-01, 4.0362698e-01,
4.9577162e-01, 3.2320812e-02, 1.3558322e-01, -6.4720726e-01,
1.7115699e-01, 4.1404569e-01, -3.2767653e-01, -2.7537036e-01,
4.0429953e-01, 8.0603760e-01, 3.0528066e-01, 2.1823558e-01,
-5.9022444e-01, 9.3476439e-01, 4.8922566e-01, 9.7956312e-01,
-4.8740220e-01, 1.4925691e+00, 3.5753128e-01, -2.4142875e-01,
-3.8534456e-01, 1.2913561e-01, 1.9847333e-01, 4.9081305e-01,
-5.7480592e-01, 4.8490041e-01, -3.2525428e-02, 4.2136800e-01,
2.9625201e-01, 2.6836032e-01, 2.5170468e-02, 9.7723901e-01,
-2.0655937e-01, -8.4121376e-01, 4.4116870e-01, -2.8295672e-01,
-2.1524879e-01, -6.2697953e-01, -1.2188171e+00, -3.2858714e-01,
2.8450605e-01, -6.1958849e-01, -1.3061513e-01, 5.4006055e-02,
7.4076033e-01, -4.3662310e-01, 8.7234661e-02, 5.8065975e-01,
-5.2451336e-01, 3.6958954e-01, -3.3527136e-01, -5.4028106e-01,
-1.3780195e-01, -7.1075052e-02, 1.2237406e-01, 6.1881006e-02,
-5.5170840e-01, 4.7130723e-02, 1.7110074e-01, 1.0709279e-01,
-5.6000286e-01, -1.0660996e+00, -4.9988338e-01, 2.5018114e-01,
9.9221684e-02, -3.2970719e-02, 9.7812414e-03, 1.5156071e+00,
-1.2632155e-01, 1.3949302e+00, -4.4655573e-01, 4.9470875e-01
}
IUT: PASS
==== TEST REAL SSE3 IMPLEMENTATION (hlen=8) ====
float x[] = {0.828957,0.675654,0.904170,0.191112,0.394521,0.706067,0.868924,0.547397,0.738959,0.932485,0.233119,0.926576,0.551443,0.933420,0.494407,0.552568,0.939129,0.799646,0.814139,0.594497,0.657201,0.995300,0.935852,0.324541,0.874309,0.589157,0.637771,0.759324,0.775421,0.794910,0.262785,0.604379,0.470564,0.166955}
float h[] = {0.726144,0.746635,0.470674,0.211604,0.963092,0.264553,0.265818,0.725771}
==== TEST REAL ====
-- h_len = 4 --
float y_ref[] = {
4.0135658e-01, -7.0862226e-02, 3.4894887e-01, 4.7711339e-02,
1.0140251e-01, -7.1107626e-02, -1.2697329e-01, 1.6737098e-01,
-2.9018813e-01, -5.9761293e-02, -3.5513252e-02, 6.3626617e-02,
2.6066792e-01, 1.7462157e-01, 3.7428492e-01, 3.9143768e-01,
-3.8318738e-02, 2.2069588e-03, -3.1210813e-01, 3.1620685e-02,
-3.1112182e-01, 1.1434813e-01, -5.4184294e-01, 1.3526961e-01,
-2.3501489e-01, -3.6625391e-01, -1.1934193e-02, -1.7606504e-01,
2.1699335e-01, -4.0490393e-02, 7.6746441e-02, -7.4506484e-02,
1.7813747e-01, -1.0028081e-01, 1.9746821e-02, 1.5693018e-01,
-4.8778683e-02, 2.4959795e-01, -3.2523207e-02, 3.5388958e-01,
-1.6698776e-01, 1.2190759e-01, 6.0691811e-02, 4.7138259e-02,
-6.0295239e-02, 3.1236699e-01, 8.1751533e-02, 2.5821042e-01,
-3.7828243e-01, 1.2646775e-01, 9.9887021e-02, 3.4521073e-03,
-4.8674144e-02, 8.4020123e-02, 9.2179805e-02, -4.9711126e-01,
-4.0484354e-01, -4.4573051e-01, 1.9680090e-02, 2.5572028e-02,
3.9755218e-02, 2.8637058e-01, 7.3675327e-02, 1.7135073e-01,
-1.7073457e-01, 3.0684435e-01, -2.8543424e-02, -2.9569201e-02,
-1.3641408e-01, -1.0760858e-01, 1.3437629e-01, -8.9979589e-02,
6.0840961e-02, 3.8501221e-01, 7.9768427e-02, 2.9003578e-01,
4.5228135e-03, -1.2797011e-01, 2.2270113e-02, -8.4269598e-02,
2.1191987e-01, -1.6733298e-01, 1.0912660e-01, 1.1670379e-01,
-1.2985474e-01, 7.8696609e-02, -1.6698521e-01, 1.5815806e-01,
1.5894224e-01, 4.2638958e-02, 3.0633986e-01, 1.5215552e-01,
-2.7814813e-02, 2.5740543e-01, -3.3318136e-02, 4.9430701e-01,
-1.2860653e-01, 1.2808253e-01, 2.7479374e-01, 2.4469435e-02,
6.7015857e-02, 1.1967997e-01, 5.7099588e-02, 4.1734684e-01,
-1.8816420e-01, 1.8340994e-01, 1.8140659e-01, -6.1031669e-02,
2.6886207e-01, -1.5314770e-01, 4.4642828e-02, -2.1207440e-01,
-3.0990593e-02, -3.7220663e-01, -4.8828661e-02, 5.2576914e-02,
-1.7090029e-01, -2.5869820e-02, -4.8212141e-01, -5.8500014e-02,
-1.7317493e-01, 3.7640739e-02, 1.4019746e-01, 3.2498634e-01,
4.1765594e-01, 2.2948909e-01, 1.2643699e-01, -6.7774974e-02,
1.6762753e-01, -2.9162568e-01, 3.2363981e-03, -2.9133266e-01,
-4.9335934e-02, -6.2273767e-02, -2.3081020e-01, 1.1932422e-01,
-2.3568818e-01, 3.0742079e-01, -2.9853183e-01, 3.8999829e-01,
-5.3858328e-01, 1.0293538e-01, -3.4805223e-01, -2.1274953e-01,
-5.5849046e-02, -4.0056542e-02, 3.6580348e-01, 4.6610707e-01,
5.6683410e-02, 4.0965819e-01, 5.0782688e-02, 6.1182901e-03,
-2.4555746e-01, -5.0187629e-01, -1.9487143e-01, -3.5387760e-01,
-5.8964893e-02, 1.4498779e-01, -2.3170851e-01, 2.9860672e-01,
-1.9317985e-01, 6.1017931e-02, -1.1533570e-01, -1.9597384e-01,
3.5556856e-01, -2.2353619e-01, 6.1620677e-01, -5.7297599e-01,
2.7382869e-01, -2.0830113e-01, 1.0651228e-01, -5.1332355e-02,
-3.9485583e-01, -5.1595878e-02, 2.5408182e-02, -1.6121480e-01,
-4.5110412e-02, 1.0172162e-01, 9.1470033e-02, 3.0106306e-01,
-1.4596304e-02, -1.4723733e-01, 1.8457155e-01, -3.0669570e-01,
4.3742776e-02, -1.3290963e-01, -1.3459724e-01, 3.4160227e-01,
1.3468790e-01, 3.2655066e-01, -5.2647412e-02, 1.3145214e-01,
-1.3840751e-01, 1.2150692e-01
}
IUT: PASS
float y[] = {2.966950,2.964003,3.035802,3.567513,2.983864,3.487861,3.089418,3.836586,2.979637,3.173361,3.524760,3.308944,3.511707,2.951268,3.500564,3.466951,3.174077,2.778949,3.124344,2.816606,3.196814,2.774090,3.272130,2.980138,2.646414,3.090803}
-- h_len = 8 --
float y_ref[] = {
3.8475716e-01, 5.4654378e-02, 4.0569583e-01, 1.9732465e-01,
6.2182777e-02, 4.6007581e-02, -7.4207053e-02, 3.2404867e-01,
-4.2646566e-01, 6.8503045e-02, -3.6317283e-01, 8.6252578e-02,
-1.0049982e-01, 2.8848641e-02, 2.3394865e-01, 3.8774294e-01,
-1.6008082e-01, -6.3331984e-02, -3.5713595e-01, -1.5972351e-01,
-7.2949402e-02, 3.3994686e-02, -5.0342709e-01, 3.3462417e-01,
-2.1306737e-01, -3.2629669e-01, 1.9106627e-02, -6.5554529e-03,
1.6688257e-01, 2.9751599e-01, -5.6339733e-02, 9.2541061e-02,
2.0728275e-01, 1.2827794e-01, -9.7970009e-02, 4.3152642e-01,
-1.3748240e-01, 2.3007628e-01, -3.5626933e-02, 5.6328601e-01,
-2.9200286e-01, 2.2552194e-01, -1.2183771e-02, -2.0528683e-01,
-1.8650457e-01, 2.0343314e-01, 1.1030240e-01, 1.6542999e-01,
-5.0732625e-01, 5.7128072e-03, 4.9520131e-02, -4.2610306e-02,
-1.4550450e-01, 3.7696707e-01, 2.0004307e-01, -2.8732342e-01,
-5.0631702e-01, -4.6201745e-01, -3.5400115e-02, 1.7888573e-01,
1.2771693e-01, 3.8287488e-01, 8.9138612e-02, 1.3326670e-01,
-1.3034222e-01, 5.0948876e-01, 1.6253988e-01, 6.1701983e-03,
-1.8704908e-01, -3.6797896e-03, 2.7523407e-01, -1.8157889e-01,
1.1319310e-01, 3.6894363e-01, 4.0642001e-02, 5.0084931e-01,
1.6419113e-01, -9.3116745e-02, -1.4887799e-02, 1.8011260e-01,
2.3029631e-01, 5.0040189e-02, 2.8201744e-01, 3.2292062e-01,
-5.3912316e-02, 4.0487966e-01, -1.8535823e-01, 3.6638471e-01,
2.3508060e-01, 2.7408370e-01, 3.6298230e-01, 4.0030634e-01,
1.5463566e-02, 2.4546035e-01, 1.3131972e-01, 6.6202849e-01,
-1.7166492e-01, 1.9913296e-01, 3.7397668e-01, -1.1465289e-01,
8.6804092e-02, -7.4657001e-02, 1.3127592e-01, 2.5572953e-01,
-3.1451899e-01, 1.1418183e-01, -5.4067936e-02, -1.8290916e-01,
1.8905841e-01, -1.2088218e-01, -4.6610184e-02, 1.1169069e-02,
-8.6249888e-02, -4.4575977e-01, 9.5960915e-02, 1.7753810e-01,
3.6300682e-02, 5.5117831e-02, -3.8269129e-01, -2.0953836e-01,
-1.6564047e-01, -4.0044457e-02, -4.7360756e-02, 2.7152115e-01,
2.8259674e-01, 2.5199765e-01, -1.9368437e-01, 1.8848507e-01,
-1.7319113e-01, -7.8186467e-02, -3.1400359e-01, -1.7798539e-01,
-2.6399142e-01, 1.2904029e-01, -3.8335827e-01, 2.1269161e-01,
-2.6879328e-01, 3.5377583e-01, -2.2276390e-01, 5.1084721e-01,
-5.0539166e-01, 1.9172835e-01, -4.3833745e-01, -2.8024608e-01,
-2.7436692e-01, -2.1930990e-01, 3.1899008e-01, 3.5051215e-01,
-1.9677331e-01, 3.0708387e-01, 8.9047387e-02, 6.5093994e-02,
-1.4114627e-01, -5.6711900e-01, -1.4393200e-01, -6.6919637e-01,
3.3267856e-01, -1.6962306e-01, -5.7009470e-02, 1.1567383e-01,
1.0546455e-02, -1.8580100e-01, -1.0087056e-01, -3.6209401e-01,
2.1601290e-01, -5.6721181e-02, 6.2750477e-01, -6.6158593e-01,
2.0232803e-01, -2.0528597e-01, 2.9394233e-01, -5.5906028e-03,
-4.0732571e-01, -3.8501829e-02, 4.9082294e-02, -1.0753187e-01,
2.2487987e-02, 1.2401126e-01, 1.3383606e-01, 5.4958242e-01,
-1.4030990e-01, 1.6715114e-01, 2.5830886e-01, -2.3100130e-02,
1.8861046e-01, 7.5817898e-02
}
IUT: PASS
==== TEST REAL SSE3 IMPLEMENTATION (hlen=12) ====
float x[] = {0.828957,0.675654,0.904170,0.191112,0.394521,0.706067,0.868924,0.547397,0.738959,0.932485,0.233119,0.926576,0.551443,0.933420,0.494407,0.552568,0.939129,0.799646,0.814139,0.594497,0.657201,0.995300,0.935852,0.324541,0.874309,0.589157,0.637771,0.759324,0.775421,0.794910,0.262785,0.604379,0.470564,0.166955}
-- h_len = 12 --
float y_ref[] = {
6.4393055e-01, 4.6198644e-02, 7.6961976e-01, 2.0338218e-01,
3.9040267e-01, -1.6305951e-02, 2.4174322e-01, 4.1924089e-01,
-3.4776792e-01, 3.3352980e-01, -5.1461613e-01, 1.7731960e-01,
-2.2109170e-01, -1.0692810e-02, 1.4568762e-01, 4.1487730e-01,
-2.3838633e-01, -1.6649896e-01, -3.2033300e-01, -4.3731174e-01,
8.0468273e-03, -2.3291506e-01, -4.5403457e-01, 6.3448392e-02,
-1.1199225e-01, -4.9399075e-01, 3.8827535e-02, -1.2175907e-01,
1.9211610e-01, 1.5748601e-02, 6.3713104e-02, -5.4411836e-02,
3.5910544e-01, 1.8089499e-01, -8.3849184e-02, 4.6952319e-01,
-1.3097216e-01, 3.9444518e-01, 9.2041276e-02, 9.1931677e-01,
-1.2274407e-01, 3.6744910e-01, -7.1317488e-03, -4.2744270e-01,
-2.4982110e-01, -3.4792073e-02, 1.7541768e-01, 1.8287193e-02,
-4.1735864e-01, -1.6319177e-01, 5.6148872e-02, -2.6662689e-02,
-1.3927303e-01, 4.2746758e-01, 9.9831104e-02, -4.5083967e-01,
-6.1798984e-01, -6.9752467e-01, -5.3983122e-02, 7.6239705e-02,
5.2488066e-02, 5.0055146e-01, 5.2418858e-03, 1.9365381e-01,
-2.3087139e-01, 4.5591956e-01, 1.0590754e-01, -9.3369529e-02,
-8.1770115e-02, -1.9278076e-01, 2.6090226e-01, -3.4908691e-01,
-9.9682942e-02, 2.0464768e-01, -8.8885106e-02, 2.1913598e-01,
1.9302976e-01, -4.1766539e-01, -1.2384148e-02, -1.4312698e-01,
1.7888883e-01, -9.5843635e-02, 1.2974171e-01, 2.6153788e-01,
-1.4377487e-01, 1.6859873e-01, -1.5012036e-01, 1.4864261e-01,
2.0533815e-01, 2.9625934e-01, 2.0829637e-01, 5.5547190e-01,
-1.1550704e-01, 4.3823749e-01, 1.9185613e-01, 8.7257445e-01,
-4.8663691e-02, 3.6873922e-01, 4.9050367e-01, -1.5719551e-01,
3.2744622e-01, -1.2747797e-01, 3.6327103e-01, 2.7797487e-01,
-3.7661973e-01, -5.4173008e-02, -3.0801520e-01, -3.8187349e-01,
-4.3708272e-02, -1.1218896e-01, -1.2676828e-01, 1.5835470e-01,
-6.8424262e-02, -2.3614784e-01, 1.8728454e-01, 2.7120337e-01,
2.6976854e-01, -7.8457497e-02, -7.5408332e-02, -4.3717277e-01,
1.5431963e-01, -3.1880721e-01, 3.1153014e-01, 2.9150844e-02,
6.3865691e-01, 2.4006522e-01, -5.9806503e-02, 2.3618667e-01,
-2.8634012e-01, -2.5914139e-01, -4.7454682e-01, -4.9253801e-01,
-2.3763049e-01, 3.4789924e-02, -2.4430576e-01, 4.3708864e-01,
-8.9352712e-02, 6.8092680e-01, -3.1629015e-02, 6.2322330e-01,
-4.4037020e-01, 3.1882312e-02, -3.3379188e-01, -3.5241520e-01,
-2.3012178e-01, -3.9647453e-02, 8.0349393e-02, 6.3374585e-01,
-5.8204597e-01, 6.3586301e-01, -2.9739904e-01, 4.4182721e-01,
-2.4614613e-01, -4.2897230e-01, -2.5775841e-02, -6.8895894e-01,
4.7509959e-01, -7.2605282e-02, -4.5277193e-02, 1.2873912e-01,
-6.7869939e-02, -3.1840524e-01, -1.3153505e-01, -4.3035623e-01,
1.5486053e-01, 6.7873567e-02, 5.2350283e-01, -5.4881847e-01,
2.1202806e-01, -3.7828991e-01, 3.5338366e-01, -3.6222893e-01,
-4.8003882e-01, -3.1656075e-01, 6.2320638e-02, -2.9101607e-01,
8.9793704e-02, -1.1759783e-01
}
IUT: PASS
float h[] = {0.726144,0.746635,0.470674,0.211604,0.963092,0.264553,0.265818,0.725771,0.590649,0.313560,0.547613,0.946811}
-- h_len = 16 --
float y_ref[] = {
6.2978989e-01, 1.4091884e-01, 1.0101279e+00, 1.2735325e-01,
3.4140077e-01, 1.3663004e-01, 2.5682330e-01, 5.0929654e-01,
-4.3438864e-01, 4.9080691e-01, -5.2847892e-01, 3.8783157e-01,
-3.1026769e-01, 1.1108337e-01, 1.0375749e-01, 3.3765313e-01,
-1.2323911e-01, 5.0369859e-02, -5.4584050e-01, -3.6808807e-01,
1.4700210e-01, -3.0531892e-01, -7.5355643e-01, 1.5413240e-01,
1.7050791e-01, -7.5538301e-01, -2.3915285e-01, -2.0387162e-01,
3.3901632e-01, -2.3372462e-01, -2.3276597e-01, 9.5151775e-02,
6.3186646e-01, 3.3316955e-01, -2.4707654e-01, 6.7647821e-01,
-3.4007065e-02, 4.8237050e-01, -3.8462095e-02, 9.1794872e-01,
-8.5097685e-02, 2.0627885e-01, -5.5439603e-03, -2.4289967e-01,
-5.2453101e-02, -8.6846918e-02, 4.6663910e-02, 3.1399831e-01,
-2.4083215e-01, -2.8138041e-01, -6.4303502e-03, -1.6721155e-01,
-9.3103498e-02, 5.2139711e-01, 2.5821176e-01, -5.7820058e-01,
-8.0225664e-01, -3.6109373e-01, -2.7383961e-02, 2.7160581e-02,
1.2773320e-01, 6.5818560e-01, 8.8922329e-02, 3.0489790e-01,
-1.2920819e-01, 6.1244529e-01, -3.4073405e-03, 9.3546256e-02,
-4.4875145e-02, -5.0849766e-03, 2.3889390e-01, -5.1130760e-01,
1.6360463e-01, 4.2029971e-01, -3.0263537e-01, 2.2376096e-01,
3.6833948e-01, -3.0413884e-01, -1.3956207e-01, -2.4732138e-01,
4.1098878e-01, -2.7388829e-01, 9.3243912e-02, 1.1229334e-01,
-2.4469408e-01, 8.2925551e-02, -1.9065322e-01, 1.0021861e-01,
6.5237284e-05, 4.9057111e-01, 1.2250574e-01, 4.0864003e-01,
-1.9107516e-01, 6.7219990e-01, 3.9303046e-01, 8.8695705e-01,
1.0575182e-01, 4.4379455e-01, 7.0351452e-01, -1.7413050e-01,
1.6268335e-01, -3.6398947e-01, 4.2087737e-01, 1.6894349e-01,
-6.3355178e-01, 2.6445135e-02, -4.0070909e-01, -2.5525039e-01,
-2.7707762e-01, 9.5453277e-02, -3.1189567e-01, 3.6464140e-01,
-2.4638277e-01, -2.5021696e-01, 3.6770612e-02, 1.9988173e-01,
4.0678823e-01, -6.1948225e-02, 1.3091478e-02, -2.9786986e-01,
3.4833667e-01, -1.0854194e-01, 9.5755681e-02, -8.9162305e-02,
6.5537059e-01, 1.7219633e-02, -3.7129313e-01, -2.0198233e-02,
-1.9736825e-01, -1.2085505e-01, -5.3962123e-01, -3.6008352e-01,
-3.5265970e-01, 1.0517791e-02, 1.5189931e-02, 3.0119994e-01,
-4.4719204e-02, 3.9169803e-01, 3.8661939e-01, 4.4619995e-01,
-3.1440073e-01, -1.6251071e-01, -5.4129058e-01, -1.6764219e-01,
-2.0251128e-01, -1.8941458e-01, -2.6222983e-01, 7.8157192e-01,
-2.5927317e-01, 6.1785626e-01, -4.6630061e-01, 5.0258636e-01,
-4.2175163e-02, -3.5761774e-01, -6.6525750e-02, -9.2430484e-01,
5.3185409e-01, 3.1188786e-02, -1.0949404e-01, 2.6275298e-01,
-6.2551588e-02, -2.0182461e-02, -7.3312223e-02, -3.3251107e-01,
1.2971312e-03, 1.4883006e-01, 7.2222137e-01, -4.2167574e-01,
2.1200123e-01, -2.1735358e-01
}
IUT: PASS
float y[] = {3.906606,3.831477,4.613783,4.371631,4.441847,4.311853,4.446086,5.089131,4.708794,4.314635,4.866886,4.812932,4.678810,4.796319,4.687846,5.426141,4.119072,4.687284,4.516533,4.303559,4.733458,4.146965,5.133350,4.832816,4.598291,4.252030}
-- h_len = 20 --
float y_ref[] = {
5.3420120e-01, 1.8982859e-01, 1.0291221e+00, 1.4028928e-01,
3.1864315e-01, 1.3011315e-01, 2.3228759e-01, 3.3733675e-01,
-2.9608598e-01, 5.8351099e-01, -6.8905240e-01, 4.1818243e-01,
-2.1958399e-01, -9.9827707e-02, -2.5024422e-02, 3.5823420e-01,
1.2323441e-01, -7.1719073e-02, -7.3568404e-01, -4.4755465e-01,
2.2529861e-01, -3.9330691e-01, -8.6953437e-01, 4.3487376e-01,
4.1921544e-01, -6.0425735e-01, -3.5521388e-01, -2.0435575e-01,
3.5010090e-01, -2.5316292e-01, -2.5977921e-01, 5.0659802e-02,
6.8681252e-01, 1.3008146e-01, -2.5315785e-01, 8.3824909e-01,
1.1510594e-01, 4.9176550e-01, -1.7823341e-01, 1.0102953e+00,
-2.3010623e-02, 3.5516348e-02, -2.1020740e-02, -3.8336021e-01,
-7.5408712e-02, 6.2125668e-02, 1.3044539e-01, 2.4428497e-01,
-4.0901759e-01, -9.4211206e-02, -2.6027888e-02, -2.1708845e-01,
4.6089791e-02, 5.2399588e-01, 2.9400715e-01, -5.4901767e-01,
-8.5219324e-01, -3.1177694e-01, -1.3685943e-01, 2.8320648e-02,
1.8308631e-01, 6.5987110e-01, 7.1065076e-02, 5.6479126e-02,
2.8107211e-02, 7.1808910e-01, -2.1963760e-01, 1.0674340e-01,
3.0151665e-02, -6.7855299e-02, 2.0626336e-01, -6.6742498e-01,
2.8878981e-01, 3.3218032e-01, -3.9555237e-01, 1.8781629e-01,
2.3412362e-01, -2.8779972e-01, -1.1563770e-01, -1.9100471e-01,
3.2126588e-01, -6.5900549e-02, 5.9671655e-02, -3.3305369e-02,
-1.7228185e-01, 2.1473107e-01, 5.1648900e-02, 1.6270636e-01,
5.1605098e-02, 4.2832869e-01, 1.4614150e-01, 3.1187207e-01,
-3.8980633e-01, 5.3980416e-01, 4.0656170e-01, 8.7101591e-01,
-5.0582461e-02, 5.9981346e-01, 6.6075850e-01, -5.1777057e-02,
1.1775772e-01, -3.0120179e-01, 4.1258806e-01, 2.2372836e-01,
-6.3340878e-01, -1.0023584e-01, -3.5639426e-01, -4.1479793e-01,
-2.8701484e-02, 1.3821782e-01, -2.1146557e-01, 4.9781999e-01,
-1.9069955e-01, -2.0883378e-01, -1.7931046e-01, -3.0740768e-02,
4.4180271e-01, -2.3938809e-01, -1.2956862e-01, -3.4948981e-01,
4.6737233e-01, 1.3115168e-01, 1.5102869e-01, 3.4803081e-02,
5.6759942e-01, -1.0904521e-01, -1.3368645e-01, -1.3472131e-01,
-1.2760213e-01, -2.1923567e-01, -3.9361662e-01, -3.7525508e-01,
-4.1372731e-01, 1.0058865e-02, -3.0391473e-01, 5.9899330e-01,
-4.2276721e-02, 3.2056916e-01, 2.3163396e-01, 5.0194943e-01,
-4.9533814e-02, -8.7483667e-02, -6.4559376e-01, -1.4032331e-01,
-1.2611310e-01, -2.1656755e-01, -2.7046436e-01, 5.9975326e-01,
-2.6020509e-01, 7.6019281e-01, -5.6156492e-01, 6.6070211e-01,
-1.8862886e-02, -2.5943482e-01, 1.1894502e-02, -1.0142385e+00,
3.6559796e-01, -1.1502273e-02, 4.4767298e-02, 2.9584533e-01,
3.7647784e-05, -1.3147809e-02
}
IUT: PASS
==== TEST REAL SSE3 IMPLEMENTATION (hlen=16) ====
float x[] = {0.828957,0.675654,0.904170,0.191112,0.394521,0.706067,0.868924,0.547397,0.738959,0.932485,0.233119,0.926576,0.551443,0.933420,0.494407,0.552568,0.939129,0.799646,0.814139,0.594497,0.657201,0.995300,0.935852,0.324541,0.874309,0.589157,0.637771,0.759324,0.775421,0.794910,0.262785,0.604379,0.470564,0.166955}
-- h_len = 24 --
float y_ref[] = {
5.3222823e-01, 2.4987461e-01, 1.0283798e+00, 3.5526699e-01,
1.8697870e-01, 1.4152247e-01, 2.6207578e-01, 3.3484158e-01,
-3.8979149e-01, 6.8604469e-01, -6.4926147e-01, 2.5207657e-01,
-3.7822890e-01, -2.2347759e-01, 1.0470502e-02, 3.0506110e-01,
4.1011676e-02, -5.4856278e-03, -6.8150079e-01, -4.5345306e-01,
1.0741763e-01, -2.7692410e-01, -8.1968147e-01, 5.7136154e-01,
3.8703418e-01, -6.2253571e-01, -3.7823236e-01, -2.0682354e-01,
3.9098579e-01, -9.6905150e-02, -1.8801522e-01, 3.7933946e-02,
6.3939112e-01, 2.3467670e-01, -1.2109100e-01, 8.0106556e-01,
1.0727239e-01, 5.1171827e-01, -1.2966292e-01, 1.0506585e+00,
4.7837995e-02, -1.8024493e-02, -1.1318965e-01, -2.0743141e-01,
1.1125680e-02, 7.5636670e-02, 2.0156327e-01, 3.7243477e-01,
-4.1752231e-01, 6.4663544e-02, 2.6038434e-02, -7.9749018e-02,
7.5686403e-02, 6.9610006e-01, 3.2217339e-01, -4.2776427e-01,
-8.2220536e-01, -2.7305549e-01, -5.1647477e-02, 2.4109566e-01,
1.1434041e-01, 6.7494470e-01, 2.0092924e-01, 8.2214177e-02,
2.3937991e-02, 7.1629500e-01, -1.4283688e-01, 2.4555638e-02,
-3.3275224e-03, -1.8814500e-01, 1.9801255e-01, -7.5389367e-01,
2.5686151e-01, 3.2000977e-01, -5.6981736e-01, 1.9078457e-01,
1.6527905e-01, -3.5737312e-01, -1.0835774e-01, -4.6288706e-03,
3.7699357e-01, -5.8192119e-02, 1.1966180e-01, -3.6230266e-02,
-6.2654182e-02, 2.3270245e-01, 4.6139322e-02, 5.6660924e-02,
7.4724287e-02, 3.8419297e-01, -1.0854036e-02, 3.5449454e-01,
-4.8905340e-01, 5.8285779e-01, 2.2925827e-01, 1.0132606e+00,
-2.5498384e-01, 7.4497163e-01, 4.7515452e-01, -4.3996703e-02,
1.8788118e-02, -2.4939571e-01, 4.2214888e-01, 3.3849144e-01,
-6.4541113e-01, -1.9279037e-02, -3.2691205e-01, -3.7180674e-01,
-8.7503098e-02, 8.6730786e-02, -2.2775947e-01, 4.2456102e-01,
-3.5831624e-01, -2.8897566e-01, -1.6724513e-01, -4.8065744e-04,
3.2799438e-01, -2.8447020e-01, -1.7178029e-01, -4.0660527e-01,
6.4187962e-01, 7.3846772e-02, 2.0672068e-01, -1.5366182e-01,
7.9900908e-01, -3.0084166e-01, -5.6068588e-02, -2.3155114e-01,
-1.4066668e-01, -2.3897269e-01, -3.4076381e-01, -5.3892708e-01,
-5.2356058e-01, 1.0604618e-01, -2.1092136e-01, 6.1905998e-01,
-1.2905772e-01, 3.0114239e-01, 3.4632972e-01, 4.9826914e-01,
-3.2198433e-02, -1.3098410e-01, -6.5234804e-01, -2.6879534e-02,
-1.2591730e-01, -1.4107151e-01, -2.1500964e-01, 7.3180163e-01,
-2.9997614e-01, 8.9727402e-01, -6.1047006e-01, 8.2901204e-01,
1.4916256e-01, -1.1074571e-01
}
IUT: PASS
float h[] = {0.726144,0.746635,0.470674,0.211604,0.963092,0.264553,0.265818,0.725771,0.590649,0.313560,0.547613,0.946811,0.793753,0.690502,0.276120,0.792995}
-- h_len = 25 --
float y_ref[] = {
5.6923628e-01, 2.1122517e-01, 1.0261438e+00, 3.9087293e-01,
2.1345332e-01, 1.2748230e-01, 2.2850943e-01, 3.4783924e-01,
-3.5230315e-01, 7.2733051e-01, -6.4562315e-01, 2.8965744e-01,
-3.5571671e-01, -2.6395977e-01, -2.4125937e-02, 2.7487630e-01,
6.1082214e-02, -8.9056650e-03, -6.7076659e-01, -4.7583869e-01,
1.0633938e-01, -2.9135108e-01, -8.2240015e-01, 6.1099094e-01,
3.9239836e-01, -6.5968055e-01, -4.1528901e-01, -2.2582904e-01,
4.0954006e-01, -1.1742298e-01, -2.0005567e-01, 5.0488133e-02,
6.2323201e-01, 2.6340836e-01, -1.1593901e-01, 7.6837289e-01,
7.8955740e-02, 5.2631205e-01, -1.0634977e-01, 1.0168220e+00,
6.0709454e-02, -3.4081139e-02, -1.5245335e-01, -2.1505590e-01,
-2.4050269e-03, 4.4784673e-02, 2.0676211e-01, 3.3596647e-01,
-4.0878621e-01, 3.7588470e-02, 1.0192644e-02, -1.1509930e-01,
7.7192493e-02, 7.2054374e-01, 2.8344434e-01, -4.6390212e-01,
-7.9189837e-01, -3.0583304e-01, -6.3782707e-02, 2.4694878e-01,
1.0405317e-01, 6.9377714e-01, 1.7632841e-01, 9.8775931e-02,
2.8493963e-02, 7.3985428e-01, -1.1595964e-01, 4.3803096e-02,
-8.1826039e-03, -1.8096274e-01, 2.2643171e-01, -7.8888541e-01,
2.9099524e-01, 3.5217339e-01, -5.6942546e-01, 1.6837947e-01,
1.2358252e-01, -3.9479053e-01, -1.2422302e-01, 5.4188324e-03,
3.5550922e-01, -4.6895050e-02, 1.3736735e-01, -1.3200402e-02,
-6.9921568e-02, 2.5468710e-01, 7.6039456e-02, 3.6692373e-02,
1.0693954e-01, 3.5894975e-01, 1.8038662e-02, 3.3341441e-01,
-4.5245603e-01, 5.4414153e-01, 2.6774803e-01, 1.0114058e+00,
-2.2713269e-01, 7.6733053e-01, 4.4953904e-01, -6.4008765e-02,
5.3210976e-03, -2.8960016e-01, 4.1015321e-01, 3.1615001e-01,
-6.1044383e-01, 1.2125295e-02, -3.2865044e-01, -3.3552784e-01,
-5.0508052e-02, 1.1213525e-01, -2.2444099e-01, 3.9706358e-01,
-3.6462283e-01, -3.1095976e-01, -1.2870458e-01, 2.8953498e-02,
2.8498679e-01, -2.5903764e-01, -2.0532282e-01, -3.7438947e-01,
5.9906083e-01, 1.1220507e-01, 1.7484823e-01, -1.1687514e-01,
8.4165925e-01, -3.3723247e-01, -6.4144842e-02, -2.0405796e-01,
-1.1169051e-01, -2.2965629e-01, -3.7629083e-01, -5.6642604e-01,
-5.1144052e-01, 1.0153162e-01, -2.1775363e-01, 6.2313920e-01,
-1.4517608e-01, 3.3557987e-01, 3.4367973e-01, 4.7613770e-01,
-1.5364632e-02, -1.7332375e-01, -6.5896881e-01, -5.9360463e-02,
-1.4723293e-01, -1.5215987e-01, -1.7504829e-01, 7.1048653e-01,
-3.2864007e-01, 8.6068469e-01, -6.4257747e-01, 8.0053741e-01
}
IUT: PASS
float y[] = {4.845784,5.086479,6.160082,6.147918,5.549072,5.538811,6.264142,6.083664,5.942431,5.214122,6.458036,6.120992,6.385656,5.751343,6.099504,6.738166,5.942206,5.756058,6.343914,6.239408,6.090616,6.325348,6.214744,6.674619,5.691174,6.413076}
==== TEST REAL SSE3 IMPLEMENTATION (hlen=20) ====
float x[] = {0.828957,0.675654,0.904170,0.191112,0.394521,0.706067,0.868924,0.547397,0.738959,0.932485,0.233119,0.926576,0.551443,0.933420,0.494407,0.552568,0.939129,0.799646,0.814139,0.594497,0.657201,0.995300,0.935852,0.324541,0.874309,0.589157,0.637771,0.759324,0.775421,0.794910,0.262785,0.604379,0.470564,0.166955}
float h[] = {0.726144,0.746635,0.470674,0.211604,0.963092,0.264553,0.265818,0.725771,0.590649,0.313560,0.547613,0.946811,0.793753,0.690502,0.276120,0.792995,0.446645,0.327805,0.785346,0.676628}
float y[] = {6.148925,6.262301,5.792440,6.652380,6.759685,6.515733,6.943458,6.334218,6.539823,6.542612,7.766725,7.472028,7.258010,6.947061,7.347066,7.503224,7.134092,6.244353,7.690946,7.584768,7.779833,6.845586,7.351567,8.099596,7.393943,7.176465}
==== TEST REAL SSE3 IMPLEMENTATION (h_len%4=0) ====
float x[] = {0.828957,0.675654,0.904170,0.191112,0.394521,0.706067,0.868924,0.547397,0.738959,0.932485,0.233119,0.926576,0.551443,0.933420,0.494407,0.552568,0.939129,0.799646,0.814139,0.594497,0.657201,0.995300,0.935852,0.324541,0.874309,0.589157,0.637771,0.759324,0.775421,0.794910,0.262785,0.604379,0.470564,0.166955}
float h[] = {0.726144,0.746635,0.470674,0.211604,0.963092,0.264553,0.265818,0.725771,0.590649,0.313560,0.547613,0.946811,0.793753,0.690502,0.276120,0.792995,0.446645,0.327805,0.785346,0.676628,0.906507,0.279178,0.015699,0.609179}
float y[] = {7.032490,7.904466,6.745667,7.146502,6.958916,7.972230,7.314566,6.972099,7.773273,7.740826,7.380684,7.907260,8.446323,7.862378,8.022881,7.726059,7.748359,7.602177,8.926439,8.905205,8.569546,7.948394,8.588051,8.850824,8.592319,7.636216}

View File

@@ -52,8 +52,6 @@ AT_CLEANUP
AT_SETUP([convolve_test])
AT_KEYWORDS([convolve_test])
# Different results for i686, x86_64 and ARM. see OS#2826, OS#2828, and https://lists.osmocom.org/pipermail/openbsc/2018-January/011655.html
AT_SKIP_IF(true)
cat $abs_srcdir/Transceiver52M/convolve_test.ok > expout
AT_CHECK([$abs_top_builddir/tests/Transceiver52M/convolve_test], [], [expout], [])
AT_CLEANUP