]> localhost Git - SCSI2SD-V6.git/blob
481197366ac0a4a41235bea49dcda462e37b8907
[SCSI2SD-V6.git] /
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_biquad_cascade_stereo_df2T_f32.c
9 *
10 * Description: Processing function for the floating-point transposed
11 * direct form II Biquad cascade filter. 2 channels
12 *
13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * - Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * - Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in
22 * the documentation and/or other materials provided with the
23 * distribution.
24 * - Neither the name of ARM LIMITED nor the names of its contributors
25 * may be used to endorse or promote products derived from this
26 * software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
31 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
32 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
33 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
34 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
35 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
36 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
38 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 * -------------------------------------------------------------------- */
41
42 #include "arm_math.h"
43
44 /**
45 * @ingroup groupFilters
46 */
47
48 /**
49 * @defgroup BiquadCascadeDF2T Biquad Cascade IIR Filters Using a Direct Form II Transposed Structure
50 *
51 * This set of functions implements arbitrary order recursive (IIR) filters using a transposed direct form II structure.
52 * The filters are implemented as a cascade of second order Biquad sections.
53 * These functions provide a slight memory savings as compared to the direct form I Biquad filter functions.
54 * Only floating-point data is supported.
55 *
56 * This function operate on blocks of input and output data and each call to the function
57 * processes <code>blockSize</code> samples through the filter.
58 * <code>pSrc</code> points to the array of input data and
59 * <code>pDst</code> points to the array of output data.
60 * Both arrays contain <code>blockSize</code> values.
61 *
62 * \par Algorithm
63 * Each Biquad stage implements a second order filter using the difference equation:
64 * <pre>
65 * y[n] = b0 * x[n] + d1
66 * d1 = b1 * x[n] + a1 * y[n] + d2
67 * d2 = b2 * x[n] + a2 * y[n]
68 * </pre>
69 * where d1 and d2 represent the two state values.
70 *
71 * \par
72 * A Biquad filter using a transposed Direct Form II structure is shown below.
73 * \image html BiquadDF2Transposed.gif "Single transposed Direct Form II Biquad"
74 * Coefficients <code>b0, b1, and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.
75 * Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.
76 * Pay careful attention to the sign of the feedback coefficients.
77 * Some design tools flip the sign of the feedback coefficients:
78 * <pre>
79 * y[n] = b0 * x[n] + d1;
80 * d1 = b1 * x[n] - a1 * y[n] + d2;
81 * d2 = b2 * x[n] - a2 * y[n];
82 * </pre>
83 * In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.
84 *
85 * \par
86 * Higher order filters are realized as a cascade of second order sections.
87 * <code>numStages</code> refers to the number of second order stages used.
88 * For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.
89 * A 9th order filter would be realized with <code>numStages=5</code> second order stages with the
90 * coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).
91 *
92 * \par
93 * <code>pState</code> points to the state variable array.
94 * Each Biquad stage has 2 state variables <code>d1</code> and <code>d2</code>.
95 * The state variables are arranged in the <code>pState</code> array as:
96 * <pre>
97 * {d11, d12, d21, d22, ...}
98 * </pre>
99 * where <code>d1x</code> refers to the state variables for the first Biquad and
100 * <code>d2x</code> refers to the state variables for the second Biquad.
101 * The state array has a total length of <code>2*numStages</code> values.
102 * The state variables are updated after each block of data is processed; the coefficients are untouched.
103 *
104 * \par
105 * The CMSIS library contains Biquad filters in both Direct Form I and transposed Direct Form II.
106 * The advantage of the Direct Form I structure is that it is numerically more robust for fixed-point data types.
107 * That is why the Direct Form I structure supports Q15 and Q31 data types.
108 * The transposed Direct Form II structure, on the other hand, requires a wide dynamic range for the state variables <code>d1</code> and <code>d2</code>.
109 * Because of this, the CMSIS library only has a floating-point version of the Direct Form II Biquad.
110 * The advantage of the Direct Form II Biquad is that it requires half the number of state variables, 2 rather than 4, per Biquad stage.
111 *
112 * \par Instance Structure
113 * The coefficients and state variables for a filter are stored together in an instance data structure.
114 * A separate instance structure must be defined for each filter.
115 * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
116 *
117 * \par Init Functions
118 * There is also an associated initialization function.
119 * The initialization function performs following operations:
120 * - Sets the values of the internal structure fields.
121 * - Zeros out the values in the state buffer.
122 * To do this manually without calling the init function, assign the follow subfields of the instance structure:
123 * numStages, pCoeffs, pState. Also set all of the values in pState to zero.
124 *
125 * \par
126 * Use of the initialization function is optional.
127 * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
128 * To place an instance structure into a const data section, the instance structure must be manually initialized.
129 * Set the values in the state buffer to zeros before static initialization.
130 * For example, to statically initialize the instance structure use
131 * <pre>
132 * arm_biquad_cascade_df2T_instance_f32 S1 = {numStages, pState, pCoeffs};
133 * </pre>
134 * where <code>numStages</code> is the number of Biquad stages in the filter; <code>pState</code> is the address of the state buffer.
135 * <code>pCoeffs</code> is the address of the coefficient buffer;
136 *
137 */
138
139 /**
140 * @addtogroup BiquadCascadeDF2T
141 * @{
142 */
143
144 /**
145 * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
146 * @param[in] *S points to an instance of the filter data structure.
147 * @param[in] *pSrc points to the block of input data.
148 * @param[out] *pDst points to the block of output data
149 * @param[in] blockSize number of samples to process.
150 * @return none.
151 */
152
153
154 LOW_OPTIMIZATION_ENTER
155 void arm_biquad_cascade_stereo_df2T_f32(
156 const arm_biquad_cascade_stereo_df2T_instance_f32 * S,
157 float32_t * pSrc,
158 float32_t * pDst,
159 uint32_t blockSize)
160 {
161
162 float32_t *pIn = pSrc; /* source pointer */
163 float32_t *pOut = pDst; /* destination pointer */
164 float32_t *pState = S->pState; /* State pointer */
165 float32_t *pCoeffs = S->pCoeffs; /* coefficient pointer */
166 float32_t acc1a, acc1b; /* accumulator */
167 float32_t b0, b1, b2, a1, a2; /* Filter coefficients */
168 float32_t Xn1a, Xn1b; /* temporary input */
169 float32_t d1a, d2a, d1b, d2b; /* state variables */
170 uint32_t sample, stage = S->numStages; /* loop counters */
171
172 #if defined(ARM_MATH_CM7)
173
174 float32_t Xn2a, Xn3a, Xn4a, Xn5a, Xn6a, Xn7a, Xn8a; /* Input State variables */
175 float32_t Xn2b, Xn3b, Xn4b, Xn5b, Xn6b, Xn7b, Xn8b; /* Input State variables */
176 float32_t acc2a, acc3a, acc4a, acc5a, acc6a, acc7a, acc8a; /* Simulates the accumulator */
177 float32_t acc2b, acc3b, acc4b, acc5b, acc6b, acc7b, acc8b; /* Simulates the accumulator */
178
179 do
180 {
181 /* Reading the coefficients */
182 b0 = pCoeffs[0];
183 b1 = pCoeffs[1];
184 b2 = pCoeffs[2];
185 a1 = pCoeffs[3];
186 /* Apply loop unrolling and compute 8 output values simultaneously. */
187 sample = blockSize >> 3u;
188 a2 = pCoeffs[4];
189
190 /*Reading the state values */
191 d1a = pState[0];
192 d2a = pState[1];
193 d1b = pState[2];
194 d2b = pState[3];
195
196 pCoeffs += 5u;
197
198 /* First part of the processing with loop unrolling. Compute 8 outputs at a time.
199 ** a second loop below computes the remaining 1 to 7 samples. */
200 while(sample > 0u) {
201
202 /* y[n] = b0 * x[n] + d1 */
203 /* d1 = b1 * x[n] + a1 * y[n] + d2 */
204 /* d2 = b2 * x[n] + a2 * y[n] */
205
206 /* Read the first 2 inputs. 2 cycles */
207 Xn1a = pIn[0 ];
208 Xn1b = pIn[1 ];
209
210 /* Sample 1. 5 cycles */
211 Xn2a = pIn[2 ];
212 acc1a = b0 * Xn1a + d1a;
213
214 Xn2b = pIn[3 ];
215 d1a = b1 * Xn1a + d2a;
216
217 Xn3a = pIn[4 ];
218 d2a = b2 * Xn1a;
219
220 Xn3b = pIn[5 ];
221 d1a += a1 * acc1a;
222
223 Xn4a = pIn[6 ];
224 d2a += a2 * acc1a;
225
226 /* Sample 2. 5 cycles */
227 Xn4b = pIn[7 ];
228 acc1b = b0 * Xn1b + d1b;
229
230 Xn5a = pIn[8 ];
231 d1b = b1 * Xn1b + d2b;
232
233 Xn5b = pIn[9 ];
234 d2b = b2 * Xn1b;
235
236 Xn6a = pIn[10];
237 d1b += a1 * acc1b;
238
239 Xn6b = pIn[11];
240 d2b += a2 * acc1b;
241
242 /* Sample 3. 5 cycles */
243 Xn7a = pIn[12];
244 acc2a = b0 * Xn2a + d1a;
245
246 Xn7b = pIn[13];
247 d1a = b1 * Xn2a + d2a;
248
249 Xn8a = pIn[14];
250 d2a = b2 * Xn2a;
251
252 Xn8b = pIn[15];
253 d1a += a1 * acc2a;
254
255 pIn += 16;
256 d2a += a2 * acc2a;
257
258 /* Sample 4. 5 cycles */
259 acc2b = b0 * Xn2b + d1b;
260 d1b = b1 * Xn2b + d2b;
261 d2b = b2 * Xn2b;
262 d1b += a1 * acc2b;
263 d2b += a2 * acc2b;
264
265 /* Sample 5. 5 cycles */
266 acc3a = b0 * Xn3a + d1a;
267 d1a = b1 * Xn3a + d2a;
268 d2a = b2 * Xn3a;
269 d1a += a1 * acc3a;
270 d2a += a2 * acc3a;
271
272 /* Sample 6. 5 cycles */
273 acc3b = b0 * Xn3b + d1b;
274 d1b = b1 * Xn3b + d2b;
275 d2b = b2 * Xn3b;
276 d1b += a1 * acc3b;
277 d2b += a2 * acc3b;
278
279 /* Sample 7. 5 cycles */
280 acc4a = b0 * Xn4a + d1a;
281 d1a = b1 * Xn4a + d2a;
282 d2a = b2 * Xn4a;
283 d1a += a1 * acc4a;
284 d2a += a2 * acc4a;
285
286 /* Sample 8. 5 cycles */
287 acc4b = b0 * Xn4b + d1b;
288 d1b = b1 * Xn4b + d2b;
289 d2b = b2 * Xn4b;
290 d1b += a1 * acc4b;
291 d2b += a2 * acc4b;
292
293 /* Sample 9. 5 cycles */
294 acc5a = b0 * Xn5a + d1a;
295 d1a = b1 * Xn5a + d2a;
296 d2a = b2 * Xn5a;
297 d1a += a1 * acc5a;
298 d2a += a2 * acc5a;
299
300 /* Sample 10. 5 cycles */
301 acc5b = b0 * Xn5b + d1b;
302 d1b = b1 * Xn5b + d2b;
303 d2b = b2 * Xn5b;
304 d1b += a1 * acc5b;
305 d2b += a2 * acc5b;
306
307 /* Sample 11. 5 cycles */
308 acc6a = b0 * Xn6a + d1a;
309 d1a = b1 * Xn6a + d2a;
310 d2a = b2 * Xn6a;
311 d1a += a1 * acc6a;
312 d2a += a2 * acc6a;
313
314 /* Sample 12. 5 cycles */
315 acc6b = b0 * Xn6b + d1b;
316 d1b = b1 * Xn6b + d2b;
317 d2b = b2 * Xn6b;
318 d1b += a1 * acc6b;
319 d2b += a2 * acc6b;
320
321 /* Sample 13. 5 cycles */
322 acc7a = b0 * Xn7a + d1a;
323 d1a = b1 * Xn7a + d2a;
324
325 pOut[0 ] = acc1a ;
326 d2a = b2 * Xn7a;
327
328 pOut[1 ] = acc1b ;
329 d1a += a1 * acc7a;
330
331 pOut[2 ] = acc2a ;
332 d2a += a2 * acc7a;
333
334 /* Sample 14. 5 cycles */
335 pOut[3 ] = acc2b ;
336 acc7b = b0 * Xn7b + d1b;
337
338 pOut[4 ] = acc3a ;
339 d1b = b1 * Xn7b + d2b;
340
341 pOut[5 ] = acc3b ;
342 d2b = b2 * Xn7b;
343
344 pOut[6 ] = acc4a ;
345 d1b += a1 * acc7b;
346
347 pOut[7 ] = acc4b ;
348 d2b += a2 * acc7b;
349
350 /* Sample 15. 5 cycles */
351 pOut[8 ] = acc5a ;
352 acc8a = b0 * Xn8a + d1a;
353
354 pOut[9 ] = acc5b;
355 d1a = b1 * Xn8a + d2a;
356
357 pOut[10] = acc6a;
358 d2a = b2 * Xn8a;
359
360 pOut[11] = acc6b;
361 d1a += a1 * acc8a;
362
363 pOut[12] = acc7a;
364 d2a += a2 * acc8a;
365
366 /* Sample 16. 5 cycles */
367 pOut[13] = acc7b;
368 acc8b = b0 * Xn8b + d1b;
369
370 pOut[14] = acc8a;
371 d1b = b1 * Xn8b + d2b;
372
373 pOut[15] = acc8b;
374 d2b = b2 * Xn8b;
375
376 sample--;
377 d1b += a1 * acc8b;
378
379 pOut += 16;
380 d2b += a2 * acc8b;
381 }
382
383 sample = blockSize & 0x7u;
384 while(sample > 0u) {
385 /* Read the input */
386 Xn1a = *pIn++; //Channel a
387 Xn1b = *pIn++; //Channel b
388
389 /* y[n] = b0 * x[n] + d1 */
390 acc1a = (b0 * Xn1a) + d1a;
391 acc1b = (b0 * Xn1b) + d1b;
392
393 /* Store the result in the accumulator in the destination buffer. */
394 *pOut++ = acc1a;
395 *pOut++ = acc1b;
396
397 /* Every time after the output is computed state should be updated. */
398 /* d1 = b1 * x[n] + a1 * y[n] + d2 */
399 d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a;
400 d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b;
401
402 /* d2 = b2 * x[n] + a2 * y[n] */
403 d2a = (b2 * Xn1a) + (a2 * acc1a);
404 d2b = (b2 * Xn1b) + (a2 * acc1b);
405
406 sample--;
407 }
408
409 /* Store the updated state variables back into the state array */
410 pState[0] = d1a;
411 pState[1] = d2a;
412
413 pState[2] = d1b;
414 pState[3] = d2b;
415
416 /* The current stage input is given as the output to the next stage */
417 pIn = pDst;
418 /* decrement the loop counter */
419 stage--;
420
421 pState += 4u;
422 /*Reset the output working pointer */
423 pOut = pDst;
424
425 } while(stage > 0u);
426
427 #elif defined(ARM_MATH_CM0_FAMILY)
428
429 /* Run the below code for Cortex-M0 */
430
431 do
432 {
433 /* Reading the coefficients */
434 b0 = *pCoeffs++;
435 b1 = *pCoeffs++;
436 b2 = *pCoeffs++;
437 a1 = *pCoeffs++;
438 a2 = *pCoeffs++;
439
440 /*Reading the state values */
441 d1a = pState[0];
442 d2a = pState[1];
443 d1b = pState[2];
444 d2b = pState[3];
445
446
447 sample = blockSize;
448
449 while(sample > 0u)
450 {
451 /* Read the input */
452 Xn1a = *pIn++; //Channel a
453 Xn1b = *pIn++; //Channel b
454
455 /* y[n] = b0 * x[n] + d1 */
456 acc1a = (b0 * Xn1a) + d1a;
457 acc1b = (b0 * Xn1b) + d1b;
458
459 /* Store the result in the accumulator in the destination buffer. */
460 *pOut++ = acc1a;
461 *pOut++ = acc1b;
462
463 /* Every time after the output is computed state should be updated. */
464 /* d1 = b1 * x[n] + a1 * y[n] + d2 */
465 d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a;
466 d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b;
467
468 /* d2 = b2 * x[n] + a2 * y[n] */
469 d2a = (b2 * Xn1a) + (a2 * acc1a);
470 d2b = (b2 * Xn1b) + (a2 * acc1b);
471
472 /* decrement the loop counter */
473 sample--;
474 }
475
476 /* Store the updated state variables back into the state array */
477 *pState++ = d1a;
478 *pState++ = d2a;
479 *pState++ = d1b;
480 *pState++ = d2b;
481
482 /* The current stage input is given as the output to the next stage */
483 pIn = pDst;
484
485 /*Reset the output working pointer */
486 pOut = pDst;
487
488 /* decrement the loop counter */
489 stage--;
490
491 } while(stage > 0u);
492
493 #else
494
495 float32_t Xn2a, Xn3a, Xn4a; /* Input State variables */
496 float32_t Xn2b, Xn3b, Xn4b; /* Input State variables */
497 float32_t acc2a, acc3a, acc4a; /* accumulator */
498 float32_t acc2b, acc3b, acc4b; /* accumulator */
499 float32_t p0a, p1a, p2a, p3a, p4a, A1a;
500 float32_t p0b, p1b, p2b, p3b, p4b, A1b;
501
502 /* Run the below code for Cortex-M4 and Cortex-M3 */
503 do
504 {
505 /* Reading the coefficients */
506 b0 = *pCoeffs++;
507 b1 = *pCoeffs++;
508 b2 = *pCoeffs++;
509 a1 = *pCoeffs++;
510 a2 = *pCoeffs++;
511
512 /*Reading the state values */
513 d1a = pState[0];
514 d2a = pState[1];
515 d1b = pState[2];
516 d2b = pState[3];
517
518 /* Apply loop unrolling and compute 4 output values simultaneously. */
519 sample = blockSize >> 2u;
520
521 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
522 ** a second loop below computes the remaining 1 to 3 samples. */
523 while(sample > 0u) {
524
525 /* y[n] = b0 * x[n] + d1 */
526 /* d1 = b1 * x[n] + a1 * y[n] + d2 */
527 /* d2 = b2 * x[n] + a2 * y[n] */
528
529 /* Read the four inputs */
530 Xn1a = pIn[0];
531 Xn1b = pIn[1];
532 Xn2a = pIn[2];
533 Xn2b = pIn[3];
534 Xn3a = pIn[4];
535 Xn3b = pIn[5];
536 Xn4a = pIn[6];
537 Xn4b = pIn[7];
538 pIn += 8;
539
540 p0a = b0 * Xn1a;
541 p0b = b0 * Xn1b;
542 p1a = b1 * Xn1a;
543 p1b = b1 * Xn1b;
544 acc1a = p0a + d1a;
545 acc1b = p0b + d1b;
546 p0a = b0 * Xn2a;
547 p0b = b0 * Xn2b;
548 p3a = a1 * acc1a;
549 p3b = a1 * acc1b;
550 p2a = b2 * Xn1a;
551 p2b = b2 * Xn1b;
552 A1a = p1a + p3a;
553 A1b = p1b + p3b;
554 p4a = a2 * acc1a;
555 p4b = a2 * acc1b;
556 d1a = A1a + d2a;
557 d1b = A1b + d2b;
558 d2a = p2a + p4a;
559 d2b = p2b + p4b;
560
561 p1a = b1 * Xn2a;
562 p1b = b1 * Xn2b;
563 acc2a = p0a + d1a;
564 acc2b = p0b + d1b;
565 p0a = b0 * Xn3a;
566 p0b = b0 * Xn3b;
567 p3a = a1 * acc2a;
568 p3b = a1 * acc2b;
569 p2a = b2 * Xn2a;
570 p2b = b2 * Xn2b;
571 A1a = p1a + p3a;
572 A1b = p1b + p3b;
573 p4a = a2 * acc2a;
574 p4b = a2 * acc2b;
575 d1a = A1a + d2a;
576 d1b = A1b + d2b;
577 d2a = p2a + p4a;
578 d2b = p2b + p4b;
579
580 p1a = b1 * Xn3a;
581 p1b = b1 * Xn3b;
582 acc3a = p0a + d1a;
583 acc3b = p0b + d1b;
584 p0a = b0 * Xn4a;
585 p0b = b0 * Xn4b;
586 p3a = a1 * acc3a;
587 p3b = a1 * acc3b;
588 p2a = b2 * Xn3a;
589 p2b = b2 * Xn3b;
590 A1a = p1a + p3a;
591 A1b = p1b + p3b;
592 p4a = a2 * acc3a;
593 p4b = a2 * acc3b;
594 d1a = A1a + d2a;
595 d1b = A1b + d2b;
596 d2a = p2a + p4a;
597 d2b = p2b + p4b;
598
599 acc4a = p0a + d1a;
600 acc4b = p0b + d1b;
601 p1a = b1 * Xn4a;
602 p1b = b1 * Xn4b;
603 p3a = a1 * acc4a;
604 p3b = a1 * acc4b;
605 p2a = b2 * Xn4a;
606 p2b = b2 * Xn4b;
607 A1a = p1a + p3a;
608 A1b = p1b + p3b;
609 p4a = a2 * acc4a;
610 p4b = a2 * acc4b;
611 d1a = A1a + d2a;
612 d1b = A1b + d2b;
613 d2a = p2a + p4a;
614 d2b = p2b + p4b;
615
616 pOut[0] = acc1a;
617 pOut[1] = acc1b;
618 pOut[2] = acc2a;
619 pOut[3] = acc2b;
620 pOut[4] = acc3a;
621 pOut[5] = acc3b;
622 pOut[6] = acc4a;
623 pOut[7] = acc4b;
624 pOut += 8;
625
626 sample--;
627 }
628
629 sample = blockSize & 0x3u;
630 while(sample > 0u) {
631 Xn1a = *pIn++;
632 Xn1b = *pIn++;
633
634 p0a = b0 * Xn1a;
635 p0b = b0 * Xn1b;
636 p1a = b1 * Xn1a;
637 p1b = b1 * Xn1b;
638 acc1a = p0a + d1a;
639 acc1b = p0b + d1b;
640 p3a = a1 * acc1a;
641 p3b = a1 * acc1b;
642 p2a = b2 * Xn1a;
643 p2b = b2 * Xn1b;
644 A1a = p1a + p3a;
645 A1b = p1b + p3b;
646 p4a = a2 * acc1a;
647 p4b = a2 * acc1b;
648 d1a = A1a + d2a;
649 d1b = A1b + d2b;
650 d2a = p2a + p4a;
651 d2b = p2b + p4b;
652
653 *pOut++ = acc1a;
654 *pOut++ = acc1b;
655
656 sample--;
657 }
658
659 /* Store the updated state variables back into the state array */
660 *pState++ = d1a;
661 *pState++ = d2a;
662 *pState++ = d1b;
663 *pState++ = d2b;
664
665 /* The current stage input is given as the output to the next stage */
666 pIn = pDst;
667
668 /*Reset the output working pointer */
669 pOut = pDst;
670
671 /* decrement the loop counter */
672 stage--;
673
674 } while(stage > 0u);
675
676 #endif
677
678 }
679 LOW_OPTIMIZATION_EXIT
680
681 /**
682 * @} end of BiquadCascadeDF2T group
683 */