localhost Git - SCSI2SD-V6.git/blob

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        19. March 2015
   5 * $Revision:    V.1.4.5
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_biquad_cascade_stereo_df2T_f32.c
   9 *
  10 * Description:  Processing function for the floating-point transposed
  11 *               direct form II Biquad cascade filter. 2 channels
  12 *
  13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  14 *
  15 * Redistribution and use in source and binary forms, with or without
  16 * modification, are permitted provided that the following conditions
  17 * are met:
  18 *   - Redistributions of source code must retain the above copyright
  19 *     notice, this list of conditions and the following disclaimer.
  20 *   - Redistributions in binary form must reproduce the above copyright
  21 *     notice, this list of conditions and the following disclaimer in
  22 *     the documentation and/or other materials provided with the
  23 *     distribution.
  24 *   - Neither the name of ARM LIMITED nor the names of its contributors
  25 *     may be used to endorse or promote products derived from this
  26 *     software without specific prior written permission.
  27 *
  28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  31 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  32 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  33 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  34 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  35 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  36 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  38 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  39 * POSSIBILITY OF SUCH DAMAGE.
  40 * -------------------------------------------------------------------- */
  41
  42 #include "arm_math.h"
  43
  44 /**
  45 * @ingroup groupFilters
  46 */
  47
  48 /**
  49 * @defgroup BiquadCascadeDF2T Biquad Cascade IIR Filters Using a Direct Form II Transposed Structure
  50 *
  51 * This set of functions implements arbitrary order recursive (IIR) filters using a transposed direct form II structure.
  52 * The filters are implemented as a cascade of second order Biquad sections.
  53 * These functions provide a slight memory savings as compared to the direct form I Biquad filter functions.
  54 * Only floating-point data is supported.
  55 *
  56 * This function operate on blocks of input and output data and each call to the function
  57 * processes <code>blockSize</code> samples through the filter.
  58 * <code>pSrc</code> points to the array of input data and
  59 * <code>pDst</code> points to the array of output data.
  60 * Both arrays contain <code>blockSize</code> values.
  61 *
  62 * \par Algorithm
  63 * Each Biquad stage implements a second order filter using the difference equation:
  64 * <pre>
  65 *    y[n] = b0 * x[n] + d1
  66 *    d1 = b1 * x[n] + a1 * y[n] + d2
  67 *    d2 = b2 * x[n] + a2 * y[n]
  68 * </pre>
  69 * where d1 and d2 represent the two state values.
  70 *
  71 * \par
  72 * A Biquad filter using a transposed Direct Form II structure is shown below.
  73 * \image html BiquadDF2Transposed.gif "Single transposed Direct Form II Biquad"
  74 * Coefficients <code>b0, b1, and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.
  75 * Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.
  76 * Pay careful attention to the sign of the feedback coefficients.
  77 * Some design tools flip the sign of the feedback coefficients:
  78 * <pre>
  79 *    y[n] = b0 * x[n] + d1;
  80 *    d1 = b1 * x[n] - a1 * y[n] + d2;
  81 *    d2 = b2 * x[n] - a2 * y[n];
  82 * </pre>
  83 * In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.
  84 *
  85 * \par
  86 * Higher order filters are realized as a cascade of second order sections.
  87 * <code>numStages</code> refers to the number of second order stages used.
  88 * For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.
  89 * A 9th order filter would be realized with <code>numStages=5</code> second order stages with the
  90 * coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).
  91 *
  92 * \par
  93 * <code>pState</code> points to the state variable array.
  94 * Each Biquad stage has 2 state variables <code>d1</code> and <code>d2</code>.
  95 * The state variables are arranged in the <code>pState</code> array as:
  96 * <pre>
  97 *     {d11, d12, d21, d22, ...}
  98 * </pre>
  99 * where <code>d1x</code> refers to the state variables for the first Biquad and
 100 * <code>d2x</code> refers to the state variables for the second Biquad.
 101 * The state array has a total length of <code>2*numStages</code> values.
 102 * The state variables are updated after each block of data is processed; the coefficients are untouched.
 103 *
 104 * \par
 105 * The CMSIS library contains Biquad filters in both Direct Form I and transposed Direct Form II.
 106 * The advantage of the Direct Form I structure is that it is numerically more robust for fixed-point data types.
 107 * That is why the Direct Form I structure supports Q15 and Q31 data types.
 108 * The transposed Direct Form II structure, on the other hand, requires a wide dynamic range for the state variables <code>d1</code> and <code>d2</code>.
 109 * Because of this, the CMSIS library only has a floating-point version of the Direct Form II Biquad.
 110 * The advantage of the Direct Form II Biquad is that it requires half the number of state variables, 2 rather than 4, per Biquad stage.
 111 *
 112 * \par Instance Structure
 113 * The coefficients and state variables for a filter are stored together in an instance data structure.
 114 * A separate instance structure must be defined for each filter.
 115 * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
 116 *
 117 * \par Init Functions
 118 * There is also an associated initialization function.
 119 * The initialization function performs following operations:
 120 * - Sets the values of the internal structure fields.
 121 * - Zeros out the values in the state buffer.
 122 * To do this manually without calling the init function, assign the follow subfields of the instance structure:
 123 * numStages, pCoeffs, pState. Also set all of the values in pState to zero.
 124 *
 125 * \par
 126 * Use of the initialization function is optional.
 127 * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
 128 * To place an instance structure into a const data section, the instance structure must be manually initialized.
 129 * Set the values in the state buffer to zeros before static initialization.
 130 * For example, to statically initialize the instance structure use
 131 * <pre>
 132 *     arm_biquad_cascade_df2T_instance_f32 S1 = {numStages, pState, pCoeffs};
 133 * </pre>
 134 * where <code>numStages</code> is the number of Biquad stages in the filter; <code>pState</code> is the address of the state buffer.
 135 * <code>pCoeffs</code> is the address of the coefficient buffer;
 136 *
 137 */
 138
 139 /**
 140 * @addtogroup BiquadCascadeDF2T
 141 * @{
 142 */
 143
 144 /**
 145 * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
 146 * @param[in]  *S        points to an instance of the filter data structure.
 147 * @param[in]  *pSrc     points to the block of input data.
 148 * @param[out] *pDst     points to the block of output data
 149 * @param[in]  blockSize number of samples to process.
 150 * @return none.
 151 */
 152
 153
 154 LOW_OPTIMIZATION_ENTER
 155 void arm_biquad_cascade_stereo_df2T_f32(
 156 const arm_biquad_cascade_stereo_df2T_instance_f32 * S,
 157 float32_t * pSrc,
 158 float32_t * pDst,
 159 uint32_t blockSize)
 160 {
 161
 162     float32_t *pIn = pSrc;                         /*  source pointer            */
 163     float32_t *pOut = pDst;                        /*  destination pointer       */
 164     float32_t *pState = S->pState;                 /*  State pointer             */
 165     float32_t *pCoeffs = S->pCoeffs;               /*  coefficient pointer       */
 166     float32_t acc1a, acc1b;                        /*  accumulator               */
 167     float32_t b0, b1, b2, a1, a2;                  /*  Filter coefficients       */
 168     float32_t Xn1a, Xn1b;                          /*  temporary input           */
 169     float32_t d1a, d2a, d1b, d2b;                  /*  state variables           */
 170     uint32_t sample, stage = S->numStages;         /*  loop counters             */
 171
 172 #if defined(ARM_MATH_CM7)
 173
 174     float32_t Xn2a, Xn3a, Xn4a, Xn5a, Xn6a, Xn7a, Xn8a;         /*  Input State variables     */
 175     float32_t Xn2b, Xn3b, Xn4b, Xn5b, Xn6b, Xn7b, Xn8b;         /*  Input State variables     */
 176     float32_t acc2a, acc3a, acc4a, acc5a, acc6a, acc7a, acc8a;  /*  Simulates the accumulator */
 177     float32_t acc2b, acc3b, acc4b, acc5b, acc6b, acc7b, acc8b;  /*  Simulates the accumulator */
 178
 179     do
 180     {
 181         /* Reading the coefficients */
 182         b0 = pCoeffs[0];
 183         b1 = pCoeffs[1];
 184         b2 = pCoeffs[2];
 185         a1 = pCoeffs[3];
 186         /* Apply loop unrolling and compute 8 output values simultaneously. */
 187         sample = blockSize >> 3u;
 188         a2 = pCoeffs[4];
 189
 190         /*Reading the state values */
 191         d1a = pState[0];
 192         d2a = pState[1];
 193         d1b = pState[2];
 194         d2b = pState[3];
 195
 196         pCoeffs += 5u;
 197
 198         /* First part of the processing with loop unrolling.  Compute 8 outputs at a time.
 199         ** a second loop below computes the remaining 1 to 7 samples. */
 200         while(sample > 0u) {
 201
 202             /* y[n] = b0 * x[n] + d1 */
 203             /* d1 = b1 * x[n] + a1 * y[n] + d2 */
 204             /* d2 = b2 * x[n] + a2 * y[n] */
 205
 206             /* Read the first 2 inputs. 2 cycles */
 207             Xn1a  = pIn[0 ];
 208             Xn1b  = pIn[1 ];
 209
 210             /* Sample 1. 5 cycles */
 211             Xn2a  = pIn[2 ];
 212             acc1a = b0 * Xn1a + d1a;
 213
 214             Xn2b  = pIn[3 ];
 215             d1a = b1 * Xn1a + d2a;
 216
 217             Xn3a  = pIn[4 ];
 218             d2a = b2 * Xn1a;
 219
 220             Xn3b  = pIn[5 ];
 221             d1a += a1 * acc1a;
 222
 223             Xn4a  = pIn[6 ];
 224             d2a += a2 * acc1a;
 225
 226             /* Sample 2. 5 cycles */
 227             Xn4b  = pIn[7 ];
 228             acc1b = b0 * Xn1b + d1b;
 229
 230             Xn5a  = pIn[8 ];
 231             d1b = b1 * Xn1b + d2b;
 232
 233             Xn5b = pIn[9 ];
 234             d2b = b2 * Xn1b;
 235
 236             Xn6a = pIn[10];
 237             d1b += a1 * acc1b;
 238
 239             Xn6b = pIn[11];
 240             d2b += a2 * acc1b;
 241
 242             /* Sample 3. 5 cycles */
 243             Xn7a = pIn[12];
 244             acc2a = b0 * Xn2a + d1a;
 245
 246             Xn7b = pIn[13];
 247             d1a = b1 * Xn2a + d2a;
 248
 249             Xn8a = pIn[14];
 250             d2a = b2 * Xn2a;
 251
 252             Xn8b = pIn[15];
 253             d1a += a1 * acc2a;
 254
 255             pIn += 16;
 256             d2a += a2 * acc2a;
 257
 258             /* Sample 4. 5 cycles */
 259             acc2b = b0 * Xn2b + d1b;
 260             d1b = b1 * Xn2b + d2b;
 261             d2b = b2 * Xn2b;
 262             d1b += a1 * acc2b;
 263             d2b += a2 * acc2b;
 264
 265             /* Sample 5. 5 cycles */
 266             acc3a = b0 * Xn3a + d1a;
 267             d1a = b1 * Xn3a + d2a;
 268             d2a = b2 * Xn3a;
 269             d1a += a1 * acc3a;
 270             d2a += a2 * acc3a;
 271
 272             /* Sample 6. 5 cycles */
 273             acc3b = b0 * Xn3b + d1b;
 274             d1b = b1 * Xn3b + d2b;
 275             d2b = b2 * Xn3b;
 276             d1b += a1 * acc3b;
 277             d2b += a2 * acc3b;
 278
 279             /* Sample 7. 5 cycles */
 280             acc4a = b0 * Xn4a + d1a;
 281             d1a = b1 * Xn4a + d2a;
 282             d2a = b2 * Xn4a;
 283             d1a += a1 * acc4a;
 284             d2a += a2 * acc4a;
 285
 286             /* Sample 8. 5 cycles */
 287             acc4b = b0 * Xn4b + d1b;
 288             d1b = b1 * Xn4b + d2b;
 289             d2b = b2 * Xn4b;
 290             d1b += a1 * acc4b;
 291             d2b += a2 * acc4b;
 292
 293             /* Sample 9. 5 cycles */
 294             acc5a = b0 * Xn5a + d1a;
 295             d1a = b1 * Xn5a + d2a;
 296             d2a = b2 * Xn5a;
 297             d1a += a1 * acc5a;
 298             d2a += a2 * acc5a;
 299
 300             /* Sample 10. 5 cycles */
 301             acc5b = b0 * Xn5b + d1b;
 302             d1b = b1 * Xn5b + d2b;
 303             d2b = b2 * Xn5b;
 304             d1b += a1 * acc5b;
 305             d2b += a2 * acc5b;
 306
 307             /* Sample 11. 5 cycles */
 308             acc6a = b0 * Xn6a + d1a;
 309             d1a = b1 * Xn6a + d2a;
 310             d2a = b2 * Xn6a;
 311             d1a += a1 * acc6a;
 312             d2a += a2 * acc6a;
 313
 314             /* Sample 12. 5 cycles */
 315             acc6b = b0 * Xn6b + d1b;
 316             d1b = b1 * Xn6b + d2b;
 317             d2b = b2 * Xn6b;
 318             d1b += a1 * acc6b;
 319             d2b += a2 * acc6b;
 320
 321             /* Sample 13. 5 cycles */
 322             acc7a = b0 * Xn7a + d1a;
 323             d1a = b1 * Xn7a + d2a;
 324
 325             pOut[0 ] = acc1a ;
 326             d2a = b2 * Xn7a;
 327
 328             pOut[1 ] = acc1b ;
 329             d1a += a1 * acc7a;
 330
 331             pOut[2 ] = acc2a ;
 332             d2a += a2 * acc7a;
 333
 334             /* Sample 14. 5 cycles */
 335             pOut[3 ] = acc2b ;
 336             acc7b = b0 * Xn7b + d1b;
 337
 338             pOut[4 ] = acc3a ;
 339             d1b = b1 * Xn7b + d2b;
 340
 341             pOut[5 ] = acc3b ;
 342             d2b = b2 * Xn7b;
 343
 344             pOut[6 ] = acc4a ;
 345             d1b += a1 * acc7b;
 346
 347             pOut[7 ] = acc4b ;
 348             d2b += a2 * acc7b;
 349
 350             /* Sample 15. 5 cycles */
 351             pOut[8 ] = acc5a ;
 352             acc8a = b0 * Xn8a + d1a;
 353
 354             pOut[9 ] = acc5b;
 355             d1a = b1 * Xn8a + d2a;
 356
 357             pOut[10] = acc6a;
 358             d2a = b2 * Xn8a;
 359
 360             pOut[11] = acc6b;
 361             d1a += a1 * acc8a;
 362
 363             pOut[12] = acc7a;
 364             d2a += a2 * acc8a;
 365
 366             /* Sample 16. 5 cycles */
 367             pOut[13] = acc7b;
 368             acc8b = b0 * Xn8b + d1b;
 369
 370             pOut[14] = acc8a;
 371             d1b = b1 * Xn8b + d2b;
 372
 373             pOut[15] = acc8b;
 374             d2b = b2 * Xn8b;
 375
 376             sample--;
 377             d1b += a1 * acc8b;
 378
 379             pOut += 16;
 380             d2b += a2 * acc8b;
 381         }
 382
 383         sample = blockSize & 0x7u;
 384         while(sample > 0u) {
 385             /* Read the input */
 386             Xn1a = *pIn++; //Channel a
 387             Xn1b = *pIn++; //Channel b
 388
 389             /* y[n] = b0 * x[n] + d1 */
 390             acc1a = (b0 * Xn1a) + d1a;
 391             acc1b = (b0 * Xn1b) + d1b;
 392
 393             /* Store the result in the accumulator in the destination buffer. */
 394             *pOut++ = acc1a;
 395             *pOut++ = acc1b;
 396
 397             /* Every time after the output is computed state should be updated. */
 398             /* d1 = b1 * x[n] + a1 * y[n] + d2 */
 399             d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a;
 400             d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b;
 401
 402             /* d2 = b2 * x[n] + a2 * y[n] */
 403             d2a = (b2 * Xn1a) + (a2 * acc1a);
 404             d2b = (b2 * Xn1b) + (a2 * acc1b);
 405
 406             sample--;
 407         }
 408
 409         /* Store the updated state variables back into the state array */
 410         pState[0] = d1a;
 411         pState[1] = d2a;
 412
 413         pState[2] = d1b;
 414         pState[3] = d2b;
 415
 416         /* The current stage input is given as the output to the next stage */
 417         pIn = pDst;
 418         /* decrement the loop counter */
 419         stage--;
 420
 421         pState += 4u;
 422         /*Reset the output working pointer */
 423         pOut = pDst;
 424
 425     } while(stage > 0u);
 426
 427 #elif defined(ARM_MATH_CM0_FAMILY)
 428
 429     /* Run the below code for Cortex-M0 */
 430
 431     do
 432     {
 433         /* Reading the coefficients */
 434         b0 = *pCoeffs++;
 435         b1 = *pCoeffs++;
 436         b2 = *pCoeffs++;
 437         a1 = *pCoeffs++;
 438         a2 = *pCoeffs++;
 439
 440         /*Reading the state values */
 441         d1a = pState[0];
 442         d2a = pState[1];
 443         d1b = pState[2];
 444         d2b = pState[3];
 445
 446
 447         sample = blockSize;
 448
 449         while(sample > 0u)
 450         {
 451             /* Read the input */
 452             Xn1a = *pIn++; //Channel a
 453             Xn1b = *pIn++; //Channel b
 454
 455             /* y[n] = b0 * x[n] + d1 */
 456             acc1a = (b0 * Xn1a) + d1a;
 457             acc1b = (b0 * Xn1b) + d1b;
 458
 459             /* Store the result in the accumulator in the destination buffer. */
 460             *pOut++ = acc1a;
 461             *pOut++ = acc1b;
 462
 463             /* Every time after the output is computed state should be updated. */
 464             /* d1 = b1 * x[n] + a1 * y[n] + d2 */
 465             d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a;
 466             d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b;
 467
 468             /* d2 = b2 * x[n] + a2 * y[n] */
 469             d2a = (b2 * Xn1a) + (a2 * acc1a);
 470             d2b = (b2 * Xn1b) + (a2 * acc1b);
 471
 472             /* decrement the loop counter */
 473             sample--;
 474         }
 475
 476         /* Store the updated state variables back into the state array */
 477         *pState++ = d1a;
 478         *pState++ = d2a;
 479         *pState++ = d1b;
 480         *pState++ = d2b;
 481
 482         /* The current stage input is given as the output to the next stage */
 483         pIn = pDst;
 484
 485         /*Reset the output working pointer */
 486         pOut = pDst;
 487
 488         /* decrement the loop counter */
 489         stage--;
 490
 491     } while(stage > 0u);
 492
 493 #else
 494
 495     float32_t Xn2a, Xn3a, Xn4a;                          /*  Input State variables     */
 496     float32_t Xn2b, Xn3b, Xn4b;                          /*  Input State variables     */
 497     float32_t acc2a, acc3a, acc4a;                       /*  accumulator               */
 498     float32_t acc2b, acc3b, acc4b;                       /*  accumulator               */
 499     float32_t p0a, p1a, p2a, p3a, p4a, A1a;
 500     float32_t p0b, p1b, p2b, p3b, p4b, A1b;
 501
 502     /* Run the below code for Cortex-M4 and Cortex-M3 */
 503     do
 504     {
 505         /* Reading the coefficients */
 506         b0 = *pCoeffs++;
 507         b1 = *pCoeffs++;
 508         b2 = *pCoeffs++;
 509         a1 = *pCoeffs++;
 510         a2 = *pCoeffs++;
 511
 512         /*Reading the state values */
 513         d1a = pState[0];
 514         d2a = pState[1];
 515         d1b = pState[2];
 516         d2b = pState[3];
 517
 518         /* Apply loop unrolling and compute 4 output values simultaneously. */
 519         sample = blockSize >> 2u;
 520
 521         /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
 522         ** a second loop below computes the remaining 1 to 3 samples. */
 523         while(sample > 0u) {
 524
 525             /* y[n] = b0 * x[n] + d1 */
 526             /* d1 = b1 * x[n] + a1 * y[n] + d2 */
 527             /* d2 = b2 * x[n] + a2 * y[n] */
 528
 529             /* Read the four inputs */
 530             Xn1a = pIn[0];
 531             Xn1b = pIn[1];
 532             Xn2a = pIn[2];
 533             Xn2b = pIn[3];
 534             Xn3a = pIn[4];
 535             Xn3b = pIn[5];
 536             Xn4a = pIn[6];
 537             Xn4b = pIn[7];
 538             pIn += 8;
 539
 540             p0a = b0 * Xn1a;
 541             p0b = b0 * Xn1b;
 542             p1a = b1 * Xn1a;
 543             p1b = b1 * Xn1b;
 544             acc1a = p0a + d1a;
 545             acc1b = p0b + d1b;
 546             p0a = b0 * Xn2a;
 547             p0b = b0 * Xn2b;
 548             p3a = a1 * acc1a;
 549             p3b = a1 * acc1b;
 550             p2a = b2 * Xn1a;
 551             p2b = b2 * Xn1b;
 552             A1a = p1a + p3a;
 553             A1b = p1b + p3b;
 554             p4a = a2 * acc1a;
 555             p4b = a2 * acc1b;
 556             d1a = A1a + d2a;
 557             d1b = A1b + d2b;
 558             d2a = p2a + p4a;
 559             d2b = p2b + p4b;
 560
 561             p1a = b1 * Xn2a;
 562             p1b = b1 * Xn2b;
 563             acc2a = p0a + d1a;
 564             acc2b = p0b + d1b;
 565             p0a = b0 * Xn3a;
 566             p0b = b0 * Xn3b;
 567             p3a = a1 * acc2a;
 568             p3b = a1 * acc2b;
 569             p2a = b2 * Xn2a;
 570             p2b = b2 * Xn2b;
 571             A1a = p1a + p3a;
 572             A1b = p1b + p3b;
 573             p4a = a2 * acc2a;
 574             p4b = a2 * acc2b;
 575             d1a = A1a + d2a;
 576             d1b = A1b + d2b;
 577             d2a = p2a + p4a;
 578             d2b = p2b + p4b;
 579
 580             p1a = b1 * Xn3a;
 581             p1b = b1 * Xn3b;
 582             acc3a = p0a + d1a;
 583             acc3b = p0b + d1b;
 584             p0a = b0 * Xn4a;
 585             p0b = b0 * Xn4b;
 586             p3a = a1 * acc3a;
 587             p3b = a1 * acc3b;
 588             p2a = b2 * Xn3a;
 589             p2b = b2 * Xn3b;
 590             A1a = p1a + p3a;
 591             A1b = p1b + p3b;
 592             p4a = a2 * acc3a;
 593             p4b = a2 * acc3b;
 594             d1a = A1a + d2a;
 595             d1b = A1b + d2b;
 596             d2a = p2a + p4a;
 597             d2b = p2b + p4b;
 598
 599             acc4a = p0a + d1a;
 600             acc4b = p0b + d1b;
 601             p1a = b1 * Xn4a;
 602             p1b = b1 * Xn4b;
 603             p3a = a1 * acc4a;
 604             p3b = a1 * acc4b;
 605             p2a = b2 * Xn4a;
 606             p2b = b2 * Xn4b;
 607             A1a = p1a + p3a;
 608             A1b = p1b + p3b;
 609             p4a = a2 * acc4a;
 610             p4b = a2 * acc4b;
 611             d1a = A1a + d2a;
 612             d1b = A1b + d2b;
 613             d2a = p2a + p4a;
 614             d2b = p2b + p4b;
 615
 616             pOut[0] = acc1a;
 617             pOut[1] = acc1b;
 618             pOut[2] = acc2a;
 619             pOut[3] = acc2b;
 620             pOut[4] = acc3a;
 621             pOut[5] = acc3b;
 622             pOut[6] = acc4a;
 623             pOut[7] = acc4b;
 624             pOut += 8;
 625
 626             sample--;
 627         }
 628
 629         sample = blockSize & 0x3u;
 630         while(sample > 0u) {
 631             Xn1a = *pIn++;
 632             Xn1b = *pIn++;
 633
 634             p0a = b0 * Xn1a;
 635             p0b = b0 * Xn1b;
 636             p1a = b1 * Xn1a;
 637             p1b = b1 * Xn1b;
 638             acc1a = p0a + d1a;
 639             acc1b = p0b + d1b;
 640             p3a = a1 * acc1a;
 641             p3b = a1 * acc1b;
 642             p2a = b2 * Xn1a;
 643             p2b = b2 * Xn1b;
 644             A1a = p1a + p3a;
 645             A1b = p1b + p3b;
 646             p4a = a2 * acc1a;
 647             p4b = a2 * acc1b;
 648             d1a = A1a + d2a;
 649             d1b = A1b + d2b;
 650             d2a = p2a + p4a;
 651             d2b = p2b + p4b;
 652
 653             *pOut++ = acc1a;
 654             *pOut++ = acc1b;
 655
 656             sample--;
 657         }
 658
 659         /* Store the updated state variables back into the state array */
 660         *pState++ = d1a;
 661         *pState++ = d2a;
 662         *pState++ = d1b;
 663         *pState++ = d2b;
 664
 665         /* The current stage input is given as the output to the next stage */
 666         pIn = pDst;
 667
 668         /*Reset the output working pointer */
 669         pOut = pDst;
 670
 671         /* decrement the loop counter */
 672         stage--;
 673
 674     } while(stage > 0u);
 675
 676 #endif
 677
 678 }
 679 LOW_OPTIMIZATION_EXIT
 680
 681 /**
 682    * @} end of BiquadCascadeDF2T group
 683    */