localhost Git - SCSI2SD-V6.git/blob

   1 /* ----------------------------------------------------------------------
   2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
   3 *
   4 * $Date:        19. March 2015
   5 * $Revision:    V.1.4.5
   6 *
   7 * Project:          CMSIS DSP Library
   8 * Title:            arm_biquad_cascade_df2T_f32.c
   9 *
  10 * Description:  Processing function for the floating-point transposed
  11 *               direct form II Biquad cascade filter.
  12 *
  13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  14 *
  15 * Redistribution and use in source and binary forms, with or without
  16 * modification, are permitted provided that the following conditions
  17 * are met:
  18 *   - Redistributions of source code must retain the above copyright
  19 *     notice, this list of conditions and the following disclaimer.
  20 *   - Redistributions in binary form must reproduce the above copyright
  21 *     notice, this list of conditions and the following disclaimer in
  22 *     the documentation and/or other materials provided with the
  23 *     distribution.
  24 *   - Neither the name of ARM LIMITED nor the names of its contributors
  25 *     may be used to endorse or promote products derived from this
  26 *     software without specific prior written permission.
  27 *
  28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  31 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  32 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  33 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  34 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  35 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  36 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  38 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  39 * POSSIBILITY OF SUCH DAMAGE.
  40 * -------------------------------------------------------------------- */
  41
  42 #include "arm_math.h"
  43
  44 /**
  45 * @ingroup groupFilters
  46 */
  47
  48 /**
  49 * @defgroup BiquadCascadeDF2T Biquad Cascade IIR Filters Using a Direct Form II Transposed Structure
  50 *
  51 * This set of functions implements arbitrary order recursive (IIR) filters using a transposed direct form II structure.
  52 * The filters are implemented as a cascade of second order Biquad sections.
  53 * These functions provide a slight memory savings as compared to the direct form I Biquad filter functions.
  54 * Only floating-point data is supported.
  55 *
  56 * This function operate on blocks of input and output data and each call to the function
  57 * processes <code>blockSize</code> samples through the filter.
  58 * <code>pSrc</code> points to the array of input data and
  59 * <code>pDst</code> points to the array of output data.
  60 * Both arrays contain <code>blockSize</code> values.
  61 *
  62 * \par Algorithm
  63 * Each Biquad stage implements a second order filter using the difference equation:
  64 * <pre>
  65 *    y[n] = b0 * x[n] + d1
  66 *    d1 = b1 * x[n] + a1 * y[n] + d2
  67 *    d2 = b2 * x[n] + a2 * y[n]
  68 * </pre>
  69 * where d1 and d2 represent the two state values.
  70 *
  71 * \par
  72 * A Biquad filter using a transposed Direct Form II structure is shown below.
  73 * \image html BiquadDF2Transposed.gif "Single transposed Direct Form II Biquad"
  74 * Coefficients <code>b0, b1, and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.
  75 * Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.
  76 * Pay careful attention to the sign of the feedback coefficients.
  77 * Some design tools flip the sign of the feedback coefficients:
  78 * <pre>
  79 *    y[n] = b0 * x[n] + d1;
  80 *    d1 = b1 * x[n] - a1 * y[n] + d2;
  81 *    d2 = b2 * x[n] - a2 * y[n];
  82 * </pre>
  83 * In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.
  84 *
  85 * \par
  86 * Higher order filters are realized as a cascade of second order sections.
  87 * <code>numStages</code> refers to the number of second order stages used.
  88 * For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.
  89 * A 9th order filter would be realized with <code>numStages=5</code> second order stages with the
  90 * coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).
  91 *
  92 * \par
  93 * <code>pState</code> points to the state variable array.
  94 * Each Biquad stage has 2 state variables <code>d1</code> and <code>d2</code>.
  95 * The state variables are arranged in the <code>pState</code> array as:
  96 * <pre>
  97 *     {d11, d12, d21, d22, ...}
  98 * </pre>
  99 * where <code>d1x</code> refers to the state variables for the first Biquad and
 100 * <code>d2x</code> refers to the state variables for the second Biquad.
 101 * The state array has a total length of <code>2*numStages</code> values.
 102 * The state variables are updated after each block of data is processed; the coefficients are untouched.
 103 *
 104 * \par
 105 * The CMSIS library contains Biquad filters in both Direct Form I and transposed Direct Form II.
 106 * The advantage of the Direct Form I structure is that it is numerically more robust for fixed-point data types.
 107 * That is why the Direct Form I structure supports Q15 and Q31 data types.
 108 * The transposed Direct Form II structure, on the other hand, requires a wide dynamic range for the state variables <code>d1</code> and <code>d2</code>.
 109 * Because of this, the CMSIS library only has a floating-point version of the Direct Form II Biquad.
 110 * The advantage of the Direct Form II Biquad is that it requires half the number of state variables, 2 rather than 4, per Biquad stage.
 111 *
 112 * \par Instance Structure
 113 * The coefficients and state variables for a filter are stored together in an instance data structure.
 114 * A separate instance structure must be defined for each filter.
 115 * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
 116 *
 117 * \par Init Functions
 118 * There is also an associated initialization function.
 119 * The initialization function performs following operations:
 120 * - Sets the values of the internal structure fields.
 121 * - Zeros out the values in the state buffer.
 122 * To do this manually without calling the init function, assign the follow subfields of the instance structure:
 123 * numStages, pCoeffs, pState. Also set all of the values in pState to zero.
 124 *
 125 * \par
 126 * Use of the initialization function is optional.
 127 * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
 128 * To place an instance structure into a const data section, the instance structure must be manually initialized.
 129 * Set the values in the state buffer to zeros before static initialization.
 130 * For example, to statically initialize the instance structure use
 131 * <pre>
 132 *     arm_biquad_cascade_df2T_instance_f32 S1 = {numStages, pState, pCoeffs};
 133 * </pre>
 134 * where <code>numStages</code> is the number of Biquad stages in the filter; <code>pState</code> is the address of the state buffer.
 135 * <code>pCoeffs</code> is the address of the coefficient buffer;
 136 *
 137 */
 138
 139 /**
 140 * @addtogroup BiquadCascadeDF2T
 141 * @{
 142 */
 143
 144 /**
 145 * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
 146 * @param[in]  *S        points to an instance of the filter data structure.
 147 * @param[in]  *pSrc     points to the block of input data.
 148 * @param[out] *pDst     points to the block of output data
 149 * @param[in]  blockSize number of samples to process.
 150 * @return none.
 151 */
 152
 153
 154 LOW_OPTIMIZATION_ENTER
 155 void arm_biquad_cascade_df2T_f32(
 156 const arm_biquad_cascade_df2T_instance_f32 * S,
 157 float32_t * pSrc,
 158 float32_t * pDst,
 159 uint32_t blockSize)
 160 {
 161
 162    float32_t *pIn = pSrc;                         /*  source pointer            */
 163    float32_t *pOut = pDst;                        /*  destination pointer       */
 164    float32_t *pState = S->pState;                 /*  State pointer             */
 165    float32_t *pCoeffs = S->pCoeffs;               /*  coefficient pointer       */
 166    float32_t acc1;                                /*  accumulator               */
 167    float32_t b0, b1, b2, a1, a2;                  /*  Filter coefficients       */
 168    float32_t Xn1;                                 /*  temporary input           */
 169    float32_t d1, d2;                              /*  state variables           */
 170    uint32_t sample, stage = S->numStages;         /*  loop counters             */
 171
 172 #if defined(ARM_MATH_CM7)
 173
 174    float32_t Xn2, Xn3, Xn4, Xn5, Xn6, Xn7, Xn8;   /*  Input State variables     */
 175    float32_t Xn9, Xn10, Xn11, Xn12, Xn13, Xn14, Xn15, Xn16;
 176    float32_t acc2, acc3, acc4, acc5, acc6, acc7;  /*  Simulates the accumulator */
 177    float32_t acc8, acc9, acc10, acc11, acc12, acc13, acc14, acc15, acc16;
 178
 179    do
 180    {
 181       /* Reading the coefficients */
 182       b0 = pCoeffs[0];
 183       b1 = pCoeffs[1];
 184       b2 = pCoeffs[2];
 185       a1 = pCoeffs[3];
 186       /* Apply loop unrolling and compute 16 output values simultaneously. */
 187       sample = blockSize >> 4u;
 188       a2 = pCoeffs[4];
 189
 190       /*Reading the state values */
 191       d1 = pState[0];
 192       d2 = pState[1];
 193
 194       pCoeffs += 5u;
 195
 196
 197       /* First part of the processing with loop unrolling.  Compute 16 outputs at a time.
 198        ** a second loop below computes the remaining 1 to 15 samples. */
 199       while(sample > 0u) {
 200
 201          /* y[n] = b0 * x[n] + d1 */
 202          /* d1 = b1 * x[n] + a1 * y[n] + d2 */
 203          /* d2 = b2 * x[n] + a2 * y[n] */
 204
 205          /* Read the first 2 inputs. 2 cycles */
 206          Xn1  = pIn[0 ];
 207          Xn2  = pIn[1 ];
 208
 209          /* Sample 1. 5 cycles */
 210          Xn3  = pIn[2 ];
 211          acc1 = b0 * Xn1 + d1;
 212
 213          Xn4  = pIn[3 ];
 214          d1 = b1 * Xn1 + d2;
 215
 216          Xn5  = pIn[4 ];
 217          d2 = b2 * Xn1;
 218
 219          Xn6  = pIn[5 ];
 220          d1 += a1 * acc1;
 221
 222          Xn7  = pIn[6 ];
 223          d2 += a2 * acc1;
 224
 225          /* Sample 2. 5 cycles */
 226          Xn8  = pIn[7 ];
 227          acc2 = b0 * Xn2 + d1;
 228
 229          Xn9  = pIn[8 ];
 230          d1 = b1 * Xn2 + d2;
 231
 232          Xn10 = pIn[9 ];
 233          d2 = b2 * Xn2;
 234
 235          Xn11 = pIn[10];
 236          d1 += a1 * acc2;
 237
 238          Xn12 = pIn[11];
 239          d2 += a2 * acc2;
 240
 241          /* Sample 3. 5 cycles */
 242          Xn13 = pIn[12];
 243          acc3 = b0 * Xn3 + d1;
 244
 245          Xn14 = pIn[13];
 246          d1 = b1 * Xn3 + d2;
 247
 248          Xn15 = pIn[14];
 249          d2 = b2 * Xn3;
 250
 251          Xn16 = pIn[15];
 252          d1 += a1 * acc3;
 253
 254          pIn += 16;
 255          d2 += a2 * acc3;
 256
 257          /* Sample 4. 5 cycles */
 258          acc4 = b0 * Xn4 + d1;
 259          d1 = b1 * Xn4 + d2;
 260          d2 = b2 * Xn4;
 261          d1 += a1 * acc4;
 262          d2 += a2 * acc4;
 263
 264          /* Sample 5. 5 cycles */
 265          acc5 = b0 * Xn5 + d1;
 266          d1 = b1 * Xn5 + d2;
 267          d2 = b2 * Xn5;
 268          d1 += a1 * acc5;
 269          d2 += a2 * acc5;
 270
 271          /* Sample 6. 5 cycles */
 272          acc6 = b0 * Xn6 + d1;
 273          d1 = b1 * Xn6 + d2;
 274          d2 = b2 * Xn6;
 275          d1 += a1 * acc6;
 276          d2 += a2 * acc6;
 277
 278          /* Sample 7. 5 cycles */
 279          acc7 = b0 * Xn7 + d1;
 280          d1 = b1 * Xn7 + d2;
 281          d2 = b2 * Xn7;
 282          d1 += a1 * acc7;
 283          d2 += a2 * acc7;
 284
 285          /* Sample 8. 5 cycles */
 286          acc8 = b0 * Xn8 + d1;
 287          d1 = b1 * Xn8 + d2;
 288          d2 = b2 * Xn8;
 289          d1 += a1 * acc8;
 290          d2 += a2 * acc8;
 291
 292          /* Sample 9. 5 cycles */
 293          acc9 = b0 * Xn9 + d1;
 294          d1 = b1 * Xn9 + d2;
 295          d2 = b2 * Xn9;
 296          d1 += a1 * acc9;
 297          d2 += a2 * acc9;
 298
 299          /* Sample 10. 5 cycles */
 300          acc10 = b0 * Xn10 + d1;
 301          d1 = b1 * Xn10 + d2;
 302          d2 = b2 * Xn10;
 303          d1 += a1 * acc10;
 304          d2 += a2 * acc10;
 305
 306          /* Sample 11. 5 cycles */
 307          acc11 = b0 * Xn11 + d1;
 308          d1 = b1 * Xn11 + d2;
 309          d2 = b2 * Xn11;
 310          d1 += a1 * acc11;
 311          d2 += a2 * acc11;
 312
 313          /* Sample 12. 5 cycles */
 314          acc12 = b0 * Xn12 + d1;
 315          d1 = b1 * Xn12 + d2;
 316          d2 = b2 * Xn12;
 317          d1 += a1 * acc12;
 318          d2 += a2 * acc12;
 319
 320          /* Sample 13. 5 cycles */
 321          acc13 = b0 * Xn13 + d1;
 322          d1 = b1 * Xn13 + d2;
 323          d2 = b2 * Xn13;
 324
 325          pOut[0 ] = acc1 ;
 326          d1 += a1 * acc13;
 327
 328          pOut[1 ] = acc2 ;
 329          d2 += a2 * acc13;
 330
 331          /* Sample 14. 5 cycles */
 332          pOut[2 ] = acc3 ;
 333          acc14 = b0 * Xn14 + d1;
 334
 335          pOut[3 ] = acc4 ;
 336          d1 = b1 * Xn14 + d2;
 337
 338          pOut[4 ] = acc5 ;
 339          d2 = b2 * Xn14;
 340
 341          pOut[5 ] = acc6 ;
 342          d1 += a1 * acc14;
 343
 344          pOut[6 ] = acc7 ;
 345          d2 += a2 * acc14;
 346
 347          /* Sample 15. 5 cycles */
 348          pOut[7 ] = acc8 ;
 349          pOut[8 ] = acc9 ;
 350          acc15 = b0 * Xn15 + d1;
 351
 352          pOut[9 ] = acc10;
 353          d1 = b1 * Xn15 + d2;
 354
 355          pOut[10] = acc11;
 356          d2 = b2 * Xn15;
 357
 358          pOut[11] = acc12;
 359          d1 += a1 * acc15;
 360
 361          pOut[12] = acc13;
 362          d2 += a2 * acc15;
 363
 364          /* Sample 16. 5 cycles */
 365          pOut[13] = acc14;
 366          acc16 = b0 * Xn16 + d1;
 367
 368          pOut[14] = acc15;
 369          d1 = b1 * Xn16 + d2;
 370
 371          pOut[15] = acc16;
 372          d2 = b2 * Xn16;
 373
 374          sample--;
 375          d1 += a1 * acc16;
 376
 377          pOut += 16;
 378          d2 += a2 * acc16;
 379       }
 380
 381       sample = blockSize & 0xFu;
 382       while(sample > 0u) {
 383          Xn1 = *pIn;
 384          acc1 = b0 * Xn1 + d1;
 385
 386          pIn++;
 387          d1 = b1 * Xn1 + d2;
 388
 389          *pOut = acc1;
 390          d2 = b2 * Xn1;
 391
 392          pOut++;
 393          d1 += a1 * acc1;
 394
 395          sample--;
 396          d2 += a2 * acc1;
 397       }
 398
 399       /* Store the updated state variables back into the state array */
 400       pState[0] = d1;
 401       /* The current stage input is given as the output to the next stage */
 402       pIn = pDst;
 403
 404       pState[1] = d2;
 405       /* decrement the loop counter */
 406       stage--;
 407
 408       pState += 2u;
 409
 410       /*Reset the output working pointer */
 411       pOut = pDst;
 412
 413    } while(stage > 0u);
 414
 415 #elif defined(ARM_MATH_CM0_FAMILY)
 416
 417    /* Run the below code for Cortex-M0 */
 418
 419    do
 420    {
 421       /* Reading the coefficients */
 422       b0 = *pCoeffs++;
 423       b1 = *pCoeffs++;
 424       b2 = *pCoeffs++;
 425       a1 = *pCoeffs++;
 426       a2 = *pCoeffs++;
 427
 428       /*Reading the state values */
 429       d1 = pState[0];
 430       d2 = pState[1];
 431
 432
 433       sample = blockSize;
 434
 435       while(sample > 0u)
 436       {
 437          /* Read the input */
 438          Xn1 = *pIn++;
 439
 440          /* y[n] = b0 * x[n] + d1 */
 441          acc1 = (b0 * Xn1) + d1;
 442
 443          /* Store the result in the accumulator in the destination buffer. */
 444          *pOut++ = acc1;
 445
 446          /* Every time after the output is computed state should be updated. */
 447          /* d1 = b1 * x[n] + a1 * y[n] + d2 */
 448          d1 = ((b1 * Xn1) + (a1 * acc1)) + d2;
 449
 450          /* d2 = b2 * x[n] + a2 * y[n] */
 451          d2 = (b2 * Xn1) + (a2 * acc1);
 452
 453          /* decrement the loop counter */
 454          sample--;
 455       }
 456
 457       /* Store the updated state variables back into the state array */
 458       *pState++ = d1;
 459       *pState++ = d2;
 460
 461       /* The current stage input is given as the output to the next stage */
 462       pIn = pDst;
 463
 464       /*Reset the output working pointer */
 465       pOut = pDst;
 466
 467       /* decrement the loop counter */
 468       stage--;
 469
 470    } while(stage > 0u);
 471
 472 #else
 473
 474    float32_t Xn2, Xn3, Xn4;                       /*  Input State variables     */
 475    float32_t acc2, acc3, acc4;                            /*  accumulator               */
 476
 477
 478    float32_t p0, p1, p2, p3, p4, A1;
 479
 480    /* Run the below code for Cortex-M4 and Cortex-M3 */
 481    do
 482    {
 483       /* Reading the coefficients */
 484       b0 = *pCoeffs++;
 485       b1 = *pCoeffs++;
 486       b2 = *pCoeffs++;
 487       a1 = *pCoeffs++;
 488       a2 = *pCoeffs++;
 489
 490
 491       /*Reading the state values */
 492       d1 = pState[0];
 493       d2 = pState[1];
 494
 495       /* Apply loop unrolling and compute 4 output values simultaneously. */
 496       sample = blockSize >> 2u;
 497
 498       /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
 499    ** a second loop below computes the remaining 1 to 3 samples. */
 500       while(sample > 0u) {
 501
 502          /* y[n] = b0 * x[n] + d1 */
 503          /* d1 = b1 * x[n] + a1 * y[n] + d2 */
 504          /* d2 = b2 * x[n] + a2 * y[n] */
 505
 506          /* Read the four inputs */
 507          Xn1 = pIn[0];
 508          Xn2 = pIn[1];
 509          Xn3 = pIn[2];
 510          Xn4 = pIn[3];
 511          pIn += 4;
 512
 513          p0 = b0 * Xn1;
 514          p1 = b1 * Xn1;
 515          acc1 = p0 + d1;
 516          p0 = b0 * Xn2;
 517          p3 = a1 * acc1;
 518          p2 = b2 * Xn1;
 519          A1 = p1 + p3;
 520          p4 = a2 * acc1;
 521          d1 = A1 + d2;
 522          d2 = p2 + p4;
 523
 524          p1 = b1 * Xn2;
 525          acc2 = p0 + d1;
 526          p0 = b0 * Xn3;
 527          p3 = a1 * acc2;
 528          p2 = b2 * Xn2;
 529          A1 = p1 + p3;
 530          p4 = a2 * acc2;
 531          d1 = A1 + d2;
 532          d2 = p2 + p4;
 533
 534          p1 = b1 * Xn3;
 535          acc3 = p0 + d1;
 536          p0 = b0 * Xn4;
 537          p3 = a1 * acc3;
 538          p2 = b2 * Xn3;
 539          A1 = p1 + p3;
 540          p4 = a2 * acc3;
 541          d1 = A1 + d2;
 542          d2 = p2 + p4;
 543
 544          acc4 = p0 + d1;
 545          p1 = b1 * Xn4;
 546          p3 = a1 * acc4;
 547          p2 = b2 * Xn4;
 548          A1 = p1 + p3;
 549          p4 = a2 * acc4;
 550          d1 = A1 + d2;
 551          d2 = p2 + p4;
 552
 553          pOut[0] = acc1;
 554          pOut[1] = acc2;
 555          pOut[2] = acc3;
 556          pOut[3] = acc4;
 557                  pOut += 4;
 558
 559          sample--;
 560       }
 561
 562       sample = blockSize & 0x3u;
 563       while(sample > 0u) {
 564          Xn1 = *pIn++;
 565
 566          p0 = b0 * Xn1;
 567          p1 = b1 * Xn1;
 568          acc1 = p0 + d1;
 569          p3 = a1 * acc1;
 570          p2 = b2 * Xn1;
 571          A1 = p1 + p3;
 572          p4 = a2 * acc1;
 573          d1 = A1 + d2;
 574          d2 = p2 + p4;
 575
 576          *pOut++ = acc1;
 577
 578          sample--;
 579       }
 580
 581       /* Store the updated state variables back into the state array */
 582       *pState++ = d1;
 583       *pState++ = d2;
 584
 585       /* The current stage input is given as the output to the next stage */
 586       pIn = pDst;
 587
 588       /*Reset the output working pointer */
 589       pOut = pDst;
 590
 591       /* decrement the loop counter */
 592       stage--;
 593
 594    } while(stage > 0u);
 595
 596 #endif
 597
 598 }
 599 LOW_OPTIMIZATION_EXIT
 600
 601 /**
 602    * @} end of BiquadCascadeDF2T group
 603    */