]> localhost Git - SCSI2SD-V6.git/commitdiff
Performance improvements for 2021 boards
authorMichael McMaster <michael@codesrc.com>
Sun, 30 Jan 2022 08:32:21 +0000 (18:32 +1000)
committerMichael McMaster <michael@codesrc.com>
Fri, 4 Feb 2022 06:04:06 +0000 (16:04 +1000)
CHANGELOG
src/firmware/bsp_driver_sd.c
src/firmware/disk.c
src/firmware/scsi.c
src/firmware/scsiPhy.c
src/firmware/sd.c
src/firmware/sd.h
src/firmware/time.h

index a385783242f88993dec1be4301c0640ce2492f67..89dd11aed11080e524f7fe3d194b86c14cc3643e 100644 (file)
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,8 @@
 2022xxxx        6.4.14
     - Fix firmware version displaying as "0.0" in scsi2sd-util when there is no
     SD card inserted.
+    - Reduce some delays for slight performance improvements
+    - Use SD High-Speed mode on V6 2021 hardware.
 
 20220121        6.4.13
     - Fix SCSI writes with sector sizes larger than 512.
index dc09fed11a3cdc15849ca24d6559e5097484bb43..51f047181b3b7eced23b9c5a1d1e2899aac73ed3 100755 (executable)
 /* Includes ------------------------------------------------------------------*/
 #include "bsp_driver_sd.h"
 #include "sd.h"
+#include "time.h"
 
 /* Extern variables ---------------------------------------------------------*/ 
   
 extern SD_HandleTypeDef hsd;
 
+static uint8_t HighSpeedSwitch();
+
 /**
   * @brief  Initializes the SD card device.
   * @param  None
@@ -47,111 +50,114 @@ extern SD_HandleTypeDef hsd;
   */
 uint8_t BSP_SD_Init(void)
 {
-  uint8_t SD_state = MSD_OK;
-  /* Check if the SD card is plugged in the slot */
-  if (BSP_SD_IsDetected() != SD_PRESENT)
-  {
-    return MSD_ERROR;
-  }
-  SD_state = HAL_SD_Init(&hsd);
-#ifdef BUS_4BITS
-  if (SD_state == HAL_OK)
-  {
-    if (HAL_SD_ConfigWideBusOperation(&hsd, SDIO_BUS_WIDE_4B) != HAL_OK)
+    uint8_t SD_state = MSD_OK;
+    /* Check if the SD card is plugged in the slot */
+    if (BSP_SD_IsDetected() != SD_PRESENT)
     {
-      SD_state = MSD_ERROR;
+        return MSD_ERROR;
     }
-    else
+    SD_state = HAL_SD_Init(&hsd);
+#ifdef BUS_4BITS
+    if (SD_state == HAL_OK)
     {
-      SD_state = MSD_OK;
+        if (HAL_SD_ConfigWideBusOperation(&hsd, SDIO_BUS_WIDE_4B) != HAL_OK)
+        {
+            SD_state = MSD_ERROR;
+        }
+        else
+        {
+            // Save the wide mode setting for when we call SDIO_Init again
+            // for high speed mode.
+            hsd.Init.BusWide = SDIO_BUS_WIDE_4B;
+            SD_state = MSD_OK;
 
 // Clock bypass mode is broken on STM32F205
-// #ifdef STM32F4xx
-#if 0
-      uint8_t SD_hs[64]  = {0};
-      //uint32_t SD_scr[2] = {0, 0};
-      //uint32_t SD_SPEC   = 0 ;
-      uint32_t count = 0;
-      uint32_t *tempbuff = (uint32_t *)SD_hs;
-
-      // Prepare to read 64 bytes status data
-      SDIO_DataInitTypeDef config;
-      config.DataTimeOut   = SDMMC_DATATIMEOUT;
-      config.DataLength    = 64;
-      config.DataBlockSize = SDIO_DATABLOCK_SIZE_64B;
-      config.TransferDir   = SDIO_TRANSFER_DIR_TO_SDIO;
-      config.TransferMode  = SDIO_TRANSFER_MODE_BLOCK;
-      config.DPSM          = SDIO_DPSM_ENABLE;
-      (void)SDIO_ConfigData(hsd.Instance, &config);
-
-      // High speed switch.
-      // SDR25 (25MB/s) mode 0x80FFFF01
-      // Which is the max without going to 1.8v
-      uint32_t errorstate = SDMMC_CmdSwitch(hsd.Instance, 0x80FFFF01);
-
-      // Now we read some status data
-
-      if (errorstate == HAL_SD_ERROR_NONE)
-      {
-          while(!__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_RXOVERR | SDIO_FLAG_DCRCFAIL | SDIO_FLAG_DTIMEOUT | SDIO_FLAG_DATAEND/* | SDIO_FLAG_STBITERR*/))
-          {
-              if (__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_RXFIFOHF))
-              {
-                  for (count = 0; count < 8; count++)
-                  {
-                      *(tempbuff + count) = SDIO_ReadFIFO(hsd.Instance);
-                  }
-
-                  tempbuff += 8;
-              }
-          }
-
-          if (__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_DTIMEOUT))
-          {
-              __HAL_SD_CLEAR_FLAG(&hsd, SDIO_FLAG_DTIMEOUT);
-              SD_state = MSD_ERROR;
-          }
-          else if (__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_DCRCFAIL))
-          {
-              __HAL_SD_CLEAR_FLAG(&hsd, SDIO_FLAG_DCRCFAIL);
-              SD_state = MSD_ERROR;
-          }
-          else if (__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_RXOVERR))
-          {
-              __HAL_SD_CLEAR_FLAG(&hsd, SDIO_FLAG_RXOVERR);
-              SD_state = MSD_ERROR;
-          }
-          /*else if (__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_STBITERR))
+#ifdef STM32F4xx
+            if (hsd.SdCard.CardType == CARD_SDHC_SDXC)
             {
-            __HAL_SD_CLEAR_FLAG(&hsd, SDIO_FLAG_STBITERR);
-            SD_state = MSD_ERROR;
-            }*/
-          else
-          {
-              count = SD_DATATIMEOUT;
-
-              while ((__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_RXDAVL)) && (count > 0))
-              {
-                  *tempbuff = SDIO_ReadFIFO(hsd.Instance);
-                  tempbuff++;
-                  count--;
-              }
-
-              /* Clear all the static flags */
-              __HAL_SD_CLEAR_FLAG(&hsd, SDIO_STATIC_FLAGS);
-
-              // After 8 "SD" clocks we can change speed
-              // Low-level init for the bypass. Changes registers only
-              hsd.Init.ClockBypass = SDIO_CLOCK_BYPASS_ENABLE;
-              SDIO_Init(hsd.Instance, hsd.Init); 
-
-          }
-      }
+                HighSpeedSwitch();
+            }
 #endif
+        }
     }
-  }
 #endif
-  return SD_state;
+    return SD_state;
+}
+
+static uint8_t HighSpeedSwitch()
+{
+    uint8_t SD_state = MSD_OK;
+
+    // Prepare to read 64 bytes status data
+    SDIO_DataInitTypeDef config;
+    config.DataTimeOut   = SDMMC_DATATIMEOUT;
+    config.DataLength    = 64;
+    config.DataBlockSize = SDIO_DATABLOCK_SIZE_64B;
+    config.TransferDir   = SDIO_TRANSFER_DIR_TO_SDIO;
+    config.TransferMode  = SDIO_TRANSFER_MODE_BLOCK;
+    config.DPSM          = SDIO_DPSM_ENABLE;
+    (void)SDIO_ConfigData(hsd.Instance, &config);
+
+    // High speed switch.
+    // SDR25 (25MB/s) mode 0x80FFFF01
+    // Which is the max without going to 1.8v
+    uint32_t errorstate = SDMMC_CmdSwitch(hsd.Instance, 0x80FFFFF1);
+
+    // Now we read some status data
+
+    if (errorstate == HAL_SD_ERROR_NONE)
+    {
+        uint32_t statusByteCount = 0;
+        while(!__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_RXOVERR | SDIO_FLAG_DCRCFAIL | SDIO_FLAG_DTIMEOUT | SDIO_FLAG_DBCKEND))
+        {
+            if (__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_RXFIFOHF) && statusByteCount < 64)
+            {
+                for ( uint32_t i = 0; i < 8; i++, statusByteCount += 4)
+                {
+                    SDIO_ReadFIFO(hsd.Instance);
+                }
+            }
+        }
+
+        if (__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_DTIMEOUT))
+        {
+            __HAL_SD_CLEAR_FLAG(&hsd, SDIO_FLAG_DTIMEOUT);
+            SD_state = MSD_ERROR;
+        }
+        else if (__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_DCRCFAIL))
+        {
+            __HAL_SD_CLEAR_FLAG(&hsd, SDIO_FLAG_DCRCFAIL);
+            SD_state = MSD_ERROR;
+        }
+        else if (__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_RXOVERR))
+        {
+            __HAL_SD_CLEAR_FLAG(&hsd, SDIO_FLAG_RXOVERR);
+            SD_state = MSD_ERROR;
+        }
+        else
+        {
+            // Read remaining data, could be the CRC bytes.
+            uint32_t count = SD_DATATIMEOUT;
+            while ((__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_RXDAVL)) && (count > 0))
+            {
+                SDIO_ReadFIFO(hsd.Instance);
+                count--;
+            }
+
+            /* Clear all the static flags */
+            __HAL_SD_CLEAR_FLAG(&hsd, SDIO_STATIC_FLAGS);
+
+            // After 8 "SD" clocks we can change speed
+            // Low-level init for the bypass. Changes registers only
+            hsd.Init.ClockBypass = SDIO_CLOCK_BYPASS_ENABLE;
+            SDIO_Init(hsd.Instance, hsd.Init); 
+
+            // 8 clocks is 160ns at 50Mhz
+            s2s_delay_ns(200);
+        }
+    }
+
+    return SD_state;
 }
 
 /**
index f68cb2427188623938dce231e9858d87475c563e..dac1715bb4e827dc90f14e1bbb04690bac03283d 100755 (executable)
@@ -38,6 +38,8 @@
 #include "time.h"\r
 #include "bsp.h"\r
 \r
+#include "led.h"\r
+\r
 #include <string.h>\r
 \r
 // Global\r
@@ -216,15 +218,6 @@ static void doWrite(uint32_t lba, uint32_t blocks)
         // No need for single-block writes atm.  Overhead of the\r
         // multi-block write is minimal.\r
         transfer.multiBlock = 1;\r
-\r
-\r
-        // TODO uint32_t sdLBA =\r
-// TODO             SCSISector2SD(\r
-    // TODO             scsiDev.target->cfg->sdSectorStart,\r
-        // TODO         bytesPerSector,\r
-            // TODO     lba);\r
-        // TODO uint32_t sdBlocks = blocks * SDSectorsPerSCSISector(bytesPerSector);\r
-        // TODO sdWriteMultiSectorPrep(sdLBA, sdBlocks);\r
     }\r
 }\r
 \r
@@ -303,7 +296,15 @@ static void doSeek(uint32_t lba)
     }\r
     else\r
     {\r
-        s2s_delay_ms(10);\r
+        if (unlikely(scsiDev.target->cfg->deviceType == S2S_CFG_FLOPPY_14MB) ||\r
+            scsiDev.compatMode < COMPAT_SCSI2)\r
+        {\r
+            s2s_delay_ms(10);\r
+        }\r
+        else\r
+        {\r
+            s2s_delay_ms(1);\r
+        }\r
     }\r
 }\r
 \r
@@ -558,411 +559,626 @@ int scsiDiskCommand()
     return commandHandled;\r
 }\r
 \r
-void scsiDiskPoll()\r
+static void diskDataInBuffered(int totalSDSectors, uint32_t sdLBA, int useSlowDataCount, uint32_t* phaseChangeDelayNs)\r
 {\r
     uint32_t bytesPerSector = scsiDev.target->liveCfg.bytesPerSector;\r
 \r
-    if (scsiDev.phase == DATA_IN &&\r
-        transfer.currentBlock != transfer.blocks)\r
-    {\r
-        // Take responsibility for waiting for the phase delays\r
-        uint32_t phaseChangeDelayUs = scsiEnterPhaseImmediate(DATA_IN);\r
-\r
-        int totalSDSectors =\r
-            transfer.blocks * SDSectorsPerSCSISector(bytesPerSector);\r
-        uint32_t sdLBA =\r
-            SCSISector2SD(\r
-                scsiDev.target->cfg->sdSectorStart,\r
-                bytesPerSector,\r
-                transfer.lba);\r
+    const int sdPerScsi = SDSectorsPerSCSISector(bytesPerSector);\r
+    const int buffers = sizeof(scsiDev.data) / SD_SECTOR_SIZE;\r
+    int prep = 0;\r
+    int i = 0;\r
+    int scsiActive __attribute__((unused)) = 0; // unused if DMA disabled\r
+    int sdActive = 0;\r
 \r
-        const int sdPerScsi = SDSectorsPerSCSISector(bytesPerSector);\r
-        const int buffers = sizeof(scsiDev.data) / SD_SECTOR_SIZE;\r
-        int prep = 0;\r
-        int i = 0;\r
-        int scsiActive __attribute__((unused)) = 0; // unused if DMA disabled\r
-        int sdActive = 0;\r
+    int gotHalf = 0;\r
+    int sentHalf = 0;\r
 \r
-        // It's highly unlikely that someone is going to use huge transfers\r
-        // per scsi command, but if they do it'll be slower than usual.\r
-        uint32_t totalScsiBytes = transfer.blocks * bytesPerSector;\r
-        int useSlowDataCount = totalScsiBytes >= SCSI_XFER_MAX;\r
-        if (!useSlowDataCount)\r
+    while ((i < totalSDSectors) &&\r
+        likely(scsiDev.phase == DATA_IN) &&\r
+        likely(!scsiDev.resetFlag))\r
+    {\r
+        int completedDmaSectors;\r
+        if (sdActive && (completedDmaSectors = sdReadDMAPoll(sdActive)))\r
         {\r
-            scsiSetDataCount(totalScsiBytes);\r
+            prep += completedDmaSectors;\r
+            sdActive -= completedDmaSectors;\r
+            gotHalf = 0;\r
         }\r
-\r
-        while ((i < totalSDSectors) &&\r
-            likely(scsiDev.phase == DATA_IN) &&\r
-            likely(!scsiDev.resetFlag))\r
+        else if (sdActive > 1)\r
         {\r
-            int completedDmaSectors;\r
-            if (sdActive && (completedDmaSectors = sdReadDMAPoll(sdActive)))\r
+            if ((scsiDev.data[SD_SECTOR_SIZE * (prep % buffers) + 510] != 0xAA) ||\r
+                (scsiDev.data[SD_SECTOR_SIZE * (prep % buffers) + 511] != 0x33))\r
             {\r
-                prep += completedDmaSectors;\r
-                sdActive -= completedDmaSectors;\r
-            } else if (sdActive > 1)\r
+                prep += 1;\r
+                sdActive -= 1;\r
+                gotHalf = 0;\r
+            }\r
+            else if (scsiDev.data[SD_SECTOR_SIZE * (prep % buffers) + 127] != 0xAA)\r
             {\r
-                if ((scsiDev.data[SD_SECTOR_SIZE * (prep % buffers) + 510] != 0xAA) ||\r
-                    (scsiDev.data[SD_SECTOR_SIZE * (prep % buffers) + 511] != 0x33))\r
-                {\r
-                    prep += 1;\r
-                    sdActive -= 1;\r
-                }\r
+                // Half-block\r
+                gotHalf = 1;\r
             }\r
+        }\r
 \r
-            if (!sdActive &&\r
-                (prep - i < buffers) &&\r
-                (prep < totalSDSectors) &&\r
-                ((totalSDSectors - prep) >= sdPerScsi) &&\r
-                (likely(!useSlowDataCount) || scsiPhyComplete()) &&\r
-                (HAL_SD_GetState(&hsd) != HAL_SD_STATE_BUSY)) // rx complete but IRQ not fired yet.\r
-            {\r
-                // Start an SD transfer if we have space.\r
-                uint32_t startBuffer = prep % buffers;\r
-                uint32_t sectors = totalSDSectors - prep;\r
-                uint32_t freeBuffers = buffers - (prep - i);\r
+        if (!sdActive &&\r
+            (prep - i < buffers) &&\r
+            (prep < totalSDSectors) &&\r
+            ((totalSDSectors - prep) >= sdPerScsi) &&\r
+            (likely(!useSlowDataCount) || scsiPhyComplete()) &&\r
+            (HAL_SD_GetState(&hsd) != HAL_SD_STATE_BUSY)) // rx complete but IRQ not fired yet.\r
+        {\r
+            // Start an SD transfer if we have space.\r
+            uint32_t startBuffer = prep % buffers;\r
+            uint32_t sectors = totalSDSectors - prep;\r
+            uint32_t freeBuffers = buffers - (prep - i);\r
 \r
-                uint32_t contiguousBuffers = buffers - startBuffer;\r
-                freeBuffers = freeBuffers < contiguousBuffers\r
-                    ? freeBuffers : contiguousBuffers;\r
-                sectors = sectors < freeBuffers ? sectors : freeBuffers;\r
+            uint32_t contiguousBuffers = buffers - startBuffer;\r
+            freeBuffers = freeBuffers < contiguousBuffers\r
+                ? freeBuffers : contiguousBuffers;\r
+            sectors = sectors < freeBuffers ? sectors : freeBuffers;\r
 \r
-                if (sectors > 128) sectors = 128; // 65536 DMA limit !!\r
+            if (sectors > 128) sectors = 128; // 65536 DMA limit !!\r
 \r
-                // Round-down when we have odd sector sizes.\r
-                if (sdPerScsi != 1)\r
-                {\r
-                    sectors = (sectors / sdPerScsi) * sdPerScsi;\r
-                }\r
+            // Round-down when we have odd sector sizes.\r
+            if (sdPerScsi != 1)\r
+            {\r
+                sectors = (sectors / sdPerScsi) * sdPerScsi;\r
+            }\r
 \r
-                for (int dodgy = 0; dodgy < sectors; dodgy++)\r
-                {\r
-                    scsiDev.data[SD_SECTOR_SIZE * (startBuffer + dodgy) + 510] = 0xAA;\r
-                    scsiDev.data[SD_SECTOR_SIZE * (startBuffer + dodgy) + 511] = 0x33;\r
-                }\r
+            for (int dodgy = 0; dodgy < sectors; dodgy++)\r
+            {\r
+                scsiDev.data[SD_SECTOR_SIZE * (startBuffer + dodgy) + 127] = 0xAA;\r
 \r
-                sdReadDMA(sdLBA + prep, sectors, &scsiDev.data[SD_SECTOR_SIZE * startBuffer]);\r
+                scsiDev.data[SD_SECTOR_SIZE * (startBuffer + dodgy) + 510] = 0xAA;\r
+                scsiDev.data[SD_SECTOR_SIZE * (startBuffer + dodgy) + 511] = 0x33;\r
+            }\r
 \r
-                sdActive = sectors;\r
+            sdReadDMA(sdLBA + prep, sectors, &scsiDev.data[SD_SECTOR_SIZE * startBuffer]);\r
 \r
-                if (useSlowDataCount)\r
-                {\r
-                    scsiSetDataCount((sectors / sdPerScsi) * bytesPerSector);\r
-                }\r
+            sdActive = sectors;\r
 \r
-                // Wait now that the SD card is busy\r
-                // Chances are we've probably already waited sufficient time,\r
-                // but it's hard to measure microseconds cheaply. So just wait\r
-                // extra just-in-case. Hopefully it's in parallel with dma.\r
-                if (phaseChangeDelayUs > 0)\r
-                {\r
-                    s2s_delay_us(phaseChangeDelayUs);\r
-                    phaseChangeDelayUs = 0;\r
-                }\r
+            if (useSlowDataCount)\r
+            {\r
+                scsiSetDataCount((sectors / sdPerScsi) * bytesPerSector);\r
             }\r
 \r
-            if (((prep - i) > 0) &&\r
-                scsiFifoReady())\r
+            // Wait now that the SD card is busy\r
+            // Chances are we've probably already waited sufficient time,\r
+            // but it's hard to measure microseconds cheaply. So just wait\r
+            // extra just-in-case. Hopefully it's in parallel with dma.\r
+            if (*phaseChangeDelayNs > 0)\r
             {\r
-                int dmaBytes = SD_SECTOR_SIZE;\r
-                if ((i % sdPerScsi) == (sdPerScsi - 1))\r
-                {\r
-                    dmaBytes = bytesPerSector % SD_SECTOR_SIZE;\r
-                    if (dmaBytes == 0) dmaBytes = SD_SECTOR_SIZE;\r
-                }\r
+                s2s_delay_ns(*phaseChangeDelayNs);\r
+                *phaseChangeDelayNs = 0;\r
+            }\r
+        }\r
+\r
+        int fifoReady = scsiFifoReady();\r
+        if (((prep - i) > 0) && fifoReady)\r
+        {\r
+            int dmaBytes = SD_SECTOR_SIZE;\r
+            if ((i % sdPerScsi) == (sdPerScsi - 1))\r
+            {\r
+                dmaBytes = bytesPerSector % SD_SECTOR_SIZE;\r
+                if (dmaBytes == 0) dmaBytes = SD_SECTOR_SIZE;\r
+            }\r
 \r
-                uint8_t* scsiDmaData = &(scsiDev.data[SD_SECTOR_SIZE * (i % buffers)]);\r
-                scsiWritePIO(scsiDmaData, dmaBytes);\r
+            uint8_t* scsiDmaData = &(scsiDev.data[SD_SECTOR_SIZE * (i % buffers)]);\r
 \r
-                ++i;\r
+            if (sentHalf)\r
+            {\r
+                scsiDmaData += SD_SECTOR_SIZE / 2;\r
+                dmaBytes -= (SD_SECTOR_SIZE / 2);\r
             }\r
-        }\r
+            scsiWritePIO(scsiDmaData, dmaBytes);\r
 \r
-        if (phaseChangeDelayUs > 0 && !scsiDev.resetFlag) // zero bytes ?\r
+            ++i;\r
+            sentHalf = 0;\r
+            gotHalf = 0;\r
+        }\r
+        else if (gotHalf && !sentHalf && fifoReady && bytesPerSector == SD_SECTOR_SIZE)\r
         {\r
-            s2s_delay_us(phaseChangeDelayUs);\r
-            phaseChangeDelayUs = 0;\r
+            uint8_t* scsiDmaData = &(scsiDev.data[SD_SECTOR_SIZE * (i % buffers)]);\r
+            scsiWritePIO(scsiDmaData, SD_SECTOR_SIZE / 2);\r
+            sentHalf = 1;\r
         }\r
+    }\r
+}\r
 \r
-        if (scsiDev.resetFlag)\r
+// Transfer from the SD card straight to the SCSI Fifo without storing in memory first for lower latency\r
+// This requires hardware flow control on the SD device (broken on stm32f205)\r
+// Only functional for 512 byte sectors.\r
+static void diskDataInDirect(uint32_t totalSDSectors, uint32_t sdLBA, int useSlowDataCount, uint32_t* phaseChangeDelayNs)\r
+{\r
+    sdReadPIO(sdLBA, totalSDSectors);\r
+\r
+    // Wait while the SD card starts buffering data\r
+    if (*phaseChangeDelayNs > 0)\r
+    {\r
+        s2s_delay_ns(*phaseChangeDelayNs);\r
+        *phaseChangeDelayNs = 0;\r
+    }\r
+\r
+    for (int i = 0; i < totalSDSectors && !scsiDev.resetFlag; ++i)\r
+    {\r
+        // TODO if i %128 == 0, and not in an error state, then do another read.\r
+\r
+        if (useSlowDataCount)\r
         {\r
-            HAL_SD_Abort(&hsd);\r
+            scsiSetDataCount(SD_SECTOR_SIZE);\r
         }\r
-        else\r
+\r
+        // The SCSI fifo is a full sector so we only need to check once.\r
+        while (!scsiFifoReady() && !scsiDev.resetFlag)\r
+        {}\r
+\r
+        int byteCount = 0;\r
+        while(byteCount < SD_SECTOR_SIZE &&\r
+            likely(!scsiDev.resetFlag) &&\r
+            likely(scsiDev.phase == DATA_IN) &&\r
+            !__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_RXOVERR | SDIO_FLAG_DCRCFAIL | SDIO_FLAG_DTIMEOUT))\r
         {\r
-            // Wait for the SD transfer to complete before we disable IRQs.\r
-            // (Otherwise some cards will cause an error if we don't sent the\r
-            // stop transfer command via the DMA complete handler in time)\r
-            while (HAL_SD_GetState(&hsd) == HAL_SD_STATE_BUSY)\r
+            if(__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_RXFIFOHF))\r
             {\r
-                // Wait while keeping BSY.\r
+                // The SDIO fifo is 32 x 32bits. As we're using the "half full" flag we must\r
+                // always read half the FIFO.\r
+\r
+                for (int j = 0; j < 4; ++j)\r
+                {\r
+                    uint32_t data[4];\r
+                    data[0] = SDIO_ReadFIFO(hsd.Instance);\r
+                    data[1] = SDIO_ReadFIFO(hsd.Instance);\r
+                    data[2] = SDIO_ReadFIFO(hsd.Instance);\r
+                    data[3] = SDIO_ReadFIFO(hsd.Instance);\r
+\r
+                    *((volatile uint32_t*)SCSI_FIFO_DATA) = data[0];\r
+                    *((volatile uint32_t*)SCSI_FIFO_DATA) = data[1];\r
+                    *((volatile uint32_t*)SCSI_FIFO_DATA) = data[2];\r
+                    *((volatile uint32_t*)SCSI_FIFO_DATA) = data[3];\r
+\r
+                    /*\r
+                    scsiPhyTx32(data[0] & 0xFFFF, data[0] >> 16);\r
+                    scsiPhyTx32(data[1] & 0xFFFF, data[1] >> 16);\r
+                    scsiPhyTx32(data[2] & 0xFFFF, data[2] >> 16);\r
+                    scsiPhyTx32(data[3] & 0xFFFF, data[3] >> 16);\r
+                    */\r
+                }\r
+\r
+                byteCount += 64;\r
             }\r
         }\r
 \r
-        HAL_SD_CardStateTypeDef cardState = HAL_SD_GetCardState(&hsd);\r
-        while (cardState == HAL_SD_CARD_PROGRAMMING || cardState == HAL_SD_CARD_SENDING) \r
+        int error = 0;\r
+        if (__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_DTIMEOUT))\r
         {\r
-            cardState = HAL_SD_GetCardState(&hsd);\r
-         }\r
+            __HAL_SD_CLEAR_FLAG(&hsd, SDIO_FLAG_DTIMEOUT);\r
+            error = 1;\r
+        }\r
+        else if (__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_DCRCFAIL))\r
+        {\r
+            __HAL_SD_CLEAR_FLAG(&hsd, SDIO_FLAG_DCRCFAIL);\r
+            error = 1;\r
+        }\r
+        else if (__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_RXOVERR))\r
+        {\r
+            __HAL_SD_CLEAR_FLAG(&hsd, SDIO_FLAG_RXOVERR);\r
+            error = 1;\r
+        }\r
 \r
-        // We've finished transferring the data to the FPGA, now wait until it's\r
-        // written to he SCSI bus.\r
-        while (!scsiPhyComplete() &&\r
-            likely(scsiDev.phase == DATA_IN) &&\r
+        if (error && scsiDev.phase == DATA_IN)\r
+        {\r
+            __HAL_SD_CLEAR_FLAG(&hsd, SDIO_STATIC_FLAGS);\r
+\r
+            scsiDiskReset();\r
+\r
+            scsiDev.status = CHECK_CONDITION;\r
+            scsiDev.target->sense.code = HARDWARE_ERROR;\r
+            scsiDev.target->sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
+            scsiDev.phase = STATUS;\r
+        }\r
+\r
+        // We need the SCSI FIFO count to complete even after the SD read has failed\r
+        while (byteCount < SD_SECTOR_SIZE &&\r
             likely(!scsiDev.resetFlag))\r
         {\r
-            __disable_irq();\r
-            if (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
-            {\r
-                __WFI();\r
-            }\r
-            __enable_irq();\r
+            scsiPhyTx32(0, 0);\r
+            byteCount += 4;\r
         }\r
 \r
-        if (scsiDev.phase == DATA_IN)\r
+        while (useSlowDataCount && !scsiDev.resetFlag && !scsiPhyComplete())\r
         {\r
-            scsiDev.phase = STATUS;\r
         }\r
-        scsiDiskReset();\r
     }\r
-    else if (scsiDev.phase == DATA_OUT &&\r
-        transfer.currentBlock != transfer.blocks)\r
+\r
+//while(1) { s2s_ledOn(); s2s_delay_ms(1000); s2s_ledOff(); s2s_delay_ms(1000); }\r
+\r
+    /* Send stop transmission command in case of multiblock read */\r
+    if(totalSDSectors > 1U)\r
     {\r
-        scsiEnterPhase(DATA_OUT);\r
+        SDMMC_CmdStopTransfer(hsd.Instance);\r
+    }\r
 \r
-        const int sdPerScsi = SDSectorsPerSCSISector(bytesPerSector);\r
-        int totalSDSectors = transfer.blocks * sdPerScsi;\r
-        uint32_t sdLBA =\r
-            SCSISector2SD(\r
-                scsiDev.target->cfg->sdSectorStart,\r
-                bytesPerSector,\r
-                transfer.lba);\r
-        int i = 0;\r
-        int clearBSY = 0;\r
-        int disconnected = 0;\r
+    // Read remaining data\r
+    uint32_t extraCount = SD_DATATIMEOUT;\r
+    while ((__HAL_SD_GET_FLAG(&hsd, SDIO_FLAG_RXDAVL)) && (extraCount > 0))\r
+    {\r
+        SDIO_ReadFIFO(hsd.Instance);\r
+        extraCount--;\r
+    }\r
 \r
-        int parityError = 0;\r
-        int enableParity = scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY;\r
+    __HAL_SD_CLEAR_FLAG(&hsd, SDIO_STATIC_DATA_FLAGS);\r
+    hsd.State = HAL_SD_STATE_READY;\r
+    \r
+    sdCompleteTransfer(); // Probably overkill\r
+}\r
 \r
-        uint32_t maxSectors = sizeof(scsiDev.data) / SD_SECTOR_SIZE;\r
+static void diskDataIn()\r
+{\r
+    uint32_t bytesPerSector = scsiDev.target->liveCfg.bytesPerSector;\r
+\r
+    // Take responsibility for waiting for the phase delays\r
+    uint32_t phaseChangeDelayNs = scsiEnterPhaseImmediate(DATA_IN);\r
+\r
+    int totalSDSectors =\r
+        transfer.blocks * SDSectorsPerSCSISector(bytesPerSector);\r
+    uint32_t sdLBA =\r
+        SCSISector2SD(\r
+            scsiDev.target->cfg->sdSectorStart,\r
+            bytesPerSector,\r
+            transfer.lba);\r
+\r
+    // It's highly unlikely that someone is going to use huge transfers\r
+    // per scsi command, but if they do it'll be slower than usual.\r
+    uint32_t totalScsiBytes = transfer.blocks * bytesPerSector;\r
+    int useSlowDataCount = totalScsiBytes >= SCSI_XFER_MAX;\r
+    if (!useSlowDataCount)\r
+    {\r
+        scsiSetDataCount(totalScsiBytes);\r
+    }\r
 \r
-        static_assert(SCSI_XFER_MAX >= sizeof(scsiDev.data), "Assumes SCSI_XFER_MAX >= sizeof(scsiDev.data)");\r
+#ifdef STM32F4xx\r
+    // Direct mode requires hardware flow control to be working on the SD peripheral\r
+    if (bytesPerSector == SD_SECTOR_SIZE && totalSDSectors < 128)\r
+    {\r
+        diskDataInDirect(totalSDSectors, sdLBA, useSlowDataCount, &phaseChangeDelayNs);\r
+    }\r
+    else\r
+#endif \r
+    {\r
+        diskDataInBuffered(totalSDSectors, sdLBA, useSlowDataCount, &phaseChangeDelayNs);\r
+    }\r
+\r
+    if (phaseChangeDelayNs > 0 && !scsiDev.resetFlag) // zero bytes ?\r
+    {\r
+        s2s_delay_ns(phaseChangeDelayNs);\r
+        phaseChangeDelayNs = 0;\r
+    }\r
 \r
-        // Start reading and filling fifos as soon as possible.\r
-        // It's highly unlikely that someone is going to use huge transfers\r
-        // per scsi command, but if they do it'll be slower than usual.\r
-        // Note: Happens in Macintosh FWB HDD Toolkit benchmarks which default\r
-        // to 768kb\r
-        uint32_t totalTransferBytes = transfer.blocks * bytesPerSector;\r
-        int useSlowDataCount = totalTransferBytes >= SCSI_XFER_MAX;\r
-        if (!useSlowDataCount)\r
+    if (scsiDev.resetFlag)\r
+    {\r
+        HAL_SD_Abort(&hsd);\r
+    }\r
+    else\r
+    {\r
+        // Wait for the SD transfer to complete before we disable IRQs.\r
+        // (Otherwise some cards will cause an error if we don't sent the\r
+        // stop transfer command via the DMA complete handler in time)\r
+        while (HAL_SD_GetState(&hsd) == HAL_SD_STATE_BUSY)\r
         {\r
-            DWT->CYCCNT = 0; // Start counting cycles\r
-            scsiSetDataCount(totalTransferBytes);\r
+            // Wait while keeping BSY.\r
         }\r
+    }\r
 \r
-        int lastWriteSize = 0;\r
+    HAL_SD_CardStateTypeDef cardState = HAL_SD_GetCardState(&hsd);\r
+    while (cardState == HAL_SD_CARD_PROGRAMMING || cardState == HAL_SD_CARD_SENDING) \r
+    {\r
+        cardState = HAL_SD_GetCardState(&hsd);\r
+    }\r
 \r
-        while ((i < totalSDSectors) &&\r
-            likely(scsiDev.phase == DATA_OUT) &&\r
-            likely(!scsiDev.resetFlag))\r
-            // KEEP GOING to ensure FIFOs are in a good state.\r
-            // likely(!parityError || !enableParity))\r
+    // We've finished transferring the data to the FPGA, now wait until it's\r
+    // written to he SCSI bus.\r
+    while (!scsiPhyComplete() &&\r
+        likely(scsiDev.phase == DATA_IN) &&\r
+        likely(!scsiDev.resetFlag))\r
+    {\r
+        __disable_irq();\r
+        if (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
         {\r
-            if (bytesPerSector == SD_SECTOR_SIZE)\r
-            {\r
-                uint32_t maxXferSectors = SCSI_XFER_MAX / SD_SECTOR_SIZE;\r
-                uint32_t rem = totalSDSectors - i;\r
-                uint32_t sectors = rem < maxXferSectors ? rem : maxXferSectors;\r
+            __WFI();\r
+        }\r
+        __enable_irq();\r
+    }\r
 \r
-                uint32_t totalBytes = sectors * SD_SECTOR_SIZE;\r
+    if (scsiDev.phase == DATA_IN)\r
+    {\r
+        scsiDev.phase = STATUS;\r
+    }\r
+    scsiDiskReset();\r
+}\r
 \r
-                if (useSlowDataCount)\r
-                {\r
-                    scsiSetDataCount(totalBytes);\r
-                }\r
+void diskDataOut_512(int totalSDSectors, uint32_t sdLBA, int useSlowDataCount, int* clearBSY, int* parityError)\r
+{\r
+    int i = 0;\r
+    int disconnected = 0;\r
 \r
-                lastWriteSize = sectors;\r
-                HAL_SD_WriteBlocks_DMA(&hsd, i + sdLBA, sectors);\r
-                int j = 0;\r
-                int prep = 0;\r
-                int sdActive = 0;\r
-                uint32_t dmaFinishTime = 0;\r
-                while (j < sectors && !scsiDev.resetFlag)\r
-                {\r
-                    if (sdActive &&\r
-                        HAL_SD_GetState(&hsd) != HAL_SD_STATE_BUSY &&\r
-                        !sdIsBusy())\r
-                    {\r
-                        j += sdActive;\r
-                        sdActive = 0;\r
-                    }\r
-                    if (!sdActive && ((prep - j) > 0))\r
-                    {\r
-                        // Start an SD transfer if we have space.\r
-                        HAL_SD_WriteBlocks_Data(&hsd, &scsiDev.data[SD_SECTOR_SIZE * (j % maxSectors)]);\r
-\r
-                        sdActive = 1;\r
-                    }\r
-\r
-                    if (((prep - j) < maxSectors) &&\r
-                        (prep < sectors) &&\r
-                        scsiFifoReady())\r
-                    {\r
-                        scsiReadPIO(\r
-                            &scsiDev.data[(prep % maxSectors) * SD_SECTOR_SIZE],\r
-                            SD_SECTOR_SIZE,\r
-                            &parityError);\r
-                        prep++;\r
-                        if (prep == sectors)\r
-                        {\r
-                            dmaFinishTime = s2s_getTime_ms();\r
-                        }\r
-                    }\r
-                \r
-                    if (i + prep >= totalSDSectors &&\r
-                        !disconnected &&\r
-                        (!parityError || !enableParity) &&\r
-                        s2s_elapsedTime_ms(dmaFinishTime) >= 180)\r
-                    {\r
-                        // We're transferring over the SCSI bus faster than the SD card\r
-                        // can write.  All data is buffered, and we're just waiting for\r
-                        // the SD card to complete. The host won't let us disconnect.\r
-                        // Some drivers set a 250ms timeout on transfers to complete.\r
-                        // SD card writes are supposed to complete\r
-                        // within 200ms, but sometimes they don't.\r
-                        // Just pretend we're finished.\r
-                        process_Status();\r
-                        clearBSY = process_MessageIn(0); // Will go to BUS_FREE state but keep BSY asserted.\r
-                        disconnected = 1;\r
-                    }\r
-                }\r
+    int enableParity = scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY;\r
 \r
-                if (scsiDev.resetFlag)\r
-                {\r
-                    HAL_SD_Abort(&hsd);\r
-                }\r
-                else\r
-                {\r
-                    while (HAL_SD_GetState(&hsd) == HAL_SD_STATE_BUSY) {} // Waits for DMA to complete\r
-                    if (lastWriteSize > 1)\r
-                    {\r
-                        SDMMC_CmdStopTransfer(hsd.Instance);\r
-                    }\r
-                }\r
+    uint32_t maxSectors = sizeof(scsiDev.data) / SD_SECTOR_SIZE;\r
 \r
-                while (sdIsBusy() &&\r
-                    s2s_elapsedTime_ms(dmaFinishTime) < 180)\r
-                {\r
-                    // Wait while the SD card is writing buffer to flash\r
-                    // The card may remain in the RECEIVING state (even though it's programming) if\r
-                    // it has buffer space to receive more data available.\r
-                }\r
+    int lastWriteSize = 0;\r
 \r
-                if (!disconnected && \r
-                    i + sectors >= totalSDSectors &&\r
-                    (!parityError || !enableParity))\r
-                {\r
-                    // We're transferring over the SCSI bus faster than the SD card\r
-                    // can write.  All data is buffered, and we're just waiting for\r
-                    // the SD card to complete. The host won't let us disconnect.\r
-                    // Some drivers set a 250ms timeout on transfers to complete.\r
-                    // SD card writes are supposed to complete\r
-                    // within 200ms, but sometimes they don't.\r
-                    // Just pretend we're finished.\r
-                    process_Status();\r
-                    clearBSY = process_MessageIn(0); // Will go to BUS_FREE state but keep BSY asserted.\r
-                }\r
+    while ((i < totalSDSectors) &&\r
+        likely(scsiDev.phase == DATA_OUT) &&\r
+        likely(!scsiDev.resetFlag))\r
+        // KEEP GOING to ensure FIFOs are in a good state.\r
+        // likely(!parityError || !enableParity))\r
+    {\r
 \r
-                // Wait while the SD card is writing buffer to flash\r
-                // The card may remain in the RECEIVING state (even though it's programming) if\r
-                // it has buffer space to receive more data available.\r
-                while (sdIsBusy()) {}\r
-                HAL_SD_CardStateTypeDef cardState = HAL_SD_GetCardState(&hsd);\r
-                while (cardState == HAL_SD_CARD_PROGRAMMING || cardState == HAL_SD_CARD_RECEIVING) \r
-                {\r
-                    // Wait while the SD card is writing buffer to flash\r
-                    // The card may remain in the RECEIVING state (even though it's programming) if\r
-                    // it has buffer space to receive more data available.\r
-                    cardState = HAL_SD_GetCardState(&hsd);\r
-                }\r
+        uint32_t maxXferSectors = SCSI_XFER_MAX / SD_SECTOR_SIZE;\r
+        uint32_t rem = totalSDSectors - i;\r
+        uint32_t sectors = rem < maxXferSectors ? rem : maxXferSectors;\r
+\r
+        uint32_t totalBytes = sectors * SD_SECTOR_SIZE;\r
+\r
+        if (useSlowDataCount)\r
+        {\r
+            scsiSetDataCount(totalBytes);\r
+        }\r
 \r
-                i += sectors;\r
+        lastWriteSize = sectors;\r
+        HAL_SD_WriteBlocks_DMA(&hsd, i + sdLBA, sectors);\r
+        int j = 0;\r
+        int prep = 0;\r
+        int sdActive = 0;\r
+        uint32_t dmaFinishTime = 0;\r
+        while (j < sectors && !scsiDev.resetFlag)\r
+        {\r
+            if (sdActive &&\r
+                HAL_SD_GetState(&hsd) != HAL_SD_STATE_BUSY &&\r
+                !sdIsBusy())\r
+            {\r
+                j += sdActive;\r
+                sdActive = 0;\r
             }\r
-            else\r
+            if (!sdActive && ((prep - j) > 0))\r
             {\r
-                // Well, until we have some proper non-blocking SD code, we must\r
-                // do this in a half-duplex fashion. We need to write as much as\r
-                // possible in each SD card transaction.\r
-                // use sg_dd from sg_utils3 tools to test.\r
-\r
-                uint32_t rem = totalSDSectors - i;\r
-                uint32_t sectors;\r
-                if (rem <= maxSectors)\r
-                {\r
-                    sectors = rem;\r
-                }\r
-                else\r
-                {\r
-                    sectors = maxSectors;\r
-                    while (sectors % sdPerScsi) sectors--;\r
-                }\r
-                \r
+                // Start an SD transfer if we have space.\r
+                HAL_SD_WriteBlocks_Data(&hsd, &scsiDev.data[SD_SECTOR_SIZE * (j % maxSectors)]);\r
 \r
-                if (useSlowDataCount)\r
-                {\r
-                    scsiSetDataCount((sectors / sdPerScsi) * bytesPerSector);\r
-                }\r
+                sdActive = 1;\r
+            }\r
 \r
-                for (int scsiSector = i; scsiSector < i + sectors; ++scsiSector)\r
-                {\r
-                    int dmaBytes = SD_SECTOR_SIZE;\r
-                    if ((scsiSector % sdPerScsi) == (sdPerScsi - 1))\r
-                    {\r
-                        dmaBytes = bytesPerSector % SD_SECTOR_SIZE;\r
-                        if (dmaBytes == 0) dmaBytes = SD_SECTOR_SIZE;\r
-                    }\r
-\r
-                    scsiReadPIO(&scsiDev.data[SD_SECTOR_SIZE * (scsiSector - i)], dmaBytes, &parityError);\r
-                }\r
-                if (!parityError || !enableParity)\r
+            if (((prep - j) < maxSectors) &&\r
+                (prep < sectors) &&\r
+                scsiFifoReady())\r
+            {\r
+                scsiReadPIO(\r
+                    &scsiDev.data[(prep % maxSectors) * SD_SECTOR_SIZE],\r
+                    SD_SECTOR_SIZE,\r
+                    parityError);\r
+                prep++;\r
+                if (prep == sectors)\r
                 {\r
-                    BSP_SD_WriteBlocks_DMA(&scsiDev.data[0], i + sdLBA, sectors);\r
+                    dmaFinishTime = s2s_getTime_ms();\r
                 }\r
-                i += sectors;\r
+            }\r
+        \r
+            if (i + prep >= totalSDSectors &&\r
+                !disconnected &&\r
+                (!(*parityError) || !enableParity) &&\r
+                s2s_elapsedTime_ms(dmaFinishTime) >= 180)\r
+            {\r
+                // We're transferring over the SCSI bus faster than the SD card\r
+                // can write.  All data is buffered, and we're just waiting for\r
+                // the SD card to complete. The host won't let us disconnect.\r
+                // Some drivers set a 250ms timeout on transfers to complete.\r
+                // SD card writes are supposed to complete\r
+                // within 200ms, but sometimes they don't.\r
+                // Just pretend we're finished.\r
+                process_Status();\r
+                *clearBSY = process_MessageIn(0); // Will go to BUS_FREE state but keep BSY asserted.\r
+                disconnected = 1;\r
             }\r
         }\r
 \r
-        // Should already be complete here as we've ready the FIFOs\r
-        // by now. Check anyway.\r
-        __disable_irq();\r
-        while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
+        if (scsiDev.resetFlag)\r
         {\r
-            __WFI();\r
+            HAL_SD_Abort(&hsd);\r
         }\r
-        __enable_irq();\r
+        else\r
+        {\r
+            while (HAL_SD_GetState(&hsd) == HAL_SD_STATE_BUSY) {} // Waits for DMA to complete\r
+            if (lastWriteSize > 1)\r
+            {\r
+                SDMMC_CmdStopTransfer(hsd.Instance);\r
+            }\r
+        }\r
+\r
+        while (sdIsBusy() &&\r
+            s2s_elapsedTime_ms(dmaFinishTime) < 180)\r
+        {\r
+            // Wait while the SD card is writing buffer to flash\r
+            // The card may remain in the RECEIVING state (even though it's programming) if\r
+            // it has buffer space to receive more data available.\r
+        }\r
+\r
+        if (!disconnected && \r
+            i + sectors >= totalSDSectors &&\r
+            (!parityError || !enableParity))\r
+        {\r
+            // We're transferring over the SCSI bus faster than the SD card\r
+            // can write.  All data is buffered, and we're just waiting for\r
+            // the SD card to complete. The host won't let us disconnect.\r
+            // Some drivers set a 250ms timeout on transfers to complete.\r
+            // SD card writes are supposed to complete\r
+            // within 200ms, but sometimes they don't.\r
+            // Just pretend we're finished.\r
+            process_Status();\r
+            *clearBSY = process_MessageIn(0); // Will go to BUS_FREE state but keep BSY asserted.\r
+        }\r
+\r
+        // Wait while the SD card is writing buffer to flash\r
+        // The card may remain in the RECEIVING state (even though it's programming) if\r
+        // it has buffer space to receive more data available.\r
+        while (sdIsBusy()) {}\r
+        HAL_SD_CardStateTypeDef cardState = HAL_SD_GetCardState(&hsd);\r
+        while (cardState == HAL_SD_CARD_PROGRAMMING || cardState == HAL_SD_CARD_RECEIVING) \r
+        {\r
+            // Wait while the SD card is writing buffer to flash\r
+            // The card may remain in the RECEIVING state (even though it's programming) if\r
+            // it has buffer space to receive more data available.\r
+            cardState = HAL_SD_GetCardState(&hsd);\r
+        }\r
+\r
+        i += sectors;\r
+   \r
+    }\r
+}\r
+\r
+void diskDataOut_variableSectorSize(int sdPerScsi, int totalSDSectors, uint32_t sdLBA, int useSlowDataCount, int* parityError)\r
+{\r
+    uint32_t bytesPerSector = scsiDev.target->liveCfg.bytesPerSector;\r
 \r
-        if (clearBSY)\r
+    int i = 0;\r
+\r
+    int enableParity = scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY;\r
+\r
+    uint32_t maxSectors = sizeof(scsiDev.data) / SD_SECTOR_SIZE;\r
+\r
+    while ((i < totalSDSectors) &&\r
+        likely(scsiDev.phase == DATA_OUT) &&\r
+        likely(!scsiDev.resetFlag))\r
+        // KEEP GOING to ensure FIFOs are in a good state.\r
+        // likely(!parityError || !enableParity))\r
+    {\r
+        // Well, until we have some proper non-blocking SD code, we must\r
+        // do this in a half-duplex fashion. We need to write as much as\r
+        // possible in each SD card transaction.\r
+        // use sg_dd from sg_utils3 tools to test.\r
+\r
+        uint32_t rem = totalSDSectors - i;\r
+        uint32_t sectors;\r
+        if (rem <= maxSectors)\r
         {\r
-            enter_BusFree();\r
+            sectors = rem;\r
+        }\r
+        else\r
+        {\r
+            sectors = maxSectors;\r
+            while (sectors % sdPerScsi) sectors--;\r
+        }\r
+        \r
+\r
+        if (useSlowDataCount)\r
+        {\r
+            scsiSetDataCount((sectors / sdPerScsi) * bytesPerSector);\r
         }\r
 \r
-        if (scsiDev.phase == DATA_OUT)\r
+        for (int scsiSector = i; scsiSector < i + sectors; ++scsiSector)\r
         {\r
-            if (parityError &&\r
-                (scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY))\r
+            int dmaBytes = SD_SECTOR_SIZE;\r
+            if ((scsiSector % sdPerScsi) == (sdPerScsi - 1))\r
             {\r
-                scsiDev.target->sense.code = ABORTED_COMMAND;\r
-                scsiDev.target->sense.asc = SCSI_PARITY_ERROR;\r
-                scsiDev.status = CHECK_CONDITION;;\r
+                dmaBytes = bytesPerSector % SD_SECTOR_SIZE;\r
+                if (dmaBytes == 0) dmaBytes = SD_SECTOR_SIZE;\r
             }\r
-            scsiDev.phase = STATUS;\r
+\r
+            scsiReadPIO(&scsiDev.data[SD_SECTOR_SIZE * (scsiSector - i)], dmaBytes, parityError);\r
         }\r
-        scsiDiskReset();\r
+        if (!(*parityError) || !enableParity)\r
+        {\r
+            BSP_SD_WriteBlocks_DMA(&scsiDev.data[0], i + sdLBA, sectors);\r
+        }\r
+        i += sectors;\r
     }\r
 }\r
 \r
+void diskDataOut()\r
+{\r
+    uint32_t bytesPerSector = scsiDev.target->liveCfg.bytesPerSector;\r
+\r
+    scsiEnterPhase(DATA_OUT);\r
+\r
+    const int sdPerScsi = SDSectorsPerSCSISector(bytesPerSector);\r
+    int totalSDSectors = transfer.blocks * sdPerScsi;\r
+    uint32_t sdLBA =\r
+        SCSISector2SD(\r
+            scsiDev.target->cfg->sdSectorStart,\r
+            bytesPerSector,\r
+            transfer.lba);\r
+    int clearBSY = 0;\r
+\r
+    int parityError = 0;\r
+\r
+    static_assert(SCSI_XFER_MAX >= sizeof(scsiDev.data), "Assumes SCSI_XFER_MAX >= sizeof(scsiDev.data)");\r
+\r
+    // Start reading and filling fifos as soon as possible.\r
+    // It's highly unlikely that someone is going to use huge transfers\r
+    // per scsi command, but if they do it'll be slower than usual.\r
+    // Note: Happens in Macintosh FWB HDD Toolkit benchmarks which default\r
+    // to 768kb\r
+    uint32_t totalTransferBytes = transfer.blocks * bytesPerSector;\r
+    int useSlowDataCount = totalTransferBytes >= SCSI_XFER_MAX;\r
+    if (!useSlowDataCount)\r
+    {\r
+        DWT->CYCCNT = 0; // Start counting cycles\r
+        scsiSetDataCount(totalTransferBytes);\r
+    }\r
+\r
+    if (bytesPerSector == SD_SECTOR_SIZE)\r
+    {\r
+        diskDataOut_512(totalSDSectors, sdLBA, useSlowDataCount, &clearBSY, &parityError);\r
+    }\r
+    else\r
+    {\r
+        diskDataOut_variableSectorSize(sdPerScsi, totalSDSectors, sdLBA, useSlowDataCount, &parityError);\r
+    }\r
+    \r
+\r
+    // Should already be complete here as we've ready the FIFOs\r
+    // by now. Check anyway.\r
+    __disable_irq();\r
+    while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
+    {\r
+        __WFI();\r
+    }\r
+    __enable_irq();\r
+\r
+    if (clearBSY)\r
+    {\r
+        enter_BusFree();\r
+    }\r
+\r
+    if (scsiDev.phase == DATA_OUT)\r
+    {\r
+        if (parityError &&\r
+            (scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY))\r
+        {\r
+            scsiDev.target->sense.code = ABORTED_COMMAND;\r
+            scsiDev.target->sense.asc = SCSI_PARITY_ERROR;\r
+            scsiDev.status = CHECK_CONDITION;;\r
+        }\r
+        scsiDev.phase = STATUS;\r
+    }\r
+    scsiDiskReset();\r
+}\r
+\r
+\r
+void scsiDiskPoll()\r
+{\r
+    if (scsiDev.phase == DATA_IN &&\r
+        transfer.currentBlock != transfer.blocks)\r
+    {\r
+        diskDataIn();\r
+     }\r
+    else if (scsiDev.phase == DATA_OUT &&\r
+        transfer.currentBlock != transfer.blocks)\r
+    {\r
+        diskDataOut();\r
+    }\r
+}\r
+\r
+\r
 void scsiDiskReset()\r
 {\r
     scsiDev.dataPtr = 0;\r
index 930e87d93217d63f6a4c80b260c83fa03c547f46..878729317ac445ff1ba89e8c83568fe1b429dcf3 100755 (executable)
@@ -56,21 +56,21 @@ void enter_BusFree()
                s2s_delay_us(2);\r
        }\r
 \r
-//#if 0\r
+#if 0\r
        if (scsiDev.status != GOOD)// && isDebugEnabled())\r
        {\r
                // We want to capture debug information for failure cases.\r
                s2s_delay_ms(80);\r
        }\r
-//#endif\r
+#endif\r
 \r
 \r
        scsiEnterBusFree();\r
 \r
        // Wait for the initiator to cease driving signals\r
        // Bus settle delay + bus clear delay = 1200ns\r
-       s2s_delay_us(2);\r
-\r
+    // Just waiting the clear delay is sufficient.\r
+       s2s_delay_ns(800);\r
 \r
        s2s_ledOff();\r
        scsiDev.phase = BUS_FREE;\r
index 87c7beb7dc550a9925e1942f2a0c28fa9bebbc57..124d0edab27b295867cb56fc69326c46f9846fb1 100755 (executable)
@@ -503,7 +503,7 @@ static inline void busSettleDelay(void)
 {\r
     // Data Release time (switching IO) = 400ns\r
     // + Bus Settle time (switching phase) = 400ns.\r
-    s2s_delay_us(1); // Close enough.\r
+    s2s_delay_ns(800);\r
 }\r
 \r
 void scsiEnterBusFree()\r
@@ -541,7 +541,7 @@ void scsiEnterPhase(int newPhase)
     uint32_t delay = scsiEnterPhaseImmediate(newPhase);\r
     if (delay > 0)\r
     {\r
-        s2s_delay_us(delay);\r
+        s2s_delay_ns(delay);\r
     }\r
 }\r
 \r
@@ -631,16 +631,21 @@ uint32_t scsiEnterPhaseImmediate(int newPhase)
                 asyncTiming[3]);\r
         }\r
 \r
-        uint32_t delayUs = 0;\r
+        uint32_t delayNs = 0;\r
         if (newPhase >= 0)\r
         {\r
             *SCSI_CTRL_PHASE = newPhase;\r
-            delayUs += 1; // busSettleDelay\r
+            delayNs += 400; // busSettleDelay\r
+\r
+            if ((oldPhase & __scsiphase_io) != (newPhase & __scsiphase_io))\r
+            {\r
+                delayNs += 400; // Data release delay\r
+            }\r
 \r
             if (scsiDev.compatMode < COMPAT_SCSI2)\r
             {\r
                 // EMU EMAX needs 100uS ! 10uS is not enough.\r
-                delayUs += 100;\r
+                delayNs += 100000;\r
             }\r
         }\r
         else\r
@@ -648,7 +653,7 @@ uint32_t scsiEnterPhaseImmediate(int newPhase)
             *SCSI_CTRL_PHASE = 0;\r
         }\r
 \r
-        return delayUs;\r
+        return delayNs;\r
     }\r
 \r
     return 0; // No change\r
index e24672111968975ed72ebd93617deccc373c90c7..6158e97ba1ddb103535bb8a360a6b24266221ba8 100755 (executable)
@@ -83,6 +83,78 @@ void sdReadDMA(uint32_t lba, uint32_t sectors, uint8_t* outputBuffer)
        }\r
 }\r
 \r
+void sdReadPIO(uint32_t lba, uint32_t sectors)\r
+{\r
+       uint32_t errorstate;\r
+       hsd.ErrorCode = HAL_SD_ERROR_NONE;\r
+       hsd.State = HAL_SD_STATE_BUSY;\r
+\r
+       /* Initialize data control register */\r
+       hsd.Instance->DCTRL = 0U;\r
+\r
+       // The IRQ handler clears flags which we need to read the fifo data\r
+#if defined(SDIO_STA_STBITERR)\r
+    __HAL_SD_DISABLE_IT(&hsd, (SDIO_IT_DCRCFAIL | SDIO_IT_DTIMEOUT | SDIO_IT_RXOVERR | SDIO_IT_DATAEND | SDIO_FLAG_RXFIFOHF | SDIO_IT_STBITERR));\r
+#else\r
+    __HAL_SD_DISABLE_IT(&hsd, (SDIO_IT_DCRCFAIL | SDIO_IT_DTIMEOUT | SDIO_IT_RXOVERR | SDIO_IT_DATAEND | SDIO_FLAG_RXFIFOHF));\r
+#endif\r
+\r
+       if(hsd.SdCard.CardType != CARD_SDHC_SDXC)\r
+       {\r
+               lba *= 512U;\r
+\r
+               errorstate = SDMMC_CmdBlockLength(hsd.Instance, 512u);\r
+               if(errorstate != HAL_SD_ERROR_NONE)\r
+               {\r
+                       __HAL_SD_CLEAR_FLAG(&hsd, SDIO_STATIC_FLAGS);\r
+                       scsiDiskReset();\r
+\r
+                       scsiDev.status = CHECK_CONDITION;\r
+                       scsiDev.target->sense.code = HARDWARE_ERROR;\r
+                       scsiDev.target->sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
+                       scsiDev.phase = STATUS;\r
+                       return;\r
+               }\r
+       }\r
+\r
+       SDIO_DataInitTypeDef config;\r
+       config.DataTimeOut   = SDMMC_DATATIMEOUT;\r
+       config.DataLength    = sectors * 512u;\r
+       config.DataBlockSize = SDIO_DATABLOCK_SIZE_512B;\r
+       config.TransferDir   = SDIO_TRANSFER_DIR_TO_SDIO;\r
+       config.TransferMode  = SDIO_TRANSFER_MODE_BLOCK;\r
+       config.DPSM          = SDIO_DPSM_ENABLE;\r
+       SDIO_ConfigData(hsd.Instance, &config);\r
+\r
+       if(sectors > 1U)\r
+       {\r
+               hsd.Context = SD_CONTEXT_READ_MULTIPLE_BLOCK;\r
+               errorstate = SDMMC_CmdReadMultiBlock(hsd.Instance, lba);\r
+       }\r
+       else\r
+       {\r
+               hsd.Context = SD_CONTEXT_READ_SINGLE_BLOCK;\r
+               errorstate = SDMMC_CmdReadSingleBlock(hsd.Instance, lba);\r
+       }\r
+\r
+       if(errorstate != HAL_SD_ERROR_NONE)\r
+       {\r
+               __HAL_SD_CLEAR_FLAG(&hsd, SDIO_STATIC_FLAGS);\r
+\r
+               scsiDiskReset();\r
+\r
+               scsiDev.status = CHECK_CONDITION;\r
+               scsiDev.target->sense.code = HARDWARE_ERROR;\r
+               scsiDev.target->sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
+               scsiDev.phase = STATUS;\r
+       }\r
+       else\r
+       {\r
+               sdCmdActive = 1;\r
+       }\r
+}\r
+\r
+\r
 void sdCompleteTransfer()\r
 {\r
        if (sdCmdActive)\r
index 438c07bb5248d2a7b6f1d23082c599caf41ab67e..67dc48c74beb537e70212b734b2bc4309299fecc 100755 (executable)
@@ -36,6 +36,9 @@ int sdInit(void);
 
 void sdReadDMA(uint32_t lba, uint32_t sectors, uint8_t* outputBuffer);
 int sdReadDMAPoll(uint32_t remainingSectors);
+
+void sdReadPIO(uint32_t lba, uint32_t sectors);
+
 void sdCompleteTransfer();
 void sdKeepAlive();
 
index 9a7dccdea0969b681cd34cedce24933f5a3bddaa..eb47a0fb73ee3222632cc05e14084c8181b43b86 100755 (executable)
@@ -33,6 +33,7 @@ uint32_t s2s_elapsedTime_ms(uint32_t since);
 
 #define s2s_delay_ms(delay) s2s_delay_clocks((delay) * (s2s_cpu_freq / 1000))
 #define s2s_delay_us(delay) s2s_delay_clocks((delay) * (s2s_cpu_freq / 1000000))
+#define s2s_delay_ns(delay) s2s_delay_clocks(((delay) * ((s2s_cpu_freq * 64LL + 500000000) / 1000000000)) / 64)
 void s2s_delay_clocks(uint32_t delay);
 
 #endif