\r
#include "stm32f2xx.h"\r
\r
+#include <assert.h>\r
+\r
// For SD write direct routines\r
#include "sdio.h"\r
#include "bsp_driver_sd.h"\r
int scsiActive __attribute__((unused)) = 0; // unused if DMA disabled\r
int sdActive = 0;\r
\r
- uint32_t partialScsiChunk = 0;\r
-\r
- // Start reading from the SD card FIRST, because we change state and\r
- // wait for SCSI signals\r
- int dataInStarted = 0;\r
+ // It's highly unlikely that someone is going to use huge transfers\r
+ // per scsi command, but if they do it'll be slower than usual.\r
+ uint32_t totalScsiBytes = transfer.blocks * bytesPerSector;\r
+ int useSlowDataCount = totalScsiBytes >= SCSI_XFER_MAX;\r
+ if (!useSlowDataCount)\r
+ {\r
+ scsiSetDataCount(totalScsiBytes);\r
+ }\r
\r
while ((i < totalSDSectors) &&\r
- (!dataInStarted || likely(scsiDev.phase == DATA_IN)) &&\r
+ likely(scsiDev.phase == DATA_IN) &&\r
likely(!scsiDev.resetFlag))\r
{\r
int completedDmaSectors;\r
\r
if (!sdActive &&\r
(prep - i < buffers) &&\r
- (prep < totalSDSectors))\r
+ (prep < totalSDSectors) &&\r
+ ((totalSDSectors - prep) >= sdPerScsi) &&\r
+ (likely(!useSlowDataCount) || scsiPhyComplete()))\r
{\r
// Start an SD transfer if we have space.\r
uint32_t startBuffer = prep % buffers;\r
uint32_t sectors = totalSDSectors - prep;\r
-\r
uint32_t freeBuffers = buffers - (prep - i);\r
\r
uint32_t contiguousBuffers = buffers - startBuffer;\r
\r
if (sectors > 128) sectors = 128; // 65536 DMA limit !!\r
\r
+ // Round-down when we have odd sector sizes.\r
+ if (sdPerScsi != 1)\r
+ {\r
+ sectors = (sectors / sdPerScsi) * sdPerScsi;\r
+ }\r
+\r
for (int dodgy = 0; dodgy < sectors; dodgy++)\r
{\r
scsiDev.data[SD_SECTOR_SIZE * (startBuffer + dodgy) + 510] = 0xAA;\r
\r
sdActive = sectors;\r
\r
+ if (useSlowDataCount)\r
+ {\r
+ scsiSetDataCount((sectors / sdPerScsi) * bytesPerSector);\r
+ }\r
+\r
// Wait now that the SD card is busy\r
// Chances are we've probably already waited sufficient time,\r
// but it's hard to measure microseconds cheaply. So just wait\r
}\r
}\r
\r
-#ifdef SCSI_FSMC_DMA\r
- #error this code not updated for 256 max bytes in scsi fifo\r
- if (scsiActive && scsiPhyComplete() && scsiWriteDMAPoll())\r
- {\r
- scsiActive = 0;\r
- i++;\r
- scsiPhyFifoFlip();\r
- }\r
- if (!scsiActive && ((prep - i) > 0))\r
- {\r
- int dmaBytes = SD_SECTOR_SIZE;\r
- if ((i % sdPerScsi) == (sdPerScsi - 1))\r
- {\r
- dmaBytes = bytesPerSector % SD_SECTOR_SIZE;\r
- if (dmaBytes == 0) dmaBytes = SD_SECTOR_SIZE;\r
- }\r
- scsiWriteDMA(&scsiDev.data[SD_SECTOR_SIZE * (i % buffers)], dmaBytes);\r
- scsiActive = 1;\r
- }\r
-#else\r
if ((prep - i) > 0)\r
{\r
int dmaBytes = SD_SECTOR_SIZE;\r
if (dmaBytes == 0) dmaBytes = SD_SECTOR_SIZE;\r
}\r
\r
- // Manually unrolled loop for performance.\r
- // -Os won't unroll this for us automatically,\r
- // especially since scsiPhyTx does volatile stuff.\r
- // Reduces bus utilisation by making the fsmc split\r
- // 32bits into 2 16 bit writes.\r
-\r
- uint16_t* scsiDmaData = (uint16_t*) &(scsiDev.data[SD_SECTOR_SIZE * (i % buffers) + partialScsiChunk]);\r
-\r
- uint32_t chunk = ((dmaBytes - partialScsiChunk) > SCSI_FIFO_DEPTH)\r
- ? SCSI_FIFO_DEPTH : (dmaBytes - partialScsiChunk);\r
-\r
- int k = 0;\r
- for (; k + 4 < (chunk + 1) / 2; k += 4)\r
- {\r
- scsiPhyTx32(scsiDmaData[k], scsiDmaData[k+1]);\r
- scsiPhyTx32(scsiDmaData[k+2], scsiDmaData[k+3]);\r
- }\r
- for (; k < (chunk + 1) / 2; ++k)\r
- {\r
- scsiPhyTx(scsiDmaData[k]);\r
- }\r
- while (!scsiPhyComplete() && !scsiDev.resetFlag)\r
- {\r
- __WFE(); // Wait for event\r
- }\r
- scsiPhyFifoFlip();\r
- scsiSetDataCount(chunk);\r
+ uint8_t* scsiDmaData = &(scsiDev.data[SD_SECTOR_SIZE * (i % buffers)]);\r
+ scsiWritePIO(scsiDmaData, dmaBytes);\r
\r
- partialScsiChunk += chunk;\r
- if (partialScsiChunk == dmaBytes)\r
- {\r
- partialScsiChunk = 0;\r
- ++i;\r
- }\r
+ ++i;\r
}\r
-#endif\r
}\r
\r
if (phaseChangeDelayUs > 0 && !scsiDev.resetFlag) // zero bytes ?\r
\r
// We've finished transferring the data to the FPGA, now wait until it's\r
// written to he SCSI bus.\r
+ __disable_irq();\r
while (!scsiPhyComplete() &&\r
likely(scsiDev.phase == DATA_IN) &&\r
likely(!scsiDev.resetFlag))\r
{\r
- __WFE(); // Wait for event\r
+ __WFI();\r
}\r
-\r
+ __enable_irq();\r
\r
if (scsiDev.phase == DATA_IN)\r
{\r
transfer.lba);\r
int i = 0;\r
int clearBSY = 0;\r
+ int extraSectors = 0;\r
\r
int parityError = 0;\r
int enableParity = scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY;\r
\r
+ uint32_t scsiSpeed = s2s_getScsiRateMBs();\r
+\r
+ uint32_t maxSectors = sizeof(scsiDev.data) / SD_SECTOR_SIZE;\r
+\r
+ static_assert(SCSI_XFER_MAX >= sizeof(scsiDev.data), "Assumes SCSI_XFER_MAX >= sizeof(scsiDev.data)");\r
+\r
+ // Start reading and filling fifos as soon as possible.\r
+ scsiSetDataCount(transfer.blocks * bytesPerSector);\r
+\r
while ((i < totalSDSectors) &&\r
likely(scsiDev.phase == DATA_OUT) &&\r
- likely(!scsiDev.resetFlag) &&\r
- likely(!parityError || !enableParity))\r
+ likely(!scsiDev.resetFlag))\r
+ // KEEP GOING to ensure FIFOs are in a good state.\r
+ // likely(!parityError || !enableParity))\r
{\r
- // Well, until we have some proper non-blocking SD code, we must\r
- // do this in a half-duplex fashion. We need to write as much as\r
- // possible in each SD card transaction.\r
- uint32_t maxSectors = sizeof(scsiDev.data) / SD_SECTOR_SIZE;\r
uint32_t rem = totalSDSectors - i;\r
- uint32_t sectors =\r
- rem < maxSectors ? rem : maxSectors;\r
+ uint32_t sectors = rem < maxSectors ? rem : maxSectors;\r
\r
if (bytesPerSector == SD_SECTOR_SIZE)\r
{\r
// no flow control. This can be handled if a) the scsi interface\r
// doesn't block and b) we read enough SCSI sectors first so that\r
// the SD interface cannot catch up.\r
+ int prevExtraSectors = extraSectors;\r
uint32_t totalBytes = sectors * SD_SECTOR_SIZE;\r
- uint32_t readAheadBytes = sectors * SD_SECTOR_SIZE;\r
+ extraSectors = 0;\r
+\r
+ int32_t readAheadBytes = totalBytes;\r
uint32_t sdSpeed = s2s_getSdRateMBs() + (scsiDev.sdUnderrunCount / 2);\r
- uint32_t scsiSpeed = s2s_getScsiRateMBs();\r
// if (have blind writes)\r
if (scsiSpeed > 0 && scsiDev.sdUnderrunCount < 16)\r
{\r
// readAhead = sectors * (sd / scsi - 1 + 0.1);\r
- readAheadBytes = totalBytes * sdSpeed / scsiSpeed - totalBytes + SCSI_FIFO_DEPTH;\r
- if (readAheadBytes < SCSI_FIFO_DEPTH)\r
- {\r
- readAheadBytes = SCSI_FIFO_DEPTH;\r
- }\r
+ readAheadBytes = totalBytes * sdSpeed / scsiSpeed - totalBytes;\r
+\r
+ // Round up to nearest FIFO size.\r
+ readAheadBytes = ((readAheadBytes / SCSI_FIFO_DEPTH) + 1) * SCSI_FIFO_DEPTH;\r
\r
if (readAheadBytes > totalBytes)\r
{\r
}\r
}\r
\r
- uint32_t chunk = (readAheadBytes > SCSI_FIFO_DEPTH) ? SCSI_FIFO_DEPTH : readAheadBytes;\r
- scsiSetDataCount(chunk);\r
+ uint32_t prevExtraBytes = prevExtraSectors * SD_SECTOR_SIZE;\r
+ uint32_t scsiBytesRead = prevExtraBytes;\r
+ readAheadBytes -= prevExtraBytes; // Must be signed!\r
\r
- uint32_t scsiBytesRead = 0;\r
- while (scsiBytesRead < readAheadBytes)\r
+ if (readAheadBytes > 0)\r
{\r
- while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
- {\r
- __WFE(); // Wait for event\r
- }\r
- parityError |= scsiParityError();\r
- scsiPhyFifoFlip();\r
- uint32_t nextChunk = ((totalBytes - scsiBytesRead - chunk) > SCSI_FIFO_DEPTH)\r
- ? SCSI_FIFO_DEPTH : (totalBytes - scsiBytesRead - chunk);\r
-\r
- if (nextChunk > 0) scsiSetDataCount(nextChunk);\r
- scsiReadPIO(&scsiDev.data[scsiBytesRead], chunk);\r
- scsiBytesRead += chunk;\r
- chunk = nextChunk;\r
+ scsiReadPIO(\r
+ &scsiDev.data[scsiBytesRead],\r
+ readAheadBytes,\r
+ &parityError);\r
+ scsiBytesRead += readAheadBytes;\r
}\r
\r
HAL_SD_WriteBlocks_DMA(&hsd, (uint32_t*) (&scsiDev.data[0]), (i + sdLBA) * 512ll, SD_SECTOR_SIZE, sectors);\r
\r
- while (scsiBytesRead < totalBytes)\r
+ int underrun = 0;\r
+ if (scsiBytesRead < totalBytes && !scsiDev.resetFlag)\r
{\r
- while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
- {\r
- __WFE(); // Wait for event\r
- }\r
- parityError |= scsiParityError();\r
- scsiPhyFifoFlip();\r
- uint32_t nextChunk = ((totalBytes - scsiBytesRead - chunk) > SCSI_FIFO_DEPTH)\r
- ? SCSI_FIFO_DEPTH : (totalBytes - scsiBytesRead - chunk);\r
-\r
- if (nextChunk > 0) scsiSetDataCount(nextChunk);\r
- scsiReadPIO(&scsiDev.data[scsiBytesRead], chunk);\r
- scsiBytesRead += chunk;\r
- chunk = nextChunk;\r
+ scsiReadPIO(\r
+ &scsiDev.data[scsiBytesRead],\r
+ totalBytes - readAheadBytes,\r
+ &parityError);\r
+\r
+ // Oh dear, SD finished first.\r
+ underrun = hsd.DmaTransferCplt;\r
+\r
+ scsiBytesRead += (totalBytes - readAheadBytes);\r
}\r
\r
- // Oh dear, SD finished first.\r
- int underrun = totalBytes > readAheadBytes && hsd.DmaTransferCplt;\r
+ if (!underrun && rem > sectors)\r
+ {\r
+ // We probably have some time to waste reading more here.\r
+ // While noting this is going to drop us down into\r
+ // half-duplex operation (hence why we read max / 4 only)\r
+\r
+ extraSectors = rem - sectors > (maxSectors / 4)\r
+ ? (maxSectors / 4)\r
+ : rem - sectors;\r
+\r
+ scsiReadPIO(\r
+ &scsiDev.data[0],\r
+ extraSectors * SD_SECTOR_SIZE,\r
+ &parityError);\r
+ }\r
\r
uint32_t dmaFinishTime = s2s_getTime_ms();\r
- while (!hsd.SdTransferCplt &&\r
+ while ((!hsd.SdTransferCplt ||\r
+ __HAL_SD_SDIO_GET_FLAG(&hsd, SDIO_FLAG_TXACT)) &&\r
s2s_elapsedTime_ms(dmaFinishTime) < 180)\r
{\r
// Wait while keeping BSY.\r
}\r
- while((__HAL_SD_SDIO_GET_FLAG(&hsd, SDIO_FLAG_TXACT)) &&\r
- s2s_elapsedTime_ms(dmaFinishTime) < 180)\r
- {\r
- // Wait for SD card while keeping BSY.\r
- }\r
\r
if (i + sectors >= totalSDSectors &&\r
!underrun &&\r
\r
HAL_SD_CheckWriteOperation(&hsd, (uint32_t)SD_DATATIMEOUT);\r
\r
- if (underrun)\r
+ if (underrun && (!parityError || !enableParity))\r
{\r
// Try again. Data is still in memory.\r
sdTmpWrite(&scsiDev.data[0], i + sdLBA, sectors);\r
scsiDev.sdUnderrunCount++;\r
}\r
- i += sectors;\r
\r
+ i += sectors;\r
}\r
else\r
{\r
// do this in a half-duplex fashion. We need to write as much as\r
// possible in each SD card transaction.\r
// use sg_dd from sg_utils3 tools to test.\r
- uint32_t maxSectors = sizeof(scsiDev.data) / SD_SECTOR_SIZE;\r
- uint32_t rem = totalSDSectors - i;\r
- uint32_t sectors = rem < maxSectors ? rem : maxSectors;\r
- int scsiSector;\r
- for (scsiSector = i; scsiSector < i + sectors; ++scsiSector)\r
+ for (int scsiSector = i; scsiSector < i + sectors; ++scsiSector)\r
{\r
int dmaBytes = SD_SECTOR_SIZE;\r
if ((scsiSector % sdPerScsi) == (sdPerScsi - 1))\r
dmaBytes = bytesPerSector % SD_SECTOR_SIZE;\r
if (dmaBytes == 0) dmaBytes = SD_SECTOR_SIZE;\r
}\r
- scsiRead(&scsiDev.data[SD_SECTOR_SIZE * (scsiSector - i)], dmaBytes, &parityError);\r
+\r
+ scsiReadPIO(&scsiDev.data[SD_SECTOR_SIZE * (scsiSector - i)], dmaBytes, &parityError);\r
}\r
- if (!parityError)\r
+ if (!parityError || !enableParity)\r
{\r
sdTmpWrite(&scsiDev.data[0], i + sdLBA, sectors);\r
}\r
}\r
}\r
\r
+ // Should already be complete here as we've ready the FIFOs\r
+ // by now. Check anyway.\r
+ __disable_irq();\r
+ while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
+ {\r
+ __WFI();\r
+ }\r
+ __enable_irq();\r
+\r
if (clearBSY)\r
{\r
enter_BusFree();\r
static uint8_t asyncTimings[][4] =\r
{\r
/* Speed, Assert, Deskew, Hold, Glitch */\r
-{/*1.5MB/s*/ 28, 18, 13, 15},\r
+{/*1.5MB/s*/ 28, 18, 7, 15},\r
+//{/*1.5MB/s*/ 63, 31, 7, 15},\r
{/*3.3MB/s*/ 13, 6, 6, 13},\r
{/*5MB/s*/ 9, 6, 6, 6}, // 80ns\r
{/*safe*/ 3, 6, 6, 6}, // Probably safe\r
volatile uint8_t scsiRxDMAComplete;\r
volatile uint8_t scsiTxDMAComplete;\r
\r
-uint8_t scsiPhyFifoSel = 0; // global\r
-\r
// scsi IRQ handler is initialised by the STM32 HAL. Connected to\r
// PE4\r
// Note: naming is important to ensure this function is listed in the\r
// Clear interrupt flag\r
__HAL_GPIO_EXTI_CLEAR_IT(GPIO_PIN_4);\r
\r
- scsiDev.resetFlag = scsiDev.resetFlag || scsiStatusRST();\r
+ uint8_t statusFlags = *SCSI_STS_SCSI;\r
+\r
+ scsiDev.resetFlag = scsiDev.resetFlag || (statusFlags & 0x04);\r
\r
// selFlag is required for Philips P2000C which releases it after 600ns\r
// without waiting for BSY.\r
// Also required for some early Mac Plus roms\r
- scsiDev.selFlag = *SCSI_STS_SELECTED;\r
+ if (statusFlags & 0x08) // Check SEL flag\r
+ {\r
+ scsiDev.selFlag = *SCSI_STS_SELECTED;\r
+ }\r
}\r
-\r
- __SEV(); // Set event. See corresponding __WFE() calls.\r
}\r
\r
static void assertFail()\r
void\r
scsiSetDataCount(uint32_t count)\r
{\r
- *SCSI_DATA_CNT_HI = count >> 8;\r
+ *SCSI_DATA_CNT_HI = (count >> 16) & 0xff;\r
+ *SCSI_DATA_CNT_MID = (count >> 8) & 0xff;\r
*SCSI_DATA_CNT_LO = count & 0xff;\r
*SCSI_DATA_CNT_SET = 1;\r
}\r
\r
+int scsiFifoReady(void)\r
+{\r
+ __NOP();\r
+ HAL_GPIO_ReadPin(GPIOE, FPGA_GPIO3_Pin);\r
+ __NOP();\r
+ return HAL_GPIO_ReadPin(GPIOE, FPGA_GPIO3_Pin) != 0;\r
+}\r
+\r
uint8_t\r
scsiReadByte(void)\r
{\r
-#if FIFODEBUG\r
- if (!scsiPhyFifoAltEmpty()) {\r
- // Force a lock-up.\r
- assertFail();\r
- }\r
-#endif\r
scsiSetDataCount(1);\r
\r
+ // Ready immediately. setDataCount resets fifos\r
+\r
while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
{\r
- __WFE(); // Wait for event\r
+ __WFI(); // Wait for interrupt\r
}\r
- scsiPhyFifoFlip();\r
+ __enable_irq();\r
+\r
uint8_t val = scsiPhyRx();\r
// TODO scsiDev.parityError = scsiDev.parityError || SCSI_Parity_Error_Read();\r
\r
-#if FIFODEBUG\r
- if (!scsiPhyFifoEmpty()) {\r
- int j = 0;\r
- uint8_t k __attribute((unused));\r
- while (!scsiPhyFifoEmpty()) { k = scsiPhyRx(); ++j; }\r
-\r
- // Force a lock-up.\r
- assertFail();\r
- }\r
-#endif\r
return val;\r
}\r
\r
\r
void\r
-scsiReadPIO(uint8_t* data, uint32_t count)\r
+scsiReadPIO(uint8_t* data, uint32_t count, int* parityError)\r
{\r
uint16_t* fifoData = (uint16_t*)data;\r
+ uint32_t count16 = (count + 1) / 2;\r
\r
- for (int i = 0; i < (count + 1) / 2; ++i)\r
+ int i = 0;\r
+ while ((i < count16) && likely(!scsiDev.resetFlag))\r
{\r
- fifoData[i] = scsiPhyRx(); // TODO ASSUMES LITTLE ENDIAN\r
- }\r
-}\r
-\r
-void\r
-scsiReadDMA(uint8_t* data, uint32_t count)\r
-{\r
- // Prepare DMA transfer\r
- dmaInProgress = 1;\r
-\r
- scsiTxDMAComplete = 1; // TODO not used much\r
- scsiRxDMAComplete = 0; // TODO not used much\r
+ // Wait until FIFO is full (or complete)\r
+ while (!scsiFifoReady() && likely(!scsiDev.resetFlag))\r
+ {\r
+ // spin\r
+ }\r
\r
- HAL_DMA_Start(\r
- &fsmcToMem,\r
- (uint32_t) SCSI_FIFO_DATA,\r
- (uint32_t) data,\r
- (count + 1) / 2);\r
-}\r
+ if (count16 - i >= SCSI_FIFO_DEPTH16)\r
+ {\r
+ uint32_t chunk16 = SCSI_FIFO_DEPTH16;\r
\r
-int\r
-scsiReadDMAPoll()\r
-{\r
- int complete = __HAL_DMA_GET_COUNTER(&fsmcToMem) == 0;\r
- complete = complete && (HAL_DMA_PollForTransfer(&fsmcToMem, HAL_DMA_FULL_TRANSFER, 0xffffffff) == HAL_OK);\r
- if (complete)\r
- {\r
- scsiTxDMAComplete = 1; // TODO MM FIX IRQ\r
- scsiRxDMAComplete = 1;\r
+ // Let gcc unroll the loop as much as possible.\r
+ for (uint32_t k = 0; k + 128 <= chunk16; k += 128)\r
+ {\r
+ fifoData[i + k] = scsiPhyRx();\r
+ fifoData[i + k + 1] = scsiPhyRx();\r
+ fifoData[i + k + 2] = scsiPhyRx();\r
+ fifoData[i + k + 3] = scsiPhyRx();\r
+ fifoData[i + k + 4] = scsiPhyRx();\r
+ fifoData[i + k + 5] = scsiPhyRx();\r
+ fifoData[i + k + 6] = scsiPhyRx();\r
+ fifoData[i + k + 7] = scsiPhyRx();\r
+ fifoData[i + k + 8] = scsiPhyRx();\r
+ fifoData[i + k + 9] = scsiPhyRx();\r
+ fifoData[i + k + 10] = scsiPhyRx();\r
+ fifoData[i + k + 11] = scsiPhyRx();\r
+ fifoData[i + k + 12] = scsiPhyRx();\r
+ fifoData[i + k + 13] = scsiPhyRx();\r
+ fifoData[i + k + 14] = scsiPhyRx();\r
+ fifoData[i + k + 15] = scsiPhyRx();\r
+ fifoData[i + k + 16] = scsiPhyRx();\r
+ fifoData[i + k + 17] = scsiPhyRx();\r
+ fifoData[i + k + 18] = scsiPhyRx();\r
+ fifoData[i + k + 19] = scsiPhyRx();\r
+ fifoData[i + k + 20] = scsiPhyRx();\r
+ fifoData[i + k + 21] = scsiPhyRx();\r
+ fifoData[i + k + 22] = scsiPhyRx();\r
+ fifoData[i + k + 23] = scsiPhyRx();\r
+ fifoData[i + k + 24] = scsiPhyRx();\r
+ fifoData[i + k + 25] = scsiPhyRx();\r
+ fifoData[i + k + 26] = scsiPhyRx();\r
+ fifoData[i + k + 27] = scsiPhyRx();\r
+ fifoData[i + k + 28] = scsiPhyRx();\r
+ fifoData[i + k + 29] = scsiPhyRx();\r
+ fifoData[i + k + 30] = scsiPhyRx();\r
+ fifoData[i + k + 31] = scsiPhyRx();\r
+ fifoData[i + k + 32] = scsiPhyRx();\r
+ fifoData[i + k + 33] = scsiPhyRx();\r
+ fifoData[i + k + 34] = scsiPhyRx();\r
+ fifoData[i + k + 35] = scsiPhyRx();\r
+ fifoData[i + k + 36] = scsiPhyRx();\r
+ fifoData[i + k + 37] = scsiPhyRx();\r
+ fifoData[i + k + 38] = scsiPhyRx();\r
+ fifoData[i + k + 39] = scsiPhyRx();\r
+ fifoData[i + k + 40] = scsiPhyRx();\r
+ fifoData[i + k + 41] = scsiPhyRx();\r
+ fifoData[i + k + 42] = scsiPhyRx();\r
+ fifoData[i + k + 43] = scsiPhyRx();\r
+ fifoData[i + k + 44] = scsiPhyRx();\r
+ fifoData[i + k + 45] = scsiPhyRx();\r
+ fifoData[i + k + 46] = scsiPhyRx();\r
+ fifoData[i + k + 47] = scsiPhyRx();\r
+ fifoData[i + k + 48] = scsiPhyRx();\r
+ fifoData[i + k + 49] = scsiPhyRx();\r
+ fifoData[i + k + 50] = scsiPhyRx();\r
+ fifoData[i + k + 51] = scsiPhyRx();\r
+ fifoData[i + k + 52] = scsiPhyRx();\r
+ fifoData[i + k + 53] = scsiPhyRx();\r
+ fifoData[i + k + 54] = scsiPhyRx();\r
+ fifoData[i + k + 55] = scsiPhyRx();\r
+ fifoData[i + k + 56] = scsiPhyRx();\r
+ fifoData[i + k + 57] = scsiPhyRx();\r
+ fifoData[i + k + 58] = scsiPhyRx();\r
+ fifoData[i + k + 59] = scsiPhyRx();\r
+ fifoData[i + k + 60] = scsiPhyRx();\r
+ fifoData[i + k + 61] = scsiPhyRx();\r
+ fifoData[i + k + 62] = scsiPhyRx();\r
+ fifoData[i + k + 63] = scsiPhyRx();\r
+ fifoData[i + k + 64] = scsiPhyRx();\r
+ fifoData[i + k + 65] = scsiPhyRx();\r
+ fifoData[i + k + 66] = scsiPhyRx();\r
+ fifoData[i + k + 67] = scsiPhyRx();\r
+ fifoData[i + k + 68] = scsiPhyRx();\r
+ fifoData[i + k + 69] = scsiPhyRx();\r
+ fifoData[i + k + 70] = scsiPhyRx();\r
+ fifoData[i + k + 71] = scsiPhyRx();\r
+ fifoData[i + k + 72] = scsiPhyRx();\r
+ fifoData[i + k + 73] = scsiPhyRx();\r
+ fifoData[i + k + 74] = scsiPhyRx();\r
+ fifoData[i + k + 75] = scsiPhyRx();\r
+ fifoData[i + k + 76] = scsiPhyRx();\r
+ fifoData[i + k + 77] = scsiPhyRx();\r
+ fifoData[i + k + 78] = scsiPhyRx();\r
+ fifoData[i + k + 79] = scsiPhyRx();\r
+ fifoData[i + k + 80] = scsiPhyRx();\r
+ fifoData[i + k + 81] = scsiPhyRx();\r
+ fifoData[i + k + 82] = scsiPhyRx();\r
+ fifoData[i + k + 83] = scsiPhyRx();\r
+ fifoData[i + k + 84] = scsiPhyRx();\r
+ fifoData[i + k + 85] = scsiPhyRx();\r
+ fifoData[i + k + 86] = scsiPhyRx();\r
+ fifoData[i + k + 87] = scsiPhyRx();\r
+ fifoData[i + k + 88] = scsiPhyRx();\r
+ fifoData[i + k + 89] = scsiPhyRx();\r
+ fifoData[i + k + 90] = scsiPhyRx();\r
+ fifoData[i + k + 91] = scsiPhyRx();\r
+ fifoData[i + k + 92] = scsiPhyRx();\r
+ fifoData[i + k + 93] = scsiPhyRx();\r
+ fifoData[i + k + 94] = scsiPhyRx();\r
+ fifoData[i + k + 95] = scsiPhyRx();\r
+ fifoData[i + k + 96] = scsiPhyRx();\r
+ fifoData[i + k + 97] = scsiPhyRx();\r
+ fifoData[i + k + 98] = scsiPhyRx();\r
+ fifoData[i + k + 99] = scsiPhyRx();\r
+ fifoData[i + k + 100] = scsiPhyRx();\r
+ fifoData[i + k + 101] = scsiPhyRx();\r
+ fifoData[i + k + 102] = scsiPhyRx();\r
+ fifoData[i + k + 103] = scsiPhyRx();\r
+ fifoData[i + k + 104] = scsiPhyRx();\r
+ fifoData[i + k + 105] = scsiPhyRx();\r
+ fifoData[i + k + 106] = scsiPhyRx();\r
+ fifoData[i + k + 107] = scsiPhyRx();\r
+ fifoData[i + k + 108] = scsiPhyRx();\r
+ fifoData[i + k + 109] = scsiPhyRx();\r
+ fifoData[i + k + 110] = scsiPhyRx();\r
+ fifoData[i + k + 111] = scsiPhyRx();\r
+ fifoData[i + k + 112] = scsiPhyRx();\r
+ fifoData[i + k + 113] = scsiPhyRx();\r
+ fifoData[i + k + 114] = scsiPhyRx();\r
+ fifoData[i + k + 115] = scsiPhyRx();\r
+ fifoData[i + k + 116] = scsiPhyRx();\r
+ fifoData[i + k + 117] = scsiPhyRx();\r
+ fifoData[i + k + 118] = scsiPhyRx();\r
+ fifoData[i + k + 119] = scsiPhyRx();\r
+ fifoData[i + k + 120] = scsiPhyRx();\r
+ fifoData[i + k + 121] = scsiPhyRx();\r
+ fifoData[i + k + 122] = scsiPhyRx();\r
+ fifoData[i + k + 123] = scsiPhyRx();\r
+ fifoData[i + k + 124] = scsiPhyRx();\r
+ fifoData[i + k + 125] = scsiPhyRx();\r
+ fifoData[i + k + 126] = scsiPhyRx();\r
+ fifoData[i + k + 127] = scsiPhyRx();\r
+ }\r
\r
- dmaInProgress = 0;\r
-#if 0\r
- // TODO MM scsiDev.parityError = scsiDev.parityError || SCSI_Parity_Error_Read();\r
-#endif\r
- return 1;\r
+ i += chunk16;\r
+ }\r
+ else\r
+ {\r
+ uint32_t chunk16 = count16 - i;\r
\r
+ uint32_t k = 0;\r
+ for (; k + 4 <= chunk16; k += 4)\r
+ {\r
+ fifoData[i + k] = scsiPhyRx();\r
+ fifoData[i + 1 + k] = scsiPhyRx();\r
+ fifoData[i + 2 + k] = scsiPhyRx();\r
+ fifoData[i + 3 + k] = scsiPhyRx();\r
+ }\r
+ for (; k < chunk16; ++k)\r
+ {\r
+ fifoData[i + k] = scsiPhyRx();\r
+ }\r
+ i += chunk16;\r
+ }\r
}\r
- else\r
- {\r
- return 0;\r
- }\r
+\r
+ *parityError |= scsiParityError();\r
}\r
\r
void\r
int i = 0;\r
*parityError = 0;\r
\r
-\r
- uint32_t chunk = ((count - i) > SCSI_FIFO_DEPTH)\r
- ? SCSI_FIFO_DEPTH : (count - i);\r
-#ifdef SCSI_FSMC_DMA\r
- if (chunk >= 16)\r
- {\r
- // DMA is doing 32bit transfers.\r
- chunk = chunk & 0xFFFFFFF8;\r
- }\r
-#endif\r
- scsiSetDataCount(chunk);\r
-\r
while (i < count && likely(!scsiDev.resetFlag))\r
{\r
- while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
- {\r
- __WFE(); // Wait for event\r
- }\r
- *parityError |= scsiParityError();\r
- scsiPhyFifoFlip();\r
+ uint32_t chunk = ((count - i) > SCSI_XFER_MAX)\r
+ ? SCSI_XFER_MAX : (count - i);\r
+ scsiSetDataCount(chunk);\r
\r
- uint32_t nextChunk = ((count - i - chunk) > SCSI_FIFO_DEPTH)\r
- ? SCSI_FIFO_DEPTH : (count - i - chunk);\r
-#ifdef SCSI_FSMC_DMA\r
- if (nextChunk >= 16)\r
- {\r
- nextChunk = nextChunk & 0xFFFFFFF8;\r
- }\r
-#endif\r
- if (nextChunk > 0)\r
- {\r
- scsiSetDataCount(nextChunk);\r
- }\r
+ scsiReadPIO(data + i, chunk, parityError);\r
\r
-#ifdef SCSI_FSMC_DMA\r
- if (chunk < 16)\r
-#endif\r
- {\r
- scsiReadPIO(data + i, chunk);\r
- }\r
-#ifdef SCSI_FSMC_DMA\r
- else\r
+ __disable_irq();\r
+ while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
{\r
- scsiReadDMA(data + i, chunk);\r
-\r
- while (!scsiReadDMAPoll() && likely(!scsiDev.resetFlag))\r
- {\r
- };\r
+ __WFI();\r
}\r
-#endif\r
-\r
+ __enable_irq();\r
\r
i += chunk;\r
- chunk = nextChunk;\r
}\r
-#if FIFODEBUG\r
- if (!scsiPhyFifoEmpty() || !scsiPhyFifoAltEmpty()) {\r
- int j = 0;\r
- while (!scsiPhyFifoEmpty()) { scsiPhyRx(); ++j; }\r
- scsiPhyFifoFlip();\r
- int k = 0;\r
- while (!scsiPhyFifoEmpty()) { scsiPhyRx(); ++k; }\r
- // Force a lock-up.\r
- assertFail();\r
- }\r
-#endif\r
}\r
\r
void\r
scsiWriteByte(uint8_t value)\r
{\r
-#if FIFODEBUG\r
- if (!scsiPhyFifoEmpty()) {\r
- // Force a lock-up.\r
- assertFail();\r
- }\r
-#endif\r
- scsiPhyTx(value);\r
- scsiPhyFifoFlip();\r
-\r
scsiSetDataCount(1);\r
+ scsiPhyTx(value);\r
\r
+ __disable_irq();\r
while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
{\r
- __WFE(); // Wait for event\r
- }\r
-\r
-#if FIFODEBUG\r
- if (!scsiPhyFifoAltEmpty()) {\r
- // Force a lock-up.\r
- assertFail();\r
+ __WFI();\r
}\r
-#endif\r
+ __enable_irq();\r
}\r
\r
-static void\r
+void\r
scsiWritePIO(const uint8_t* data, uint32_t count)\r
{\r
uint16_t* fifoData = (uint16_t*)data;\r
- for (int i = 0; i < (count + 1) / 2; ++i)\r
+ uint32_t count16 = (count + 1) / 2;\r
+\r
+ int i = 0;\r
+ while ((i < count16) && likely(!scsiDev.resetFlag))\r
{\r
- scsiPhyTx(fifoData[i]);\r
- }\r
-}\r
+ while (!scsiFifoReady() && likely(!scsiDev.resetFlag))\r
+ {\r
+ // Spin\r
+ }\r
\r
-void\r
-scsiWriteDMA(const uint8_t* data, uint32_t count)\r
-{\r
- // Prepare DMA transfer\r
- dmaInProgress = 1;\r
+ if (count16 - i >= SCSI_FIFO_DEPTH16)\r
+ {\r
+ uint32_t chunk16 = SCSI_FIFO_DEPTH16;\r
\r
- scsiTxDMAComplete = 0;\r
- scsiRxDMAComplete = 1;\r
+ // Let gcc unroll the loop as much as possible.\r
+ for (uint32_t k = 0; k + 128 <= chunk16; k += 128)\r
+ {\r
+ scsiPhyTx32(fifoData[i + k], fifoData[i + k + 1]);\r
+ scsiPhyTx32(fifoData[i + 2 + k], fifoData[i + k + 3]);\r
+ scsiPhyTx32(fifoData[i + 4 + k], fifoData[i + k + 5]);\r
+ scsiPhyTx32(fifoData[i + 6 + k], fifoData[i + k + 7]);\r
+ scsiPhyTx32(fifoData[i + 8 + k], fifoData[i + k + 9]);\r
+ scsiPhyTx32(fifoData[i + 10 + k], fifoData[i + k + 11]);\r
+ scsiPhyTx32(fifoData[i + 12 + k], fifoData[i + k + 13]);\r
+ scsiPhyTx32(fifoData[i + 14 + k], fifoData[i + k + 15]);\r
+ scsiPhyTx32(fifoData[i + 16 + k], fifoData[i + k + 17]);\r
+ scsiPhyTx32(fifoData[i + 18 + k], fifoData[i + k + 19]);\r
+ scsiPhyTx32(fifoData[i + 20 + k], fifoData[i + k + 21]);\r
+ scsiPhyTx32(fifoData[i + 22 + k], fifoData[i + k + 23]);\r
+ scsiPhyTx32(fifoData[i + 24 + k], fifoData[i + k + 25]);\r
+ scsiPhyTx32(fifoData[i + 26 + k], fifoData[i + k + 27]);\r
+ scsiPhyTx32(fifoData[i + 28 + k], fifoData[i + k + 29]);\r
+ scsiPhyTx32(fifoData[i + 30 + k], fifoData[i + k + 31]);\r
+\r
+ scsiPhyTx32(fifoData[i + 32 + k], fifoData[i + k + 33]);\r
+ scsiPhyTx32(fifoData[i + 34 + k], fifoData[i + k + 35]);\r
+ scsiPhyTx32(fifoData[i + 36 + k], fifoData[i + k + 37]);\r
+ scsiPhyTx32(fifoData[i + 38 + k], fifoData[i + k + 39]);\r
+ scsiPhyTx32(fifoData[i + 40 + k], fifoData[i + k + 41]);\r
+ scsiPhyTx32(fifoData[i + 42 + k], fifoData[i + k + 43]);\r
+ scsiPhyTx32(fifoData[i + 44 + k], fifoData[i + k + 45]);\r
+ scsiPhyTx32(fifoData[i + 46 + k], fifoData[i + k + 47]);\r
+ scsiPhyTx32(fifoData[i + 48 + k], fifoData[i + k + 49]);\r
+ scsiPhyTx32(fifoData[i + 50 + k], fifoData[i + k + 51]);\r
+ scsiPhyTx32(fifoData[i + 52 + k], fifoData[i + k + 53]);\r
+ scsiPhyTx32(fifoData[i + 54 + k], fifoData[i + k + 55]);\r
+ scsiPhyTx32(fifoData[i + 56 + k], fifoData[i + k + 57]);\r
+ scsiPhyTx32(fifoData[i + 58 + k], fifoData[i + k + 59]);\r
+ scsiPhyTx32(fifoData[i + 60 + k], fifoData[i + k + 61]);\r
+ scsiPhyTx32(fifoData[i + 62 + k], fifoData[i + k + 63]);\r
+\r
+ scsiPhyTx32(fifoData[i + 64 + k], fifoData[i + k + 65]);\r
+ scsiPhyTx32(fifoData[i + 66 + k], fifoData[i + k + 67]);\r
+ scsiPhyTx32(fifoData[i + 68 + k], fifoData[i + k + 69]);\r
+ scsiPhyTx32(fifoData[i + 70 + k], fifoData[i + k + 71]);\r
+ scsiPhyTx32(fifoData[i + 72 + k], fifoData[i + k + 73]);\r
+ scsiPhyTx32(fifoData[i + 74 + k], fifoData[i + k + 75]);\r
+ scsiPhyTx32(fifoData[i + 76 + k], fifoData[i + k + 77]);\r
+ scsiPhyTx32(fifoData[i + 78 + k], fifoData[i + k + 79]);\r
+ scsiPhyTx32(fifoData[i + 80 + k], fifoData[i + k + 81]);\r
+ scsiPhyTx32(fifoData[i + 82 + k], fifoData[i + k + 83]);\r
+ scsiPhyTx32(fifoData[i + 84 + k], fifoData[i + k + 85]);\r
+ scsiPhyTx32(fifoData[i + 86 + k], fifoData[i + k + 87]);\r
+ scsiPhyTx32(fifoData[i + 88 + k], fifoData[i + k + 89]);\r
+ scsiPhyTx32(fifoData[i + 90 + k], fifoData[i + k + 91]);\r
+ scsiPhyTx32(fifoData[i + 92 + k], fifoData[i + k + 93]);\r
+ scsiPhyTx32(fifoData[i + 94 + k], fifoData[i + k + 95]);\r
+\r
+ scsiPhyTx32(fifoData[i + 96 + k], fifoData[i + k + 97]);\r
+ scsiPhyTx32(fifoData[i + 98 + k], fifoData[i + k + 99]);\r
+ scsiPhyTx32(fifoData[i + 100 + k], fifoData[i + k + 101]);\r
+ scsiPhyTx32(fifoData[i + 102 + k], fifoData[i + k + 103]);\r
+ scsiPhyTx32(fifoData[i + 104 + k], fifoData[i + k + 105]);\r
+ scsiPhyTx32(fifoData[i + 106 + k], fifoData[i + k + 107]);\r
+ scsiPhyTx32(fifoData[i + 108 + k], fifoData[i + k + 109]);\r
+ scsiPhyTx32(fifoData[i + 110 + k], fifoData[i + k + 111]);\r
+ scsiPhyTx32(fifoData[i + 112 + k], fifoData[i + k + 113]);\r
+ scsiPhyTx32(fifoData[i + 114 + k], fifoData[i + k + 115]);\r
+ scsiPhyTx32(fifoData[i + 116 + k], fifoData[i + k + 117]);\r
+ scsiPhyTx32(fifoData[i + 118 + k], fifoData[i + k + 119]);\r
+ scsiPhyTx32(fifoData[i + 120 + k], fifoData[i + k + 121]);\r
+ scsiPhyTx32(fifoData[i + 122 + k], fifoData[i + k + 123]);\r
+ scsiPhyTx32(fifoData[i + 124 + k], fifoData[i + k + 125]);\r
+ scsiPhyTx32(fifoData[i + 126 + k], fifoData[i + k + 127]);\r
\r
- HAL_DMA_Start(\r
- &memToFSMC,\r
- (uint32_t) data,\r
- (uint32_t) SCSI_FIFO_DATA,\r
- count / 4);\r
-}\r
+ }\r
\r
-int\r
-scsiWriteDMAPoll()\r
-{\r
- int complete = __HAL_DMA_GET_COUNTER(&memToFSMC) == 0;\r
- complete = complete && (HAL_DMA_PollForTransfer(&memToFSMC, HAL_DMA_FULL_TRANSFER, 0xffffffff) == HAL_OK);\r
- if (complete)\r
- {\r
- scsiTxDMAComplete = 1; // TODO MM FIX IRQ\r
- scsiRxDMAComplete = 1;\r
+ i += chunk16;\r
+ }\r
+ else\r
+ {\r
+ uint32_t chunk16 = count16 - i;\r
\r
- dmaInProgress = 0;\r
- return 1;\r
- }\r
- else\r
- {\r
- return 0;\r
+ uint32_t k = 0;\r
+ for (; k + 4 <= chunk16; k += 4)\r
+ {\r
+ scsiPhyTx32(fifoData[i + k], fifoData[i + k + 1]);\r
+ scsiPhyTx32(fifoData[i + k + 2], fifoData[i + k + 3]);\r
+ }\r
+ for (; k < chunk16; ++k)\r
+ {\r
+ scsiPhyTx(fifoData[i + k]);\r
+ }\r
+ i += chunk16;\r
+ }\r
}\r
}\r
\r
+\r
void\r
scsiWrite(const uint8_t* data, uint32_t count)\r
{\r
int i = 0;\r
while (i < count && likely(!scsiDev.resetFlag))\r
{\r
- uint32_t chunk = ((count - i) > SCSI_FIFO_DEPTH)\r
- ? SCSI_FIFO_DEPTH : (count - i);\r
-\r
-#if FIFODEBUG\r
- if (!scsiPhyFifoEmpty()) {\r
- // Force a lock-up.\r
- assertFail();\r
- }\r
-#endif\r
-\r
-#ifdef SCSI_FSMC_DMA\r
- if (chunk < 16)\r
-#endif\r
- {\r
- scsiWritePIO(data + i, chunk);\r
- }\r
-#ifdef SCSI_FSMC_DMA\r
- else\r
- {\r
- // DMA is doing 32bit transfers.\r
- chunk = chunk & 0xFFFFFFF8;\r
- scsiWriteDMA(data + i, chunk);\r
+ uint32_t chunk = ((count - i) > SCSI_XFER_MAX)\r
+ ? SCSI_XFER_MAX : (count - i);\r
+ scsiSetDataCount(chunk);\r
\r
- while (!scsiWriteDMAPoll() && likely(!scsiDev.resetFlag))\r
- {\r
- }\r
- }\r
-#endif\r
+ scsiWritePIO(data + i, chunk);\r
\r
+ __disable_irq();\r
while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
{\r
- __WFE(); // Wait for event\r
+ __WFI();\r
}\r
+ __enable_irq();\r
\r
-#if FIFODEBUG\r
- if (!scsiPhyFifoAltEmpty()) {\r
- // Force a lock-up.\r
- assertFail();\r
- }\r
-#endif\r
-\r
- scsiPhyFifoFlip();\r
- scsiSetDataCount(chunk);\r
i += chunk;\r
}\r
- while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
- {\r
- __WFE(); // Wait for event\r
- }\r
-\r
-#if FIFODEBUG\r
- if (!scsiPhyFifoAltEmpty()) {\r
- // Force a lock-up.\r
- assertFail();\r
- }\r
-#endif\r
}\r
\r
static inline void busSettleDelay(void)\r
\r
int oldPhase = *SCSI_CTRL_PHASE;\r
\r
- if (!scsiDev.resetFlag && (!scsiPhyFifoEmpty() || !scsiPhyFifoAltEmpty())) {\r
- // Force a lock-up.\r
- assertFail();\r
- }\r
if (newPhase != oldPhase)\r
{\r
if ((newPhase == DATA_IN || newPhase == DATA_OUT) &&\r
\r
*SCSI_CTRL_PHASE = 0x00;\r
*SCSI_CTRL_BSY = 0x00;\r
- scsiPhyFifoSel = 0;\r
- *SCSI_FIFO_SEL = 0;\r
*SCSI_CTRL_DBX = 0;\r
\r
*SCSI_CTRL_SYNC_OFFSET = 0;\r
}\r
#endif\r
\r
+ // PIO Benchmark code\r
+ // Currently 16.7MB/s.\r
+ //#define PIO_BENCHMARK 1\r
+ #ifdef PIO_BENCHMARK\r
+ while(1)\r
+ {\r
+ s2s_ledOn();\r
+\r
+ scsiEnterPhase(DATA_IN); // Need IO flag set for fifo ready flag\r
+\r
+ // 100MB\r
+ for (int i = 0; i < (100LL * 1024 * 1024 / SCSI_FIFO_DEPTH); ++i)\r
+ {\r
+ scsiSetDataCount(1); // Resets fifos.\r
+\r
+ // Shouldn't block\r
+ scsiDev.resetFlag = 0;\r
+ scsiWritePIO(&scsiDev.data[0], SCSI_FIFO_DEPTH);\r
+ }\r
+ s2s_ledOff();\r
+\r
+ for(int i = 0; i < 10; ++i) s2s_delay_ms(1000);\r
+ }\r
+ #endif\r
+\r
#ifdef SCSI_FREQ_TEST\r
while(1)\r
{\r
*SCSI_CTRL_IDMASK = 0x00; // Reset in scsiPhyConfig\r
*SCSI_CTRL_PHASE = 0x00;\r
*SCSI_CTRL_BSY = 0x00;\r
- scsiPhyFifoSel = 0;\r
- *SCSI_FIFO_SEL = 0;\r
*SCSI_CTRL_DBX = 0;\r
\r
*SCSI_CTRL_SYNC_OFFSET = 0;\r