From: Michael McMaster Date: Mon, 5 Aug 2019 10:07:37 +0000 (+1000) Subject: Slight improvements to data throughput, which may assist SCSI hosts with short timeouts X-Git-Tag: v6.2.7 X-Git-Url: http://git.codesrc.com/gitweb.cgi?a=commitdiff_plain;h=05e02cbfcb1dbc071ecb15c7627edb2e156f66b3;p=SCSI2SD-V6.git Slight improvements to data throughput, which may assist SCSI hosts with short timeouts --- diff --git a/CHANGELOG b/CHANGELOG index f30692b8..a6955fff 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,7 @@ +20191009 6.2.7 + - Slight improvements to data throughput, which may assist SCSI hosts with + short timeouts. + 20190529 6.2.5 - Add scsi mode page 0 support - Fix SD card hotswap bug when the SCSI host is constantly polling diff --git a/STM32CubeMX/SCSI2SD-V6/Inc/stm32f2xx_it.h b/STM32CubeMX/SCSI2SD-V6/Inc/stm32f2xx_it.h index 9f13ad03..4c7075ab 100755 --- a/STM32CubeMX/SCSI2SD-V6/Inc/stm32f2xx_it.h +++ b/STM32CubeMX/SCSI2SD-V6/Inc/stm32f2xx_it.h @@ -46,6 +46,7 @@ /* Exported functions ------------------------------------------------------- */ void SysTick_Handler(void); +void EXTI3_IRQHandler(void); void EXTI4_IRQHandler(void); void SDIO_IRQHandler(void); void DMA2_Stream3_IRQHandler(void); diff --git a/STM32CubeMX/SCSI2SD-V6/Src/gpio.c b/STM32CubeMX/SCSI2SD-V6/Src/gpio.c index ba18a6d7..5682f257 100755 --- a/STM32CubeMX/SCSI2SD-V6/Src/gpio.c +++ b/STM32CubeMX/SCSI2SD-V6/Src/gpio.c @@ -69,11 +69,17 @@ void MX_GPIO_Init(void) __GPIOD_CLK_ENABLE(); /*Configure GPIO pins : PEPin PEPin PEPin PEPin */ - GPIO_InitStruct.Pin = FPGA_GPIO2_Pin|FPGA_GPIO3_Pin|UNUSED_PE5_Pin|UNUSED_PE6_Pin; + GPIO_InitStruct.Pin = FPGA_GPIO2_Pin|UNUSED_PE5_Pin|UNUSED_PE6_Pin; GPIO_InitStruct.Mode = GPIO_MODE_INPUT; GPIO_InitStruct.Pull = GPIO_PULLDOWN; HAL_GPIO_Init(GPIOE, &GPIO_InitStruct); + /*Configure GPIO pin : PE3 */ + GPIO_InitStruct.Pin = FPGA_GPIO3_Pin; + GPIO_InitStruct.Mode = GPIO_MODE_INPUT; + GPIO_InitStruct.Pull = GPIO_NOPULL; + HAL_GPIO_Init(GPIOE, &GPIO_InitStruct); + /*Configure GPIO pin : PE4 */ GPIO_InitStruct.Pin = GPIO_PIN_4; GPIO_InitStruct.Mode = GPIO_MODE_IT_RISING; diff --git a/STM32CubeMX/SCSI2SD-V6/Src/stm32f2xx_it.c b/STM32CubeMX/SCSI2SD-V6/Src/stm32f2xx_it.c index 3ed95bbb..efa2c5a5 100755 --- a/STM32CubeMX/SCSI2SD-V6/Src/stm32f2xx_it.c +++ b/STM32CubeMX/SCSI2SD-V6/Src/stm32f2xx_it.c @@ -72,6 +72,7 @@ void SysTick_Handler(void) /* please refer to the startup file (startup_stm32f2xx.s). */ /******************************************************************************/ + /** * @brief This function handles EXTI line4 interrupt. */ diff --git a/rtl/fpga_bitmap.o b/rtl/fpga_bitmap.o index 857b042a..e877e44b 100644 Binary files a/rtl/fpga_bitmap.o and b/rtl/fpga_bitmap.o differ diff --git a/src/firmware/config.c b/src/firmware/config.c index 0ea55b2b..f9b288c8 100755 --- a/src/firmware/config.c +++ b/src/firmware/config.c @@ -37,7 +37,7 @@ #include -static const uint16_t FIRMWARE_VERSION = 0x0625; +static const uint16_t FIRMWARE_VERSION = 0x0627; // 1 flash row static const uint8_t DEFAULT_CONFIG[128] = diff --git a/src/firmware/disk.c b/src/firmware/disk.c index 961387fa..315db1c4 100755 --- a/src/firmware/disk.c +++ b/src/firmware/disk.c @@ -18,6 +18,8 @@ #include "stm32f2xx.h" +#include + // For SD write direct routines #include "sdio.h" #include "bsp_driver_sd.h" @@ -561,14 +563,17 @@ void scsiDiskPoll() int scsiActive __attribute__((unused)) = 0; // unused if DMA disabled int sdActive = 0; - uint32_t partialScsiChunk = 0; - - // Start reading from the SD card FIRST, because we change state and - // wait for SCSI signals - int dataInStarted = 0; + // It's highly unlikely that someone is going to use huge transfers + // per scsi command, but if they do it'll be slower than usual. + uint32_t totalScsiBytes = transfer.blocks * bytesPerSector; + int useSlowDataCount = totalScsiBytes >= SCSI_XFER_MAX; + if (!useSlowDataCount) + { + scsiSetDataCount(totalScsiBytes); + } while ((i < totalSDSectors) && - (!dataInStarted || likely(scsiDev.phase == DATA_IN)) && + likely(scsiDev.phase == DATA_IN) && likely(!scsiDev.resetFlag)) { int completedDmaSectors; @@ -588,12 +593,13 @@ void scsiDiskPoll() if (!sdActive && (prep - i < buffers) && - (prep < totalSDSectors)) + (prep < totalSDSectors) && + ((totalSDSectors - prep) >= sdPerScsi) && + (likely(!useSlowDataCount) || scsiPhyComplete())) { // Start an SD transfer if we have space. uint32_t startBuffer = prep % buffers; uint32_t sectors = totalSDSectors - prep; - uint32_t freeBuffers = buffers - (prep - i); uint32_t contiguousBuffers = buffers - startBuffer; @@ -603,6 +609,12 @@ void scsiDiskPoll() if (sectors > 128) sectors = 128; // 65536 DMA limit !! + // Round-down when we have odd sector sizes. + if (sdPerScsi != 1) + { + sectors = (sectors / sdPerScsi) * sdPerScsi; + } + for (int dodgy = 0; dodgy < sectors; dodgy++) { scsiDev.data[SD_SECTOR_SIZE * (startBuffer + dodgy) + 510] = 0xAA; @@ -613,6 +625,11 @@ void scsiDiskPoll() sdActive = sectors; + if (useSlowDataCount) + { + scsiSetDataCount((sectors / sdPerScsi) * bytesPerSector); + } + // Wait now that the SD card is busy // Chances are we've probably already waited sufficient time, // but it's hard to measure microseconds cheaply. So just wait @@ -624,26 +641,6 @@ void scsiDiskPoll() } } -#ifdef SCSI_FSMC_DMA - #error this code not updated for 256 max bytes in scsi fifo - if (scsiActive && scsiPhyComplete() && scsiWriteDMAPoll()) - { - scsiActive = 0; - i++; - scsiPhyFifoFlip(); - } - if (!scsiActive && ((prep - i) > 0)) - { - int dmaBytes = SD_SECTOR_SIZE; - if ((i % sdPerScsi) == (sdPerScsi - 1)) - { - dmaBytes = bytesPerSector % SD_SECTOR_SIZE; - if (dmaBytes == 0) dmaBytes = SD_SECTOR_SIZE; - } - scsiWriteDMA(&scsiDev.data[SD_SECTOR_SIZE * (i % buffers)], dmaBytes); - scsiActive = 1; - } -#else if ((prep - i) > 0) { int dmaBytes = SD_SECTOR_SIZE; @@ -653,42 +650,11 @@ void scsiDiskPoll() if (dmaBytes == 0) dmaBytes = SD_SECTOR_SIZE; } - // Manually unrolled loop for performance. - // -Os won't unroll this for us automatically, - // especially since scsiPhyTx does volatile stuff. - // Reduces bus utilisation by making the fsmc split - // 32bits into 2 16 bit writes. - - uint16_t* scsiDmaData = (uint16_t*) &(scsiDev.data[SD_SECTOR_SIZE * (i % buffers) + partialScsiChunk]); - - uint32_t chunk = ((dmaBytes - partialScsiChunk) > SCSI_FIFO_DEPTH) - ? SCSI_FIFO_DEPTH : (dmaBytes - partialScsiChunk); - - int k = 0; - for (; k + 4 < (chunk + 1) / 2; k += 4) - { - scsiPhyTx32(scsiDmaData[k], scsiDmaData[k+1]); - scsiPhyTx32(scsiDmaData[k+2], scsiDmaData[k+3]); - } - for (; k < (chunk + 1) / 2; ++k) - { - scsiPhyTx(scsiDmaData[k]); - } - while (!scsiPhyComplete() && !scsiDev.resetFlag) - { - __WFE(); // Wait for event - } - scsiPhyFifoFlip(); - scsiSetDataCount(chunk); + uint8_t* scsiDmaData = &(scsiDev.data[SD_SECTOR_SIZE * (i % buffers)]); + scsiWritePIO(scsiDmaData, dmaBytes); - partialScsiChunk += chunk; - if (partialScsiChunk == dmaBytes) - { - partialScsiChunk = 0; - ++i; - } + ++i; } -#endif } if (phaseChangeDelayUs > 0 && !scsiDev.resetFlag) // zero bytes ? @@ -699,13 +665,14 @@ void scsiDiskPoll() // We've finished transferring the data to the FPGA, now wait until it's // written to he SCSI bus. + __disable_irq(); while (!scsiPhyComplete() && likely(scsiDev.phase == DATA_IN) && likely(!scsiDev.resetFlag)) { - __WFE(); // Wait for event + __WFI(); } - + __enable_irq(); if (scsiDev.phase == DATA_IN) { @@ -727,22 +694,28 @@ void scsiDiskPoll() transfer.lba); int i = 0; int clearBSY = 0; + int extraSectors = 0; int parityError = 0; int enableParity = scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY; + uint32_t scsiSpeed = s2s_getScsiRateMBs(); + + uint32_t maxSectors = sizeof(scsiDev.data) / SD_SECTOR_SIZE; + + static_assert(SCSI_XFER_MAX >= sizeof(scsiDev.data), "Assumes SCSI_XFER_MAX >= sizeof(scsiDev.data)"); + + // Start reading and filling fifos as soon as possible. + scsiSetDataCount(transfer.blocks * bytesPerSector); + while ((i < totalSDSectors) && likely(scsiDev.phase == DATA_OUT) && - likely(!scsiDev.resetFlag) && - likely(!parityError || !enableParity)) + likely(!scsiDev.resetFlag)) + // KEEP GOING to ensure FIFOs are in a good state. + // likely(!parityError || !enableParity)) { - // Well, until we have some proper non-blocking SD code, we must - // do this in a half-duplex fashion. We need to write as much as - // possible in each SD card transaction. - uint32_t maxSectors = sizeof(scsiDev.data) / SD_SECTOR_SIZE; uint32_t rem = totalSDSectors - i; - uint32_t sectors = - rem < maxSectors ? rem : maxSectors; + uint32_t sectors = rem < maxSectors ? rem : maxSectors; if (bytesPerSector == SD_SECTOR_SIZE) { @@ -750,19 +723,20 @@ void scsiDiskPoll() // no flow control. This can be handled if a) the scsi interface // doesn't block and b) we read enough SCSI sectors first so that // the SD interface cannot catch up. + int prevExtraSectors = extraSectors; uint32_t totalBytes = sectors * SD_SECTOR_SIZE; - uint32_t readAheadBytes = sectors * SD_SECTOR_SIZE; + extraSectors = 0; + + int32_t readAheadBytes = totalBytes; uint32_t sdSpeed = s2s_getSdRateMBs() + (scsiDev.sdUnderrunCount / 2); - uint32_t scsiSpeed = s2s_getScsiRateMBs(); // if (have blind writes) if (scsiSpeed > 0 && scsiDev.sdUnderrunCount < 16) { // readAhead = sectors * (sd / scsi - 1 + 0.1); - readAheadBytes = totalBytes * sdSpeed / scsiSpeed - totalBytes + SCSI_FIFO_DEPTH; - if (readAheadBytes < SCSI_FIFO_DEPTH) - { - readAheadBytes = SCSI_FIFO_DEPTH; - } + readAheadBytes = totalBytes * sdSpeed / scsiSpeed - totalBytes; + + // Round up to nearest FIFO size. + readAheadBytes = ((readAheadBytes / SCSI_FIFO_DEPTH) + 1) * SCSI_FIFO_DEPTH; if (readAheadBytes > totalBytes) { @@ -770,60 +744,58 @@ void scsiDiskPoll() } } - uint32_t chunk = (readAheadBytes > SCSI_FIFO_DEPTH) ? SCSI_FIFO_DEPTH : readAheadBytes; - scsiSetDataCount(chunk); + uint32_t prevExtraBytes = prevExtraSectors * SD_SECTOR_SIZE; + uint32_t scsiBytesRead = prevExtraBytes; + readAheadBytes -= prevExtraBytes; // Must be signed! - uint32_t scsiBytesRead = 0; - while (scsiBytesRead < readAheadBytes) + if (readAheadBytes > 0) { - while (!scsiPhyComplete() && likely(!scsiDev.resetFlag)) - { - __WFE(); // Wait for event - } - parityError |= scsiParityError(); - scsiPhyFifoFlip(); - uint32_t nextChunk = ((totalBytes - scsiBytesRead - chunk) > SCSI_FIFO_DEPTH) - ? SCSI_FIFO_DEPTH : (totalBytes - scsiBytesRead - chunk); - - if (nextChunk > 0) scsiSetDataCount(nextChunk); - scsiReadPIO(&scsiDev.data[scsiBytesRead], chunk); - scsiBytesRead += chunk; - chunk = nextChunk; + scsiReadPIO( + &scsiDev.data[scsiBytesRead], + readAheadBytes, + &parityError); + scsiBytesRead += readAheadBytes; } HAL_SD_WriteBlocks_DMA(&hsd, (uint32_t*) (&scsiDev.data[0]), (i + sdLBA) * 512ll, SD_SECTOR_SIZE, sectors); - while (scsiBytesRead < totalBytes) + int underrun = 0; + if (scsiBytesRead < totalBytes && !scsiDev.resetFlag) { - while (!scsiPhyComplete() && likely(!scsiDev.resetFlag)) - { - __WFE(); // Wait for event - } - parityError |= scsiParityError(); - scsiPhyFifoFlip(); - uint32_t nextChunk = ((totalBytes - scsiBytesRead - chunk) > SCSI_FIFO_DEPTH) - ? SCSI_FIFO_DEPTH : (totalBytes - scsiBytesRead - chunk); - - if (nextChunk > 0) scsiSetDataCount(nextChunk); - scsiReadPIO(&scsiDev.data[scsiBytesRead], chunk); - scsiBytesRead += chunk; - chunk = nextChunk; + scsiReadPIO( + &scsiDev.data[scsiBytesRead], + totalBytes - readAheadBytes, + &parityError); + + // Oh dear, SD finished first. + underrun = hsd.DmaTransferCplt; + + scsiBytesRead += (totalBytes - readAheadBytes); } - // Oh dear, SD finished first. - int underrun = totalBytes > readAheadBytes && hsd.DmaTransferCplt; + if (!underrun && rem > sectors) + { + // We probably have some time to waste reading more here. + // While noting this is going to drop us down into + // half-duplex operation (hence why we read max / 4 only) + + extraSectors = rem - sectors > (maxSectors / 4) + ? (maxSectors / 4) + : rem - sectors; + + scsiReadPIO( + &scsiDev.data[0], + extraSectors * SD_SECTOR_SIZE, + &parityError); + } uint32_t dmaFinishTime = s2s_getTime_ms(); - while (!hsd.SdTransferCplt && + while ((!hsd.SdTransferCplt || + __HAL_SD_SDIO_GET_FLAG(&hsd, SDIO_FLAG_TXACT)) && s2s_elapsedTime_ms(dmaFinishTime) < 180) { // Wait while keeping BSY. } - while((__HAL_SD_SDIO_GET_FLAG(&hsd, SDIO_FLAG_TXACT)) && - s2s_elapsedTime_ms(dmaFinishTime) < 180) - { - // Wait for SD card while keeping BSY. - } if (i + sectors >= totalSDSectors && !underrun && @@ -842,14 +814,14 @@ void scsiDiskPoll() HAL_SD_CheckWriteOperation(&hsd, (uint32_t)SD_DATATIMEOUT); - if (underrun) + if (underrun && (!parityError || !enableParity)) { // Try again. Data is still in memory. sdTmpWrite(&scsiDev.data[0], i + sdLBA, sectors); scsiDev.sdUnderrunCount++; } - i += sectors; + i += sectors; } else { @@ -857,11 +829,7 @@ void scsiDiskPoll() // do this in a half-duplex fashion. We need to write as much as // possible in each SD card transaction. // use sg_dd from sg_utils3 tools to test. - uint32_t maxSectors = sizeof(scsiDev.data) / SD_SECTOR_SIZE; - uint32_t rem = totalSDSectors - i; - uint32_t sectors = rem < maxSectors ? rem : maxSectors; - int scsiSector; - for (scsiSector = i; scsiSector < i + sectors; ++scsiSector) + for (int scsiSector = i; scsiSector < i + sectors; ++scsiSector) { int dmaBytes = SD_SECTOR_SIZE; if ((scsiSector % sdPerScsi) == (sdPerScsi - 1)) @@ -869,9 +837,10 @@ void scsiDiskPoll() dmaBytes = bytesPerSector % SD_SECTOR_SIZE; if (dmaBytes == 0) dmaBytes = SD_SECTOR_SIZE; } - scsiRead(&scsiDev.data[SD_SECTOR_SIZE * (scsiSector - i)], dmaBytes, &parityError); + + scsiReadPIO(&scsiDev.data[SD_SECTOR_SIZE * (scsiSector - i)], dmaBytes, &parityError); } - if (!parityError) + if (!parityError || !enableParity) { sdTmpWrite(&scsiDev.data[0], i + sdLBA, sectors); } @@ -879,6 +848,15 @@ void scsiDiskPoll() } } + // Should already be complete here as we've ready the FIFOs + // by now. Check anyway. + __disable_irq(); + while (!scsiPhyComplete() && likely(!scsiDev.resetFlag)) + { + __WFI(); + } + __enable_irq(); + if (clearBSY) { enter_BusFree(); diff --git a/src/firmware/scsi.c b/src/firmware/scsi.c index 4864426f..25f02839 100755 --- a/src/firmware/scsi.c +++ b/src/firmware/scsi.c @@ -303,7 +303,6 @@ static void process_Command() { scsiRead(scsiDev.cdb + 6, scsiDev.cdbLen - 6, &parityError); } - command = scsiDev.cdb[0]; // Prefer LUN's set by IDENTIFY messages for newer hosts. diff --git a/src/firmware/scsi.h b/src/firmware/scsi.h index 5480a6a5..cbfa9807 100755 --- a/src/firmware/scsi.h +++ b/src/firmware/scsi.h @@ -106,7 +106,9 @@ typedef struct typedef struct { // TODO reduce this buffer size and add a proper cache - uint8_t data[MAX_SECTOR_SIZE * 8]; // Must be aligned for DMA + // Must be aligned for DMA + // 65536 bytes is the DMA limit + uint8_t data[MAX_SECTOR_SIZE * 8]; TargetState targets[S2S_MAX_TARGETS]; TargetState* target; diff --git a/src/firmware/scsiPhy.c b/src/firmware/scsiPhy.c index deb67b25..9337b13c 100755 --- a/src/firmware/scsiPhy.c +++ b/src/firmware/scsiPhy.c @@ -30,7 +30,8 @@ static uint8_t asyncTimings[][4] = { /* Speed, Assert, Deskew, Hold, Glitch */ -{/*1.5MB/s*/ 28, 18, 13, 15}, +{/*1.5MB/s*/ 28, 18, 7, 15}, +//{/*1.5MB/s*/ 63, 31, 7, 15}, {/*3.3MB/s*/ 13, 6, 6, 13}, {/*5MB/s*/ 9, 6, 6, 6}, // 80ns {/*safe*/ 3, 6, 6, 6}, // Probably safe @@ -106,8 +107,6 @@ static DMA_HandleTypeDef fsmcToMem; volatile uint8_t scsiRxDMAComplete; volatile uint8_t scsiTxDMAComplete; -uint8_t scsiPhyFifoSel = 0; // global - // scsi IRQ handler is initialised by the STM32 HAL. Connected to // PE4 // Note: naming is important to ensure this function is listed in the @@ -120,15 +119,18 @@ void EXTI4_IRQHandler() // Clear interrupt flag __HAL_GPIO_EXTI_CLEAR_IT(GPIO_PIN_4); - scsiDev.resetFlag = scsiDev.resetFlag || scsiStatusRST(); + uint8_t statusFlags = *SCSI_STS_SCSI; + + scsiDev.resetFlag = scsiDev.resetFlag || (statusFlags & 0x04); // selFlag is required for Philips P2000C which releases it after 600ns // without waiting for BSY. // Also required for some early Mac Plus roms - scsiDev.selFlag = *SCSI_STS_SELECTED; + if (statusFlags & 0x08) // Check SEL flag + { + scsiDev.selFlag = *SCSI_STS_SELECTED; + } } - - __SEV(); // Set event. See corresponding __WFE() calls. } static void assertFail() @@ -145,92 +147,215 @@ static void assertFail() void scsiSetDataCount(uint32_t count) { - *SCSI_DATA_CNT_HI = count >> 8; + *SCSI_DATA_CNT_HI = (count >> 16) & 0xff; + *SCSI_DATA_CNT_MID = (count >> 8) & 0xff; *SCSI_DATA_CNT_LO = count & 0xff; *SCSI_DATA_CNT_SET = 1; } +int scsiFifoReady(void) +{ + __NOP(); + HAL_GPIO_ReadPin(GPIOE, FPGA_GPIO3_Pin); + __NOP(); + return HAL_GPIO_ReadPin(GPIOE, FPGA_GPIO3_Pin) != 0; +} + uint8_t scsiReadByte(void) { -#if FIFODEBUG - if (!scsiPhyFifoAltEmpty()) { - // Force a lock-up. - assertFail(); - } -#endif scsiSetDataCount(1); + // Ready immediately. setDataCount resets fifos + while (!scsiPhyComplete() && likely(!scsiDev.resetFlag)) { - __WFE(); // Wait for event + __WFI(); // Wait for interrupt } - scsiPhyFifoFlip(); + __enable_irq(); + uint8_t val = scsiPhyRx(); // TODO scsiDev.parityError = scsiDev.parityError || SCSI_Parity_Error_Read(); -#if FIFODEBUG - if (!scsiPhyFifoEmpty()) { - int j = 0; - uint8_t k __attribute((unused)); - while (!scsiPhyFifoEmpty()) { k = scsiPhyRx(); ++j; } - - // Force a lock-up. - assertFail(); - } -#endif return val; } void -scsiReadPIO(uint8_t* data, uint32_t count) +scsiReadPIO(uint8_t* data, uint32_t count, int* parityError) { uint16_t* fifoData = (uint16_t*)data; + uint32_t count16 = (count + 1) / 2; - for (int i = 0; i < (count + 1) / 2; ++i) + int i = 0; + while ((i < count16) && likely(!scsiDev.resetFlag)) { - fifoData[i] = scsiPhyRx(); // TODO ASSUMES LITTLE ENDIAN - } -} - -void -scsiReadDMA(uint8_t* data, uint32_t count) -{ - // Prepare DMA transfer - dmaInProgress = 1; - - scsiTxDMAComplete = 1; // TODO not used much - scsiRxDMAComplete = 0; // TODO not used much + // Wait until FIFO is full (or complete) + while (!scsiFifoReady() && likely(!scsiDev.resetFlag)) + { + // spin + } - HAL_DMA_Start( - &fsmcToMem, - (uint32_t) SCSI_FIFO_DATA, - (uint32_t) data, - (count + 1) / 2); -} + if (count16 - i >= SCSI_FIFO_DEPTH16) + { + uint32_t chunk16 = SCSI_FIFO_DEPTH16; -int -scsiReadDMAPoll() -{ - int complete = __HAL_DMA_GET_COUNTER(&fsmcToMem) == 0; - complete = complete && (HAL_DMA_PollForTransfer(&fsmcToMem, HAL_DMA_FULL_TRANSFER, 0xffffffff) == HAL_OK); - if (complete) - { - scsiTxDMAComplete = 1; // TODO MM FIX IRQ - scsiRxDMAComplete = 1; + // Let gcc unroll the loop as much as possible. + for (uint32_t k = 0; k + 128 <= chunk16; k += 128) + { + fifoData[i + k] = scsiPhyRx(); + fifoData[i + k + 1] = scsiPhyRx(); + fifoData[i + k + 2] = scsiPhyRx(); + fifoData[i + k + 3] = scsiPhyRx(); + fifoData[i + k + 4] = scsiPhyRx(); + fifoData[i + k + 5] = scsiPhyRx(); + fifoData[i + k + 6] = scsiPhyRx(); + fifoData[i + k + 7] = scsiPhyRx(); + fifoData[i + k + 8] = scsiPhyRx(); + fifoData[i + k + 9] = scsiPhyRx(); + fifoData[i + k + 10] = scsiPhyRx(); + fifoData[i + k + 11] = scsiPhyRx(); + fifoData[i + k + 12] = scsiPhyRx(); + fifoData[i + k + 13] = scsiPhyRx(); + fifoData[i + k + 14] = scsiPhyRx(); + fifoData[i + k + 15] = scsiPhyRx(); + fifoData[i + k + 16] = scsiPhyRx(); + fifoData[i + k + 17] = scsiPhyRx(); + fifoData[i + k + 18] = scsiPhyRx(); + fifoData[i + k + 19] = scsiPhyRx(); + fifoData[i + k + 20] = scsiPhyRx(); + fifoData[i + k + 21] = scsiPhyRx(); + fifoData[i + k + 22] = scsiPhyRx(); + fifoData[i + k + 23] = scsiPhyRx(); + fifoData[i + k + 24] = scsiPhyRx(); + fifoData[i + k + 25] = scsiPhyRx(); + fifoData[i + k + 26] = scsiPhyRx(); + fifoData[i + k + 27] = scsiPhyRx(); + fifoData[i + k + 28] = scsiPhyRx(); + fifoData[i + k + 29] = scsiPhyRx(); + fifoData[i + k + 30] = scsiPhyRx(); + fifoData[i + k + 31] = scsiPhyRx(); + fifoData[i + k + 32] = scsiPhyRx(); + fifoData[i + k + 33] = scsiPhyRx(); + fifoData[i + k + 34] = scsiPhyRx(); + fifoData[i + k + 35] = scsiPhyRx(); + fifoData[i + k + 36] = scsiPhyRx(); + fifoData[i + k + 37] = scsiPhyRx(); + fifoData[i + k + 38] = scsiPhyRx(); + fifoData[i + k + 39] = scsiPhyRx(); + fifoData[i + k + 40] = scsiPhyRx(); + fifoData[i + k + 41] = scsiPhyRx(); + fifoData[i + k + 42] = scsiPhyRx(); + fifoData[i + k + 43] = scsiPhyRx(); + fifoData[i + k + 44] = scsiPhyRx(); + fifoData[i + k + 45] = scsiPhyRx(); + fifoData[i + k + 46] = scsiPhyRx(); + fifoData[i + k + 47] = scsiPhyRx(); + fifoData[i + k + 48] = scsiPhyRx(); + fifoData[i + k + 49] = scsiPhyRx(); + fifoData[i + k + 50] = scsiPhyRx(); + fifoData[i + k + 51] = scsiPhyRx(); + fifoData[i + k + 52] = scsiPhyRx(); + fifoData[i + k + 53] = scsiPhyRx(); + fifoData[i + k + 54] = scsiPhyRx(); + fifoData[i + k + 55] = scsiPhyRx(); + fifoData[i + k + 56] = scsiPhyRx(); + fifoData[i + k + 57] = scsiPhyRx(); + fifoData[i + k + 58] = scsiPhyRx(); + fifoData[i + k + 59] = scsiPhyRx(); + fifoData[i + k + 60] = scsiPhyRx(); + fifoData[i + k + 61] = scsiPhyRx(); + fifoData[i + k + 62] = scsiPhyRx(); + fifoData[i + k + 63] = scsiPhyRx(); + fifoData[i + k + 64] = scsiPhyRx(); + fifoData[i + k + 65] = scsiPhyRx(); + fifoData[i + k + 66] = scsiPhyRx(); + fifoData[i + k + 67] = scsiPhyRx(); + fifoData[i + k + 68] = scsiPhyRx(); + fifoData[i + k + 69] = scsiPhyRx(); + fifoData[i + k + 70] = scsiPhyRx(); + fifoData[i + k + 71] = scsiPhyRx(); + fifoData[i + k + 72] = scsiPhyRx(); + fifoData[i + k + 73] = scsiPhyRx(); + fifoData[i + k + 74] = scsiPhyRx(); + fifoData[i + k + 75] = scsiPhyRx(); + fifoData[i + k + 76] = scsiPhyRx(); + fifoData[i + k + 77] = scsiPhyRx(); + fifoData[i + k + 78] = scsiPhyRx(); + fifoData[i + k + 79] = scsiPhyRx(); + fifoData[i + k + 80] = scsiPhyRx(); + fifoData[i + k + 81] = scsiPhyRx(); + fifoData[i + k + 82] = scsiPhyRx(); + fifoData[i + k + 83] = scsiPhyRx(); + fifoData[i + k + 84] = scsiPhyRx(); + fifoData[i + k + 85] = scsiPhyRx(); + fifoData[i + k + 86] = scsiPhyRx(); + fifoData[i + k + 87] = scsiPhyRx(); + fifoData[i + k + 88] = scsiPhyRx(); + fifoData[i + k + 89] = scsiPhyRx(); + fifoData[i + k + 90] = scsiPhyRx(); + fifoData[i + k + 91] = scsiPhyRx(); + fifoData[i + k + 92] = scsiPhyRx(); + fifoData[i + k + 93] = scsiPhyRx(); + fifoData[i + k + 94] = scsiPhyRx(); + fifoData[i + k + 95] = scsiPhyRx(); + fifoData[i + k + 96] = scsiPhyRx(); + fifoData[i + k + 97] = scsiPhyRx(); + fifoData[i + k + 98] = scsiPhyRx(); + fifoData[i + k + 99] = scsiPhyRx(); + fifoData[i + k + 100] = scsiPhyRx(); + fifoData[i + k + 101] = scsiPhyRx(); + fifoData[i + k + 102] = scsiPhyRx(); + fifoData[i + k + 103] = scsiPhyRx(); + fifoData[i + k + 104] = scsiPhyRx(); + fifoData[i + k + 105] = scsiPhyRx(); + fifoData[i + k + 106] = scsiPhyRx(); + fifoData[i + k + 107] = scsiPhyRx(); + fifoData[i + k + 108] = scsiPhyRx(); + fifoData[i + k + 109] = scsiPhyRx(); + fifoData[i + k + 110] = scsiPhyRx(); + fifoData[i + k + 111] = scsiPhyRx(); + fifoData[i + k + 112] = scsiPhyRx(); + fifoData[i + k + 113] = scsiPhyRx(); + fifoData[i + k + 114] = scsiPhyRx(); + fifoData[i + k + 115] = scsiPhyRx(); + fifoData[i + k + 116] = scsiPhyRx(); + fifoData[i + k + 117] = scsiPhyRx(); + fifoData[i + k + 118] = scsiPhyRx(); + fifoData[i + k + 119] = scsiPhyRx(); + fifoData[i + k + 120] = scsiPhyRx(); + fifoData[i + k + 121] = scsiPhyRx(); + fifoData[i + k + 122] = scsiPhyRx(); + fifoData[i + k + 123] = scsiPhyRx(); + fifoData[i + k + 124] = scsiPhyRx(); + fifoData[i + k + 125] = scsiPhyRx(); + fifoData[i + k + 126] = scsiPhyRx(); + fifoData[i + k + 127] = scsiPhyRx(); + } - dmaInProgress = 0; -#if 0 - // TODO MM scsiDev.parityError = scsiDev.parityError || SCSI_Parity_Error_Read(); -#endif - return 1; + i += chunk16; + } + else + { + uint32_t chunk16 = count16 - i; + uint32_t k = 0; + for (; k + 4 <= chunk16; k += 4) + { + fifoData[i + k] = scsiPhyRx(); + fifoData[i + 1 + k] = scsiPhyRx(); + fifoData[i + 2 + k] = scsiPhyRx(); + fifoData[i + 3 + k] = scsiPhyRx(); + } + for (; k < chunk16; ++k) + { + fifoData[i + k] = scsiPhyRx(); + } + i += chunk16; + } } - else - { - return 0; - } + + *parityError |= scsiParityError(); } void @@ -239,208 +364,173 @@ scsiRead(uint8_t* data, uint32_t count, int* parityError) int i = 0; *parityError = 0; - - uint32_t chunk = ((count - i) > SCSI_FIFO_DEPTH) - ? SCSI_FIFO_DEPTH : (count - i); -#ifdef SCSI_FSMC_DMA - if (chunk >= 16) - { - // DMA is doing 32bit transfers. - chunk = chunk & 0xFFFFFFF8; - } -#endif - scsiSetDataCount(chunk); - while (i < count && likely(!scsiDev.resetFlag)) { - while (!scsiPhyComplete() && likely(!scsiDev.resetFlag)) - { - __WFE(); // Wait for event - } - *parityError |= scsiParityError(); - scsiPhyFifoFlip(); + uint32_t chunk = ((count - i) > SCSI_XFER_MAX) + ? SCSI_XFER_MAX : (count - i); + scsiSetDataCount(chunk); - uint32_t nextChunk = ((count - i - chunk) > SCSI_FIFO_DEPTH) - ? SCSI_FIFO_DEPTH : (count - i - chunk); -#ifdef SCSI_FSMC_DMA - if (nextChunk >= 16) - { - nextChunk = nextChunk & 0xFFFFFFF8; - } -#endif - if (nextChunk > 0) - { - scsiSetDataCount(nextChunk); - } + scsiReadPIO(data + i, chunk, parityError); -#ifdef SCSI_FSMC_DMA - if (chunk < 16) -#endif - { - scsiReadPIO(data + i, chunk); - } -#ifdef SCSI_FSMC_DMA - else + __disable_irq(); + while (!scsiPhyComplete() && likely(!scsiDev.resetFlag)) { - scsiReadDMA(data + i, chunk); - - while (!scsiReadDMAPoll() && likely(!scsiDev.resetFlag)) - { - }; + __WFI(); } -#endif - + __enable_irq(); i += chunk; - chunk = nextChunk; } -#if FIFODEBUG - if (!scsiPhyFifoEmpty() || !scsiPhyFifoAltEmpty()) { - int j = 0; - while (!scsiPhyFifoEmpty()) { scsiPhyRx(); ++j; } - scsiPhyFifoFlip(); - int k = 0; - while (!scsiPhyFifoEmpty()) { scsiPhyRx(); ++k; } - // Force a lock-up. - assertFail(); - } -#endif } void scsiWriteByte(uint8_t value) { -#if FIFODEBUG - if (!scsiPhyFifoEmpty()) { - // Force a lock-up. - assertFail(); - } -#endif - scsiPhyTx(value); - scsiPhyFifoFlip(); - scsiSetDataCount(1); + scsiPhyTx(value); + __disable_irq(); while (!scsiPhyComplete() && likely(!scsiDev.resetFlag)) { - __WFE(); // Wait for event - } - -#if FIFODEBUG - if (!scsiPhyFifoAltEmpty()) { - // Force a lock-up. - assertFail(); + __WFI(); } -#endif + __enable_irq(); } -static void +void scsiWritePIO(const uint8_t* data, uint32_t count) { uint16_t* fifoData = (uint16_t*)data; - for (int i = 0; i < (count + 1) / 2; ++i) + uint32_t count16 = (count + 1) / 2; + + int i = 0; + while ((i < count16) && likely(!scsiDev.resetFlag)) { - scsiPhyTx(fifoData[i]); - } -} + while (!scsiFifoReady() && likely(!scsiDev.resetFlag)) + { + // Spin + } -void -scsiWriteDMA(const uint8_t* data, uint32_t count) -{ - // Prepare DMA transfer - dmaInProgress = 1; + if (count16 - i >= SCSI_FIFO_DEPTH16) + { + uint32_t chunk16 = SCSI_FIFO_DEPTH16; - scsiTxDMAComplete = 0; - scsiRxDMAComplete = 1; + // Let gcc unroll the loop as much as possible. + for (uint32_t k = 0; k + 128 <= chunk16; k += 128) + { + scsiPhyTx32(fifoData[i + k], fifoData[i + k + 1]); + scsiPhyTx32(fifoData[i + 2 + k], fifoData[i + k + 3]); + scsiPhyTx32(fifoData[i + 4 + k], fifoData[i + k + 5]); + scsiPhyTx32(fifoData[i + 6 + k], fifoData[i + k + 7]); + scsiPhyTx32(fifoData[i + 8 + k], fifoData[i + k + 9]); + scsiPhyTx32(fifoData[i + 10 + k], fifoData[i + k + 11]); + scsiPhyTx32(fifoData[i + 12 + k], fifoData[i + k + 13]); + scsiPhyTx32(fifoData[i + 14 + k], fifoData[i + k + 15]); + scsiPhyTx32(fifoData[i + 16 + k], fifoData[i + k + 17]); + scsiPhyTx32(fifoData[i + 18 + k], fifoData[i + k + 19]); + scsiPhyTx32(fifoData[i + 20 + k], fifoData[i + k + 21]); + scsiPhyTx32(fifoData[i + 22 + k], fifoData[i + k + 23]); + scsiPhyTx32(fifoData[i + 24 + k], fifoData[i + k + 25]); + scsiPhyTx32(fifoData[i + 26 + k], fifoData[i + k + 27]); + scsiPhyTx32(fifoData[i + 28 + k], fifoData[i + k + 29]); + scsiPhyTx32(fifoData[i + 30 + k], fifoData[i + k + 31]); + + scsiPhyTx32(fifoData[i + 32 + k], fifoData[i + k + 33]); + scsiPhyTx32(fifoData[i + 34 + k], fifoData[i + k + 35]); + scsiPhyTx32(fifoData[i + 36 + k], fifoData[i + k + 37]); + scsiPhyTx32(fifoData[i + 38 + k], fifoData[i + k + 39]); + scsiPhyTx32(fifoData[i + 40 + k], fifoData[i + k + 41]); + scsiPhyTx32(fifoData[i + 42 + k], fifoData[i + k + 43]); + scsiPhyTx32(fifoData[i + 44 + k], fifoData[i + k + 45]); + scsiPhyTx32(fifoData[i + 46 + k], fifoData[i + k + 47]); + scsiPhyTx32(fifoData[i + 48 + k], fifoData[i + k + 49]); + scsiPhyTx32(fifoData[i + 50 + k], fifoData[i + k + 51]); + scsiPhyTx32(fifoData[i + 52 + k], fifoData[i + k + 53]); + scsiPhyTx32(fifoData[i + 54 + k], fifoData[i + k + 55]); + scsiPhyTx32(fifoData[i + 56 + k], fifoData[i + k + 57]); + scsiPhyTx32(fifoData[i + 58 + k], fifoData[i + k + 59]); + scsiPhyTx32(fifoData[i + 60 + k], fifoData[i + k + 61]); + scsiPhyTx32(fifoData[i + 62 + k], fifoData[i + k + 63]); + + scsiPhyTx32(fifoData[i + 64 + k], fifoData[i + k + 65]); + scsiPhyTx32(fifoData[i + 66 + k], fifoData[i + k + 67]); + scsiPhyTx32(fifoData[i + 68 + k], fifoData[i + k + 69]); + scsiPhyTx32(fifoData[i + 70 + k], fifoData[i + k + 71]); + scsiPhyTx32(fifoData[i + 72 + k], fifoData[i + k + 73]); + scsiPhyTx32(fifoData[i + 74 + k], fifoData[i + k + 75]); + scsiPhyTx32(fifoData[i + 76 + k], fifoData[i + k + 77]); + scsiPhyTx32(fifoData[i + 78 + k], fifoData[i + k + 79]); + scsiPhyTx32(fifoData[i + 80 + k], fifoData[i + k + 81]); + scsiPhyTx32(fifoData[i + 82 + k], fifoData[i + k + 83]); + scsiPhyTx32(fifoData[i + 84 + k], fifoData[i + k + 85]); + scsiPhyTx32(fifoData[i + 86 + k], fifoData[i + k + 87]); + scsiPhyTx32(fifoData[i + 88 + k], fifoData[i + k + 89]); + scsiPhyTx32(fifoData[i + 90 + k], fifoData[i + k + 91]); + scsiPhyTx32(fifoData[i + 92 + k], fifoData[i + k + 93]); + scsiPhyTx32(fifoData[i + 94 + k], fifoData[i + k + 95]); + + scsiPhyTx32(fifoData[i + 96 + k], fifoData[i + k + 97]); + scsiPhyTx32(fifoData[i + 98 + k], fifoData[i + k + 99]); + scsiPhyTx32(fifoData[i + 100 + k], fifoData[i + k + 101]); + scsiPhyTx32(fifoData[i + 102 + k], fifoData[i + k + 103]); + scsiPhyTx32(fifoData[i + 104 + k], fifoData[i + k + 105]); + scsiPhyTx32(fifoData[i + 106 + k], fifoData[i + k + 107]); + scsiPhyTx32(fifoData[i + 108 + k], fifoData[i + k + 109]); + scsiPhyTx32(fifoData[i + 110 + k], fifoData[i + k + 111]); + scsiPhyTx32(fifoData[i + 112 + k], fifoData[i + k + 113]); + scsiPhyTx32(fifoData[i + 114 + k], fifoData[i + k + 115]); + scsiPhyTx32(fifoData[i + 116 + k], fifoData[i + k + 117]); + scsiPhyTx32(fifoData[i + 118 + k], fifoData[i + k + 119]); + scsiPhyTx32(fifoData[i + 120 + k], fifoData[i + k + 121]); + scsiPhyTx32(fifoData[i + 122 + k], fifoData[i + k + 123]); + scsiPhyTx32(fifoData[i + 124 + k], fifoData[i + k + 125]); + scsiPhyTx32(fifoData[i + 126 + k], fifoData[i + k + 127]); - HAL_DMA_Start( - &memToFSMC, - (uint32_t) data, - (uint32_t) SCSI_FIFO_DATA, - count / 4); -} + } -int -scsiWriteDMAPoll() -{ - int complete = __HAL_DMA_GET_COUNTER(&memToFSMC) == 0; - complete = complete && (HAL_DMA_PollForTransfer(&memToFSMC, HAL_DMA_FULL_TRANSFER, 0xffffffff) == HAL_OK); - if (complete) - { - scsiTxDMAComplete = 1; // TODO MM FIX IRQ - scsiRxDMAComplete = 1; + i += chunk16; + } + else + { + uint32_t chunk16 = count16 - i; - dmaInProgress = 0; - return 1; - } - else - { - return 0; + uint32_t k = 0; + for (; k + 4 <= chunk16; k += 4) + { + scsiPhyTx32(fifoData[i + k], fifoData[i + k + 1]); + scsiPhyTx32(fifoData[i + k + 2], fifoData[i + k + 3]); + } + for (; k < chunk16; ++k) + { + scsiPhyTx(fifoData[i + k]); + } + i += chunk16; + } } } + void scsiWrite(const uint8_t* data, uint32_t count) { int i = 0; while (i < count && likely(!scsiDev.resetFlag)) { - uint32_t chunk = ((count - i) > SCSI_FIFO_DEPTH) - ? SCSI_FIFO_DEPTH : (count - i); - -#if FIFODEBUG - if (!scsiPhyFifoEmpty()) { - // Force a lock-up. - assertFail(); - } -#endif - -#ifdef SCSI_FSMC_DMA - if (chunk < 16) -#endif - { - scsiWritePIO(data + i, chunk); - } -#ifdef SCSI_FSMC_DMA - else - { - // DMA is doing 32bit transfers. - chunk = chunk & 0xFFFFFFF8; - scsiWriteDMA(data + i, chunk); + uint32_t chunk = ((count - i) > SCSI_XFER_MAX) + ? SCSI_XFER_MAX : (count - i); + scsiSetDataCount(chunk); - while (!scsiWriteDMAPoll() && likely(!scsiDev.resetFlag)) - { - } - } -#endif + scsiWritePIO(data + i, chunk); + __disable_irq(); while (!scsiPhyComplete() && likely(!scsiDev.resetFlag)) { - __WFE(); // Wait for event + __WFI(); } + __enable_irq(); -#if FIFODEBUG - if (!scsiPhyFifoAltEmpty()) { - // Force a lock-up. - assertFail(); - } -#endif - - scsiPhyFifoFlip(); - scsiSetDataCount(chunk); i += chunk; } - while (!scsiPhyComplete() && likely(!scsiDev.resetFlag)) - { - __WFE(); // Wait for event - } - -#if FIFODEBUG - if (!scsiPhyFifoAltEmpty()) { - // Force a lock-up. - assertFail(); - } -#endif } static inline void busSettleDelay(void) @@ -498,10 +588,6 @@ uint32_t scsiEnterPhaseImmediate(int newPhase) int oldPhase = *SCSI_CTRL_PHASE; - if (!scsiDev.resetFlag && (!scsiPhyFifoEmpty() || !scsiPhyFifoAltEmpty())) { - // Force a lock-up. - assertFail(); - } if (newPhase != oldPhase) { if ((newPhase == DATA_IN || newPhase == DATA_OUT) && @@ -639,8 +725,6 @@ void scsiPhyReset() *SCSI_CTRL_PHASE = 0x00; *SCSI_CTRL_BSY = 0x00; - scsiPhyFifoSel = 0; - *SCSI_FIFO_SEL = 0; *SCSI_CTRL_DBX = 0; *SCSI_CTRL_SYNC_OFFSET = 0; @@ -674,6 +758,31 @@ void scsiPhyReset() } #endif + // PIO Benchmark code + // Currently 16.7MB/s. + //#define PIO_BENCHMARK 1 + #ifdef PIO_BENCHMARK + while(1) + { + s2s_ledOn(); + + scsiEnterPhase(DATA_IN); // Need IO flag set for fifo ready flag + + // 100MB + for (int i = 0; i < (100LL * 1024 * 1024 / SCSI_FIFO_DEPTH); ++i) + { + scsiSetDataCount(1); // Resets fifos. + + // Shouldn't block + scsiDev.resetFlag = 0; + scsiWritePIO(&scsiDev.data[0], SCSI_FIFO_DEPTH); + } + s2s_ledOff(); + + for(int i = 0; i < 10; ++i) s2s_delay_ms(1000); + } + #endif + #ifdef SCSI_FREQ_TEST while(1) { @@ -749,8 +858,6 @@ void scsiPhyInit() *SCSI_CTRL_IDMASK = 0x00; // Reset in scsiPhyConfig *SCSI_CTRL_PHASE = 0x00; *SCSI_CTRL_BSY = 0x00; - scsiPhyFifoSel = 0; - *SCSI_FIFO_SEL = 0; *SCSI_CTRL_DBX = 0; *SCSI_CTRL_SYNC_OFFSET = 0; diff --git a/src/firmware/scsiPhy.h b/src/firmware/scsiPhy.h index 19f0aa6f..c2288db7 100755 --- a/src/firmware/scsiPhy.h +++ b/src/firmware/scsiPhy.h @@ -20,8 +20,8 @@ #define SCSI_CTRL_IDMASK ((volatile uint8_t*)0x60000000) #define SCSI_CTRL_PHASE ((volatile uint8_t*)0x60000002) #define SCSI_CTRL_BSY ((volatile uint8_t*)0x60000004) -#define SCSI_FIFO_SEL ((volatile uint8_t*)0x60000006) -#define SCSI_DATA_CNT_HI ((volatile uint8_t*)0x60000008) +#define SCSI_DATA_CNT_HI ((volatile uint8_t*)0x60000006) +#define SCSI_DATA_CNT_MID ((volatile uint8_t*)0x60000008) #define SCSI_DATA_CNT_LO ((volatile uint8_t*)0x6000000A) #define SCSI_DATA_CNT_SET ((volatile uint8_t*)0x6000000C) #define SCSI_CTRL_DBX ((volatile uint8_t*)0x6000000E) @@ -35,7 +35,7 @@ #define SCSI_CTRL_SEL_TIMING ((volatile uint8_t*)0x60000018) #define SCSI_STS_FIFO ((volatile uint8_t*)0x60000020) -#define SCSI_STS_ALTFIFO ((volatile uint8_t*)0x60000022) +// Obsolete #define SCSI_STS_ALTFIFO ((volatile uint8_t*)0x60000022) #define SCSI_STS_FIFO_COMPLETE ((volatile uint8_t*)0x60000024) #define SCSI_STS_SELECTED ((volatile uint8_t*)0x60000026) #define SCSI_STS_SCSI ((volatile uint8_t*)0x60000028) @@ -47,18 +47,17 @@ #define SCSI_STS_PARITY_ERR ((volatile uint8_t*)0x6000002C) #define SCSI_FIFO_DATA ((volatile uint16_t*)0x60000040) -#define SCSI_FIFO_DEPTH 256 +#define SCSI_FIFO_DEPTH 512 +#define SCSI_FIFO_DEPTH16 (SCSI_FIFO_DEPTH / 2) +#define SCSI_XFER_MAX 524288 -#define scsiPhyFifoFull() ((*SCSI_STS_FIFO & 0x01) == 0x01) -#define scsiPhyFifoEmpty() ((*SCSI_STS_FIFO & 0x02) == 0x02) -#define scsiPhyFifoAltEmpty() ((*SCSI_STS_ALTFIFO & 0x02) == 0x02) +// Check if FIFO is empty or full. +// Replaced with method due to delays +// #define scsiFifoReady() (HAL_GPIO_ReadPin(GPIOE, FPGA_GPIO3_Pin) != 0) -#define scsiPhyFifoFlip() \ -{\ - scsiPhyFifoSel ^= 1; \ - *SCSI_FIFO_SEL = scsiPhyFifoSel; \ -} +#define scsiPhyFifoFull() ((*SCSI_STS_FIFO & 0x01) != 0) +#define scsiPhyFifoEmpty() ((*SCSI_STS_FIFO & 0x02) != 0) #define scsiPhyTx(val) *SCSI_FIFO_DATA = (val) @@ -69,24 +68,23 @@ #define scsiPhyRx() *SCSI_FIFO_DATA #define scsiPhyComplete() ((*SCSI_STS_FIFO_COMPLETE & 0x01) == 0x01) -#define scsiStatusATN() ((*SCSI_STS_SCSI & 0x01) == 0x01) -#define scsiStatusBSY() ((*SCSI_STS_SCSI & 0x02) == 0x02) -#define scsiStatusRST() ((*SCSI_STS_SCSI & 0x04) == 0x04) -#define scsiStatusSEL() ((*SCSI_STS_SCSI & 0x08) == 0x08) -#define scsiStatusACK() ((*SCSI_STS_SCSI & 0x10) == 0x10) +#define scsiStatusATN() ((*SCSI_STS_SCSI & 0x01) != 0) +#define scsiStatusBSY() ((*SCSI_STS_SCSI & 0x02) != 0) +#define scsiStatusRST() ((*SCSI_STS_SCSI & 0x04) != 0) +#define scsiStatusSEL() ((*SCSI_STS_SCSI & 0x08) != 0) +#define scsiStatusACK() ((*SCSI_STS_SCSI & 0x10) != 0) -#define scsiParityError() ((*SCSI_STS_PARITY_ERR & 0x1) == 0x1) +#define scsiParityError() ((*SCSI_STS_PARITY_ERR & 0x1) != 0) // Disable DMA due to errate with the STM32F205 DMA2 controller when // concurrently transferring FSMC (with FIFO) and APB (ie. sdio) // peripherals. #undef SCSI_FSMC_DMA -extern uint8_t scsiPhyFifoSel; - void scsiPhyInit(void); void scsiPhyConfig(void); void scsiPhyReset(void); +int scsiFifoReady(void); void scsiEnterPhase(int phase); uint32_t scsiEnterPhaseImmediate(int phase); @@ -111,7 +109,8 @@ void scsiReadDMA(uint8_t* data, uint32_t count); int scsiReadDMAPoll(); // Low-level. -void scsiReadPIO(uint8_t* data, uint32_t count); +void scsiReadPIO(uint8_t* data, uint32_t count, int* parityError); +void scsiWritePIO(const uint8_t* data, uint32_t count); void scsiWriteDMA(const uint8_t* data, uint32_t count); int scsiWriteDMAPoll(); diff --git a/src/firmware/sd.h b/src/firmware/sd.h index 0a9d6328..b82f1192 100755 --- a/src/firmware/sd.h +++ b/src/firmware/sd.h @@ -34,14 +34,9 @@ extern SdDevice sdDev; int sdInit(void); -void sdWriteMultiSectorPrep(uint32_t sdLBA, uint32_t sdSectors); -void sdWriteMultiSectorDMA(uint8_t* outputBuffer); -int sdWriteSectorDMAPoll(); - void sdReadDMA(uint32_t lba, uint32_t sectors, uint8_t* outputBuffer); int sdReadDMAPoll(uint32_t remainingSectors); void sdCompleteTransfer(); -void sdPoll(); #endif