From: Michael McMaster Date: Tue, 24 Dec 2019 06:13:47 +0000 (+1000) Subject: Improve sync transfer stability by measuring host speed before blind writes to SD... X-Git-Tag: v6.2.14~1 X-Git-Url: http://git.codesrc.com/gitweb.cgi?a=commitdiff_plain;h=8b13d49ce01748d934f739b9df2b0ea0d42598c2;p=SCSI2SD-V6.git Improve sync transfer stability by measuring host speed before blind writes to SD card --- diff --git a/Makefile b/Makefile index 9be0d9a6..b81734a6 100644 --- a/Makefile +++ b/Makefile @@ -160,6 +160,9 @@ build/firmware.elf: $(SRC) rtl/fpga_bitmap.o $(STM32OBJS) build/firmware.bin: build/firmware.elf $(OBJCOPY) -O binary $< $@ +# Example to hard-code config within firmware +#sudo arm-none-eabi-objcopy --update-section .fixed_config=config.dat firmware.elf -O binary firmware.bin + build/stm32cubemx/%.o: mkdir -p build/stm32cubemx $(ARMCC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $(STM32CubeMX_INCUDE) $(INCLUDE) $^ diff --git a/src/firmware/bsp.c b/src/firmware/bsp.c index 0c0a4774..a6794398 100644 --- a/src/firmware/bsp.c +++ b/src/firmware/bsp.c @@ -23,15 +23,15 @@ static int usingFastClock = 0; // TODO keep clock routines consistent with those in STM32Cubemx main.c -uint32_t s2s_getSdRateMBs() +uint32_t s2s_getSdRateKBs() { if (usingFastClock) { - return 18; // ((72MHz / 2) / 8bits) * 4bitparallel + return 18000; // ((72MHz / 2) / 8bits) * 4bitparallel } else { - return 12; // ((48MHz / 2) / 8bits) * 4bitparallel + return 12000; // ((48MHz / 2) / 8bits) * 4bitparallel } } diff --git a/src/firmware/bsp.h b/src/firmware/bsp.h index a11c850a..10481294 100644 --- a/src/firmware/bsp.h +++ b/src/firmware/bsp.h @@ -27,7 +27,7 @@ void s2s_setNormalClock(); void s2s_setFastClock(); -uint32_t s2s_getSdRateMBs(); +uint32_t s2s_getSdRateKBs(); #endif diff --git a/src/firmware/config.c b/src/firmware/config.c index 46bebb83..ef09bc06 100755 --- a/src/firmware/config.c +++ b/src/firmware/config.c @@ -37,7 +37,10 @@ #include -static const uint16_t FIRMWARE_VERSION = 0x0629; +static const uint16_t FIRMWARE_VERSION = 0x062A; + +// Optional static config +extern uint8_t* __fixed_config; // 1 flash row static const uint8_t DEFAULT_CONFIG[128] = @@ -87,7 +90,14 @@ void s2s_configInit(S2S_BoardCfg* config) { usbInEpState = USB_IDLE; - if ((blockDev.state & DISK_PRESENT) && sdDev.capacity) + if (memcmp(__fixed_config, "BCFG", 4) == 0) + { + // Use hardcoded config + memcpy(s2s_cfg, __fixed_config, S2S_CFG_SIZE); + memcpy(config, s2s_cfg, sizeof(S2S_BoardCfg)); + } + + else if ((blockDev.state & DISK_PRESENT) && sdDev.capacity) { int cfgSectors = (S2S_CFG_SIZE + 511) / 512; BSP_SD_ReadBlocks_DMA( diff --git a/src/firmware/disk.c b/src/firmware/disk.c index 773ffd34..3c562e30 100755 --- a/src/firmware/disk.c +++ b/src/firmware/disk.c @@ -548,6 +548,29 @@ int scsiDiskCommand() return commandHandled; } +static uint32_t +calcReadahead(uint32_t totalBytes, uint32_t sdSpeedKBs, uint32_t scsiSpeedKBs) +{ + if (scsiSpeedKBs == 0 || scsiDev.hostSpeedMeasured == 0) + { + return totalBytes; + } + + // uint32_t readAheadBytes = totalBytes * (1 - scsiSpeedKBs / sdSpeedKBs); + // Won't overflow with 65536 max bytes, 20000 max scsi speed. + uint32_t readAheadBytes = totalBytes - totalBytes * scsiSpeedKBs / sdSpeedKBs; + + // Round up to nearest FIFO size (* 4 for safety) + readAheadBytes = ((readAheadBytes / SCSI_FIFO_DEPTH) + 4) * SCSI_FIFO_DEPTH; + + if (readAheadBytes > totalBytes) + { + readAheadBytes = totalBytes; + } + + return readAheadBytes; +} + void scsiDiskPoll() { uint32_t bytesPerSector = scsiDev.target->liveCfg.bytesPerSector; @@ -704,18 +727,16 @@ void scsiDiskPoll() transfer.lba); int i = 0; int clearBSY = 0; - int extraSectors = 0; int parityError = 0; int enableParity = scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY; - uint32_t scsiSpeed = s2s_getScsiRateMBs(); - uint32_t maxSectors = sizeof(scsiDev.data) / SD_SECTOR_SIZE; static_assert(SCSI_XFER_MAX >= sizeof(scsiDev.data), "Assumes SCSI_XFER_MAX >= sizeof(scsiDev.data)"); // Start reading and filling fifos as soon as possible. + DWT->CYCCNT = 0; // Start counting cycles scsiSetDataCount(transfer.blocks * bytesPerSector); while ((i < totalSDSectors) && @@ -733,31 +754,15 @@ void scsiDiskPoll() // no flow control. This can be handled if a) the scsi interface // doesn't block and b) we read enough SCSI sectors first so that // the SD interface cannot catch up. - int prevExtraSectors = extraSectors; uint32_t totalBytes = sectors * SD_SECTOR_SIZE; - extraSectors = 0; - int32_t readAheadBytes = totalBytes; - uint32_t sdSpeed = s2s_getSdRateMBs() + (scsiDev.sdUnderrunCount / 2); - // if (have blind writes) - if (scsiSpeed > 0 && scsiDev.sdUnderrunCount < 16) - { - // readAhead = sectors * (sd / scsi - 1 + 0.1); - readAheadBytes = totalBytes * sdSpeed / scsiSpeed - totalBytes; - - // Round up to nearest FIFO size. - readAheadBytes = ((readAheadBytes / SCSI_FIFO_DEPTH) + 1) * SCSI_FIFO_DEPTH; - - if (readAheadBytes > totalBytes) - { - readAheadBytes = totalBytes; - } - } - - uint32_t prevExtraBytes = prevExtraSectors * SD_SECTOR_SIZE; - uint32_t scsiBytesRead = prevExtraBytes; - readAheadBytes -= prevExtraBytes; // Must be signed! + uint32_t sdSpeedKBs = s2s_getSdRateKBs() + (scsiDev.sdUnderrunCount * 256); + uint32_t readAheadBytes = calcReadahead( + totalBytes, + sdSpeedKBs, + scsiDev.hostSpeedKBs); + uint32_t scsiBytesRead = 0; if (readAheadBytes > 0) { scsiReadPIO( @@ -765,6 +770,42 @@ void scsiDiskPoll() readAheadBytes, &parityError); scsiBytesRead += readAheadBytes; + + if (i == 0) + { + uint32_t elapsedCycles = DWT->CYCCNT; + + // uint32_t rateKBs = (readAheadBytes / 1000) / (elapsedCycles / HAL_RCC_GetHCLKFreq()); + // Scaled by 4 to avoid overflow w/ max 65536 at 108MHz. + uint32_t rateKBs = ((readAheadBytes / 4) * (HAL_RCC_GetHCLKFreq() / 1000) / elapsedCycles) * 4; + + scsiDev.hostSpeedKBs = (scsiDev.hostSpeedKBs + rateKBs) / 2; + scsiDev.hostSpeedMeasured = 1; + + if (rateKBs < scsiDev.hostSpeedKBs) + { + // Our readahead was too slow; assume remaining bytes + // will be as well. + if (readAheadBytes < totalBytes) + { + uint32_t properReadahead = calcReadahead( + totalBytes, + sdSpeedKBs, + rateKBs); + + if (properReadahead > readAheadBytes) + { + uint32_t diff = properReadahead - readAheadBytes; + readAheadBytes = properReadahead; + scsiReadPIO( + &scsiDev.data[scsiBytesRead], + diff, + &parityError); + scsiBytesRead += diff; + } + } + } + } } HAL_SD_WriteBlocks_DMA(&hsd, (uint32_t*) (&scsiDev.data[0]), (i + sdLBA) * 512ll, SD_SECTOR_SIZE, sectors); @@ -783,22 +824,6 @@ void scsiDiskPoll() scsiBytesRead += (totalBytes - readAheadBytes); } - if (!underrun && rem > sectors) - { - // We probably have some time to waste reading more here. - // While noting this is going to drop us down into - // half-duplex operation (hence why we read max / 4 only) - - extraSectors = rem - sectors > (maxSectors / 4) - ? (maxSectors / 4) - : rem - sectors; - - scsiReadPIO( - &scsiDev.data[0], - extraSectors * SD_SECTOR_SIZE, - &parityError); - } - uint32_t dmaFinishTime = s2s_getTime_ms(); while ((!hsd.SdTransferCplt || __HAL_SD_SDIO_GET_FLAG(&hsd, SDIO_FLAG_TXACT)) && diff --git a/src/firmware/link.ld b/src/firmware/link.ld index bcd2ed37..ec18c4e8 100755 --- a/src/firmware/link.ld +++ b/src/firmware/link.ld @@ -34,6 +34,16 @@ SECTIONS . = ALIGN(4); } >FLASH_ISR + /* Store config settings into FLASH */ + .fixed_config : + { + . = ALIGN(4); + __fixed_config = .; /* create a global symbol at config start */ + . += 1024; + KEEP(*(.fixed_config)) + . = ALIGN(4); + } >CONFIG + /* The program code and other data goes into FLASH */ .text : { diff --git a/src/firmware/scsi.c b/src/firmware/scsi.c index 25f02839..016ecbb5 100755 --- a/src/firmware/scsi.c +++ b/src/firmware/scsi.c @@ -965,6 +965,11 @@ static void process_MessageOut() scsiWrite(SDTR, sizeof(SDTR)); scsiDev.needSyncNegotiationAck = 1; // Check if this message is rejected. scsiDev.sdUnderrunCount = 0; // reset counter, may work now. + + // Set to the theoretical speed, then adjust if we measure lower + // actual speeds. + scsiDev.hostSpeedKBs = s2s_getScsiRateKBs(); + scsiDev.hostSpeedMeasured = 0; } } else @@ -1125,6 +1130,8 @@ void scsiInit() scsiDev.phase = BUS_FREE; scsiDev.target = NULL; scsiDev.compatMode = COMPAT_UNKNOWN; + scsiDev.hostSpeedKBs = 0; + scsiDev.hostSpeedMeasured = 0; int i; for (i = 0; i < S2S_MAX_TARGETS; ++i) diff --git a/src/firmware/scsi.h b/src/firmware/scsi.h index cbfa9807..64353ada 100755 --- a/src/firmware/scsi.h +++ b/src/firmware/scsi.h @@ -165,6 +165,10 @@ typedef struct int needSyncNegotiationAck; int sdUnderrunCount; + + // Estimate of the SCSI host actual speed + uint32_t hostSpeedKBs; + int hostSpeedMeasured; } ScsiDevice; extern ScsiDevice scsiDev; diff --git a/src/firmware/scsiPhy.c b/src/firmware/scsiPhy.c index a3801273..2f27b1fd 100755 --- a/src/firmware/scsiPhy.c +++ b/src/firmware/scsiPhy.c @@ -677,21 +677,25 @@ uint32_t scsiEnterPhaseImmediate(int newPhase) return 0; // No change } -uint32_t s2s_getScsiRateMBs() +// Returns a "safe" estimate of the host SCSI speed of +// theoretical speed / 2 +uint32_t s2s_getScsiRateKBs() { if (scsiDev.target->syncOffset) { if (scsiDev.target->syncPeriod < 23) { - return 20; + return 20 / 2; } else if (scsiDev.target->syncPeriod <= 25) { - return 10; + return 10 / 2; } else { - return 1000 / (scsiDev.target->syncPeriod * 4); + // 1000000000 / (scsiDev.target->syncPeriod * 4) bytes per second + // (1000000000 / (scsiDev.target->syncPeriod * 4)) / 1000 kB/s + return (1000000 / (scsiDev.target->syncPeriod * 4)) / 2; } } else diff --git a/src/firmware/scsiPhy.h b/src/firmware/scsiPhy.h index c2288db7..360d594c 100755 --- a/src/firmware/scsiPhy.h +++ b/src/firmware/scsiPhy.h @@ -117,6 +117,6 @@ int scsiWriteDMAPoll(); int scsiSelfTest(void); -uint32_t s2s_getScsiRateMBs(); +uint32_t s2s_getScsiRateKBs(); #endif diff --git a/src/scsi2sd-util6/Makefile b/src/scsi2sd-util6/Makefile index 45796205..088f0195 100755 --- a/src/scsi2sd-util6/Makefile +++ b/src/scsi2sd-util6/Makefile @@ -96,7 +96,7 @@ ifeq ($(TARGET),Linux) BUILD := $(PWD)/build/linux LIBUSB_CONFIG+=--disable-shared LDFLAGS_LIBUSB+= -ludev -lpthread -all: $(BUILD)/scsi2sd-test +#all: $(BUILD)/scsi2sd-test endif ifeq ($(TARGET),Darwin)