@@ -853,32 +853,77 @@ static bool _transfer(rp2pio_statemachine_obj_t *self,
853853 // This implementation is based on SPI but varies because the tx and rx buffers
854854 // may be different lengths and occur at different times or speeds.
855855
856- // Use DMA for large transfers if channels are available.
857- // Don't exceed FIFO size.
858- const size_t dma_min_size_threshold = self -> fifo_depth ;
859856 int chan_tx = -1 ;
860857 int chan_rx = -1 ;
861858 size_t len = MAX (out_len , in_len );
862859 bool tx = data_out != NULL ;
863860 bool rx = data_in != NULL ;
864- bool use_dma = len >= dma_min_size_threshold || swap_out || swap_in ;
861+ bool free_data_out = false;
862+ bool free_data_in = false;
863+ uint8_t * sram_data_out = (uint8_t * )data_out ;
864+ uint8_t * sram_data_in = data_in ;
865+ bool tx_fits_in_fifo = (out_len / out_stride_in_bytes ) <= self -> fifo_depth ;
866+ bool rx_fits_in_fifo = (in_len / in_stride_in_bytes ) <= self -> fifo_depth ;
867+ bool use_dma = !(tx_fits_in_fifo && rx_fits_in_fifo ) || swap_out || swap_in ;
868+
865869 if (use_dma ) {
866- // Use DMA channels to service the two FIFOs
870+ // We can only reliably use DMA for SRAM buffers. So, if we're given PSRAM buffers,
871+ // then copy them to SRAM first. If we can't, then fail.
872+ // Use DMA channels to service the two FIFOs. Fail if we can't allocate DMA channels.
867873 if (tx ) {
874+ if (data_out < (uint8_t * )SRAM_BASE ) {
875+ // Try to allocate a temporary buffer for DMA transfer
876+ uint8_t * temp_buffer = (uint8_t * )port_malloc (len , true);
877+ if (temp_buffer == NULL ) {
878+ mp_printf (& mp_plat_print , "Failed to allocate temporary buffer for DMA tx\n" );
879+ return false;
880+ }
881+ memcpy (temp_buffer , data_out , len );
882+ sram_data_out = temp_buffer ;
883+ free_data_out = true;
884+ }
868885 chan_tx = dma_claim_unused_channel (false);
869886 // DMA allocation failed...
870887 if (chan_tx < 0 ) {
888+ if (free_data_out ) {
889+ port_free (sram_data_out );
890+ }
891+ if (free_data_in ) {
892+ port_free (sram_data_in );
893+ }
871894 return false;
872895 }
873896 }
874897 if (rx ) {
898+ if (data_in < (uint8_t * )SRAM_BASE ) {
899+ // Try to allocate a temporary buffer for DMA transfer
900+ uint8_t * temp_buffer = (uint8_t * )port_malloc (len , true);
901+ if (temp_buffer == NULL ) {
902+ mp_printf (& mp_plat_print , "Failed to allocate temporary buffer for DMA rx\n" );
903+ if (chan_tx >= 0 ) {
904+ dma_channel_unclaim (chan_tx );
905+ }
906+ if (free_data_out ) {
907+ port_free (sram_data_out );
908+ }
909+ return false;
910+ }
911+ sram_data_in = temp_buffer ;
912+ free_data_in = true;
913+ }
875914 chan_rx = dma_claim_unused_channel (false);
876915 // DMA allocation failed...
877916 if (chan_rx < 0 ) {
878917 // may need to free tx channel
879918 if (chan_tx >= 0 ) {
880919 dma_channel_unclaim (chan_tx );
881920 }
921+ if (free_data_out ) {
922+ port_free (sram_data_out );
923+ }
924+ if (free_data_in ) {
925+ port_free (sram_data_in );
926+ }
882927 return false;
883928 }
884929 }
@@ -910,7 +955,7 @@ static bool _transfer(rp2pio_statemachine_obj_t *self,
910955 channel_config_set_bswap (& c , swap_out );
911956 dma_channel_configure (chan_tx , & c ,
912957 tx_destination ,
913- data_out ,
958+ sram_data_out ,
914959 out_len / out_stride_in_bytes ,
915960 false);
916961 channel_mask |= 1u << chan_tx ;
@@ -923,7 +968,7 @@ static bool _transfer(rp2pio_statemachine_obj_t *self,
923968 channel_config_set_write_increment (& c , true);
924969 channel_config_set_bswap (& c , swap_in );
925970 dma_channel_configure (chan_rx , & c ,
926- data_in ,
971+ sram_data_in ,
927972 rx_source ,
928973 in_len / in_stride_in_bytes ,
929974 false);
@@ -950,8 +995,7 @@ static bool _transfer(rp2pio_statemachine_obj_t *self,
950995 self -> pio -> fdebug = stall_mask ;
951996 }
952997
953- // If we have claimed only one channel successfully, we should release immediately. This also
954- // releases the DMA after use_dma has been done.
998+ // Release the DMA channels after use_dma has been done.
955999 if (chan_rx >= 0 ) {
9561000 dma_channel_unclaim (chan_rx );
9571001 }
@@ -960,31 +1004,31 @@ static bool _transfer(rp2pio_statemachine_obj_t *self,
9601004 }
9611005
9621006 if (!use_dma && !(self -> user_interruptible && mp_hal_is_interrupted ())) {
963- // Use software for small transfers, or if couldn't claim two DMA channels
1007+ // Use software for small transfers
9641008 size_t rx_remaining = in_len / in_stride_in_bytes ;
9651009 size_t tx_remaining = out_len / out_stride_in_bytes ;
9661010
9671011 while (rx_remaining || tx_remaining ) {
9681012 while (tx_remaining && !pio_sm_is_tx_fifo_full (self -> pio , self -> state_machine )) {
9691013 if (out_stride_in_bytes == 1 ) {
970- * tx_destination = * data_out ;
1014+ * tx_destination = * sram_data_out ;
9711015 } else if (out_stride_in_bytes == 2 ) {
972- * ((uint16_t * )tx_destination ) = * ((uint16_t * )data_out );
1016+ * ((uint16_t * )tx_destination ) = * ((uint16_t * )sram_data_out );
9731017 } else if (out_stride_in_bytes == 4 ) {
974- * ((uint32_t * )tx_destination ) = * ((uint32_t * )data_out );
1018+ * ((uint32_t * )tx_destination ) = * ((uint32_t * )sram_data_out );
9751019 }
976- data_out += out_stride_in_bytes ;
1020+ sram_data_out += out_stride_in_bytes ;
9771021 -- tx_remaining ;
9781022 }
9791023 while (rx_remaining && !pio_sm_is_rx_fifo_empty (self -> pio , self -> state_machine )) {
9801024 if (in_stride_in_bytes == 1 ) {
981- * data_in = (uint8_t )* rx_source ;
1025+ * sram_data_in = (uint8_t )* rx_source ;
9821026 } else if (in_stride_in_bytes == 2 ) {
983- * ((uint16_t * )data_in ) = * ((uint16_t * )rx_source );
1027+ * ((uint16_t * )sram_data_in ) = * ((uint16_t * )rx_source );
9841028 } else if (in_stride_in_bytes == 4 ) {
985- * ((uint32_t * )data_in ) = * ((uint32_t * )rx_source );
1029+ * ((uint32_t * )sram_data_in ) = * ((uint32_t * )rx_source );
9861030 }
987- data_in += in_stride_in_bytes ;
1031+ sram_data_in += in_stride_in_bytes ;
9881032 -- rx_remaining ;
9891033 }
9901034 RUN_BACKGROUND_TASKS ;
@@ -996,7 +1040,7 @@ static bool _transfer(rp2pio_statemachine_obj_t *self,
9961040 self -> pio -> fdebug = stall_mask ;
9971041 }
9981042 // Wait for the state machine to finish transmitting the data we've queued
999- // up.
1043+ // up (either from the CPU or via DMA.)
10001044 if (tx ) {
10011045 while (!pio_sm_is_tx_fifo_empty (self -> pio , self -> state_machine ) ||
10021046 (self -> wait_for_txstall && (self -> pio -> fdebug & stall_mask ) == 0 )) {
@@ -1006,6 +1050,14 @@ static bool _transfer(rp2pio_statemachine_obj_t *self,
10061050 }
10071051 }
10081052 }
1053+ if (free_data_out ) {
1054+ port_free (sram_data_out );
1055+ }
1056+ if (free_data_in ) {
1057+ // Copy the data from the SRAM buffer to the user PSRAM buffer.
1058+ memcpy (data_in , sram_data_in , len );
1059+ port_free (sram_data_in );
1060+ }
10091061 return true;
10101062}
10111063
0 commit comments