diff options
-rw-r--r-- | host/lib/convert/convert_with_sse2.cpp | 204 | ||||
-rw-r--r-- | host/utils/usrp1p_gpif_loopback.cpp | 111 | ||||
-rw-r--r-- | host/utils/usrp1p_poketest.cpp | 134 |
3 files changed, 126 insertions, 323 deletions
diff --git a/host/lib/convert/convert_with_sse2.cpp b/host/lib/convert/convert_with_sse2.cpp index 52beea24a..9772028dc 100644 --- a/host/lib/convert/convert_with_sse2.cpp +++ b/host/lib/convert/convert_with_sse2.cpp @@ -25,25 +25,37 @@ DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){ const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); - __m128 scalar = _mm_set_ps1(float(scale_factor)); - - //convert blocks of samples with intrinsics - size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ - //load from input - __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0)); - __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2)); - - //convert and scale - __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); - __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); - - //pack + swap 16-bit pairs - __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); - tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); - tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); - - //store to output - _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); + const __m128 scalar = _mm_set_ps1(float(scale_factor)); + + #define convert_fc32_1_to_item32_1_nswap_guts(_al_) \ + for (; i < (nsamps & ~0x3); i+=4){ \ + /* load from input */ \ + __m128 tmplo = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+0)); \ + __m128 tmphi = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+2)); \ + \ + /* convert and scale */ \ + __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); \ + __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); \ + \ + /* pack + swap 16-bit pairs */ \ + __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \ + tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ + tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ + \ + /* store to output */ \ + _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \ + } \ + + size_t i = 0; + + //dispatch according to alignment + switch (size_t(input) & 0xf){ + case 0x8: + output[i] = fc32_to_item32(input[i], float(scale_factor)); i++; + case 0x0: + convert_fc32_1_to_item32_1_nswap_guts() + break; + default: convert_fc32_1_to_item32_1_nswap_guts(u) } //convert remainder @@ -56,24 +68,36 @@ DECLARE_CONVERTER(convert_fc32_1_to_item32_1_bswap, PRIORITY_CUSTOM){ const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]); item32_t *output = reinterpret_cast<item32_t *>(outputs[0]); - __m128 scalar = _mm_set_ps1(float(scale_factor)); - - //convert blocks of samples with intrinsics - size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ - //load from input - __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0)); - __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2)); - - //convert and scale - __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); - __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); - - //pack + byteswap -> byteswap 16 bit words - __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); - tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); - - //store to output - _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); + const __m128 scalar = _mm_set_ps1(float(scale_factor)); + + #define convert_fc32_1_to_item32_1_bswap_guts(_al_) \ + for (; i < (nsamps & ~0x3); i+=4){ \ + /* load from input */ \ + __m128 tmplo = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+0)); \ + __m128 tmphi = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+2)); \ + \ + /* convert and scale */ \ + __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); \ + __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); \ + \ + /* pack + byteswap -> byteswap 16 bit words */ \ + __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \ + tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \ + \ + /* store to output */ \ + _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \ + } \ + + size_t i = 0; + + //dispatch according to alignment + switch (size_t(input) & 0xf){ + case 0x8: + output[i] = uhd::byteswap(fc32_to_item32(input[i], float(scale_factor))); i++; + case 0x0: + convert_fc32_1_to_item32_1_bswap_guts() + break; + default: convert_fc32_1_to_item32_1_bswap_guts(u) } //convert remainder @@ -86,27 +110,39 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){ const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); - __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); - __m128i zeroi = _mm_setzero_si128(); - - //convert blocks of samples with intrinsics - size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ - //load from input - __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); - - //unpack + swap 16-bit pairs - tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); - tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); - __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits - __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); - - //convert and scale - __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); - __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); - - //store to output - _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo); - _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi); + const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); + const __m128i zeroi = _mm_setzero_si128(); + + #define convert_item32_1_to_fc32_1_nswap_guts(_al_) \ + for (; i < (nsamps & ~0x3); i+=4){ \ + /* load from input */ \ + __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \ + \ + /* unpack + swap 16-bit pairs */ \ + tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ + tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \ + __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \ + __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); \ + \ + /* convert and scale */ \ + __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); \ + __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); \ + \ + /* store to output */ \ + _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+0), tmplo); \ + _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+2), tmphi); \ + } \ + + size_t i = 0; + + //dispatch according to alignment + switch (size_t(output) & 0xf){ + case 0x8: + output[i] = item32_to_fc32(input[i], float(scale_factor)); i++; + case 0x0: + convert_item32_1_to_fc32_1_nswap_guts() + break; + default: convert_item32_1_to_fc32_1_nswap_guts(u) } //convert remainder @@ -119,26 +155,38 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_bswap, PRIORITY_CUSTOM){ const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]); fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]); - __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); - __m128i zeroi = _mm_setzero_si128(); - - //convert blocks of samples with intrinsics - size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){ - //load from input - __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); - - //byteswap + unpack -> byteswap 16 bit words - tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); - __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits - __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); - - //convert and scale - __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); - __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); - - //store to output - _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo); - _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi); + const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16)); + const __m128i zeroi = _mm_setzero_si128(); + + #define convert_item32_1_to_fc32_1_bswap_guts(_al_) \ + for (; i < (nsamps & ~0x3); i+=4){ \ + /* load from input */ \ + __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \ + \ + /* byteswap + unpack -> byteswap 16 bit words */ \ + tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \ + __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \ + __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); \ + \ + /* convert and scale */ \ + __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); \ + __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); \ + \ + /* store to output */ \ + _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+0), tmplo); \ + _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+2), tmphi); \ + } \ + + size_t i = 0; + + //dispatch according to alignment + switch (size_t(output) & 0xf){ + case 0x8: + output[i] = item32_to_fc32(uhd::byteswap(input[i]), float(scale_factor)); i++; + case 0x0: + convert_item32_1_to_fc32_1_bswap_guts() + break; + default: convert_item32_1_to_fc32_1_bswap_guts(u) } //convert remainder diff --git a/host/utils/usrp1p_gpif_loopback.cpp b/host/utils/usrp1p_gpif_loopback.cpp deleted file mode 100644 index 3b9da4304..000000000 --- a/host/utils/usrp1p_gpif_loopback.cpp +++ /dev/null @@ -1,111 +0,0 @@ -//USB->GPIF->FPGA loopback test for USRP1P -//uses UHD libusb transport - -#include <uhd/device.hpp> -#include <uhd/transport/usb_zero_copy.hpp> -#include <uhd/transport/bounded_buffer.hpp> -#include <uhd/transport/usb_control.hpp> -#include <uhd/utils/assert.hpp> -#include <boost/shared_array.hpp> -#include <boost/foreach.hpp> -#include <boost/thread.hpp> -#include <boost/format.hpp> -#include <vector> -#include <iostream> -#include <iomanip> - -//so the goal is to open a USB device to endpoints (2,6), submit a buffer, receive a reply, and compare them. -//use usb_zero_copy::make() to get a usb_zero_copy object and then start submitting. -//need to get a usb dev handle to pass to make -//use static std::vector<usb_device_handle::sptr> get_device_list(boost::uint16_t vid, boost::uint16_t pid) to get a device handle -//then get_send_buffer, send, etc. -using namespace uhd; -using namespace uhd::transport; - -const boost::uint16_t data_xfer_size = 32; -const boost::uint16_t ctrl_xfer_size = 32; - -int main(int argc, char *argv[]) { - std::cout << "USRP1+ GPIF loopback test" << std::endl; - //step 1: get a handle on it - std::vector<usb_device_handle::sptr> handles = usb_device_handle::get_device_list(0xfffe, 0x0003); - if(handles.size() == 0) { - std::cout << "No USRP1+ found." << std::endl; - return ~0; - } - - bool verbose = false; - if(argc > 1) if(std::string(argv[1]) == "-v") verbose = true; - - usb_device_handle::sptr handle = handles.front(); - - usb_zero_copy::sptr data_transport; - usb_control::sptr ctrl_transport = usb_control::make(handle); //just in case - - data_transport = usb_zero_copy::make( - handle, // identifier - 8, // IN endpoint - 4, // OUT endpoint - uhd::device_addr_t("recv_frame_size=32, num_recv_frames=1, send_frame_size=32, num_send_frames=1") //args - ); - - if(verbose) std::cout << "Made." << std::endl; - - //ok now we're made. time to get a buffer and start sending data. - - boost::uint8_t localbuf[data_xfer_size]; - - managed_send_buffer::sptr sbuf; - managed_recv_buffer::sptr rbuf; - size_t xfercount = 0; - - srand(time(0)); - while(1) { - - if(verbose) std::cout << "Getting send buffer." << std::endl; - sbuf = data_transport->get_send_buff(); - if(sbuf == 0) { - std::cout << "Failed to get a send buffer." << std::endl; - return ~0; - } - for(int i = 0; i < data_xfer_size; i++) { - boost::uint8_t x = rand(); - sbuf->cast<boost::uint8_t *>()[i] = x; - localbuf[i] = x; - } - - if(verbose) std::cout << "Buffer loaded" << std::endl; - - sbuf->commit(data_xfer_size); - if(verbose) std::cout << "Committed." << std::endl; - - rbuf = data_transport->get_recv_buff(0.3); //timeout - - if(rbuf == 0) { - std::cout << "Failed to get receive buffer (timeout?)" << std::endl; - return ~0; - } - - if(verbose) std::cout << "# " << xfercount << std::endl; - - if(!memcmp(rbuf->cast<const boost::uint8_t *>(), localbuf, data_xfer_size)) { - std::cout << "."; - } else { - if(verbose) { - int i = 0; - for(int j = 0; j < 32; j++) { - std::cout << boost::format("%02X ") % int(rbuf->cast<const boost::uint8_t *>()[i*32+j]); - } - std::cout << std::endl; - } - else std::cout << "x"; - - } - sbuf.reset(); - rbuf.reset(); - xfercount++; - //if(verbose) std::cout << "sptrs reset" << std::endl; - } - - return 0; -} diff --git a/host/utils/usrp1p_poketest.cpp b/host/utils/usrp1p_poketest.cpp deleted file mode 100644 index ca7628e01..000000000 --- a/host/utils/usrp1p_poketest.cpp +++ /dev/null @@ -1,134 +0,0 @@ -//FPGA register poke test for USRP1P -//uses UHD libusb transport - -#include <uhd/device.hpp> -#include <uhd/transport/usb_zero_copy.hpp> -#include <uhd/transport/bounded_buffer.hpp> -#include <uhd/transport/usb_control.hpp> -#include <uhd/utils/assert.hpp> -#include <boost/shared_array.hpp> -#include <boost/foreach.hpp> -#include <boost/thread.hpp> -#include <boost/format.hpp> -#include <vector> -#include <iostream> -#include <iomanip> -#include "../lib/usrp/usrp1p/ctrl_packet.hpp" - -//so the goal is to open a USB device to endpoints (2,6), submit a buffer, receive a reply, and compare them. -//use usb_zero_copy::make() to get a usb_zero_copy object and then start submitting. -//need to get a usb dev handle to pass to make -//use static std::vector<usb_device_handle::sptr> get_device_list(boost::uint16_t vid, boost::uint16_t pid) to get a device handle -//then get_send_buffer, send, etc. -using namespace uhd; -using namespace uhd::transport; - -const boost::uint16_t ctrl_xfer_size = 32; - -int main(int argc, char *argv[]) { - std::cout << "USRP1+ GPIF poke test" << std::endl; - //step 1: get a handle on it - std::vector<usb_device_handle::sptr> handles = usb_device_handle::get_device_list(0xfffe, 0x0003); - if(handles.size() == 0) { - std::cout << "No USRP1+ found." << std::endl; - return ~0; - } - - bool verbose = false; - if(argc > 1) if(std::string(argv[1]) == "-v") verbose = true; - - usb_device_handle::sptr handle = handles.front(); - - usb_zero_copy::sptr data_transport; - usb_control::sptr ctrl_transport = usb_control::make(handle); //just in case - - data_transport = usb_zero_copy::make( - handle, // identifier - 8, // IN endpoint - 4, // OUT endpoint - uhd::device_addr_t("recv_frame_size=32, num_recv_frames=1, send_frame_size=32, num_send_frames=1") //args - ); - - if(verbose) std::cout << "Made." << std::endl; - - //ok now we're made. time to get a buffer and start sending data. - - managed_send_buffer::sptr sbuf; - managed_recv_buffer::sptr rbuf; - size_t xfercount = 0; - - static uint8_t sequence = 0; - //uhd::usrp::ctrl_packet_out_t outpkt; - //memset(outpkt.data, 0x00, sizeof(outpkt.data)); -// outpkt.op = uhd::usrp::CTRL_PACKET_WRITE; -// outpkt.callbacks = 0; -// outpkt.seq = sequence++; -// outpkt.len = 4; -// outpkt.addr = 0x00000000; -// outpkt.data[0] = 0xff; -// outpkt.data[1] = 0xfe; -// outpkt.data[2] = 0xfd; -// outpkt.data[3] = 0xfc; - - boost::uint16_t outpkt[16]; - /* Packet format: - * Command: 2 bits - * Callbacks: 6 bits - * Seq num: 8 bits - * Length: 16 bits - * Addr LSW: 16 bits - * Addr MSW: 16 bits - * Data: 24 bytes/12 words - * Lengths are in lines - * - * readback: - * AA00 LEN(16) SEQ(16) ADDR(32) DATA(16bx12B) - */ - memset(outpkt, 0x00, sizeof(outpkt)); - outpkt[0] = 0x8000; //read cmd + callbacks (0) + seq - outpkt[1] = 0x0001; //len - outpkt[2] = 0x0000; //addr LSW - outpkt[3] = 0x0000; //addr MSW - outpkt[4] = 0x0A0A; //data - outpkt[5] = 0xFFFF; - - - srand(time(0)); -// while(1) { - - if(verbose) std::cout << "Getting send buffer." << std::endl; - sbuf = data_transport->get_send_buff(); - if(sbuf == 0) { - std::cout << "Failed to get a send buffer." << std::endl; - return ~0; - } - - for(int i = 0; i < ctrl_xfer_size; i++) { - sbuf->cast<boost::uint8_t *>()[i] = ((boost::uint8_t *)&outpkt)[i]; - } - - if(verbose) std::cout << "Buffer loaded" << std::endl; - - sbuf->commit(ctrl_xfer_size); - if(verbose) std::cout << "Committed." << std::endl; - - rbuf = data_transport->get_recv_buff(0.3); //timeout - - if(rbuf == 0) { - std::cout << "Failed to get receive buffer (timeout?)" << std::endl; - return ~0; - } - - for(int j = 0; j < 32; j++) { - std::cout << boost::format("%02X ") % int(rbuf->cast<const boost::uint8_t *>()[j]); - } - std::cout << std::endl; - - sbuf.reset(); - rbuf.reset(); - xfercount++; - //if(verbose) std::cout << "sptrs reset" << std::endl; -// } - - return 0; -} |