aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--host/lib/convert/convert_with_sse2.cpp204
-rw-r--r--host/utils/usrp1p_gpif_loopback.cpp111
-rw-r--r--host/utils/usrp1p_poketest.cpp134
3 files changed, 126 insertions, 323 deletions
diff --git a/host/lib/convert/convert_with_sse2.cpp b/host/lib/convert/convert_with_sse2.cpp
index 52beea24a..9772028dc 100644
--- a/host/lib/convert/convert_with_sse2.cpp
+++ b/host/lib/convert/convert_with_sse2.cpp
@@ -25,25 +25,37 @@ DECLARE_CONVERTER(convert_fc32_1_to_item32_1_nswap, PRIORITY_CUSTOM){
const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
- __m128 scalar = _mm_set_ps1(float(scale_factor));
-
- //convert blocks of samples with intrinsics
- size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
- //load from input
- __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0));
- __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2));
-
- //convert and scale
- __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
- __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
-
- //pack + swap 16-bit pairs
- __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
- tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
- tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
-
- //store to output
- _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);
+ const __m128 scalar = _mm_set_ps1(float(scale_factor));
+
+ #define convert_fc32_1_to_item32_1_nswap_guts(_al_) \
+ for (; i < (nsamps & ~0x3); i+=4){ \
+ /* load from input */ \
+ __m128 tmplo = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+0)); \
+ __m128 tmphi = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+2)); \
+ \
+ /* convert and scale */ \
+ __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); \
+ __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); \
+ \
+ /* pack + swap 16-bit pairs */ \
+ __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \
+ tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \
+ tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \
+ \
+ /* store to output */ \
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \
+ } \
+
+ size_t i = 0;
+
+ //dispatch according to alignment
+ switch (size_t(input) & 0xf){
+ case 0x8:
+ output[i] = fc32_to_item32(input[i], float(scale_factor)); i++;
+ case 0x0:
+ convert_fc32_1_to_item32_1_nswap_guts()
+ break;
+ default: convert_fc32_1_to_item32_1_nswap_guts(u)
}
//convert remainder
@@ -56,24 +68,36 @@ DECLARE_CONVERTER(convert_fc32_1_to_item32_1_bswap, PRIORITY_CUSTOM){
const fc32_t *input = reinterpret_cast<const fc32_t *>(inputs[0]);
item32_t *output = reinterpret_cast<item32_t *>(outputs[0]);
- __m128 scalar = _mm_set_ps1(float(scale_factor));
-
- //convert blocks of samples with intrinsics
- size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
- //load from input
- __m128 tmplo = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+0));
- __m128 tmphi = _mm_loadu_ps(reinterpret_cast<const float *>(input+i+2));
-
- //convert and scale
- __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar));
- __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar));
-
- //pack + byteswap -> byteswap 16 bit words
- __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi);
- tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
-
- //store to output
- _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi);
+ const __m128 scalar = _mm_set_ps1(float(scale_factor));
+
+ #define convert_fc32_1_to_item32_1_bswap_guts(_al_) \
+ for (; i < (nsamps & ~0x3); i+=4){ \
+ /* load from input */ \
+ __m128 tmplo = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+0)); \
+ __m128 tmphi = _mm_load ## _al_ ## _ps(reinterpret_cast<const float *>(input+i+2)); \
+ \
+ /* convert and scale */ \
+ __m128i tmpilo = _mm_cvtps_epi32(_mm_mul_ps(tmplo, scalar)); \
+ __m128i tmpihi = _mm_cvtps_epi32(_mm_mul_ps(tmphi, scalar)); \
+ \
+ /* pack + byteswap -> byteswap 16 bit words */ \
+ __m128i tmpi = _mm_packs_epi32(tmpilo, tmpihi); \
+ tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
+ \
+ /* store to output */ \
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(output+i), tmpi); \
+ } \
+
+ size_t i = 0;
+
+ //dispatch according to alignment
+ switch (size_t(input) & 0xf){
+ case 0x8:
+ output[i] = uhd::byteswap(fc32_to_item32(input[i], float(scale_factor))); i++;
+ case 0x0:
+ convert_fc32_1_to_item32_1_bswap_guts()
+ break;
+ default: convert_fc32_1_to_item32_1_bswap_guts(u)
}
//convert remainder
@@ -86,27 +110,39 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_nswap, PRIORITY_CUSTOM){
const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
- __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16));
- __m128i zeroi = _mm_setzero_si128();
-
- //convert blocks of samples with intrinsics
- size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
- //load from input
- __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
-
- //unpack + swap 16-bit pairs
- tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
- tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1));
- __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
- __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
-
- //convert and scale
- __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);
- __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);
-
- //store to output
- _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo);
- _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi);
+ const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16));
+ const __m128i zeroi = _mm_setzero_si128();
+
+ #define convert_item32_1_to_fc32_1_nswap_guts(_al_) \
+ for (; i < (nsamps & ~0x3); i+=4){ \
+ /* load from input */ \
+ __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
+ \
+ /* unpack + swap 16-bit pairs */ \
+ tmpi = _mm_shufflelo_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \
+ tmpi = _mm_shufflehi_epi16(tmpi, _MM_SHUFFLE(2, 3, 0, 1)); \
+ __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
+ __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); \
+ \
+ /* convert and scale */ \
+ __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); \
+ __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); \
+ \
+ /* store to output */ \
+ _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+0), tmplo); \
+ _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+2), tmphi); \
+ } \
+
+ size_t i = 0;
+
+ //dispatch according to alignment
+ switch (size_t(output) & 0xf){
+ case 0x8:
+ output[i] = item32_to_fc32(input[i], float(scale_factor)); i++;
+ case 0x0:
+ convert_item32_1_to_fc32_1_nswap_guts()
+ break;
+ default: convert_item32_1_to_fc32_1_nswap_guts(u)
}
//convert remainder
@@ -119,26 +155,38 @@ DECLARE_CONVERTER(convert_item32_1_to_fc32_1_bswap, PRIORITY_CUSTOM){
const item32_t *input = reinterpret_cast<const item32_t *>(inputs[0]);
fc32_t *output = reinterpret_cast<fc32_t *>(outputs[0]);
- __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16));
- __m128i zeroi = _mm_setzero_si128();
-
- //convert blocks of samples with intrinsics
- size_t i = 0; for (; i < (nsamps & ~0x3); i+=4){
- //load from input
- __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i));
-
- //byteswap + unpack -> byteswap 16 bit words
- tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8));
- __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); //value in upper 16 bits
- __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi);
-
- //convert and scale
- __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar);
- __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar);
-
- //store to output
- _mm_storeu_ps(reinterpret_cast<float *>(output+i+0), tmplo);
- _mm_storeu_ps(reinterpret_cast<float *>(output+i+2), tmphi);
+ const __m128 scalar = _mm_set_ps1(float(scale_factor)/(1 << 16));
+ const __m128i zeroi = _mm_setzero_si128();
+
+ #define convert_item32_1_to_fc32_1_bswap_guts(_al_) \
+ for (; i < (nsamps & ~0x3); i+=4){ \
+ /* load from input */ \
+ __m128i tmpi = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input+i)); \
+ \
+ /* byteswap + unpack -> byteswap 16 bit words */ \
+ tmpi = _mm_or_si128(_mm_srli_epi16(tmpi, 8), _mm_slli_epi16(tmpi, 8)); \
+ __m128i tmpilo = _mm_unpacklo_epi16(zeroi, tmpi); /* value in upper 16 bits */ \
+ __m128i tmpihi = _mm_unpackhi_epi16(zeroi, tmpi); \
+ \
+ /* convert and scale */ \
+ __m128 tmplo = _mm_mul_ps(_mm_cvtepi32_ps(tmpilo), scalar); \
+ __m128 tmphi = _mm_mul_ps(_mm_cvtepi32_ps(tmpihi), scalar); \
+ \
+ /* store to output */ \
+ _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+0), tmplo); \
+ _mm_store ## _al_ ## _ps(reinterpret_cast<float *>(output+i+2), tmphi); \
+ } \
+
+ size_t i = 0;
+
+ //dispatch according to alignment
+ switch (size_t(output) & 0xf){
+ case 0x8:
+ output[i] = item32_to_fc32(uhd::byteswap(input[i]), float(scale_factor)); i++;
+ case 0x0:
+ convert_item32_1_to_fc32_1_bswap_guts()
+ break;
+ default: convert_item32_1_to_fc32_1_bswap_guts(u)
}
//convert remainder
diff --git a/host/utils/usrp1p_gpif_loopback.cpp b/host/utils/usrp1p_gpif_loopback.cpp
deleted file mode 100644
index 3b9da4304..000000000
--- a/host/utils/usrp1p_gpif_loopback.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-//USB->GPIF->FPGA loopback test for USRP1P
-//uses UHD libusb transport
-
-#include <uhd/device.hpp>
-#include <uhd/transport/usb_zero_copy.hpp>
-#include <uhd/transport/bounded_buffer.hpp>
-#include <uhd/transport/usb_control.hpp>
-#include <uhd/utils/assert.hpp>
-#include <boost/shared_array.hpp>
-#include <boost/foreach.hpp>
-#include <boost/thread.hpp>
-#include <boost/format.hpp>
-#include <vector>
-#include <iostream>
-#include <iomanip>
-
-//so the goal is to open a USB device to endpoints (2,6), submit a buffer, receive a reply, and compare them.
-//use usb_zero_copy::make() to get a usb_zero_copy object and then start submitting.
-//need to get a usb dev handle to pass to make
-//use static std::vector<usb_device_handle::sptr> get_device_list(boost::uint16_t vid, boost::uint16_t pid) to get a device handle
-//then get_send_buffer, send, etc.
-using namespace uhd;
-using namespace uhd::transport;
-
-const boost::uint16_t data_xfer_size = 32;
-const boost::uint16_t ctrl_xfer_size = 32;
-
-int main(int argc, char *argv[]) {
- std::cout << "USRP1+ GPIF loopback test" << std::endl;
- //step 1: get a handle on it
- std::vector<usb_device_handle::sptr> handles = usb_device_handle::get_device_list(0xfffe, 0x0003);
- if(handles.size() == 0) {
- std::cout << "No USRP1+ found." << std::endl;
- return ~0;
- }
-
- bool verbose = false;
- if(argc > 1) if(std::string(argv[1]) == "-v") verbose = true;
-
- usb_device_handle::sptr handle = handles.front();
-
- usb_zero_copy::sptr data_transport;
- usb_control::sptr ctrl_transport = usb_control::make(handle); //just in case
-
- data_transport = usb_zero_copy::make(
- handle, // identifier
- 8, // IN endpoint
- 4, // OUT endpoint
- uhd::device_addr_t("recv_frame_size=32, num_recv_frames=1, send_frame_size=32, num_send_frames=1") //args
- );
-
- if(verbose) std::cout << "Made." << std::endl;
-
- //ok now we're made. time to get a buffer and start sending data.
-
- boost::uint8_t localbuf[data_xfer_size];
-
- managed_send_buffer::sptr sbuf;
- managed_recv_buffer::sptr rbuf;
- size_t xfercount = 0;
-
- srand(time(0));
- while(1) {
-
- if(verbose) std::cout << "Getting send buffer." << std::endl;
- sbuf = data_transport->get_send_buff();
- if(sbuf == 0) {
- std::cout << "Failed to get a send buffer." << std::endl;
- return ~0;
- }
- for(int i = 0; i < data_xfer_size; i++) {
- boost::uint8_t x = rand();
- sbuf->cast<boost::uint8_t *>()[i] = x;
- localbuf[i] = x;
- }
-
- if(verbose) std::cout << "Buffer loaded" << std::endl;
-
- sbuf->commit(data_xfer_size);
- if(verbose) std::cout << "Committed." << std::endl;
-
- rbuf = data_transport->get_recv_buff(0.3); //timeout
-
- if(rbuf == 0) {
- std::cout << "Failed to get receive buffer (timeout?)" << std::endl;
- return ~0;
- }
-
- if(verbose) std::cout << "# " << xfercount << std::endl;
-
- if(!memcmp(rbuf->cast<const boost::uint8_t *>(), localbuf, data_xfer_size)) {
- std::cout << ".";
- } else {
- if(verbose) {
- int i = 0;
- for(int j = 0; j < 32; j++) {
- std::cout << boost::format("%02X ") % int(rbuf->cast<const boost::uint8_t *>()[i*32+j]);
- }
- std::cout << std::endl;
- }
- else std::cout << "x";
-
- }
- sbuf.reset();
- rbuf.reset();
- xfercount++;
- //if(verbose) std::cout << "sptrs reset" << std::endl;
- }
-
- return 0;
-}
diff --git a/host/utils/usrp1p_poketest.cpp b/host/utils/usrp1p_poketest.cpp
deleted file mode 100644
index ca7628e01..000000000
--- a/host/utils/usrp1p_poketest.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-//FPGA register poke test for USRP1P
-//uses UHD libusb transport
-
-#include <uhd/device.hpp>
-#include <uhd/transport/usb_zero_copy.hpp>
-#include <uhd/transport/bounded_buffer.hpp>
-#include <uhd/transport/usb_control.hpp>
-#include <uhd/utils/assert.hpp>
-#include <boost/shared_array.hpp>
-#include <boost/foreach.hpp>
-#include <boost/thread.hpp>
-#include <boost/format.hpp>
-#include <vector>
-#include <iostream>
-#include <iomanip>
-#include "../lib/usrp/usrp1p/ctrl_packet.hpp"
-
-//so the goal is to open a USB device to endpoints (2,6), submit a buffer, receive a reply, and compare them.
-//use usb_zero_copy::make() to get a usb_zero_copy object and then start submitting.
-//need to get a usb dev handle to pass to make
-//use static std::vector<usb_device_handle::sptr> get_device_list(boost::uint16_t vid, boost::uint16_t pid) to get a device handle
-//then get_send_buffer, send, etc.
-using namespace uhd;
-using namespace uhd::transport;
-
-const boost::uint16_t ctrl_xfer_size = 32;
-
-int main(int argc, char *argv[]) {
- std::cout << "USRP1+ GPIF poke test" << std::endl;
- //step 1: get a handle on it
- std::vector<usb_device_handle::sptr> handles = usb_device_handle::get_device_list(0xfffe, 0x0003);
- if(handles.size() == 0) {
- std::cout << "No USRP1+ found." << std::endl;
- return ~0;
- }
-
- bool verbose = false;
- if(argc > 1) if(std::string(argv[1]) == "-v") verbose = true;
-
- usb_device_handle::sptr handle = handles.front();
-
- usb_zero_copy::sptr data_transport;
- usb_control::sptr ctrl_transport = usb_control::make(handle); //just in case
-
- data_transport = usb_zero_copy::make(
- handle, // identifier
- 8, // IN endpoint
- 4, // OUT endpoint
- uhd::device_addr_t("recv_frame_size=32, num_recv_frames=1, send_frame_size=32, num_send_frames=1") //args
- );
-
- if(verbose) std::cout << "Made." << std::endl;
-
- //ok now we're made. time to get a buffer and start sending data.
-
- managed_send_buffer::sptr sbuf;
- managed_recv_buffer::sptr rbuf;
- size_t xfercount = 0;
-
- static uint8_t sequence = 0;
- //uhd::usrp::ctrl_packet_out_t outpkt;
- //memset(outpkt.data, 0x00, sizeof(outpkt.data));
-// outpkt.op = uhd::usrp::CTRL_PACKET_WRITE;
-// outpkt.callbacks = 0;
-// outpkt.seq = sequence++;
-// outpkt.len = 4;
-// outpkt.addr = 0x00000000;
-// outpkt.data[0] = 0xff;
-// outpkt.data[1] = 0xfe;
-// outpkt.data[2] = 0xfd;
-// outpkt.data[3] = 0xfc;
-
- boost::uint16_t outpkt[16];
- /* Packet format:
- * Command: 2 bits
- * Callbacks: 6 bits
- * Seq num: 8 bits
- * Length: 16 bits
- * Addr LSW: 16 bits
- * Addr MSW: 16 bits
- * Data: 24 bytes/12 words
- * Lengths are in lines
- *
- * readback:
- * AA00 LEN(16) SEQ(16) ADDR(32) DATA(16bx12B)
- */
- memset(outpkt, 0x00, sizeof(outpkt));
- outpkt[0] = 0x8000; //read cmd + callbacks (0) + seq
- outpkt[1] = 0x0001; //len
- outpkt[2] = 0x0000; //addr LSW
- outpkt[3] = 0x0000; //addr MSW
- outpkt[4] = 0x0A0A; //data
- outpkt[5] = 0xFFFF;
-
-
- srand(time(0));
-// while(1) {
-
- if(verbose) std::cout << "Getting send buffer." << std::endl;
- sbuf = data_transport->get_send_buff();
- if(sbuf == 0) {
- std::cout << "Failed to get a send buffer." << std::endl;
- return ~0;
- }
-
- for(int i = 0; i < ctrl_xfer_size; i++) {
- sbuf->cast<boost::uint8_t *>()[i] = ((boost::uint8_t *)&outpkt)[i];
- }
-
- if(verbose) std::cout << "Buffer loaded" << std::endl;
-
- sbuf->commit(ctrl_xfer_size);
- if(verbose) std::cout << "Committed." << std::endl;
-
- rbuf = data_transport->get_recv_buff(0.3); //timeout
-
- if(rbuf == 0) {
- std::cout << "Failed to get receive buffer (timeout?)" << std::endl;
- return ~0;
- }
-
- for(int j = 0; j < 32; j++) {
- std::cout << boost::format("%02X ") % int(rbuf->cast<const boost::uint8_t *>()[j]);
- }
- std::cout << std::endl;
-
- sbuf.reset();
- rbuf.reset();
- xfercount++;
- //if(verbose) std::cout << "sptrs reset" << std::endl;
-// }
-
- return 0;
-}