#!/usr/bin/env python3 # # Copyright 2011-2012 Ettus Research LLC # Copyright 2018 Ettus Research, a National Instruments Company # # SPDX-License-Identifier: GPL-3.0-or-later # """ Auto-Generator for generic converters """ TMPL_HEADER = """ <% import time %> /*********************************************************************** * This file was generated by ${file} on ${time.strftime("%c")} **********************************************************************/ #include "convert_common.hpp" #include #include #include using namespace uhd::convert; // item32 -> item32: Just a memcpy. No scaling possible. DECLARE_CONVERTER(item32, 1, item32, 1, PRIORITY_GENERAL) { const item32_t *input = reinterpret_cast(inputs[0]); item32_t *output = reinterpret_cast(outputs[0]); memcpy(output, input, nsamps * sizeof(item32_t)); } """ TMPL_CONV_CHDR_SC16_TO_FP = """ DECLARE_CONVERTER({fctype}, 1, sc16_chdr, 1, PRIORITY_GENERAL) {{ // Note: We convert I and Q separately, because there's no optimized // constructor to create a complex<{fptype}> from a complex. This // means we need to multiply nsamps by 2 const {fptype}* input = reinterpret_cast(inputs[0]); int16_t* output = reinterpret_cast(outputs[0]); for (size_t i = 0; i < nsamps * 2; i += 2) {{ output[i] = static_cast(input[i] * {fptype}(scale_factor)); output[i+1] = static_cast(input[i+1] * {fptype}(scale_factor)); }} }} DECLARE_CONVERTER(sc16_chdr, 1, {fctype}, 1, PRIORITY_GENERAL) {{ // Note: We convert I and Q separately, because there's no optimized // constructor to create a complex<{fptype}> from a complex. This // means we need to multiply nsamps by 2 const int16_t* input = reinterpret_cast(inputs[0]); {fptype}* output = reinterpret_cast<{fptype}*>(outputs[0]); for (size_t i = 0; i < nsamps * 2; i += 2) {{ output[i] = static_cast<{fptype}>(input[i]) * {fptype}(scale_factor); output[i+1] = static_cast<{fptype}>(input[i+1]) * {fptype}(scale_factor); }} }} """ # For CHDR converters, all converters where the input and output type are the # same can be done by a memcpy, because we can do the endianness adaptation in # the FPGA. TMPL_CONV_CHDR_MEMCPY = """ DECLARE_CONVERTER({in_type}, 1, {out_type}, 1, PRIORITY_GENERAL) {{ const {ptr_type} *input = reinterpret_cast(inputs[0]); {ptr_type}* output = reinterpret_cast<{ptr_type}*>(outputs[0]); // Benchmark shows that copy_n can be significantly slower in some cases //std::copy_n(input, nsamps, output); memcpy(output, input, sizeof({ptr_type}) * nsamps); }} """ # Some 32-bit types converters are also defined in convert_item32.cpp to # take care of quirks such as I/Q ordering on the wire etc. TMPL_CONV_ITEM32 = """ DECLARE_CONVERTER({in_type}, 1, {out_type}, 1, PRIORITY_GENERAL) {{ const item32_t *input = reinterpret_cast(inputs[0]); item32_t *output = reinterpret_cast(outputs[0]); for (size_t i = 0; i < nsamps; i++) {{ output[i] = {to_wire_or_host}(input[i]); }} }} """ # 64-bit data types are two consecutive item32 items TMPL_CONV_ITEM64 = """ DECLARE_CONVERTER({in_type}, 1, {out_type}, 1, PRIORITY_GENERAL) {{ const item32_t *input = reinterpret_cast(inputs[0]); item32_t *output = reinterpret_cast(outputs[0]); // An item64 is two item32_t's for (size_t i = 0; i < nsamps * 2; i++) {{ output[i] = {to_wire_or_host}(input[i]); }} }} """ TMPL_CONV_U8S8 = """ DECLARE_CONVERTER({us8}, 1, {us8}_item32_{end}, 1, PRIORITY_GENERAL) {{ const item32_t *input = reinterpret_cast(inputs[0]); item32_t *output = reinterpret_cast(outputs[0]); // 1) Copy all the 4-byte tuples size_t n_words = nsamps / 4; for (size_t i = 0; i < n_words; i++) {{ output[i] = {to_wire}(input[i]); }} // 2) If nsamps was not a multiple of 4, copy the rest by hand size_t bytes_left = nsamps % 4; if (bytes_left) {{ const {us8}_t *last_input_word = reinterpret_cast(&input[n_words]); {us8}_t *last_output_word = reinterpret_cast<{us8}_t *>(&output[n_words]); for (size_t k = 0; k < bytes_left; k++) {{ last_output_word[k] = last_input_word[k]; }} output[n_words] = {to_wire}(output[n_words]); }} }} DECLARE_CONVERTER({us8}_item32_{end}, 1, {us8}, 1, PRIORITY_GENERAL) {{ const item32_t *input = reinterpret_cast(inputs[0]); item32_t *output = reinterpret_cast(outputs[0]); // 1) Copy all the 4-byte tuples size_t n_words = nsamps / 4; for (size_t i = 0; i < n_words; i++) {{ output[i] = {to_host}(input[i]); }} // 2) If nsamps was not a multiple of 4, copy the rest by hand size_t bytes_left = nsamps % 4; if (bytes_left) {{ item32_t last_input_word = {to_host}(input[n_words]); const {us8}_t *last_input_word_ptr = reinterpret_cast(&last_input_word); {us8}_t *last_output_word = reinterpret_cast<{us8}_t *>(&output[n_words]); for (size_t k = 0; k < bytes_left; k++) {{ last_output_word[k] = last_input_word_ptr[k]; }} }} }} """ TMPL_CONV_S16 = """ DECLARE_CONVERTER(s16, 1, s16_item32_{end}, 1, PRIORITY_GENERAL) {{ const uint16_t *input = reinterpret_cast(inputs[0]); uint16_t *output = reinterpret_cast(outputs[0]); for (size_t i = 0; i < nsamps; i++) {{ output[i] = {to_wire}(input[i]); }} }} DECLARE_CONVERTER(s16_item32_{end}, 1, s16, 1, PRIORITY_GENERAL) {{ const uint16_t *input = reinterpret_cast(inputs[0]); uint16_t *output = reinterpret_cast(outputs[0]); for (size_t i = 0; i < nsamps; i++) {{ output[i] = {to_host}(input[i]); }} }} """ TMPL_CONV_USRP1_COMPLEX = """ DECLARE_CONVERTER(${cpu_type}, ${width}, sc16_item16_usrp1, 1, PRIORITY_GENERAL){ % for w in range(width): const ${cpu_type}_t *input${w} = reinterpret_cast(inputs[${w}]); % endfor uint16_t *output = reinterpret_cast(outputs[0]); for (size_t i = 0, j = 0; i < nsamps; i++){ % for w in range(width): output[j++] = ${to_wire}(uint16_t(int16_t(input${w}[i].real()${do_scale}))); output[j++] = ${to_wire}(uint16_t(int16_t(input${w}[i].imag()${do_scale}))); % endfor } } DECLARE_CONVERTER(sc16_item16_usrp1, 1, ${cpu_type}, ${width}, PRIORITY_GENERAL){ const uint16_t *input = reinterpret_cast(inputs[0]); % for w in range(width): ${cpu_type}_t *output${w} = reinterpret_cast<${cpu_type}_t *>(outputs[${w}]); % endfor for (size_t i = 0, j = 0; i < nsamps; i++){ % for w in range(width): output${w}[i] = ${cpu_type}_t( int16_t(${to_host}(input[j+0]))${do_scale}, int16_t(${to_host}(input[j+1]))${do_scale} ); j += 2; % endfor } } DECLARE_CONVERTER(sc8_item16_usrp1, 1, ${cpu_type}, ${width}, PRIORITY_GENERAL){ const uint16_t *input = reinterpret_cast(inputs[0]); % for w in range(width): ${cpu_type}_t *output${w} = reinterpret_cast<${cpu_type}_t *>(outputs[${w}]); % endfor for (size_t i = 0, j = 0; i < nsamps; i++){ % for w in range(width): { const uint16_t num = ${to_host}(input[j++]); output${w}[i] = ${cpu_type}_t( int8_t(num)${do_scale}, int8_t(num >> 8)${do_scale} ); } % endfor } } """ def parse_tmpl(_tmpl_text, **kwargs): from mako.template import Template return Template(_tmpl_text).render(**kwargs) if __name__ == '__main__': import sys import os file = os.path.basename(__file__) output = parse_tmpl(TMPL_HEADER, file=file) for fctype, fptype in ( ('fc32', 'float'), ('fc64', 'double'), ): output += TMPL_CONV_CHDR_SC16_TO_FP.format( fctype=fctype, fptype=fptype) ## Generate CHDR converters # These guys don't have to worry about endianness for uhd_type, ptr_type in ( ('u8', 'uint8_t'), ('s8', 'int8_t'), ('s16', 'int16_t'), ('f32', 'float'), ('sc8', 'std::complex'), ('sc16', 'std::complex'), ('fc32', 'std::complex'), ('fc64', 'std::complex'), ): for in_type, out_type in ((uhd_type + '_chdr', uhd_type), (uhd_type, uhd_type + '_chdr')): output += TMPL_CONV_CHDR_MEMCPY.format( in_type=in_type, out_type=out_type, ptr_type=ptr_type) ## Generate all data types that are exactly ## item32 or multiples thereof: for end in ('be', 'le'): host_to_wire = {'be': 'uhd::htonx', 'le': 'uhd::htowx'}[end] wire_to_host = {'be': 'uhd::ntohx', 'le': 'uhd::wtohx'}[end] # item32 types (sc16->sc16 is a special case because it defaults # to Q/I order on the wire: for in_type, out_type, to_wire_or_host in ( ('item32', 'sc16_item32_{end}', host_to_wire), ('sc16_item32_{end}', 'item32', wire_to_host), ('f32', 'f32_item32_{end}', host_to_wire), ('f32_item32_{end}', 'f32', wire_to_host), ): output += TMPL_CONV_ITEM32.format( end=end, to_wire_or_host=to_wire_or_host, in_type=in_type.format(end=end), out_type=out_type.format(end=end) ) # 2xitem32 types: for in_type, out_type in ( ('fc32', 'fc32_item32_{end}'), ('fc32_item32_{end}', 'fc32'), ): output += TMPL_CONV_ITEM64.format( end=end, to_wire_or_host=to_wire_or_host, in_type=in_type.format(end=end), out_type=out_type.format(end=end) ) ## Real 16-Bit: for end, to_host, to_wire in ( ('be', 'uhd::ntohx', 'uhd::htonx'), ('le', 'uhd::wtohx', 'uhd::htowx'), ): output += TMPL_CONV_S16.format( end=end, to_host=to_host, to_wire=to_wire ) ## Real 8-Bit Types: for us8 in ('u8', 's8'): for end, to_host, to_wire in ( ('be', 'uhd::ntohx', 'uhd::htonx'), ('le', 'uhd::wtohx', 'uhd::htowx'), ): output += TMPL_CONV_U8S8.format( us8=us8, end=end, to_host=to_host, to_wire=to_wire ) #generate complex converters for usrp1 format (requires Cheetah) for width in 1, 2, 4: for cpu_type, do_scale in ( ('fc64', '*scale_factor'), ('fc32', '*float(scale_factor)'), ('sc16', ''), ): output += parse_tmpl( TMPL_CONV_USRP1_COMPLEX, width=width, to_host='uhd::wtohx', to_wire='uhd::htowx', cpu_type=cpu_type, do_scale=do_scale ) open(sys.argv[1], 'w').write(output)