/* ------------------------------------------------------------------ * Copyright (C) 2011 Martin Storsjo * Copyright (C) 2022 Matthias P. Braendli * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. * See the License for the specific language governing permissions * and limitations under the License. * ------------------------------------------------------------------- */ /*! \mainpage Introduction * The ODR-mmbTools ODR-AudioEnc Audio encoder can encode audio for * ODR-DabMux, both DAB and DAB+. The DAB encoder is based on toolame. The * DAB+ encoder requires a the Fraunhofer FDK AAC library, with the * necessary patches for 960-transform to do DAB+ broadcast encoding. * * This document describes some internals of the encoder, and is intended * to help developers understand and improve the software package. * * User documentation is available in the README and in the ODR-mmbTools * Guide, available on the www.opendigitalradio.org website. * * The readme for the whole package is \ref md_README * * Interesting starting points for the encoder * - \ref odr-audioenc.cpp Main encoder file * - \ref VLCInput.h VLC Input * - \ref GSTInput.h GST Input * - \ref AlsaInput.h Alsa Input * - \ref JackInput.h JACK Input * - \ref Outputs.h ZeroMQ, file and EDI outputs * - \ref SampleQueue.h * - \ref charset.h Charset conversion * - \ref toolame.h libtolame API * - \ref AudioLevel * - \ref DataInput * - \ref SilenceDetection * * \file odr-audioenc.cpp * \brief The main file for the audio encoder */ #include "config.h" #include "PadInterface.h" #include "AlsaInput.h" #include "FileInput.h" #include "JackInput.h" #include "VLCInput.h" #include "GSTInput.h" #include "SampleQueue.h" #include "AACDecoder.h" #include "StatsPublish.h" #include "Outputs.h" #include "common.h" #include "wavfile.h" #include "utils.h" extern "C" { #include "encryption.h" } #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "aacenc_lib.h" extern "C" { #include "fec/fec.h" #include "libtoolame-dab/toolame.h" } /* Due to memory leaks in the VLC input, * we don't want to restart it endlessly. */ constexpr int MAX_FAULTS_ALLOWED = 5; using vec_u8 = std::vector; using namespace std; static void usage(const char* name) { fprintf(stderr, "ODR-AudioEnc %s is an audio encoder for both DAB and DAB+.\n" "The encoder can read from JACK, ALSA or\n" "a file source and encode to a ZeroMQ output for ODR-DabMux.\n" "It can also use libvlc and GStreamer as input.\n" "\n" "The -D option enables sound card clock drift compensation.\n" "A consumer sound card has a clock that is always a bit imprecise, and\n" "would drift off slowly. ODR-DabMux cannot handle such drift\n" "because it would have to throw away or insert complete encoded audio frames,\n" "which would create audible artifacts. This drift compensation can\n" "make sure that the encoding rate is correct by inserting or deleting\n" "audio samples. It can be used for both ALSA and VLC inputs and requires\n" "a system clock synchronised using NTP.\n" "\n" "When this option is enabled, you will see U and O printed in the\n" "console. These correspond to audio underruns and overruns caused\n" "by sound card clock drift. When sparse, they should not create audible\n" "artifacts.\n" "\n" "This encoder is able to insert PAD (DLS and MOT Slideshow)\n" "generated by ODR-PadEnc, and communicates using a UNIX socket.\n" "\nUsage:\n" "%s [INPUT SELECTION] [OPTION...]\n", #if defined(GITVERSION) GITVERSION #else PACKAGE_VERSION #endif , name); fprintf(stderr, " For the alsa input:\n" #if HAVE_ALSA " -d, --device=alsa_device Set ALSA input device.\n" #else " The Alsa input was disabled at compile time\n" #endif " For the file input:\n" " -i, --input=FILENAME Input filename (use -i - for stdin).\n" " -f, --format={ wav, raw } Set input file format (default: wav).\n" " --fifo-silence Input file is fifo and encoder generates silence when fifo is empty. Ignore EOF.\n" " For the JACK input:\n" #if HAVE_JACK " -j, --jack=name Enable JACK input, and define our name\n" #else " The JACK input was disabled at compile-time\n" #endif " For the VLC input:\n" #if HAVE_VLC " -v, --vlc-uri=uri Enable VLC input and use the URI given as source\n" " -C, --vlc-cache=ms Specify VLC network cache length.\n" " -V Increase the VLC verbosity by one (can be given \n" " multiple times)\n" " -L OPTION Give an additional options to VLC (can be given\n" " multiple times)\n" #else " The VLC input was disabled at compile-time\n" #endif " For the GStreamer input:\n" #if HAVE_GST " -G, --gst-uri=uri Enable GStreamer input and use the URI given as source\n" " --gst-pipeline=pipeline Specify a GStreamer pipeline that receives your source.\n" " The last pipeline element is connected to a caps filter that specifies\n" " the audio format and sample rate.\n" #else " The GStreamer input was disabled at compile-time\n" #endif " -w, --write-icy-text=filename Write the ICY Text into the file, so that ODR-PadEnc can read it.\n" " -W, --write-icy-text-dl-plus When writing the ICY Text into the file, add DL Plus information.\n" " Drift compensation\n" " -D, --drift-comp Enable ALSA/VLC sound card drift compensation.\n" " Encoder parameters:\n" " -b, --bitrate={ 8, 16, ..., 192 } Output bitrate in kbps. Must be a multiple of 8.\n" " -c, --channels={ 1, 2 } Nb of input channels (default: 2).\n" " -r, --rate={ 24000, 32000, 48000 } Input sample rate (default: 48000).\n" " -g, --audio-gain=dB Apply audio gain correction in dB to source, negative values allowed.\n" " Use this as a workaround to correct the gain for streams that are\n" " much too loud.\n" " DAB specific options\n" " -a, --dab Encode in DAB and not in DAB+.\n" " --dabmode=MODE Channel mode: s/d/j/m\n" " (default: j if stereo, m if mono).\n" " --dabpsy=PSY Psychoacoustic model 0/1/2/3\n" " (default: 1).\n" " DAB+ specific options\n" " -A, --no-afterburner Disable AAC encoder quality increaser.\n" " --aaclc Force the usage of AAC-LC (no SBR, no PS)\n" " --sbr Force the usage of SBR (HE-AAC)\n" " --ps Force the usage of SBR and PS (HE-AACv2)\n" " -B, --bandwidth=VALUE Set the AAC encoder bandwidth to VALUE [Hz].\n" " --decode=FILE Decode the AAC back to a wav file (loopback test).\n" " Output and PAD parameters:\n" " --identifier=ID An identifier string that is sent in the ODRv EDI TAG. Max 32 characters length.\n" " -o, --output=URI Output ZMQ uri. (e.g. 'tcp://localhost:9000')\n" " -or- Output file uri. (e.g. 'file.dabp')\n" " -or- a single dash '-' to denote stdout\n" " If more than one ZMQ output is given, the socket\n" " will be connected to all listed endpoints.\n" " -e, --edi=URI EDI output uri, (e.g. 'tcp://localhost:7000')\n" " --fec=FEC Set EDI output FEC\n" " -T, --timestamp-delay=DELAY_MS Enabled timestamps in EDI (requires TAI clock bulletin download) and\n" " add a delay (in milliseconds) to the timestamps carried in EDI\n" " --startup-check=SCRIPT_PATH Before starting, run the given script, and only start if it returns 0.\n" " -k, --secret-key=FILE Enable ZMQ encryption with the given secret key.\n" " -p, --pad=BYTES Enable PAD insertion and set PAD size in bytes.\n" " -P, --pad-socket=IDENTIFIER Use the given identifier to communicate with ODR-PadEnc.\n" " -l, --level Show peak audio level indication.\n" " -S, --stats=SOCKET_NAME Connect to the specified UNIX Datagram socket and send statistics.\n" " This allows external tools to collect audio and drift compensation stats.\n" " -s, --silence=TIMEOUT Abort encoding after TIMEOUT seconds of silence.\n" " --version Show version and quit.\n" "\n" ); } /*! Setup the FDK AAC encoder * * \return 0 on success */ static int prepare_aac_encoder( HANDLE_AACENCODER *encoder, int subchannel_index, int channels, int sample_rate, int afterburner, uint32_t bandwidth, int *aot) { CHANNEL_MODE mode; switch (channels) { case 1: mode = MODE_1; break; case 2: mode = MODE_2; break; default: fprintf(stderr, "Unsupported channels number %d\n", channels); return 1; } if (aacEncOpen(encoder, 0x01|0x02|0x04, channels) != AACENC_OK) { fprintf(stderr, "Unable to open encoder\n"); return 1; } if (*aot == AOT_NONE) { if(channels == 2 && subchannel_index <= 6) { *aot = AOT_DABPLUS_PS; } else if((channels == 1 && subchannel_index <= 8) || (channels == 2 && subchannel_index <= 10)) { *aot = AOT_DABPLUS_SBR; } else { *aot = AOT_DABPLUS_AAC_LC; } } fprintf(stderr, "Using %d subchannels. AAC type: %s%s%s. channels=%d, sample_rate=%d\n", subchannel_index, *aot == AOT_DABPLUS_PS ? "HE-AAC v2" : "", *aot == AOT_DABPLUS_SBR ? "HE-AAC" : "", *aot == AOT_DABPLUS_AAC_LC ? "AAC-LC" : "", channels, sample_rate); if (aacEncoder_SetParam(*encoder, AACENC_AOT, *aot) != AACENC_OK) { fprintf(stderr, "Unable to set the AOT\n"); return 1; } if (aacEncoder_SetParam(*encoder, AACENC_SAMPLERATE, sample_rate) != AACENC_OK) { fprintf(stderr, "Unable to set the sample rate\n"); return 1; } if (aacEncoder_SetParam(*encoder, AACENC_CHANNELMODE, mode) != AACENC_OK) { fprintf(stderr, "Unable to set the channel mode\n"); return 1; } if (aacEncoder_SetParam(*encoder, AACENC_CHANNELORDER, 1) != AACENC_OK) { fprintf(stderr, "Unable to set the wav channel order\n"); return 1; } if (aacEncoder_SetParam(*encoder, AACENC_GRANULE_LENGTH, 960) != AACENC_OK) { fprintf(stderr, "Unable to set the granule length\n"); return 1; } if (aacEncoder_SetParam(*encoder, AACENC_TRANSMUX, TT_DABPLUS) != AACENC_OK) { fprintf(stderr, "Unable to set the RAW transmux\n"); return 1; } /*if (aacEncoder_SetParam(*encoder, AACENC_BITRATEMODE, AACENC_BR_MODE_SFR) * != AACENC_OK) { fprintf(stderr, "Unable to set the bitrate mode\n"); return 1; }*/ fprintf(stderr, "AAC bitrate set to: %d\n", subchannel_index*8000); if (aacEncoder_SetParam(*encoder, AACENC_BITRATE, subchannel_index*8000) != AACENC_OK) { fprintf(stderr, "Unable to set the bitrate\n"); return 1; } if (aacEncoder_SetParam(*encoder, AACENC_AFTERBURNER, afterburner) != AACENC_OK) { fprintf(stderr, "Unable to set the afterburner mode\n"); return 1; } if (!afterburner) { fprintf(stderr, "Warning: Afterburned disabled!\n"); } if (bandwidth > 0) { fprintf(stderr, "Setting bandwidth is %d\n", bandwidth); if (aacEncoder_SetParam(*encoder, AACENC_BANDWIDTH, bandwidth) != AACENC_OK) { fprintf(stderr, "Unable to set bandwidth mode\n"); return 1; } } if (aacEncEncode(*encoder, nullptr, nullptr, nullptr, nullptr) != AACENC_OK) { fprintf(stderr, "Unable to initialize the encoder\n"); return 1; } const uint32_t bw = aacEncoder_GetParam(*encoder, AACENC_BANDWIDTH); fprintf(stderr, "Bandwidth is %d\n", bw); return 0; } chrono::steady_clock::time_point timepoint_last_compensation; /*! Do drift compensation by distributing the missing samples over * the whole input buffer instead of having a bunch of missing samples * at the end only. * * This expands (in time) the received samples over the whole duration * of the buffer. */ static void expand_missing_samples(vec_u8& buf, int channels, size_t valid_bytes) { const size_t bytes_per_sample = BYTES_PER_SAMPLE * channels; assert(buf.size() % bytes_per_sample == 0); assert(buf.size() > valid_bytes); const size_t valid_samples = valid_bytes / bytes_per_sample; const size_t missing_samples = (buf.size() / bytes_per_sample) - valid_samples; // We only fix up to 10% missing samples if (missing_samples * bytes_per_sample > buf.size() / 10) { for (size_t i = valid_samples * bytes_per_sample; i < buf.size(); i++) { buf[i] = 0; } } else { const vec_u8 source_buf(buf); size_t source_ix = 0; for (size_t i = 0; i < buf.size() / bytes_per_sample; i++) { for (size_t j = 0; j < bytes_per_sample; j++) { buf.at(bytes_per_sample*i + j) = source_buf.at(source_ix + j); } // Do not advance the source index if the sample index is // at the spots where we want to duplicate the source sample if (not (i > 0 and (i % (valid_samples / missing_samples) == 0))) { source_ix += bytes_per_sample; } } } } /*! Wait the proper amount of time to throttle down to nominal encoding * rate, if drift compensation is enabled. */ static void drift_compensation_delay(int sample_rate, int channels, size_t bytes) { const size_t bytes_per_second = sample_rate * BYTES_PER_SAMPLE * channels; size_t bytes_compensate = bytes; const auto wait_time = std::chrono::milliseconds(1000ul * bytes_compensate / bytes_per_second); assert(1000ul * bytes_compensate % bytes_per_second == 0); const auto curTime = std::chrono::steady_clock::now(); const auto diff = curTime - timepoint_last_compensation; if (diff < wait_time) { auto waiting = wait_time - diff; std::this_thread::sleep_for(waiting); } timepoint_last_compensation += wait_time; } #define no_argument 0 #define required_argument 1 #define optional_argument 2 #define STATUS_PAD_INSERTED 0x1 #define STATUS_OVERRUN 0x2 #define STATUS_UNDERRUN 0x4 struct AudioEnc { public: int sample_rate=48000; int channels=2; double gain_dB = 0.0; string icytext_file; bool icytext_dlplus = false; ICY_TEXT_t previous_text; // For the ALSA input string alsa_device; // For the file input string infile; bool continue_after_eof = false; int raw_input = 0; // For the VLC input string vlc_uri; string vlc_cache; vector vlc_additional_opts; unsigned verbosity = 0; // For the GST input string gst_uri; string gst_pipeline; string jack_name; bool drift_compensation = false; encoder_selection_t selected_encoder = encoder_selection_t::fdk_dabplus; bool afterburner = true; uint32_t bandwidth = 0; int bitrate = 0; // 0 means default bitrate int dab_psy_model = 1; bool restart_on_fault = false; int fault_counter = 0; std::deque toolame_buffer; shared_ptr file_output; shared_ptr zmq_output; Output::EDI edi_output; string identifier; bool tist_enabled = false; uint32_t tist_delay_ms = 0; vector output_uris; vector edi_output_uris; void *rs_handler = nullptr; AACENC_InfoStruct info = { 0 }; int aot = AOT_NONE; string decode_wavfilename; string dab_channel_mode; /* On silence, die after the silence_timeout expires */ bool die_on_silence = false; int silence_timeout = 0; int measured_silence_ms = 0; /* For MOT Slideshow and DLS insertion */ string pad_ident = ""; PadInterface pad_intf; int padlen = 6; /* Encoder status, see the above STATUS macros */ int status = 0; /* Whether to show the 'sox'-like measurement */ int show_level = 0; /* If not empty, send stats over UNIX DGRAM socket */ string send_stats_to = ""; /* Data for ZMQ CURVE authentication */ char* keyfile = nullptr; char secretkey[CURVE_KEYLEN+1]; SampleQueue queue; HANDLE_AACENCODER encoder = nullptr; unique_ptr decoder; unique_ptr stats_publisher; AudioEnc() : queue(BYTES_PER_SAMPLE) { } AudioEnc(const AudioEnc&) = delete; AudioEnc& operator=(const AudioEnc&) = delete; ~AudioEnc(); int run(); bool send_frame(const uint8_t *buf, size_t len, int16_t peak_left, int16_t peak_right); shared_ptr initialise_input(); }; int AudioEnc::run() { int num_inputs = 0; #if HAVE_ALSA if (not alsa_device.empty()) num_inputs++; #endif if (not infile.empty()) num_inputs++; #if HAVE_JACK if (not jack_name.empty()) num_inputs++; #endif #if HAVE_VLC if (not vlc_uri.empty()) num_inputs++; #endif #if HAVE_GST if (not gst_uri.empty()) num_inputs++; if (not gst_pipeline.empty()) num_inputs++; #endif if (num_inputs == 0) { fprintf(stderr, "No input defined!\n"); return 1; } else if (num_inputs > 1) { fprintf(stderr, "You must define only one possible input, not several!\n"); return 1; } if (selected_encoder == encoder_selection_t::fdk_dabplus) { if (bitrate == 0) { bitrate = 64; } int subchannel_index = bitrate / 8; if (subchannel_index < 1 || subchannel_index > 24) { fprintf(stderr, "Bad subchannel index: %d, must be between 1 and 24. Try other bitrate.\n", subchannel_index); return 1; } if ( ! (sample_rate == 32000 || sample_rate == 48000)) { fprintf(stderr, "Invalid sample rate. Possible values are: 32000, 48000.\n"); return 1; } } else if (selected_encoder == encoder_selection_t::toolame_dab) { if (bitrate == 0) { bitrate = 192; } if ( ! (sample_rate == 24000 || sample_rate == 48000)) { fprintf(stderr, "Invalid sample rate. Possible values are: 24000, 48000.\n"); return 1; } } if (padlen < 0 or padlen > 255) { fprintf(stderr, "Invalid PAD length specified\n"); return 1; } if (output_uris.empty() and edi_output_uris.empty()) { fprintf(stderr, "No output defined\n"); return 1; } for (const auto& uri : output_uris) { if (uri == "-") { if (file_output) { fprintf(stderr, "You can't write to more than one file!\n"); return 1; } file_output = make_shared(stdout); } else if ((uri.compare(0, 6, "tcp://") == 0) || (uri.compare(0, 6, "pgm://") == 0) || (uri.compare(0, 7, "epgm://") == 0) || (uri.compare(0, 6, "ipc://") == 0)) { if (not zmq_output) { zmq_output = make_shared(); } zmq_output->connect(uri.c_str(), keyfile); } else { // We assume it's a file name if (file_output) { fprintf(stderr, "You can't write to more than one file!\n"); return 1; } file_output = make_shared(uri.c_str()); } } for (const auto& uri : edi_output_uris) { if (uri.compare(0, 6, "tcp://") == 0 or uri.compare(0, 6, "udp://") == 0) { auto host_port_sep_ix = uri.find(':', 6); if (host_port_sep_ix != string::npos) { auto host = uri.substr(6, host_port_sep_ix - 6); auto port = std::stoi(uri.substr(host_port_sep_ix + 1)); auto proto = uri.substr(0, 3); if (proto == "tcp") { edi_output.add_tcp_destination(host, port); } else if (proto == "udp") { edi_output.add_udp_destination(host, port); } else { throw logic_error("unhandled proto"); } } else { fprintf(stderr, "Invalid EDI URL host!\n"); } } else { fprintf(stderr, "Invalid EDI protocol!\n"); } } if (not edi_output_uris.empty()) { edi_output.set_tist(tist_enabled, tist_delay_ms); stringstream ss; ss << PACKAGE_NAME << " " << #if defined(GITVERSION) GITVERSION << #else PACKAGE_VERSION << #endif " " << identifier; edi_output.set_odr_version_tag(ss.str()); } if (pad_ident.empty()) { // Override both default value and user-configured value if no ident given padlen = 0; } if (padlen != 0 and not pad_ident.empty()) { pad_intf.open(pad_ident); fprintf(stderr, "PAD socket opened\n"); } else { fprintf(stderr, "PAD disabled because neither PAD length nor PAD identifier given\n"); } vec_u8 input_buf; if (selected_encoder == encoder_selection_t::fdk_dabplus) { int subchannel_index = bitrate / 8; if (prepare_aac_encoder(&encoder, subchannel_index, channels, sample_rate, afterburner, bandwidth, &aot) != 0) { fprintf(stderr, "Encoder preparation failed\n"); return 1; } if (aacEncInfo(encoder, &info) != AACENC_OK) { fprintf(stderr, "Unable to get the encoder info\n"); return 1; } // Each DAB+ frame will need input_size audio bytes const int input_size = channels * BYTES_PER_SAMPLE * info.frameLength; fprintf(stderr, "DAB+ Encoding: framelen=%d (%dB)\n", info.frameLength, input_size); input_buf.resize(input_size); if (not decode_wavfilename.empty()) { decoder.reset(new AACDecoder(decode_wavfilename.c_str())); } } else if (selected_encoder == encoder_selection_t::toolame_dab) { int err = toolame_init(); if (err == 0) { err = toolame_set_samplerate(sample_rate); } if (err == 0) { err = toolame_set_psy_model(dab_psy_model); } if (dab_channel_mode.empty()) { if (channels == 2) { dab_channel_mode = 'j'; // Default to joint-stereo } else if (channels == 1) { dab_channel_mode = 'm'; // Default to mono } else { fprintf(stderr, "Unsupported channels number %d\n", channels); return 1; } } if (err == 0) { err = toolame_set_channel_mode(dab_channel_mode.c_str()[0]); } // setting the ScF-CRC len here depends on set sample rate/channel mode if (err == 0) { err = toolame_set_bitrate(bitrate); } if (err == 0) { err = toolame_set_pad(padlen); } if (err) { fprintf(stderr, "libtoolame-dab init failed: %d\n", err); return err; } input_buf.resize(channels * 1152 * BYTES_PER_SAMPLE); if (not decode_wavfilename.empty()) { fprintf(stderr, "--decode not supported for DAB\n"); return 1; } } if (not send_stats_to.empty()) { StatsPublisher *s = nullptr; try { s = new StatsPublisher(send_stats_to); stats_publisher.reset(s); } catch (const runtime_error& e) { fprintf(stderr, "Failed to initialise Stats Publisher: %s", e.what()); if (s != nullptr) { delete s; } return 1; } } /* We assume that we need to call the encoder * enc_calls_per_output before it gives us one encoded audio * frame. This information is used when the alsa drift compensation * is active. This is only valid for FDK-AAC. */ const int enc_calls_per_output = (aot == AOT_DABPLUS_AAC_LC) ? sample_rate / 8000 : sample_rate / 16000; int max_size = 32*input_buf.size() + NUM_SAMPLES_PER_CALL; /*! The SampleQueue \c queue is given to the inputs, so that they * can fill it. */ queue.configure(max_size, not drift_compensation, channels); /* symsize=8, gfpoly=0x11d, fcr=0, prim=1, nroots=10, pad=135 */ rs_handler = init_rs_char(8, 0x11d, 0, 1, 10, 135); if (rs_handler == nullptr) { perror("init_rs_char failed"); return 1; } shared_ptr input; try { input = initialise_input(); } catch (const runtime_error& e) { fprintf(stderr, "Initialising input triggered exception: %s\n", e.what()); return 1; } if (zmq_output) { zmq_output->set_encoder_type(selected_encoder, bitrate); } int outbuf_size = 0; vec_u8 outbuf; switch (selected_encoder) { case encoder_selection_t::fdk_dabplus: outbuf_size = bitrate/8*120; outbuf.resize(24*120); break; case encoder_selection_t::toolame_dab: outbuf_size = 4092; outbuf.resize(outbuf_size); fprintf(stderr, "Setting outbuf size to %zu\n", outbuf.size()); break; } vector pad_buf(padlen + 1); if (restart_on_fault) { fprintf(stderr, "Autorestart has been deprecated and will be removed in the future!\n"); this_thread::sleep_for(chrono::seconds(2)); } fprintf(stderr, "Starting encoding\n"); int retval = 0; int send_error_count = 0; timepoint_last_compensation = chrono::steady_clock::now(); int calls = 0; // for checking ssize_t read_bytes = 0; do { // --------------- Read data from the PAD socket int calculated_padlen = 0; if (padlen != 0) { vector pad_data = pad_intf.request(padlen); if (pad_data.empty()) { /* no PAD available */ } else if (pad_data.size() == pad_buf.size()) { calculated_padlen = pad_data[padlen]; if (calculated_padlen < 2) { throw runtime_error("Invalid X-PAD length " + to_string(calculated_padlen)); } /* AAC: skip PAD if only zero F-PAD (saves four bytes) * See ยง5.4.3 in ETSI TS 102 563 */ if ( selected_encoder == encoder_selection_t::fdk_dabplus && calculated_padlen == 2 && pad_data[padlen - 2] == 0x00 && pad_data[padlen - 1] == 0x00 ) { calculated_padlen = 0; } copy(pad_data.begin(), pad_data.end(), pad_buf.begin()); } else { fprintf(stderr, "Incorrect PAD length received: %zu expected %d\n", pad_data.size(), padlen + 1); break; } } if (calculated_padlen > 0) { status |= STATUS_PAD_INSERTED; } // -------------- Read Data memset(outbuf.data(), 0x00, outbuf_size); memset(input_buf.data(), 0x00, input_buf.size()); /*! \section DataInput * We read data input either in a blocking way (file input, VLC or ALSA * without drift compensation) or in a non-blocking way (VLC or ALSA * with drift compensation, JACK). * * All inputs write samples into the queue, and either use \c pop() or * \c pop_wait() depending on if it's blocking or not * * In non-blocking, the \c queue makes the data available without delay, and the * \c drift_compensation_delay() function handles rate throttling. */ if (input->fault_detected()) { fprintf(stderr, "Detected fault in input!\n"); if (restart_on_fault) { fault_counter++; if (fault_counter >= MAX_FAULTS_ALLOWED) { fprintf(stderr, "Maximum number of input faults reached, aborting"); retval = 5; break; } try { input = initialise_input(); } catch (const runtime_error& e) { fprintf(stderr, "Initialising input triggered exception: %s\n", e.what()); retval = 5; break; } continue; } else { retval = 5; break; } } if (not input->read_source(input_buf.size())) { fprintf(stderr, "End of input reached\n"); retval = 0; break; } if (drift_compensation) { size_t overruns = 0; size_t bytes_from_queue = queue.pop(input_buf.data(), input_buf.size(), &overruns); // returns bytes if (bytes_from_queue != input_buf.size()) { expand_missing_samples(input_buf, channels, bytes_from_queue); } read_bytes = input_buf.size(); drift_compensation_delay(sample_rate, channels, read_bytes); if (bytes_from_queue != input_buf.size()) { status |= STATUS_UNDERRUN; if (stats_publisher) { stats_publisher->notify_underrun(); } } if (overruns) { status |= STATUS_OVERRUN; if (stats_publisher) { stats_publisher->notify_overrun(); } } } else { const int timeout_ms = 10000; read_bytes = input_buf.size(); size_t overruns = 0; /*! pop_wait() must return after a timeout, otherwise the silence detector cannot do * its job. */ ssize_t bytes_from_queue = queue.pop_wait(input_buf.data(), read_bytes, timeout_ms, &overruns); // returns bytes if (overruns) { throw logic_error("Queue overrun in non-drift compensation!"); } if (bytes_from_queue < read_bytes) { // queue timeout occurred fprintf(stderr, "Detected fault in input! No data in time.\n"); if (restart_on_fault) { fault_counter++; if (fault_counter >= MAX_FAULTS_ALLOWED) { fprintf(stderr, "Maximum number of input faults reached, aborting"); retval = 5; break; } try { input = initialise_input(); } catch (const runtime_error& e) { fprintf(stderr, "Initialising input triggered exception: %s\n", e.what()); return 1; } continue; } else { retval = 5; break; } } } /*! \section MetadataFromSource * The VLC input is the only input that can also give us metadata, which * we can hand over to ODR-PadEnc. */ if (not icytext_file.empty()) { ICY_TEXT_t text; if (false) {} #if HAVE_VLC // Using std::dynamic_pointer_cast would be safer, but is C++17 else if (not vlc_uri.empty()) { VLCInput *vlc_input = (VLCInput*)(input.get()); text = vlc_input->get_icy_text(); } #endif #if HAVE_GST else if ((not gst_uri.empty()) or (not gst_pipeline.empty())) { GSTInput *gst_input = (GSTInput*)(input.get()); text = gst_input->get_icy_text(); } #endif if (previous_text != text) { bool success = write_icy_to_file(text, icytext_file, icytext_dlplus); if (not success) { fprintf(stderr, "Failed to write ICY Text\n"); } } previous_text = text; } /*! \section AudioLevel * Audio level measurement is always done assuming we have two * channels, and is formally wrong in mono, but still gives * numbers one can use. * * At the same time, we apply gain correction. * * \todo fix level measurement in mono */ int16_t peak_left = 0; int16_t peak_right = 0; const double linear_gain_correction = pow(10.0, gain_dB / 20.0); for (int i = 0; i < read_bytes; i+=4) { int16_t l = input_buf[i] | (input_buf[i+1] << 8); int16_t r = input_buf[i+2] | (input_buf[i+3] << 8); if (linear_gain_correction != 1.0) { l *= linear_gain_correction; r *= linear_gain_correction; input_buf[i] = l & 0x00FF; input_buf[i+1] = (l & 0xFF00) >> 8; input_buf[i+2] = r & 0x00FF; input_buf[i+3] = (r & 0xFF00) >> 8; } peak_left = std::max(peak_left, l); peak_right = std::max(peak_right, r); } if (stats_publisher) { stats_publisher->update_audio_levels(peak_left, peak_right); } /*! \section SilenceDetection * Silence detection looks at the audio level and is * only useful if the connection dropped, or if no data is available. It is not * useful if the source is nearly silent (some noise present), because the * threshold is 0, and not configurable. The rationale is that we want to * guard against connection issues, not source level issues. */ if (die_on_silence && std::max(peak_left, peak_right) == 0) { const unsigned int frame_time_msec = 1000ul * read_bytes / (BYTES_PER_SAMPLE * channels * sample_rate); measured_silence_ms += frame_time_msec; if (measured_silence_ms > 1000*silence_timeout) { fprintf(stderr, "Silence detected for %d seconds, aborting.\n", silence_timeout); retval = 2; break; } } else { measured_silence_ms = 0; } int numOutBytes = 0; if (read_bytes and selected_encoder == encoder_selection_t::fdk_dabplus) { AACENC_BufDesc in_buf = { 0 }, out_buf = { 0 }; AACENC_InArgs in_args = { 0 }; AACENC_OutArgs out_args = { 0 }; // -------------- AAC Encoding // int in_identifier[] = {IN_AUDIO_DATA, IN_ANCILLRY_DATA}; int out_identifier = OUT_BITSTREAM_DATA; void *in_ptr[2], *out_ptr; int in_size[2], in_elem_size[2]; int out_size, out_elem_size; in_ptr[0] = input_buf.data(); in_ptr[1] = pad_buf.data() + (padlen - calculated_padlen); // offset due to unused PAD bytes in_size[0] = read_bytes; in_size[1] = calculated_padlen; in_elem_size[0] = BYTES_PER_SAMPLE; in_elem_size[1] = sizeof(uint8_t); in_args.numInSamples = input_buf.size()/BYTES_PER_SAMPLE; in_args.numAncBytes = calculated_padlen; in_buf.numBufs = calculated_padlen ? 2 : 1; // Samples + Data / Samples in_buf.bufs = (void**)&in_ptr; in_buf.bufferIdentifiers = in_identifier; in_buf.bufSizes = in_size; in_buf.bufElSizes = in_elem_size; out_ptr = outbuf.data(); out_size = outbuf.size(); out_elem_size = 1; out_buf.numBufs = 1; out_buf.bufs = &out_ptr; out_buf.bufferIdentifiers = &out_identifier; out_buf.bufSizes = &out_size; out_buf.bufElSizes = &out_elem_size; AACENC_ERROR err; if ((err = aacEncEncode(encoder, &in_buf, &out_buf, &in_args, &out_args)) != AACENC_OK) { if (err == AACENC_ENCODE_EOF) { fprintf(stderr, "encoder error: EOF reached\n"); break; } fprintf(stderr, "Encoding failed (%d)\n", err); retval = 3; break; } calls++; numOutBytes = out_args.numOutBytes; } else if (selected_encoder == encoder_selection_t::toolame_dab) { /*! \note toolame expects the audio to be in another shape as * we have in input_buf, and we need to convert first */ short input_buffers[2][1152]; if (channels == 1) { memcpy(input_buffers[0], input_buf.data(), 1152 * BYTES_PER_SAMPLE); } else if (channels == 2) { for (int i = 0; i < 1152; i++) { int16_t l = input_buf[4*i] | (input_buf[4*i+1] << 8); int16_t r = input_buf[4*i+2] | (input_buf[4*i+3] << 8); input_buffers[0][i] = l; input_buffers[1][i] = r; } } else { fprintf(stderr, "INTERNAL ERROR! invalid number of channels\n"); } if (read_bytes) { numOutBytes = toolame_encode_frame(input_buffers, pad_buf.data(), calculated_padlen, outbuf.data(), outbuf.size()); } else { numOutBytes = toolame_finish(outbuf.data(), outbuf.size()); } } if (numOutBytes != 0 and decoder) { try { decoder->decode_frame(outbuf.data(), numOutBytes); } catch (runtime_error &e) { fprintf(stderr, "Decoding failed with: %s\n", e.what()); return 1; } } /* Check if the encoder has generated output data. * DAB+ requires RS encoding, which is not done in ODR-DabMux and not necessary * for DAB. */ if (numOutBytes != 0 and selected_encoder == encoder_selection_t::fdk_dabplus) { // Our timing code depends on this if (calls != enc_calls_per_output) { fprintf(stderr, "INTERNAL ERROR! calls=%d, expected %d\n", calls, enc_calls_per_output); } calls = 0; int row, col; unsigned char buf_to_rs_enc[110]; unsigned char rs_enc[10]; const int subchannel_index = bitrate / 8; for(row=0; row < subchannel_index; row++) { for(col=0;col < 110; col++) { buf_to_rs_enc[col] = outbuf[subchannel_index * col + row]; } encode_rs_char(rs_handler, buf_to_rs_enc, rs_enc); for(col=110; col<120; col++) { outbuf[subchannel_index * col + row] = rs_enc[col-110]; assert(subchannel_index * col + row < outbuf_size); } } numOutBytes = outbuf_size; } if (numOutBytes > 0 and selected_encoder == encoder_selection_t::toolame_dab) { toolame_buffer.insert(toolame_buffer.end(), outbuf.begin(), outbuf.begin() + numOutBytes); // ODR-DabMux expects frames of length 3*bitrate const size_t frame_len = 3 * bitrate; while (toolame_buffer.size() > frame_len) { vec_u8 frame(frame_len); // this is probably not very efficient std::copy(toolame_buffer.begin(), toolame_buffer.begin() + frame_len, frame.begin()); toolame_buffer.erase(toolame_buffer.begin(), toolame_buffer.begin() + frame_len); bool success = send_frame(frame.data(), frame.size(), peak_left, peak_right); if (not success) { fprintf(stderr, "Send error !\n"); send_error_count ++; } } } else if (numOutBytes > 0 and selected_encoder == encoder_selection_t::fdk_dabplus) { bool success = send_frame(outbuf.data(), numOutBytes, peak_left, peak_right); if (not success) { fprintf(stderr, "Send error !\n"); send_error_count ++; } } if (send_error_count > 10) { fprintf(stderr, "Send failed ten times, aborting!\n"); retval = 4; break; } if (numOutBytes != 0) { if (show_level) { if (channels == 1) { fprintf(stderr, "\rIn: [%-6s] %1s %1s %1s", level(1, std::max(peak_right, peak_left)), status & STATUS_PAD_INSERTED ? "P" : " ", status & STATUS_UNDERRUN ? "U" : " ", status & STATUS_OVERRUN ? "O" : " "); } else if (channels == 2) { fprintf(stderr, "\rIn: [%6s|%-6s] %1s %1s %1s", level(0, peak_left), level(1, peak_right), status & STATUS_PAD_INSERTED ? "P" : " ", status & STATUS_UNDERRUN ? "U" : " ", status & STATUS_OVERRUN ? "O" : " "); } } else { if (status & STATUS_OVERRUN) { fprintf(stderr, "O"); } if (status & STATUS_UNDERRUN) { fprintf(stderr, "U"); } } if (stats_publisher) { stats_publisher->send_stats(); } status = 0; } fflush(stdout); } while (read_bytes > 0); fprintf(stderr, "\n"); return retval; } bool AudioEnc::send_frame(const uint8_t *buf, size_t len, int16_t peak_left, int16_t peak_right) { // The file output is mutually exclusive to the other outputs if (file_output) { file_output->update_audio_levels(peak_left, peak_right); return file_output->write_frame(buf, len); } bool success = true; if (zmq_output) { zmq_output->update_audio_levels(peak_left, peak_right); success &= zmq_output->write_frame(buf, len); } if (edi_output.enabled()) { edi_output.update_audio_levels(peak_left, peak_right); switch (selected_encoder) { case encoder_selection_t::fdk_dabplus: { // STI/EDI specifies that one AF packet must contain 24ms worth of data, // therefore we must split the superframe into five parts if (len % 5 != 0) { throw logic_error("Superframe size not multiple of 5"); } const size_t blocksize = len/5; for (size_t i = 0; i < 5; i++) { success &= edi_output.write_frame(buf + i * blocksize, blocksize); if (not success) { break; } } } break; case encoder_selection_t::toolame_dab: success &= edi_output.write_frame(buf, len); break; } } return success; } AudioEnc::~AudioEnc() { file_output.reset(); zmq_output.reset(); if (rs_handler != nullptr) { free_rs_char(rs_handler); } if (encoder != nullptr and selected_encoder == encoder_selection_t::fdk_dabplus) { aacEncClose(&encoder); } } shared_ptr AudioEnc::initialise_input() { shared_ptr input; if (not infile.empty()) { input = make_shared(infile, raw_input, sample_rate, continue_after_eof, queue); } #if HAVE_JACK else if (not jack_name.empty()) { input = make_shared(jack_name, channels, sample_rate, queue); } #endif #if HAVE_VLC else if (not vlc_uri.empty()) { input = make_shared(vlc_uri, sample_rate, channels, verbosity, vlc_cache, vlc_additional_opts, queue); } #endif #if HAVE_GST else if ((not gst_uri.empty()) or (not gst_pipeline.empty())) { input = make_shared(gst_uri, gst_pipeline, sample_rate, channels, queue); } #endif #if HAVE_ALSA else if (drift_compensation) { input = make_shared(alsa_device, channels, sample_rate, queue); } else { input = make_shared(alsa_device, channels, sample_rate, queue); } #endif if (not input) { throw logic_error("Initialising input incomplete!"); } input->prepare(); return input; } int main(int argc, char *argv[]) { const struct option longopts[] = { {"bitrate", required_argument, 0, 'b'}, {"bandwidth", required_argument, 0, 'B'}, {"audio-gain", required_argument, 0, 'g'}, {"vlc-gain", required_argument, 0, 10 }, // backward-compatibility to v3 {"channels", required_argument, 0, 'c'}, {"dabmode", required_argument, 0, 4 }, {"dabpsy", required_argument, 0, 5 }, {"device", required_argument, 0, 'd'}, {"edi", required_argument, 0, 'e'}, {"fec", required_argument, 0, 8 }, {"timestamp-delay", required_argument, 0, 'T'}, {"decode", required_argument, 0, 6 }, {"format", required_argument, 0, 'f'}, {"gst-uri", required_argument, 0, 'G'}, {"gst-pipeline", required_argument, 0, 11 }, {"identifier", required_argument, 0, 7 }, {"input", required_argument, 0, 'i'}, {"jack", required_argument, 0, 'j'}, {"output", required_argument, 0, 'o'}, {"pad", required_argument, 0, 'p'}, {"pad-socket", required_argument, 0, 'P'}, {"rate", required_argument, 0, 'r'}, {"secret-key", required_argument, 0, 'k'}, {"silence", required_argument, 0, 's'}, {"startup-check", required_argument, 0, 9 }, {"stats", required_argument, 0, 'S'}, {"vlc-cache", required_argument, 0, 'C'}, {"vlc-uri", required_argument, 0, 'v'}, {"vlc-opt", required_argument, 0, 'L'}, {"write-icy-text", required_argument, 0, 'w'}, {"write-icy-text-dl-plus", no_argument, 0, 'W'}, {"aaclc", no_argument, 0, 0 }, {"dab", no_argument, 0, 'a'}, {"drift-comp", no_argument, 0, 'D'}, {"fifo-silence", no_argument, 0, 3 }, {"help", no_argument, 0, 'h'}, {"level", no_argument, 0, 'l'}, {"no-afterburner", no_argument, 0, 'A'}, {"ps", no_argument, 0, 2 }, {"restart", no_argument, 0, 'R'}, {"sbr", no_argument, 0, 1 }, {"verbosity", no_argument, 0, 'V'}, {0, 0, 0, 0}, }; if (argc == 2 and strcmp(argv[1], "--version") == 0) { fprintf(stdout, "%s\n", #if defined(GITVERSION) GITVERSION #else PACKAGE_VERSION #endif ); return 0; } fprintf(stderr, "Welcome to %s %s, compiled at %s, %s", PACKAGE_NAME, #if defined(GITVERSION) GITVERSION, #else PACKAGE_VERSION, #endif __DATE__, __TIME__); fprintf(stderr, "\n"); fprintf(stderr, " http://opendigitalradio.org\n\n"); if (argc < 2) { usage(argv[0]); return 1; } AudioEnc audio_enc; std::string startupcheck; int ch=0; int index; while(ch != -1) { ch = getopt_long(argc, argv, "aAhDlRVb:B:c:e:f:G:i:j:k:L:o:r:d:p:P:s:S:T:v:w:Wg:C:", longopts, &index); switch (ch) { case 0: // AAC-LC audio_enc.aot = AOT_DABPLUS_AAC_LC; break; case 1: // SBR audio_enc.aot = AOT_DABPLUS_SBR; break; case 2: // PS audio_enc.aot = AOT_DABPLUS_PS; break; case 3: // FIFO Silence audio_enc.continue_after_eof = true; // Enable drift compensation, otherwise we would block instead of inserting silence. audio_enc.drift_compensation = true; break; case 4: // DAB channel mode audio_enc.dab_channel_mode = optarg; if (not( audio_enc.dab_channel_mode == "s" or audio_enc.dab_channel_mode == "d" or audio_enc.dab_channel_mode == "j" or audio_enc.dab_channel_mode == "m")) { fprintf(stderr, "Invalid DAB channel mode\n"); usage(argv[0]); return 1; } break; case 5: // DAB psy model audio_enc.dab_psy_model = std::stoi(optarg); break; case 6: // Enable loopback decoder for AAC audio_enc.decode_wavfilename = optarg; break; case 7: // Identifier for in-band version information audio_enc.identifier = optarg; /* The 32 character length restriction is arbitrary, but guarantees * that the EDI packet will not grow too large */ if (audio_enc.identifier.size() > 32) { fprintf(stderr, "Output Identifier too long!\n"); usage(argv[0]); return 1; } break; case 8: // EDI output FEC audio_enc.edi_output.set_fec(std::stoi(optarg)); break; case 9: // --startup-check startupcheck = optarg; break; case 'a': audio_enc.selected_encoder = encoder_selection_t::toolame_dab; break; case 'A': audio_enc.afterburner = false; break; case 'b': audio_enc.bitrate = std::stoi(optarg); break; case 'B': audio_enc.bandwidth = std::stoi(optarg); break; case 'c': audio_enc.channels = std::stoi(optarg); break; case 'd': audio_enc.alsa_device = optarg; break; case 'D': audio_enc.drift_compensation = true; break; case 'e': audio_enc.edi_output_uris.push_back(optarg); break; case 'T': audio_enc.tist_enabled = true; audio_enc.tist_delay_ms = std::stoi(optarg); break; case 'f': if (strcmp(optarg, "raw") == 0) { audio_enc.raw_input = 1; } else if (strcmp(optarg, "wav") != 0) { usage(argv[0]); return 1; } break; case 10: fprintf(stderr, "WARNING: the --vlc-gain option has been deprecated in favour of --audio-gain\n"); // fallthrough case 'g': audio_enc.gain_dB = std::stod(optarg); break; #ifdef HAVE_GST case 'G': audio_enc.gst_uri = optarg; break; case 11: // --gst-pipeline audio_enc.gst_pipeline = optarg; break; #endif case 'i': audio_enc.infile = optarg; break; case 'j': #if HAVE_JACK audio_enc.jack_name = optarg; #else fprintf(stderr, "JACK disabled at compile time!\n"); return 1; #endif break; case 'k': audio_enc.keyfile = optarg; break; case 'l': audio_enc.show_level = 1; break; case 'o': audio_enc.output_uris.push_back(optarg); break; case 'p': audio_enc.padlen = std::stoi(optarg); break; case 'P': audio_enc.pad_ident = optarg; break; case 'r': audio_enc.sample_rate = std::stoi(optarg); break; case 'R': audio_enc.restart_on_fault = true; break; case 's': audio_enc.silence_timeout = std::stoi(optarg); if (audio_enc.silence_timeout > 0 && audio_enc.silence_timeout < 3600*24*30) { audio_enc.die_on_silence = true; } else { fprintf(stderr, "Invalid silence timeout (%d) given!\n", audio_enc.silence_timeout); return 1; } break; case 'S': audio_enc.send_stats_to = optarg; break; case 'w': audio_enc.icytext_file = optarg; break; case 'W': audio_enc.icytext_dlplus = true; break; #ifdef HAVE_VLC case 'v': audio_enc.vlc_uri = optarg; break; case 'C': audio_enc.vlc_cache = optarg; break; case 'L': audio_enc.vlc_additional_opts.push_back(optarg); break; #else case 'v': fprintf(stderr, "VLC input not enabled at compile time!\n"); return 1; #endif case 'V': audio_enc.verbosity++; break; case '?': case 'h': usage(argv[0]); return 1; } } if (not startupcheck.empty()) { etiLog.level(info) << "Running startup check '" << startupcheck << "'"; int wstatus = system(startupcheck.c_str()); if (WIFEXITED(wstatus)) { if (WEXITSTATUS(wstatus) == 0) { etiLog.level(info) << "Startup check ok"; } else { etiLog.level(error) << "Startup check failed, returned " << WEXITSTATUS(wstatus); return 1; } } else { etiLog.level(error) << "Startup check failed, child didn't terminate normally"; return 1; } } try { return audio_enc.run(); } catch (const std::runtime_error& e) { fprintf(stderr, "ODR-AudioEnc failed to start: %s\n", e.what()); return 1; } }