/* ------------------------------------------------------------------ * Copyright (C) 2011 Martin Storsjo * Copyright (C) 2020 Matthias P. Braendli * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. * See the License for the specific language governing permissions * and limitations under the License. * ------------------------------------------------------------------- */ /*! \mainpage Introduction * The ODR-mmbTools ODR-AudioEnc Audio encoder can encode audio for * ODR-DabMux, both DAB and DAB+. The DAB encoder is based on toolame. The * DAB+ encoder requires a the Fraunhofer FDK AAC library, with the * necessary patches for 960-transform to do DAB+ broadcast encoding. * * This document describes some internals of the encoder, and is intended * to help developers understand and improve the software package. * * User documentation is available in the README and in the ODR-mmbTools * Guide, available on the www.opendigitalradio.org website. * * The readme for the whole package is \ref md_README * * Interesting starting points for the encoder * - \ref odr-audioenc.cpp Main encoder file * - \ref VLCInput.h VLC Input * - \ref GSTInput.h GST Input * - \ref AlsaInput.h Alsa Input * - \ref JackInput.h JACK Input * - \ref Outputs.h ZeroMQ, file and EDI outputs * - \ref SampleQueue.h * - \ref charset.h Charset conversion * - \ref toolame.h libtolame API * - \ref AudioLevel * - \ref DataInput * - \ref SilenceDetection * * \file odr-audioenc.cpp * \brief The main file for the audio encoder */ #include "config.h" #include "AlsaInput.h" #include "FileInput.h" #include "JackInput.h" #include "VLCInput.h" #include "GSTInput.h" #include "SampleQueue.h" #include "AACDecoder.h" #include "StatsPublish.h" #include "Outputs.h" #include "common.h" #include "wavfile.h" extern "C" { #include "encryption.h" #include "utils.h" } #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "aacenc_lib.h" extern "C" { #include "fec/fec.h" #include "libtoolame-dab/toolame.h" } /* Due to memory leaks in the VLC input, * we don't want to restart it endlessly. */ constexpr int MAX_FAULTS_ALLOWED = 5; using vec_u8 = std::vector; using namespace std; void usage(const char* name) { fprintf(stderr, "ODR-AudioEnc %s is an audio encoder for both DAB and DAB+.\n" "The encoder can read from JACK, ALSA or\n" "a file source and encode to a ZeroMQ output for ODR-DabMux.\n" "It can also use libvlc and GStreamer as input.\n" "\n" "The -D option enables sound card clock drift compensation.\n" "A consumer sound card has a clock that is always a bit imprecise, and\n" "would drift off slowly. ODR-DabMux cannot handle such drift\n" "because it would have to throw away or insert complete encoded audio frames,\n" "which would create audible artifacts. This drift compensation can\n" "make sure that the encoding rate is correct by inserting or deleting\n" "audio samples. It can be used for both ALSA and VLC inputs and requires\n" "a system clock synchronised using NTP.\n" "\n" "When this option is enabled, you will see U and O printed in the\n" "console. These correspond to audio underruns and overruns caused\n" "by sound card clock drift. When sparse, they should not create audible\n" "artifacts.\n" "\n" "This encoder is able to insert PAD (DLS and MOT Slideshow)\n" "generated by ODR-PadEnc.\n" "\nUsage:\n" "%s [INPUT SELECTION] [OPTION...]\n", #if defined(GITVERSION) GITVERSION #else PACKAGE_VERSION #endif , name); fprintf(stderr, " For the alsa input:\n" #if HAVE_ALSA " -d, --device=alsa_device Set ALSA input device.\n" #else " The Alsa input was disabled at compile time\n" #endif " For the file input:\n" " -i, --input=FILENAME Input filename (use -i - for stdin).\n" " -f, --format={ wav, raw } Set input file format (default: wav).\n" " --fifo-silence Input file is fifo and encoder generates silence when fifo is empty. Ignore EOF.\n" " For the JACK input:\n" #if HAVE_JACK " -j, --jack=name Enable JACK input, and define our name\n" #else " The JACK input was disabled at compile-time\n" #endif " For the VLC input:\n" #if HAVE_VLC " -v, --vlc-uri=uri Enable VLC input and use the URI given as source\n" " -C, --vlc-cache=ms Specify VLC network cache length.\n" " -g, --vlc-gain=db Enable VLC audio compressor, with given compressor-makeup value.\n" " Use this as a workaround to correct the gain for streams that are\n" " much too loud.\n" " -V Increase the VLC verbosity by one (can be given \n" " multiple times)\n" " -L OPTION Give an additional options to VLC (can be given\n" " multiple times)\n" " -w, --write-icy-text=filename Write the ICY Text into the file, so that ODR-PadEnc can read it.\n" " -W, --write-icy-text-dl-plus When writing the ICY Text into the file, add DL Plus information.\n" #else " The VLC input was disabled at compile-time\n" #endif " For the GStreamer input:\n" #if HAVE_GST " -G, --gst-uri=uri Enable GStreamer input and use the URI given as source\n" #else " The GStreamer input was disabled at compile-time\n" #endif " Drift compensation\n" " -D, --drift-comp Enable ALSA/VLC sound card drift compensation.\n" " Encoder parameters:\n" " -b, --bitrate={ 8, 16, ..., 192 } Output bitrate in kbps. Must be a multiple of 8.\n" " -c, --channels={ 1, 2 } Nb of input channels (default: 2).\n" " -r, --rate={ 24000, 32000, 48000 } Input sample rate (default: 48000).\n" " DAB specific options\n" " -a, --dab Encode in DAB and not in DAB+.\n" " --dabmode=MODE Channel mode: s/d/j/m\n" " (default: j if stereo, m if mono).\n" " --dabpsy=PSY Psychoacoustic model 0/1/2/3\n" " (default: 1).\n" " DAB+ specific options\n" " -A, --no-afterburner Disable AAC encoder quality increaser.\n" " --aaclc Force the usage of AAC-LC (no SBR, no PS)\n" " --sbr Force the usage of SBR (HE-AAC)\n" " --ps Force the usage of SBR and PS (HE-AACv2)\n" " -B, --bandwidth=VALUE Set the AAC encoder bandwidth to VALUE [Hz].\n" " --decode=FILE Decode the AAC back to a wav file (loopback test).\n" " Output and PAD parameters:\n" " --identifier=ID An identifier string that is sent in the ODRv EDI TAG. Max 32 characters length.\n" " -o, --output=URI Output ZMQ uri. (e.g. 'tcp://localhost:9000')\n" " -or- Output file uri. (e.g. 'file.dabp')\n" " -or- a single dash '-' to denote stdout\n" " If more than one ZMQ output is given, the socket\n" " will be connected to all listed endpoints.\n" " -e, --edi=URI EDI output uri, (e.g. 'tcp://localhost:7000')\n" " -T, --timestamp-delay=DELAY_MS Enabled timestamps in EDI (requires TAI clock bulletin download) and\n" " add a delay (in milliseconds) to the timestamps carried in EDI\n" " -k, --secret-key=FILE Enable ZMQ encryption with the given secret key.\n" " -p, --pad=BYTES Enable PAD insertion and set PAD size in bytes.\n" " -P, --pad-fifo=FILENAME Set PAD data input fifo name" " (default: /tmp/pad.fifo).\n" " -l, --level Show peak audio level indication.\n" " -S, --stats=SOCKET_NAME Connect to the specified UNIX Datagram socket and send statistics.\n" " This allows external tools to collect audio and drift compensation stats.\n" " -s, --silence=TIMEOUT Abort encoding after TIMEOUT seconds of silence.\n" "\n" "Only the tcp:// zeromq transport has been tested until now,\n" " but epgm://, pgm:// and ipc:// are also accepted\n" ); } /*! Setup the FDK AAC encoder * * \return 0 on success */ static int prepare_aac_encoder( HANDLE_AACENCODER *encoder, int subchannel_index, int channels, int sample_rate, int afterburner, uint32_t bandwidth, int *aot) { CHANNEL_MODE mode; switch (channels) { case 1: mode = MODE_1; break; case 2: mode = MODE_2; break; default: fprintf(stderr, "Unsupported channels number %d\n", channels); return 1; } if (aacEncOpen(encoder, 0x01|0x02|0x04, channels) != AACENC_OK) { fprintf(stderr, "Unable to open encoder\n"); return 1; } if (*aot == AOT_NONE) { if(channels == 2 && subchannel_index <= 6) { *aot = AOT_DABPLUS_PS; } else if((channels == 1 && subchannel_index <= 8) || (channels == 2 && subchannel_index <= 10)) { *aot = AOT_DABPLUS_SBR; } else { *aot = AOT_DABPLUS_AAC_LC; } } fprintf(stderr, "Using %d subchannels. AAC type: %s%s%s. channels=%d, sample_rate=%d\n", subchannel_index, *aot == AOT_DABPLUS_PS ? "HE-AAC v2" : "", *aot == AOT_DABPLUS_SBR ? "HE-AAC" : "", *aot == AOT_DABPLUS_AAC_LC ? "AAC-LC" : "", channels, sample_rate); if (aacEncoder_SetParam(*encoder, AACENC_AOT, *aot) != AACENC_OK) { fprintf(stderr, "Unable to set the AOT\n"); return 1; } if (aacEncoder_SetParam(*encoder, AACENC_SAMPLERATE, sample_rate) != AACENC_OK) { fprintf(stderr, "Unable to set the sample rate\n"); return 1; } if (aacEncoder_SetParam(*encoder, AACENC_CHANNELMODE, mode) != AACENC_OK) { fprintf(stderr, "Unable to set the channel mode\n"); return 1; } if (aacEncoder_SetParam(*encoder, AACENC_CHANNELORDER, 1) != AACENC_OK) { fprintf(stderr, "Unable to set the wav channel order\n"); return 1; } if (aacEncoder_SetParam(*encoder, AACENC_GRANULE_LENGTH, 960) != AACENC_OK) { fprintf(stderr, "Unable to set the granule length\n"); return 1; } if (aacEncoder_SetParam(*encoder, AACENC_TRANSMUX, TT_DABPLUS) != AACENC_OK) { fprintf(stderr, "Unable to set the RAW transmux\n"); return 1; } /*if (aacEncoder_SetParam(*encoder, AACENC_BITRATEMODE, AACENC_BR_MODE_SFR) * != AACENC_OK) { fprintf(stderr, "Unable to set the bitrate mode\n"); return 1; }*/ fprintf(stderr, "AAC bitrate set to: %d\n", subchannel_index*8000); if (aacEncoder_SetParam(*encoder, AACENC_BITRATE, subchannel_index*8000) != AACENC_OK) { fprintf(stderr, "Unable to set the bitrate\n"); return 1; } if (aacEncoder_SetParam(*encoder, AACENC_AFTERBURNER, afterburner) != AACENC_OK) { fprintf(stderr, "Unable to set the afterburner mode\n"); return 1; } if (!afterburner) { fprintf(stderr, "Warning: Afterburned disabled!\n"); } if (bandwidth > 0) { fprintf(stderr, "Setting bandwidth is %d\n", bandwidth); if (aacEncoder_SetParam(*encoder, AACENC_BANDWIDTH, bandwidth) != AACENC_OK) { fprintf(stderr, "Unable to set bandwidth mode\n"); return 1; } } if (aacEncEncode(*encoder, nullptr, nullptr, nullptr, nullptr) != AACENC_OK) { fprintf(stderr, "Unable to initialize the encoder\n"); return 1; } const uint32_t bw = aacEncoder_GetParam(*encoder, AACENC_BANDWIDTH); fprintf(stderr, "Bandwidth is %d\n", bw); return 0; } chrono::steady_clock::time_point timepoint_last_compensation; /*! Do drift compensation by distributing the missing samples over * the whole input buffer instead of having a bunch of missing samples * at the end only. * * This expands (in time) the received samples over the whole duration * of the buffer. */ static void expand_missing_samples(vec_u8& buf, int channels, size_t valid_bytes) { const size_t bytes_per_sample = BYTES_PER_SAMPLE * channels; assert(buf.size() % bytes_per_sample == 0); assert(buf.size() > valid_bytes); const size_t valid_samples = valid_bytes / bytes_per_sample; const size_t missing_samples = (buf.size() / bytes_per_sample) - valid_samples; // We only fix up to 10% missing samples if (missing_samples * bytes_per_sample > buf.size() / 10) { for (size_t i = valid_samples * bytes_per_sample; i < buf.size(); i++) { buf[i] = 0; } } else { const vec_u8 source_buf(buf); size_t source_ix = 0; for (size_t i = 0; i < buf.size() / bytes_per_sample; i++) { for (size_t j = 0; j < bytes_per_sample; j++) { buf.at(bytes_per_sample*i + j) = source_buf.at(source_ix + j); } // Do not advance the source index if the sample index is // at the spots where we want to duplicate the source sample if (not (i > 0 and (i % (valid_samples / missing_samples) == 0))) { source_ix += bytes_per_sample; } } } } /*! Wait the proper amount of time to throttle down to nominal encoding * rate, if drift compensation is enabled. */ static void drift_compensation_delay(int sample_rate, int channels, size_t bytes) { const size_t bytes_per_second = sample_rate * BYTES_PER_SAMPLE * channels; size_t bytes_compensate = bytes; const auto wait_time = std::chrono::milliseconds(1000ul * bytes_compensate / bytes_per_second); assert(1000ul * bytes_compensate % bytes_per_second == 0); const auto curTime = std::chrono::steady_clock::now(); const auto diff = curTime - timepoint_last_compensation; if (diff < wait_time) { auto waiting = wait_time - diff; std::this_thread::sleep_for(waiting); } timepoint_last_compensation += wait_time; } #define no_argument 0 #define required_argument 1 #define optional_argument 2 #define STATUS_PAD_INSERTED 0x1 #define STATUS_OVERRUN 0x2 #define STATUS_UNDERRUN 0x4 struct AudioEnc { public: int sample_rate=48000; int channels=2; // For the ALSA input string alsa_device; // For the file input string infile; bool continue_after_eof = false; int raw_input = 0; // For the VLC input string vlc_uri; string vlc_icytext_file; bool vlc_icytext_dlplus = false; string vlc_gain; string vlc_cache; vector vlc_additional_opts; unsigned verbosity = 0; string gst_uri; string jack_name; bool drift_compensation = false; encoder_selection_t selected_encoder = encoder_selection_t::fdk_dabplus; bool afterburner = true; uint32_t bandwidth = 0; int bitrate = 0; // 0 means default bitrate int dab_psy_model = 1; bool restart_on_fault = false; int fault_counter = 0; std::deque toolame_buffer; shared_ptr file_output; shared_ptr zmq_output; Output::EDI edi_output; string identifier; bool tist_enabled = false; uint32_t tist_delay_ms = 0; vector output_uris; vector edi_output_uris; void *rs_handler = nullptr; AACENC_InfoStruct info = { 0 }; int aot = AOT_NONE; string decode_wavfilename; string dab_channel_mode; /* Keep track of peaks */ int16_t peak_left = 0; int16_t peak_right = 0; /* On silence, die after the silence_timeout expires */ bool die_on_silence = false; int silence_timeout = 0; int measured_silence_ms = 0; /* For MOT Slideshow and DLS insertion */ const char* pad_fifo = "/tmp/pad.fifo"; int pad_fd = -1; int padlen = 0; /* Encoder status, see the above STATUS macros */ int status = 0; /* Whether to show the 'sox'-like measurement */ int show_level = 0; /* If not empty, send stats over UNIX DGRAM socket */ string send_stats_to = ""; /* Data for ZMQ CURVE authentication */ char* keyfile = nullptr; char secretkey[CURVE_KEYLEN+1]; SampleQueue queue; HANDLE_AACENCODER encoder = nullptr; unique_ptr decoder; unique_ptr stats_publisher; AudioEnc() : queue(BYTES_PER_SAMPLE, channels, 0, drift_compensation) { } AudioEnc(const AudioEnc&) = delete; AudioEnc& operator=(const AudioEnc&) = delete; ~AudioEnc(); int run(); bool send_frame(const uint8_t *buf, size_t len, int16_t peak_left, int16_t peak_right); shared_ptr initialise_input(); }; int AudioEnc::run() { int num_inputs = 0; #if HAVE_ALSA if (not alsa_device.empty()) num_inputs++; #endif if (not infile.empty()) num_inputs++; #if HAVE_JACK if (not jack_name.empty()) num_inputs++; #endif #if HAVE_VLC if (not vlc_uri.empty()) num_inputs++; #endif #if HAVE_GST if (not gst_uri.empty()) num_inputs++; #endif if (num_inputs == 0) { fprintf(stderr, "No input defined!\n"); return 1; } else if (num_inputs > 1) { fprintf(stderr, "You must define only one possible input, not several!\n"); return 1; } if (selected_encoder == encoder_selection_t::fdk_dabplus) { if (bitrate == 0) { bitrate = 64; } int subchannel_index = bitrate / 8; if (subchannel_index < 1 || subchannel_index > 24) { fprintf(stderr, "Bad subchannel index: %d, must be between 1 and 24. Try other bitrate.\n", subchannel_index); return 1; } if ( ! (sample_rate == 32000 || sample_rate == 48000)) { fprintf(stderr, "Invalid sample rate. Possible values are: 32000, 48000.\n"); return 1; } } else if (selected_encoder == encoder_selection_t::toolame_dab) { if (bitrate == 0) { bitrate = 192; } if ( ! (sample_rate == 24000 || sample_rate == 48000)) { fprintf(stderr, "Invalid sample rate. Possible values are: 24000, 48000.\n"); return 1; } } if (padlen < 0) { fprintf(stderr, "Invalid PAD length specified\n"); return 1; } if (output_uris.empty() and edi_output_uris.empty()) { fprintf(stderr, "No output defined\n"); return 1; } for (const auto& uri : output_uris) { if (uri == "-") { if (file_output) { fprintf(stderr, "You can't write to more than one file!\n"); return 1; } file_output = make_shared(stdout); } else if ((uri.compare(0, 6, "tcp://") == 0) || (uri.compare(0, 6, "pgm://") == 0) || (uri.compare(0, 7, "epgm://") == 0) || (uri.compare(0, 6, "ipc://") == 0)) { if (not zmq_output) { zmq_output = make_shared(); } zmq_output->connect(uri.c_str(), keyfile); } else { // We assume it's a file name if (file_output) { fprintf(stderr, "You can't write to more than one file!\n"); return 1; } file_output = make_shared(uri.c_str()); } } for (const auto& uri : edi_output_uris) { if (uri.compare(0, 6, "tcp://") == 0 or uri.compare(0, 6, "udp://") == 0) { auto host_port_sep_ix = uri.find(':', 6); if (host_port_sep_ix != string::npos) { auto host = uri.substr(6, host_port_sep_ix - 6); auto port = std::stoi(uri.substr(host_port_sep_ix + 1)); auto proto = uri.substr(0, 3); if (proto == "tcp") { edi_output.add_tcp_destination(host, port); } else if (proto == "udp") { edi_output.add_udp_destination(host, port); } else { throw logic_error("unhandled proto"); } } else { fprintf(stderr, "Invalid EDI URL host!\n"); } } else { fprintf(stderr, "Invalid EDI protocol!\n"); } } if (not edi_output_uris.empty()) { edi_output.set_tist(tist_enabled, tist_delay_ms); stringstream ss; ss << PACKAGE_NAME << " " << #if defined(GITVERSION) GITVERSION << #else PACKAGE_VERSION << #endif " " << identifier; edi_output.set_odr_version_tag(ss.str()); } if (padlen != 0) { int flags; if (mkfifo(pad_fifo, S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH) != 0) { if (errno != EEXIST) { fprintf(stderr, "Can't create pad file: %d!\n", errno); return 1; } } pad_fd = open(pad_fifo, O_RDONLY | O_NONBLOCK); if (pad_fd == -1) { fprintf(stderr, "Can't open pad file!\n"); return 1; } flags = fcntl(pad_fd, F_GETFL, 0); if (fcntl(pad_fd, F_SETFL, flags | O_NONBLOCK)) { fprintf(stderr, "Can't set non-blocking mode in pad file!\n"); return 1; } } vec_u8 input_buf; if (selected_encoder == encoder_selection_t::fdk_dabplus) { int subchannel_index = bitrate / 8; if (prepare_aac_encoder(&encoder, subchannel_index, channels, sample_rate, afterburner, bandwidth, &aot) != 0) { fprintf(stderr, "Encoder preparation failed\n"); return 1; } if (aacEncInfo(encoder, &info) != AACENC_OK) { fprintf(stderr, "Unable to get the encoder info\n"); return 1; } // Each DAB+ frame will need input_size audio bytes const int input_size = channels * BYTES_PER_SAMPLE * info.frameLength; fprintf(stderr, "DAB+ Encoding: framelen=%d (%dB)\n", info.frameLength, input_size); input_buf.resize(input_size); if (not decode_wavfilename.empty()) { decoder.reset(new AACDecoder(decode_wavfilename.c_str())); } } else if (selected_encoder == encoder_selection_t::toolame_dab) { int err = toolame_init(); if (err == 0) { err = toolame_set_samplerate(sample_rate); } if (err == 0) { err = toolame_set_psy_model(dab_psy_model); } if (dab_channel_mode.empty()) { if (channels == 2) { dab_channel_mode = 'j'; // Default to joint-stereo } else if (channels == 1) { dab_channel_mode = 'm'; // Default to mono } else { fprintf(stderr, "Unsupported channels number %d\n", channels); return 1; } } if (err == 0) { err = toolame_set_channel_mode(dab_channel_mode.c_str()[0]); } // setting the ScF-CRC len here depends on set sample rate/channel mode if (err == 0) { err = toolame_set_bitrate(bitrate); } if (err == 0) { err = toolame_set_pad(padlen); } if (err) { fprintf(stderr, "libtoolame-dab init failed: %d\n", err); return err; } input_buf.resize(channels * 1152 * BYTES_PER_SAMPLE); if (not decode_wavfilename.empty()) { fprintf(stderr, "--decode not supported for DAB\n"); return 1; } } if (not send_stats_to.empty()) { StatsPublisher *s = nullptr; try { s = new StatsPublisher(send_stats_to); stats_publisher.reset(s); } catch (const runtime_error& e) { fprintf(stderr, "Failed to initialise Stats Publisher: %s", e.what()); if (s != nullptr) { delete s; } return 1; } } /* We assume that we need to call the encoder * enc_calls_per_output before it gives us one encoded audio * frame. This information is used when the alsa drift compensation * is active. This is only valid for FDK-AAC. */ const int enc_calls_per_output = (aot == AOT_DABPLUS_AAC_LC) ? sample_rate / 8000 : sample_rate / 16000; int max_size = 32*input_buf.size() + NUM_SAMPLES_PER_CALL; /*! The SampleQueue \c queue is given to the inputs, so that they * can fill it. */ queue.set_max_size(max_size); /* symsize=8, gfpoly=0x11d, fcr=0, prim=1, nroots=10, pad=135 */ rs_handler = init_rs_char(8, 0x11d, 0, 1, 10, 135); if (rs_handler == nullptr) { perror("init_rs_char failed"); return 1; } shared_ptr input; try { input = initialise_input(); } catch (const runtime_error& e) { fprintf(stderr, "Initialising input triggered exception: %s\n", e.what()); return 1; } if (zmq_output) { zmq_output->set_encoder_type(selected_encoder, bitrate); } int outbuf_size = 0; vec_u8 outbuf; switch (selected_encoder) { case encoder_selection_t::fdk_dabplus: outbuf_size = bitrate/8*120; outbuf.resize(24*120); if(outbuf_size % 5 != 0) { fprintf(stderr, "Warning: (outbuf_size mod 5) = %d\n", outbuf_size % 5); } break; case encoder_selection_t::toolame_dab: outbuf_size = 4092; outbuf.resize(outbuf_size); fprintf(stderr, "Setting outbuf size to %zu\n", outbuf.size()); break; } unsigned char pad_buf[padlen + 1]; if (restart_on_fault) { fprintf(stderr, "Autorestart has been deprecated and will be removed in the future!\n"); this_thread::sleep_for(chrono::seconds(2)); } fprintf(stderr, "Starting encoding\n"); int retval = 0; int send_error_count = 0; timepoint_last_compensation = chrono::steady_clock::now(); int calls = 0; // for checking ssize_t read_bytes = 0; do { // --------------- Read data from the PAD fifo int calculated_padlen = 0; if (padlen != 0) { ssize_t pad_ret = read(pad_fd, pad_buf, padlen + 1); if ((pad_ret < 0 && errno == EAGAIN) || pad_ret == 0) { // If this condition passes, there is no data to be read } else if (pad_ret == padlen + 1) { // Otherwise, you're good to go and buffer should contain "count" bytes. calculated_padlen = pad_buf[padlen]; if (calculated_padlen < 2) { throw runtime_error("Invalid X-PAD length " + to_string(calculated_padlen)); } /* AAC: skip PAD if only zero F-PAD (saves four bytes) * See ยง5.4.3 in ETSI TS 102 563 */ if ( selected_encoder == encoder_selection_t::fdk_dabplus && calculated_padlen == 2 && pad_buf[padlen - 2] == 0x00 && pad_buf[padlen - 1] == 0x00 ) { calculated_padlen = 0; } } else { // Some other error occurred during read. fprintf(stderr, "Unable to read from PAD!\n"); break; } } if (calculated_padlen > 0) { status |= STATUS_PAD_INSERTED; } // -------------- Read Data memset(outbuf.data(), 0x00, outbuf_size); memset(input_buf.data(), 0x00, input_buf.size()); /*! \section DataInput * We read data input either in a blocking way (file input, VLC or ALSA * without drift compensation) or in a non-blocking way (VLC or ALSA * with drift compensation, JACK). * * All inputs write samples into the queue, and either use \c pop() or * \c pop_wait() depending on if it's blocking or not * * In non-blocking, the \c queue makes the data available without delay, and the * \c drift_compensation_delay() function handles rate throttling. */ if (input->fault_detected()) { fprintf(stderr, "Detected fault in input!\n"); if (restart_on_fault) { fault_counter++; if (fault_counter >= MAX_FAULTS_ALLOWED) { fprintf(stderr, "Maximum number of input faults reached, aborting"); retval = 5; break; } try { input = initialise_input(); } catch (const runtime_error& e) { fprintf(stderr, "Initialising input triggered exception: %s\n", e.what()); retval = 5; break; } continue; } else { retval = 5; break; } } if (not input->read_source(input_buf.size())) { fprintf(stderr, "End of input reached\n"); retval = 0; break; } if (drift_compensation) { size_t overruns = 0; size_t bytes_from_queue = queue.pop(input_buf.data(), input_buf.size(), &overruns); // returns bytes if (bytes_from_queue != input_buf.size()) { expand_missing_samples(input_buf, channels, bytes_from_queue); } read_bytes = input_buf.size(); drift_compensation_delay(sample_rate, channels, read_bytes); if (bytes_from_queue != input_buf.size()) { status |= STATUS_UNDERRUN; if (stats_publisher) { stats_publisher->notify_underrun(); } } if (overruns) { status |= STATUS_OVERRUN; if (stats_publisher) { stats_publisher->notify_overrun(); } } } else { const int timeout_ms = 10000; read_bytes = input_buf.size(); size_t overruns = 0; /*! pop_wait() must return after a timeout, otherwise the silence detector cannot do * its job. */ ssize_t bytes_from_queue = queue.pop_wait(input_buf.data(), read_bytes, timeout_ms, &overruns); // returns bytes if (overruns) { throw logic_error("Queue overrun in non-drift compensation!"); } if (bytes_from_queue < read_bytes) { // queue timeout occurred fprintf(stderr, "Detected fault in input! No data in time.\n"); if (restart_on_fault) { fault_counter++; if (fault_counter >= MAX_FAULTS_ALLOWED) { fprintf(stderr, "Maximum number of input faults reached, aborting"); retval = 5; break; } try { input = initialise_input(); } catch (const runtime_error& e) { fprintf(stderr, "Initialising input triggered exception: %s\n", e.what()); return 1; } continue; } else { retval = 5; break; } } } /*! \section MetadataFromSource * The VLC input is the only input that can also give us metadata, which * we can hand over to ODR-PadEnc. */ #if HAVE_VLC if (not vlc_uri.empty() and not vlc_icytext_file.empty()) { // Using std::dynamic_pointer_cast would be safer, but is C++17 VLCInput *vlc_input = (VLCInput*)(input.get()); vlc_input->write_icy_text(vlc_icytext_file, vlc_icytext_dlplus); } #endif /*! \section AudioLevel * Audio level measurement is always done assuming we have two * channels, and is formally wrong in mono, but still gives * numbers one can use. * * \todo fix level measurement in mono */ for (int i = 0; i < read_bytes; i+=4) { int16_t l = input_buf[i] | (input_buf[i+1] << 8); int16_t r = input_buf[i+2] | (input_buf[i+3] << 8); peak_left = std::max(peak_left, l); peak_right = std::max(peak_right, r); } if (stats_publisher) { stats_publisher->update_audio_levels(peak_left, peak_right); } /*! \section SilenceDetection * Silence detection looks at the audio level and is * only useful if the connection dropped, or if no data is available. It is not * useful if the source is nearly silent (some noise present), because the * threshold is 0, and not configurable. The rationale is that we want to * guard against connection issues, not source level issues. */ if (die_on_silence && std::max(peak_left, peak_right) == 0) { const unsigned int frame_time_msec = 1000ul * read_bytes / (BYTES_PER_SAMPLE * channels * sample_rate); measured_silence_ms += frame_time_msec; if (measured_silence_ms > 1000*silence_timeout) { fprintf(stderr, "Silence detected for %d seconds, aborting.\n", silence_timeout); retval = 2; break; } } else { measured_silence_ms = 0; } int numOutBytes = 0; if (read_bytes and selected_encoder == encoder_selection_t::fdk_dabplus) { AACENC_BufDesc in_buf = { 0 }, out_buf = { 0 }; AACENC_InArgs in_args = { 0 }; AACENC_OutArgs out_args = { 0 }; // -------------- AAC Encoding // int in_identifier[] = {IN_AUDIO_DATA, IN_ANCILLRY_DATA}; int out_identifier = OUT_BITSTREAM_DATA; void *in_ptr[2], *out_ptr; int in_size[2], in_elem_size[2]; int out_size, out_elem_size; in_ptr[0] = input_buf.data(); in_ptr[1] = pad_buf + (padlen - calculated_padlen); // offset due to unused PAD bytes in_size[0] = read_bytes; in_size[1] = calculated_padlen; in_elem_size[0] = BYTES_PER_SAMPLE; in_elem_size[1] = sizeof(uint8_t); in_args.numInSamples = input_buf.size()/BYTES_PER_SAMPLE; in_args.numAncBytes = calculated_padlen; in_buf.numBufs = calculated_padlen ? 2 : 1; // Samples + Data / Samples in_buf.bufs = (void**)&in_ptr; in_buf.bufferIdentifiers = in_identifier; in_buf.bufSizes = in_size; in_buf.bufElSizes = in_elem_size; out_ptr = outbuf.data(); out_size = outbuf.size(); out_elem_size = 1; out_buf.numBufs = 1; out_buf.bufs = &out_ptr; out_buf.bufferIdentifiers = &out_identifier; out_buf.bufSizes = &out_size; out_buf.bufElSizes = &out_elem_size; AACENC_ERROR err; if ((err = aacEncEncode(encoder, &in_buf, &out_buf, &in_args, &out_args)) != AACENC_OK) { if (err == AACENC_ENCODE_EOF) { fprintf(stderr, "encoder error: EOF reached\n"); break; } fprintf(stderr, "Encoding failed (%d)\n", err); retval = 3; break; } calls++; numOutBytes = out_args.numOutBytes; } else if (selected_encoder == encoder_selection_t::toolame_dab) { /*! \note toolame expects the audio to be in another shape as * we have in input_buf, and we need to convert first */ short input_buffers[2][1152]; if (channels == 1) { memcpy(input_buffers[0], input_buf.data(), 1152 * BYTES_PER_SAMPLE); } else if (channels == 2) { for (int i = 0; i < 1152; i++) { int16_t l = input_buf[4*i] | (input_buf[4*i+1] << 8); int16_t r = input_buf[4*i+2] | (input_buf[4*i+3] << 8); input_buffers[0][i] = l; input_buffers[1][i] = r; } } else { fprintf(stderr, "INTERNAL ERROR! invalid number of channels\n"); } if (read_bytes) { numOutBytes = toolame_encode_frame(input_buffers, pad_buf, calculated_padlen, outbuf.data(), outbuf.size()); } else { numOutBytes = toolame_finish(outbuf.data(), outbuf.size()); } } if (numOutBytes != 0 and decoder) { try { decoder->decode_frame(outbuf.data(), numOutBytes); } catch (runtime_error &e) { fprintf(stderr, "Decoding failed with: %s\n", e.what()); return 1; } } /* Check if the encoder has generated output data. * DAB+ requires RS encoding, which is not done in ODR-DabMux and not necessary * for DAB. */ if (numOutBytes != 0 and selected_encoder == encoder_selection_t::fdk_dabplus) { // Our timing code depends on this if (calls != enc_calls_per_output) { fprintf(stderr, "INTERNAL ERROR! calls=%d" ", expected %d\n", calls, enc_calls_per_output); } calls = 0; int row, col; unsigned char buf_to_rs_enc[110]; unsigned char rs_enc[10]; const int subchannel_index = bitrate / 8; for(row=0; row < subchannel_index; row++) { for(col=0;col < 110; col++) { buf_to_rs_enc[col] = outbuf[subchannel_index * col + row]; } encode_rs_char(rs_handler, buf_to_rs_enc, rs_enc); for(col=110; col<120; col++) { outbuf[subchannel_index * col + row] = rs_enc[col-110]; assert(subchannel_index * col + row < outbuf_size); } } numOutBytes = outbuf_size; } if (numOutBytes > 0 and selected_encoder == encoder_selection_t::toolame_dab) { toolame_buffer.insert(toolame_buffer.end(), outbuf.begin(), outbuf.begin() + numOutBytes); // ODR-DabMux expects frames of length 3*bitrate const size_t frame_len = 3 * bitrate; while (toolame_buffer.size() > frame_len) { vec_u8 frame(frame_len); // this is probably not very efficient std::copy(toolame_buffer.begin(), toolame_buffer.begin() + frame_len, frame.begin()); toolame_buffer.erase(toolame_buffer.begin(), toolame_buffer.begin() + frame_len); bool success = send_frame(frame.data(), frame.size(), peak_left, peak_right); if (not success) { fprintf(stderr, "Send error !\n"); send_error_count ++; } } } else if (numOutBytes > 0 and selected_encoder == encoder_selection_t::fdk_dabplus) { bool success = send_frame(outbuf.data(), numOutBytes, peak_left, peak_right); if (not success) { fprintf(stderr, "Send error !\n"); send_error_count ++; } } if (send_error_count > 10) { fprintf(stderr, "Send failed ten times, aborting!\n"); retval = 4; break; } if (numOutBytes != 0) { if (show_level) { if (channels == 1) { fprintf(stderr, "\rIn: [%-6s] %1s %1s %1s", level(1, std::max(peak_right, peak_left)), status & STATUS_PAD_INSERTED ? "P" : " ", status & STATUS_UNDERRUN ? "U" : " ", status & STATUS_OVERRUN ? "O" : " "); } else if (channels == 2) { fprintf(stderr, "\rIn: [%6s|%-6s] %1s %1s %1s", level(0, peak_left), level(1, peak_right), status & STATUS_PAD_INSERTED ? "P" : " ", status & STATUS_UNDERRUN ? "U" : " ", status & STATUS_OVERRUN ? "O" : " "); } } else { if (status & STATUS_OVERRUN) { fprintf(stderr, "O"); } if (status & STATUS_UNDERRUN) { fprintf(stderr, "U"); } } if (stats_publisher) { stats_publisher->send_stats(); } peak_right = 0; peak_left = 0; status = 0; } fflush(stdout); } while (read_bytes > 0); fprintf(stderr, "\n"); return retval; } bool AudioEnc::send_frame(const uint8_t *buf, size_t len, int16_t peak_left, int16_t peak_right) { if (file_output) { file_output->update_audio_levels(peak_left, peak_right); return file_output->write_frame(buf, len); } else if (zmq_output) { zmq_output->update_audio_levels(peak_left, peak_right); return zmq_output->write_frame(buf, len); } else if (edi_output.enabled()) { edi_output.update_audio_levels(peak_left, peak_right); switch (selected_encoder) { case encoder_selection_t::fdk_dabplus: { // STI/EDI specifies that one AF packet must contain 24ms worth of data, // therefore we must split the superframe into five parts if (len % 5 != 0) { throw logic_error("Superframe size not multiple of 5"); } const size_t blocksize = len/5; for (size_t i = 0; i < 5; i++) { bool success = edi_output.write_frame(buf + i * blocksize, blocksize); if (not success) { return false; } } return true; } case encoder_selection_t::toolame_dab: return edi_output.write_frame(buf, len); } } return false; } AudioEnc::~AudioEnc() { file_output.reset(); zmq_output.reset(); if (rs_handler != nullptr) { free_rs_char(rs_handler); } if (encoder != nullptr and selected_encoder == encoder_selection_t::fdk_dabplus) { aacEncClose(&encoder); } } shared_ptr AudioEnc::initialise_input() { shared_ptr input; if (not infile.empty()) { input = make_shared(infile, raw_input, sample_rate, continue_after_eof, queue); } #if HAVE_JACK else if (not jack_name.empty()) { input = make_shared(jack_name, channels, sample_rate, queue); } #endif #if HAVE_VLC else if (not vlc_uri.empty()) { input = make_shared(vlc_uri, sample_rate, channels, verbosity, vlc_gain, vlc_cache, vlc_additional_opts, queue); } #endif #if HAVE_GST else if (not gst_uri.empty()) { input = make_shared(gst_uri, sample_rate, channels, queue); } #endif #if HAVE_ALSA else if (drift_compensation) { input = make_shared(alsa_device, channels, sample_rate, queue); } else { input = make_shared(alsa_device, channels, sample_rate, queue); } #endif if (not input) { throw logic_error("Initialising input incomplete!"); } input->prepare(); return input; } int main(int argc, char *argv[]) { AudioEnc audio_enc; const struct option longopts[] = { {"bitrate", required_argument, 0, 'b'}, {"bandwidth", required_argument, 0, 'B'}, {"channels", required_argument, 0, 'c'}, {"dabmode", required_argument, 0, 4 }, {"dabpsy", required_argument, 0, 5 }, {"device", required_argument, 0, 'd'}, {"edi", required_argument, 0, 'e'}, {"timestamp-delay", required_argument, 0, 'T'}, {"decode", required_argument, 0, 6 }, {"format", required_argument, 0, 'f'}, {"gst-uri", required_argument, 0, 'G'}, {"identifier", required_argument, 0, 7 }, {"input", required_argument, 0, 'i'}, {"jack", required_argument, 0, 'j'}, {"output", required_argument, 0, 'o'}, {"pad", required_argument, 0, 'p'}, {"pad-fifo", required_argument, 0, 'P'}, {"rate", required_argument, 0, 'r'}, {"secret-key", required_argument, 0, 'k'}, {"silence", required_argument, 0, 's'}, {"stats", required_argument, 0, 'S'}, {"vlc-cache", required_argument, 0, 'C'}, {"vlc-gain", required_argument, 0, 'g'}, {"vlc-uri", required_argument, 0, 'v'}, {"vlc-opt", required_argument, 0, 'L'}, {"write-icy-text", required_argument, 0, 'w'}, {"write-icy-text-dl-plus", no_argument, 0, 'W'}, {"aaclc", no_argument, 0, 0 }, {"dab", no_argument, 0, 'a'}, {"drift-comp", no_argument, 0, 'D'}, {"fifo-silence", no_argument, 0, 3 }, {"help", no_argument, 0, 'h'}, {"level", no_argument, 0, 'l'}, {"no-afterburner", no_argument, 0, 'A'}, {"ps", no_argument, 0, 2 }, {"restart", no_argument, 0, 'R'}, {"sbr", no_argument, 0, 1 }, {"verbosity", no_argument, 0, 'V'}, {0, 0, 0, 0}, }; fprintf(stderr, "Welcome to %s %s, compiled at %s, %s", PACKAGE_NAME, #if defined(GITVERSION) GITVERSION, #else PACKAGE_VERSION, #endif __DATE__, __TIME__); fprintf(stderr, "\n"); fprintf(stderr, " http://opendigitalradio.org\n\n"); if (argc < 2) { usage(argv[0]); return 1; } int ch=0; int index; while(ch != -1) { ch = getopt_long(argc, argv, "aAhDlRVb:B:c:e:f:G:i:j:k:L:o:r:d:p:P:s:S:T:v:w:Wg:C:", longopts, &index); switch (ch) { case 0: // AAC-LC audio_enc.aot = AOT_DABPLUS_AAC_LC; break; case 1: // SBR audio_enc.aot = AOT_DABPLUS_SBR; break; case 2: // PS audio_enc.aot = AOT_DABPLUS_PS; break; case 3: // FIFO Silence audio_enc.continue_after_eof = true; // Enable drift compensation, otherwise we would block instead of inserting silence. audio_enc.drift_compensation = true; break; case 4: // DAB channel mode audio_enc.dab_channel_mode = optarg; if (not( audio_enc.dab_channel_mode == "s" or audio_enc.dab_channel_mode == "d" or audio_enc.dab_channel_mode == "j" or audio_enc.dab_channel_mode == "m")) { fprintf(stderr, "Invalid DAB channel mode\n"); usage(argv[0]); return 1; } break; case 5: // DAB psy model audio_enc.dab_psy_model = std::stoi(optarg); break; case 6: // Enable loopback decoder for AAC audio_enc.decode_wavfilename = optarg; break; case 7: // Identifier for in-band version information audio_enc.identifier = optarg; /* The 32 character length restriction is arbitrary, but guarantees * that the EDI packet will not grow too large */ if (audio_enc.identifier.size() > 32) { fprintf(stderr, "Output Identifier too long!\n"); usage(argv[0]); return 1; } break; case 'a': audio_enc.selected_encoder = encoder_selection_t::toolame_dab; break; case 'A': audio_enc.afterburner = false; break; case 'b': audio_enc.bitrate = std::stoi(optarg); break; case 'B': audio_enc.bandwidth = std::stoi(optarg); break; case 'c': audio_enc.channels = std::stoi(optarg); break; case 'd': audio_enc.alsa_device = optarg; break; case 'D': audio_enc.drift_compensation = true; break; case 'e': audio_enc.edi_output_uris.push_back(optarg); break; case 'T': audio_enc.tist_enabled = true; audio_enc.tist_delay_ms = std::stoi(optarg); break; case 'f': if (strcmp(optarg, "raw") == 0) { audio_enc.raw_input = 1; } else if (strcmp(optarg, "wav") != 0) { usage(argv[0]); return 1; } break; #ifdef HAVE_GST case 'G': audio_enc.gst_uri = optarg; break; #endif case 'i': audio_enc.infile = optarg; break; case 'j': #if HAVE_JACK audio_enc.jack_name = optarg; #else fprintf(stderr, "JACK disabled at compile time!\n"); return 1; #endif break; case 'k': audio_enc.keyfile = optarg; break; case 'l': audio_enc.show_level = 1; break; case 'o': audio_enc.output_uris.push_back(optarg); break; case 'p': audio_enc.padlen = std::stoi(optarg); break; case 'P': audio_enc.pad_fifo = optarg; break; case 'r': audio_enc.sample_rate = std::stoi(optarg); break; case 'R': audio_enc.restart_on_fault = true; break; case 's': audio_enc.silence_timeout = std::stoi(optarg); if (audio_enc.silence_timeout > 0 && audio_enc.silence_timeout < 3600*24*30) { audio_enc.die_on_silence = true; } else { fprintf(stderr, "Invalid silence timeout (%d) given!\n", audio_enc.silence_timeout); return 1; } break; case 'S': audio_enc.send_stats_to = optarg; break; #ifdef HAVE_VLC case 'v': audio_enc.vlc_uri = optarg; break; case 'w': audio_enc.vlc_icytext_file = optarg; break; case 'W': audio_enc.vlc_icytext_dlplus = true; break; case 'g': audio_enc.vlc_gain = optarg; break; case 'C': audio_enc.vlc_cache = optarg; break; case 'L': audio_enc.vlc_additional_opts.push_back(optarg); break; #else case 'v': case 'w': fprintf(stderr, "VLC input not enabled at compile time!\n"); return 1; #endif case 'V': audio_enc.verbosity++; break; case '?': case 'h': usage(argv[0]); return 1; } } try { return audio_enc.run(); } catch (const std::runtime_error& e) { fprintf(stderr, "ODR-AudioEnc failed to start: %s\n", e.what()); return 1; } }