From ad279f82072b0dffa376e95abb67b655fbf902ed Mon Sep 17 00:00:00 2001 From: Stefan Pöschel Date: Thu, 23 Apr 2015 18:04:45 +0200 Subject: Make DLS conversion from (ATM only) UTF-8 to EBU Latin optional This (re-)introduces the ability to use DLS texts already having EBU Latin based charset (e.g. DAB retransmission of an FM station having RDS). The conversion from (ATM only) UTF-8 to EBU Latin based must now be enabled via parameter. If used, the charset parameter only affects the DLS text input. --- src/mot-encoder.cpp | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/src/mot-encoder.cpp b/src/mot-encoder.cpp index 0fd8859..60109f8 100644 --- a/src/mot-encoder.cpp +++ b/src/mot-encoder.cpp @@ -235,7 +235,7 @@ void writeMotPAD(int output_fd, unsigned short int padlen); void create_dls_pads(const std::string& text, const int padlen, const uint8_t charset); -void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t charset); +void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t charset, bool dls_to_ebu); int get_xpadlengthmask(int padlen); @@ -292,13 +292,15 @@ void usage(char* name) " -p, --pad=LENGTH Set the pad length.\n" " Possible values: " ALLOWED_PADLEN "\n" " Default: 58\n" - " -c, --charset=ID Signal the character set encoding defined by ID\n" - " ID = 0: Complete EBU Latin based repertoire\n" - " ID = 1: Latin based common core, Cyrillic, Greek\n" - " ID = 2: EBU Latin based core, Arabic, Hebrew, Cyrillic and Greek\n" - " ID = 3: ISO Latin Alphabet No 2\n" + " -c, --charset=ID ID of the character set encoding used for DLS text input.\n" + " ID = 0: Complete EBU Latin based repertoire\n" + " ID = 1: Latin based common core, Cyrillic, Greek\n" + " ID = 2: EBU Latin based core, Arabic, Hebrew, Cyrillic and Greek\n" + " ID = 3: ISO Latin Alphabet No 2\n" " ID = 15: ISO/IEC 10646 using UTF-8\n" " Default: 0\n" + " -C, --dls-to-ebu Convert each DLS text to Complete EBU Latin based repertoire\n" + " character set encoding (currently only from UTF-8).\n" " -R, --raw-slides Do not process slides. Integrity checks and resizing\n" " slides is skipped. Use this if you know what you are doing !\n" " It is useful only when -d is used\n" @@ -319,6 +321,7 @@ int main(int argc, char *argv[]) int sleepdelay = SLEEPDELAY_DEFAULT; bool raw_slides = false; int charset = CHARSET_COMPLETE_EBU_LATIN; + bool dls_to_ebu = false; const char* dir = NULL; const char* output = "/tmp/pad.fifo"; @@ -326,6 +329,7 @@ int main(int argc, char *argv[]) const struct option longopts[] = { {"charset", required_argument, 0, 'c'}, + {"dls-to-ebu", no_argument, 0, 'C'}, {"dir", required_argument, 0, 'd'}, {"erase", no_argument, 0, 'e'}, {"output", required_argument, 0, 'o'}, @@ -341,11 +345,14 @@ int main(int argc, char *argv[]) int ch=0; int index; while(ch != -1) { - ch = getopt_long(argc, argv, "ehRc:d:o:p:s:t:v", longopts, &index); + ch = getopt_long(argc, argv, "eChRc:d:o:p:s:t:v", longopts, &index); switch (ch) { case 'c': charset = atoi(optarg); break; + case 'C': + dls_to_ebu = true; + break; case 'd': dir = optarg; break; @@ -439,6 +446,14 @@ int main(int argc, char *argv[]) user_charset, charset); } + if (dls_to_ebu) { + if (charset != CHARSET_UTF8) { + fprintf(stderr, "mot-encoder Error: DLS conversion to EBU is currently only supported for UTF-8 input!\n"); + return 1; + } + fprintf(stderr, "mot-encoder converting DLS texts to Complete EBU Latin\n"); + } + int output_fd = open(output, O_WRONLY); if (output_fd == -1) { perror("mot-encoder Error: failed to open output"); @@ -486,7 +501,7 @@ int main(int argc, char *argv[]) if (not dls_file.empty()) { // Maybe we have no slides, always update DLS - writeDLS(output_fd, dls_file, padlen, charset); + writeDLS(output_fd, dls_file, padlen, charset, dls_to_ebu); sleep(sleepdelay); } @@ -510,7 +525,7 @@ int main(int argc, char *argv[]) // Always retransmit DLS after each slide, we want it to be updated frequently if (not dls_file.empty()) { - writeDLS(output_fd, dls_file, padlen, charset); + writeDLS(output_fd, dls_file, padlen, charset, dls_to_ebu); } sleep(sleepdelay); @@ -524,7 +539,7 @@ int main(int argc, char *argv[]) } else if (not dls_file.empty()) { // only DLS // Always retransmit DLS, we want it to be updated frequently - writeDLS(output_fd, dls_file, padlen, charset); + writeDLS(output_fd, dls_file, padlen, charset, dls_to_ebu); sleep(sleepdelay); } @@ -920,7 +935,7 @@ void packMscDG(unsigned char* b, MSCDG* msc, unsigned short int* bsize) } -void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t charset) +void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t charset, bool dls_to_ebu) { std::ifstream dls_fstream(dls_file.c_str()); if (!dls_fstream.is_open()) { @@ -935,7 +950,7 @@ void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t ch // line endings while (std::getline(dls_fstream, line)) { if (not line.empty()) { - if (charset == CHARSET_COMPLETE_EBU_LATIN) { + if (dls_to_ebu) { dls_lines.push_back(charset_converter.convert(line)); } else { @@ -944,6 +959,8 @@ void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t ch // TODO handle the other charsets accordingly } } + if (dls_to_ebu) + charset = CHARSET_COMPLETE_EBU_LATIN; std::stringstream ss; for (size_t i = 0; i < dls_lines.size(); i++) { -- cgit v1.2.3 From 6f5a4f559efe30f32f89c21b7bc382e3bba79c1f Mon Sep 17 00:00:00 2001 From: Stefan Pöschel Date: Thu, 23 Apr 2015 20:22:09 +0200 Subject: Change default DLS text charset to UTF-8 --- src/mot-encoder.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mot-encoder.cpp b/src/mot-encoder.cpp index 60109f8..2e477f6 100644 --- a/src/mot-encoder.cpp +++ b/src/mot-encoder.cpp @@ -298,7 +298,7 @@ void usage(char* name) " ID = 2: EBU Latin based core, Arabic, Hebrew, Cyrillic and Greek\n" " ID = 3: ISO Latin Alphabet No 2\n" " ID = 15: ISO/IEC 10646 using UTF-8\n" - " Default: 0\n" + " Default: 15\n" " -C, --dls-to-ebu Convert each DLS text to Complete EBU Latin based repertoire\n" " character set encoding (currently only from UTF-8).\n" " -R, --raw-slides Do not process slides. Integrity checks and resizing\n" @@ -320,7 +320,7 @@ int main(int argc, char *argv[]) bool erase_after_tx = false; int sleepdelay = SLEEPDELAY_DEFAULT; bool raw_slides = false; - int charset = CHARSET_COMPLETE_EBU_LATIN; + int charset = CHARSET_UTF8; bool dls_to_ebu = false; const char* dir = NULL; -- cgit v1.2.3 From dce47c5c697dc1290d1a1cdfda773e991f90ce15 Mon Sep 17 00:00:00 2001 From: Stefan Pöschel Date: Thu, 23 Apr 2015 21:14:34 +0200 Subject: Add support for DLS text raw UCS-2 BE input --- src/mot-encoder.cpp | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/mot-encoder.cpp b/src/mot-encoder.cpp index 2e477f6..3dd604e 100644 --- a/src/mot-encoder.cpp +++ b/src/mot-encoder.cpp @@ -86,6 +86,7 @@ extern "C" { #define CHARSET_EBU_LATIN_CY_GR 1 // EBU Latin based common core, Cyrillic, Greek #define CHARSET_EBU_LATIN_AR_HE_CY_GR 2 // EBU Latin based core, Arabic, Hebrew, Cyrillic and Greek #define CHARSET_ISO_LATIN_ALPHABET_2 3 // ISO Latin Alphabet No 2 +#define CHARSET_UCS2_BE 6 // ISO/IEC 10646 using UCS-2 transformation format, big endian byte order #define CHARSET_UTF8 15 // ISO Latin Alphabet No 2 struct MSCDG { @@ -297,6 +298,7 @@ void usage(char* name) " ID = 1: Latin based common core, Cyrillic, Greek\n" " ID = 2: EBU Latin based core, Arabic, Hebrew, Cyrillic and Greek\n" " ID = 3: ISO Latin Alphabet No 2\n" + " ID = 6: ISO/IEC 10646 using UCS-2 BE\n" " ID = 15: ISO/IEC 10646 using UTF-8\n" " Default: 15\n" " -C, --dls-to-ebu Convert each DLS text to Complete EBU Latin based repertoire\n" @@ -427,6 +429,9 @@ int main(int argc, char *argv[]) case CHARSET_ISO_LATIN_ALPHABET_2: user_charset = "ISO Latin Alphabet 2"; break; + case CHARSET_UCS2_BE: + user_charset = "UCS-2 BE"; + break; case CHARSET_UTF8: user_charset = "UTF-8"; break; @@ -959,19 +964,29 @@ void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t ch // TODO handle the other charsets accordingly } } - if (dls_to_ebu) - charset = CHARSET_COMPLETE_EBU_LATIN; std::stringstream ss; for (size_t i = 0; i < dls_lines.size(); i++) { if (i != 0) { - ss << "\n"; + if (charset == CHARSET_UCS2_BE) + ss << '\0' << '\n'; + else + ss << '\n'; + } + + // UCS-2 BE: if from file the first byte of \0\n remains, remove it + if (charset == CHARSET_UCS2_BE && dls_lines[i].size() % 2) { + dls_lines[i].resize(dls_lines[i].size() - 1); } + ss << dls_lines[i]; } std::string dlstext = ss.str(); using namespace std; + if (dls_to_ebu) + charset = CHARSET_COMPLETE_EBU_LATIN; + // (Re)Create data groups (and thereby toggle the toggle bit) only on (first call or) new text bool dlstext_is_new = dls_pads.empty() || (dlstext != dlstext_prev); @@ -1003,8 +1018,9 @@ size_t dls_get(const std::string& text, const uint8_t charset, const unsigned in bool first_seg = seg_index == 0; bool last_seg = seg_index == seg_count - 1; - const char *seg_text_start = text.c_str() + seg_index * DLS_SEG_LEN_CHAR_MAX; - size_t seg_text_len = strnlen(seg_text_start, DLS_SEG_LEN_CHAR_MAX); + int seg_text_offset = seg_index * DLS_SEG_LEN_CHAR_MAX; + const char *seg_text_start = text.c_str() + seg_text_offset; + size_t seg_text_len = MIN(text.size() - seg_text_offset, DLS_SEG_LEN_CHAR_MAX); size_t seg_len = DLS_SEG_LEN_PREFIX + seg_text_len + DLS_SEG_LEN_CRC; -- cgit v1.2.3 From 676db4cad279e2498ac0aa286fcbc3ad4a07caca Mon Sep 17 00:00:00 2001 From: Stefan Pöschel Date: Thu, 23 Apr 2015 21:41:06 +0200 Subject: Fix MAXDLS regression --- src/mot-encoder.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mot-encoder.cpp b/src/mot-encoder.cpp index 3dd604e..5763f61 100644 --- a/src/mot-encoder.cpp +++ b/src/mot-encoder.cpp @@ -262,7 +262,7 @@ CharsetConverter charset_converter; typedef std::vector pad_t; static std::deque dls_pads; static bool dls_toggle = false; -std::string dlstext_prev(MAXDLS + 1, ' '); +std::string dlstext_prev = ""; static int verbose = 0; @@ -982,7 +982,8 @@ void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t ch ss << dls_lines[i]; } std::string dlstext = ss.str(); - using namespace std; + if (dlstext.size() > MAXDLS) + dlstext.resize(MAXDLS); if (dls_to_ebu) charset = CHARSET_COMPLETE_EBU_LATIN; -- cgit v1.2.3 From 08ca74724207d5dc3471d4a07d62c1a85784a546 Mon Sep 17 00:00:00 2001 From: Stefan Pöschel Date: Tue, 28 Apr 2015 23:24:43 +0200 Subject: Re-enable default conversion to EBU Latin based --- README.md | 12 +++++++----- src/mot-encoder.cpp | 36 +++++++++++++++++++++--------------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 7fbab37..ed604f2 100644 --- a/README.md +++ b/README.md @@ -211,17 +211,19 @@ can also read *mot-encoder* data. This is an ongoing development. Make sure you use the same pad length option for *mot-encoder* and the audio encoder. Only some pad lengths are supported, -please see *mot-encoder*'s help. Only pad lengths 34, 42 and 58 seem to be -working with some receivers, 23 and 26 appear to be broken. +please see *mot-encoder*'s help. Character Sets -------------- When *mot-encoder* is launched with the default character set encoding, it assumes that the DLS text in the file is encoded in UTF-8, and will convert it according to -the DAB standard. +the DAB standard to the EBU Latin based character set encoding. -If you set the character set encoding to anything else, *mot-encoder* will not perform -any conversion, and it is your responsibility to ensure the encoding is valid. +If you set the character set encoding to anything else (except: EBU Latin based, +which needs no conversion), *mot-encoder* will abort, as it does not support +any other conversion than from UTF-8 to EBU Latin based. +You can also use the -C option to transmit the untouched DLS text. In this case, +it is your responsibility to ensure the encoding is valid. Known Limitations ----------------- diff --git a/src/mot-encoder.cpp b/src/mot-encoder.cpp index 5763f61..afcbd86 100644 --- a/src/mot-encoder.cpp +++ b/src/mot-encoder.cpp @@ -236,7 +236,7 @@ void writeMotPAD(int output_fd, unsigned short int padlen); void create_dls_pads(const std::string& text, const int padlen, const uint8_t charset); -void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t charset, bool dls_to_ebu); +void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t charset, bool raw_dls); int get_xpadlengthmask(int padlen); @@ -301,8 +301,8 @@ void usage(char* name) " ID = 6: ISO/IEC 10646 using UCS-2 BE\n" " ID = 15: ISO/IEC 10646 using UTF-8\n" " Default: 15\n" - " -C, --dls-to-ebu Convert each DLS text to Complete EBU Latin based repertoire\n" - " character set encoding (currently only from UTF-8).\n" + " -C, --raw-dls Do not convert DLS texts to Complete EBU Latin based repertoire\n" + " character set encoding.\n" " -R, --raw-slides Do not process slides. Integrity checks and resizing\n" " slides is skipped. Use this if you know what you are doing !\n" " It is useful only when -d is used\n" @@ -323,7 +323,7 @@ int main(int argc, char *argv[]) int sleepdelay = SLEEPDELAY_DEFAULT; bool raw_slides = false; int charset = CHARSET_UTF8; - bool dls_to_ebu = false; + bool raw_dls = false; const char* dir = NULL; const char* output = "/tmp/pad.fifo"; @@ -331,7 +331,7 @@ int main(int argc, char *argv[]) const struct option longopts[] = { {"charset", required_argument, 0, 'c'}, - {"dls-to-ebu", no_argument, 0, 'C'}, + {"raw-dls", no_argument, 0, 'C'}, {"dir", required_argument, 0, 'd'}, {"erase", no_argument, 0, 'e'}, {"output", required_argument, 0, 'o'}, @@ -353,7 +353,7 @@ int main(int argc, char *argv[]) charset = atoi(optarg); break; case 'C': - dls_to_ebu = true; + raw_dls = true; break; case 'd': dir = optarg; @@ -451,12 +451,18 @@ int main(int argc, char *argv[]) user_charset, charset); } - if (dls_to_ebu) { - if (charset != CHARSET_UTF8) { + if (not raw_dls) { + switch (charset) { + case CHARSET_COMPLETE_EBU_LATIN: + // no conversion needed + break; + case CHARSET_UTF8: + fprintf(stderr, "mot-encoder converting DLS texts to Complete EBU Latin\n"); + break; + default: fprintf(stderr, "mot-encoder Error: DLS conversion to EBU is currently only supported for UTF-8 input!\n"); return 1; } - fprintf(stderr, "mot-encoder converting DLS texts to Complete EBU Latin\n"); } int output_fd = open(output, O_WRONLY); @@ -506,7 +512,7 @@ int main(int argc, char *argv[]) if (not dls_file.empty()) { // Maybe we have no slides, always update DLS - writeDLS(output_fd, dls_file, padlen, charset, dls_to_ebu); + writeDLS(output_fd, dls_file, padlen, charset, raw_dls); sleep(sleepdelay); } @@ -530,7 +536,7 @@ int main(int argc, char *argv[]) // Always retransmit DLS after each slide, we want it to be updated frequently if (not dls_file.empty()) { - writeDLS(output_fd, dls_file, padlen, charset, dls_to_ebu); + writeDLS(output_fd, dls_file, padlen, charset, raw_dls); } sleep(sleepdelay); @@ -544,7 +550,7 @@ int main(int argc, char *argv[]) } else if (not dls_file.empty()) { // only DLS // Always retransmit DLS, we want it to be updated frequently - writeDLS(output_fd, dls_file, padlen, charset, dls_to_ebu); + writeDLS(output_fd, dls_file, padlen, charset, raw_dls); sleep(sleepdelay); } @@ -940,7 +946,7 @@ void packMscDG(unsigned char* b, MSCDG* msc, unsigned short int* bsize) } -void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t charset, bool dls_to_ebu) +void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t charset, bool raw_dls) { std::ifstream dls_fstream(dls_file.c_str()); if (!dls_fstream.is_open()) { @@ -955,7 +961,7 @@ void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t ch // line endings while (std::getline(dls_fstream, line)) { if (not line.empty()) { - if (dls_to_ebu) { + if (not raw_dls && charset == CHARSET_UTF8) { dls_lines.push_back(charset_converter.convert(line)); } else { @@ -985,7 +991,7 @@ void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t ch if (dlstext.size() > MAXDLS) dlstext.resize(MAXDLS); - if (dls_to_ebu) + if (not raw_dls) charset = CHARSET_COMPLETE_EBU_LATIN; -- cgit v1.2.3