From ad279f82072b0dffa376e95abb67b655fbf902ed Mon Sep 17 00:00:00 2001
From: Stefan Pöschel <github@basicmaster.de>
Date: Thu, 23 Apr 2015 18:04:45 +0200
Subject: Make DLS conversion from (ATM only) UTF-8 to EBU Latin optional

This (re-)introduces the ability to use DLS texts already having EBU Latin
based charset (e.g. DAB retransmission of an FM station having RDS).
The conversion from (ATM only) UTF-8 to EBU Latin based must now be enabled
via parameter. If used, the charset parameter only affects the DLS text input.
---
 src/mot-encoder.cpp | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/src/mot-encoder.cpp b/src/mot-encoder.cpp
index 0fd8859..60109f8 100644
--- a/src/mot-encoder.cpp
+++ b/src/mot-encoder.cpp
@@ -235,7 +235,7 @@ void writeMotPAD(int output_fd,
         unsigned short int padlen);
 
 void create_dls_pads(const std::string& text, const int padlen, const uint8_t charset);
-void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t charset);
+void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t charset, bool dls_to_ebu);
 
 
 int get_xpadlengthmask(int padlen);
@@ -292,13 +292,15 @@ void usage(char* name)
                     " -p, --pad=LENGTH       Set the pad length.\n"
                     "                          Possible values: " ALLOWED_PADLEN "\n"
                     "                          Default: 58\n"
-                    " -c, --charset=ID       Signal the character set encoding defined by ID\n"
-                    "                          ID = 0: Complete EBU Latin based repertoire\n"
-                    "                          ID = 1: Latin based common core, Cyrillic, Greek\n"
-                    "                          ID = 2: EBU Latin based core, Arabic, Hebrew, Cyrillic and Greek\n"
-                    "                          ID = 3: ISO Latin Alphabet No 2\n"
+                    " -c, --charset=ID       ID of the character set encoding used for DLS text input.\n"
+                    "                          ID =  0: Complete EBU Latin based repertoire\n"
+                    "                          ID =  1: Latin based common core, Cyrillic, Greek\n"
+                    "                          ID =  2: EBU Latin based core, Arabic, Hebrew, Cyrillic and Greek\n"
+                    "                          ID =  3: ISO Latin Alphabet No 2\n"
                     "                          ID = 15: ISO/IEC 10646 using UTF-8\n"
                     "                          Default: 0\n"
+                    " -C, --dls-to-ebu       Convert each DLS text to Complete EBU Latin based repertoire\n"
+                    "                          character set encoding (currently only from UTF-8).\n"
                     " -R, --raw-slides       Do not process slides. Integrity checks and resizing\n"
                     "                          slides is skipped. Use this if you know what you are doing !\n"
                     "                          It is useful only when -d is used\n"
@@ -319,6 +321,7 @@ int main(int argc, char *argv[])
     int  sleepdelay = SLEEPDELAY_DEFAULT;
     bool raw_slides = false;
     int  charset = CHARSET_COMPLETE_EBU_LATIN;
+    bool dls_to_ebu = false;
 
     const char* dir = NULL;
     const char* output = "/tmp/pad.fifo";
@@ -326,6 +329,7 @@ int main(int argc, char *argv[])
 
     const struct option longopts[] = {
         {"charset",    required_argument,  0, 'c'},
+        {"dls-to-ebu", no_argument,        0, 'C'},
         {"dir",        required_argument,  0, 'd'},
         {"erase",      no_argument,        0, 'e'},
         {"output",     required_argument,  0, 'o'},
@@ -341,11 +345,14 @@ int main(int argc, char *argv[])
     int ch=0;
     int index;
     while(ch != -1) {
-        ch = getopt_long(argc, argv, "ehRc:d:o:p:s:t:v", longopts, &index);
+        ch = getopt_long(argc, argv, "eChRc:d:o:p:s:t:v", longopts, &index);
         switch (ch) {
             case 'c':
                 charset = atoi(optarg);
                 break;
+            case 'C':
+                dls_to_ebu = true;
+                break;
             case 'd':
                 dir = optarg;
                 break;
@@ -439,6 +446,14 @@ int main(int argc, char *argv[])
                user_charset, charset);
     }
 
+    if (dls_to_ebu) {
+        if (charset != CHARSET_UTF8) {
+            fprintf(stderr, "mot-encoder Error: DLS conversion to EBU is currently only supported for UTF-8 input!\n");
+            return 1;
+        }
+        fprintf(stderr, "mot-encoder converting DLS texts to Complete EBU Latin\n");
+    }
+
     int output_fd = open(output, O_WRONLY);
     if (output_fd == -1) {
         perror("mot-encoder Error: failed to open output");
@@ -486,7 +501,7 @@ int main(int argc, char *argv[])
 
             if (not dls_file.empty()) {
                 // Maybe we have no slides, always update DLS
-                writeDLS(output_fd, dls_file, padlen, charset);
+                writeDLS(output_fd, dls_file, padlen, charset, dls_to_ebu);
                 sleep(sleepdelay);
             }
 
@@ -510,7 +525,7 @@ int main(int argc, char *argv[])
 
                 // Always retransmit DLS after each slide, we want it to be updated frequently
                 if (not dls_file.empty()) {
-                    writeDLS(output_fd, dls_file, padlen, charset);
+                    writeDLS(output_fd, dls_file, padlen, charset, dls_to_ebu);
                 }
 
                 sleep(sleepdelay);
@@ -524,7 +539,7 @@ int main(int argc, char *argv[])
         }
         else if (not dls_file.empty()) { // only DLS
             // Always retransmit DLS, we want it to be updated frequently
-            writeDLS(output_fd, dls_file, padlen, charset);
+            writeDLS(output_fd, dls_file, padlen, charset, dls_to_ebu);
 
             sleep(sleepdelay);
         }
@@ -920,7 +935,7 @@ void packMscDG(unsigned char* b, MSCDG* msc, unsigned short int* bsize)
 }
 
 
-void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t charset)
+void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t charset, bool dls_to_ebu)
 {
     std::ifstream dls_fstream(dls_file.c_str());
     if (!dls_fstream.is_open()) {
@@ -935,7 +950,7 @@ void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t ch
     // line endings
     while (std::getline(dls_fstream, line)) {
         if (not line.empty()) {
-            if (charset == CHARSET_COMPLETE_EBU_LATIN) {
+            if (dls_to_ebu) {
                 dls_lines.push_back(charset_converter.convert(line));
             }
             else {
@@ -944,6 +959,8 @@ void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t ch
             // TODO handle the other charsets accordingly
         }
     }
+    if (dls_to_ebu)
+        charset = CHARSET_COMPLETE_EBU_LATIN;
 
     std::stringstream ss;
     for (size_t i = 0; i < dls_lines.size(); i++) {
-- 
cgit v1.2.3


From 6f5a4f559efe30f32f89c21b7bc382e3bba79c1f Mon Sep 17 00:00:00 2001
From: Stefan Pöschel <github@basicmaster.de>
Date: Thu, 23 Apr 2015 20:22:09 +0200
Subject: Change default DLS text charset to UTF-8

---
 src/mot-encoder.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mot-encoder.cpp b/src/mot-encoder.cpp
index 60109f8..2e477f6 100644
--- a/src/mot-encoder.cpp
+++ b/src/mot-encoder.cpp
@@ -298,7 +298,7 @@ void usage(char* name)
                     "                          ID =  2: EBU Latin based core, Arabic, Hebrew, Cyrillic and Greek\n"
                     "                          ID =  3: ISO Latin Alphabet No 2\n"
                     "                          ID = 15: ISO/IEC 10646 using UTF-8\n"
-                    "                          Default: 0\n"
+                    "                          Default: 15\n"
                     " -C, --dls-to-ebu       Convert each DLS text to Complete EBU Latin based repertoire\n"
                     "                          character set encoding (currently only from UTF-8).\n"
                     " -R, --raw-slides       Do not process slides. Integrity checks and resizing\n"
@@ -320,7 +320,7 @@ int main(int argc, char *argv[])
     bool erase_after_tx = false;
     int  sleepdelay = SLEEPDELAY_DEFAULT;
     bool raw_slides = false;
-    int  charset = CHARSET_COMPLETE_EBU_LATIN;
+    int  charset = CHARSET_UTF8;
     bool dls_to_ebu = false;
 
     const char* dir = NULL;
-- 
cgit v1.2.3


From dce47c5c697dc1290d1a1cdfda773e991f90ce15 Mon Sep 17 00:00:00 2001
From: Stefan Pöschel <github@basicmaster.de>
Date: Thu, 23 Apr 2015 21:14:34 +0200
Subject: Add support for DLS text raw UCS-2 BE input

---
 src/mot-encoder.cpp | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/src/mot-encoder.cpp b/src/mot-encoder.cpp
index 2e477f6..3dd604e 100644
--- a/src/mot-encoder.cpp
+++ b/src/mot-encoder.cpp
@@ -86,6 +86,7 @@ extern "C" {
 #define CHARSET_EBU_LATIN_CY_GR 1 // EBU Latin based common core, Cyrillic, Greek
 #define CHARSET_EBU_LATIN_AR_HE_CY_GR 2 // EBU Latin based core, Arabic, Hebrew, Cyrillic and Greek
 #define CHARSET_ISO_LATIN_ALPHABET_2 3 // ISO Latin Alphabet No 2
+#define CHARSET_UCS2_BE 6 // ISO/IEC 10646 using UCS-2 transformation format, big endian byte order
 #define CHARSET_UTF8 15 // ISO Latin Alphabet No 2
 
 struct MSCDG {
@@ -297,6 +298,7 @@ void usage(char* name)
                     "                          ID =  1: Latin based common core, Cyrillic, Greek\n"
                     "                          ID =  2: EBU Latin based core, Arabic, Hebrew, Cyrillic and Greek\n"
                     "                          ID =  3: ISO Latin Alphabet No 2\n"
+                    "                          ID =  6: ISO/IEC 10646 using UCS-2 BE\n"
                     "                          ID = 15: ISO/IEC 10646 using UTF-8\n"
                     "                          Default: 15\n"
                     " -C, --dls-to-ebu       Convert each DLS text to Complete EBU Latin based repertoire\n"
@@ -427,6 +429,9 @@ int main(int argc, char *argv[])
         case CHARSET_ISO_LATIN_ALPHABET_2:
             user_charset = "ISO Latin Alphabet 2";
             break;
+        case CHARSET_UCS2_BE:
+            user_charset = "UCS-2 BE";
+            break;
         case CHARSET_UTF8:
             user_charset = "UTF-8";
             break;
@@ -959,19 +964,29 @@ void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t ch
             // TODO handle the other charsets accordingly
         }
     }
-    if (dls_to_ebu)
-        charset = CHARSET_COMPLETE_EBU_LATIN;
 
     std::stringstream ss;
     for (size_t i = 0; i < dls_lines.size(); i++) {
         if (i != 0) {
-            ss << "\n";
+            if (charset == CHARSET_UCS2_BE)
+                ss << '\0' << '\n';
+            else
+                ss << '\n';
+        }
+
+        // UCS-2 BE: if from file the first byte of \0\n remains, remove it
+        if (charset == CHARSET_UCS2_BE && dls_lines[i].size() % 2) {
+            dls_lines[i].resize(dls_lines[i].size() - 1);
         }
+
         ss << dls_lines[i];
     }
     std::string dlstext = ss.str();
     using namespace std;
 
+    if (dls_to_ebu)
+        charset = CHARSET_COMPLETE_EBU_LATIN;
+
 
     // (Re)Create data groups (and thereby toggle the toggle bit) only on (first call or) new text
     bool dlstext_is_new = dls_pads.empty() || (dlstext != dlstext_prev);
@@ -1003,8 +1018,9 @@ size_t dls_get(const std::string& text, const uint8_t charset, const unsigned in
     bool first_seg = seg_index == 0;
     bool last_seg  = seg_index == seg_count - 1;
 
-    const char *seg_text_start = text.c_str() + seg_index * DLS_SEG_LEN_CHAR_MAX;
-    size_t seg_text_len = strnlen(seg_text_start, DLS_SEG_LEN_CHAR_MAX);
+    int seg_text_offset = seg_index * DLS_SEG_LEN_CHAR_MAX;
+    const char *seg_text_start = text.c_str() + seg_text_offset;
+    size_t seg_text_len = MIN(text.size() - seg_text_offset, DLS_SEG_LEN_CHAR_MAX);
     size_t seg_len = DLS_SEG_LEN_PREFIX + seg_text_len + DLS_SEG_LEN_CRC;
 
 
-- 
cgit v1.2.3


From 676db4cad279e2498ac0aa286fcbc3ad4a07caca Mon Sep 17 00:00:00 2001
From: Stefan Pöschel <github@basicmaster.de>
Date: Thu, 23 Apr 2015 21:41:06 +0200
Subject: Fix MAXDLS regression

---
 src/mot-encoder.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/mot-encoder.cpp b/src/mot-encoder.cpp
index 3dd604e..5763f61 100644
--- a/src/mot-encoder.cpp
+++ b/src/mot-encoder.cpp
@@ -262,7 +262,7 @@ CharsetConverter charset_converter;
 typedef std::vector<uint8_t> pad_t;
 static std::deque<pad_t> dls_pads;
 static bool dls_toggle = false;
-std::string dlstext_prev(MAXDLS + 1, ' ');
+std::string dlstext_prev = "";
 
 
 static int verbose = 0;
@@ -982,7 +982,8 @@ void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t ch
         ss << dls_lines[i];
     }
     std::string dlstext = ss.str();
-    using namespace std;
+    if (dlstext.size() > MAXDLS)
+        dlstext.resize(MAXDLS);
 
     if (dls_to_ebu)
         charset = CHARSET_COMPLETE_EBU_LATIN;
-- 
cgit v1.2.3


From 08ca74724207d5dc3471d4a07d62c1a85784a546 Mon Sep 17 00:00:00 2001
From: Stefan Pöschel <github@basicmaster.de>
Date: Tue, 28 Apr 2015 23:24:43 +0200
Subject: Re-enable default conversion to EBU Latin based

---
 README.md           | 12 +++++++-----
 src/mot-encoder.cpp | 36 +++++++++++++++++++++---------------
 2 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index 7fbab37..ed604f2 100644
--- a/README.md
+++ b/README.md
@@ -211,17 +211,19 @@ can also read *mot-encoder* data.
 
 This is an ongoing development. Make sure you use the same pad length option
 for *mot-encoder* and the audio encoder. Only some pad lengths are supported,
-please see *mot-encoder*'s help. Only pad lengths 34, 42 and 58 seem to be
-working with some receivers, 23 and 26 appear to be broken.
+please see *mot-encoder*'s help.
 
 Character Sets
 --------------
 When *mot-encoder* is launched with the default character set encoding, it assumes
 that the DLS text in the file is encoded in UTF-8, and will convert it according to
-the DAB standard.
+the DAB standard to the EBU Latin based character set encoding.
 
-If you set the character set encoding to anything else, *mot-encoder* will not perform
-any conversion, and it is your responsibility to ensure the encoding is valid.
+If you set the character set encoding to anything else (except: EBU Latin based,
+which needs no conversion), *mot-encoder* will abort, as it does not support
+any other conversion than from UTF-8 to EBU Latin based.
+You can also use the -C option to transmit the untouched DLS text. In this case,
+it is your responsibility to ensure the encoding is valid.
 
 Known Limitations
 -----------------
diff --git a/src/mot-encoder.cpp b/src/mot-encoder.cpp
index 5763f61..afcbd86 100644
--- a/src/mot-encoder.cpp
+++ b/src/mot-encoder.cpp
@@ -236,7 +236,7 @@ void writeMotPAD(int output_fd,
         unsigned short int padlen);
 
 void create_dls_pads(const std::string& text, const int padlen, const uint8_t charset);
-void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t charset, bool dls_to_ebu);
+void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t charset, bool raw_dls);
 
 
 int get_xpadlengthmask(int padlen);
@@ -301,8 +301,8 @@ void usage(char* name)
                     "                          ID =  6: ISO/IEC 10646 using UCS-2 BE\n"
                     "                          ID = 15: ISO/IEC 10646 using UTF-8\n"
                     "                          Default: 15\n"
-                    " -C, --dls-to-ebu       Convert each DLS text to Complete EBU Latin based repertoire\n"
-                    "                          character set encoding (currently only from UTF-8).\n"
+                    " -C, --raw-dls          Do not convert DLS texts to Complete EBU Latin based repertoire\n"
+                    "                          character set encoding.\n"
                     " -R, --raw-slides       Do not process slides. Integrity checks and resizing\n"
                     "                          slides is skipped. Use this if you know what you are doing !\n"
                     "                          It is useful only when -d is used\n"
@@ -323,7 +323,7 @@ int main(int argc, char *argv[])
     int  sleepdelay = SLEEPDELAY_DEFAULT;
     bool raw_slides = false;
     int  charset = CHARSET_UTF8;
-    bool dls_to_ebu = false;
+    bool raw_dls = false;
 
     const char* dir = NULL;
     const char* output = "/tmp/pad.fifo";
@@ -331,7 +331,7 @@ int main(int argc, char *argv[])
 
     const struct option longopts[] = {
         {"charset",    required_argument,  0, 'c'},
-        {"dls-to-ebu", no_argument,        0, 'C'},
+        {"raw-dls",    no_argument,        0, 'C'},
         {"dir",        required_argument,  0, 'd'},
         {"erase",      no_argument,        0, 'e'},
         {"output",     required_argument,  0, 'o'},
@@ -353,7 +353,7 @@ int main(int argc, char *argv[])
                 charset = atoi(optarg);
                 break;
             case 'C':
-                dls_to_ebu = true;
+                raw_dls = true;
                 break;
             case 'd':
                 dir = optarg;
@@ -451,12 +451,18 @@ int main(int argc, char *argv[])
                user_charset, charset);
     }
 
-    if (dls_to_ebu) {
-        if (charset != CHARSET_UTF8) {
+    if (not raw_dls) {
+        switch (charset) {
+        case CHARSET_COMPLETE_EBU_LATIN:
+            // no conversion needed
+            break;
+        case CHARSET_UTF8:
+            fprintf(stderr, "mot-encoder converting DLS texts to Complete EBU Latin\n");
+            break;
+        default:
             fprintf(stderr, "mot-encoder Error: DLS conversion to EBU is currently only supported for UTF-8 input!\n");
             return 1;
         }
-        fprintf(stderr, "mot-encoder converting DLS texts to Complete EBU Latin\n");
     }
 
     int output_fd = open(output, O_WRONLY);
@@ -506,7 +512,7 @@ int main(int argc, char *argv[])
 
             if (not dls_file.empty()) {
                 // Maybe we have no slides, always update DLS
-                writeDLS(output_fd, dls_file, padlen, charset, dls_to_ebu);
+                writeDLS(output_fd, dls_file, padlen, charset, raw_dls);
                 sleep(sleepdelay);
             }
 
@@ -530,7 +536,7 @@ int main(int argc, char *argv[])
 
                 // Always retransmit DLS after each slide, we want it to be updated frequently
                 if (not dls_file.empty()) {
-                    writeDLS(output_fd, dls_file, padlen, charset, dls_to_ebu);
+                    writeDLS(output_fd, dls_file, padlen, charset, raw_dls);
                 }
 
                 sleep(sleepdelay);
@@ -544,7 +550,7 @@ int main(int argc, char *argv[])
         }
         else if (not dls_file.empty()) { // only DLS
             // Always retransmit DLS, we want it to be updated frequently
-            writeDLS(output_fd, dls_file, padlen, charset, dls_to_ebu);
+            writeDLS(output_fd, dls_file, padlen, charset, raw_dls);
 
             sleep(sleepdelay);
         }
@@ -940,7 +946,7 @@ void packMscDG(unsigned char* b, MSCDG* msc, unsigned short int* bsize)
 }
 
 
-void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t charset, bool dls_to_ebu)
+void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t charset, bool raw_dls)
 {
     std::ifstream dls_fstream(dls_file.c_str());
     if (!dls_fstream.is_open()) {
@@ -955,7 +961,7 @@ void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t ch
     // line endings
     while (std::getline(dls_fstream, line)) {
         if (not line.empty()) {
-            if (dls_to_ebu) {
+            if (not raw_dls && charset == CHARSET_UTF8) {
                 dls_lines.push_back(charset_converter.convert(line));
             }
             else {
@@ -985,7 +991,7 @@ void writeDLS(int output_fd, const std::string& dls_file, int padlen, uint8_t ch
     if (dlstext.size() > MAXDLS)
         dlstext.resize(MAXDLS);
 
-    if (dls_to_ebu)
+    if (not raw_dls)
         charset = CHARSET_COMPLETE_EBU_LATIN;
 
 
-- 
cgit v1.2.3