From 6902285dbc1c816f9c16d1d63a151b2022c094dc Mon Sep 17 00:00:00 2001 From: "Matthias P. Braendli" Date: Fri, 9 Feb 2018 11:43:10 +0100 Subject: Convert labels from utf-8 to EBU --- doc/example.mux | 11 ++++++--- lib/charset/charset.cpp | 20 +++++++++++++++++ lib/charset/charset.h | 7 +++++- src/MuxElements.cpp | 60 +++++++++++++++++++++++++++++++++++++++---------- src/MuxElements.h | 10 ++++++--- 5 files changed, 89 insertions(+), 19 deletions(-) diff --git a/doc/example.mux b/doc/example.mux index 556a4c9..632772d 100644 --- a/doc/example.mux +++ b/doc/example.mux @@ -81,8 +81,13 @@ ensemble { ; automatic calculation of the local time offset, set the environment variable TZ ; to your timezone (e.g. TZ=Europe/Rome) before you launch ODR-DabMux - ; all labels are maximum 16 characters in length + ; All labels are maximum 16 characters in length. + ; Labels that are valid utf-8 will be converted to EBU Latin Character set + ; as defined in ETSI TS 101 756, in Annex C. If it's not valid utf-8, the + ; label is taken as-is, byte per byte. Characters that cannot be + ; represented in EBU Latin will be replaced by a space character. label "OpenDigitalRadio" + ; The short label is built from the label by erasing letters, and cannot ; be longer than 8 characters. If omitted, it will be truncated from the ; label @@ -95,8 +100,8 @@ services { ; used throughout the configuration file and for the RC. srv-fu { id 0x8daa - label "Funk" - ; you can define a shortlabel too. + label "Fünk" + ; You can define a shortlabel too. } srv-ri { id 0x8dab diff --git a/lib/charset/charset.cpp b/lib/charset/charset.cpp index 2ee14f8..5cc773d 100644 --- a/lib/charset/charset.cpp +++ b/lib/charset/charset.cpp @@ -52,3 +52,23 @@ const char* utf8_encoded_EBU_Latin[CHARSET_TABLE_ENTRIES] = { "Â", "Ä", "Ê", "Ë", "Î", "Ï", "Ô", "Ö", "Û", "Ü", "ř", "č", "š", "ž", "đ", "ŀ", "Ã", "Å", "Æ", "Œ", "ŷ", "Ý", "Õ", "Ø", "Þ", "Ŋ", "Ŕ", "Ć", "Ś", "Ź", "Ť", "ð", "ã", "å", "æ", "œ", "ŵ", "ý", "õ", "ø", "þ", "ŋ", "ŕ", "ć", "ś", "ź", "ť", "ħ"}; + +std::string CharsetConverter::convert_ebu_to_utf8(const std::string& str) +{ + using namespace std; + + string utf8_str; + for (const uint8_t c : str) { + // Table offset because NUL is not represented + if (c >= CHARSET_TABLE_OFFSET) { + string utf8_char(utf8_encoded_EBU_Latin[c - CHARSET_TABLE_OFFSET]); + utf8_str += utf8_char; + } + else { + utf8_str += "⁇"; + } + } + + return utf8_str; +} + diff --git a/lib/charset/charset.h b/lib/charset/charset.h index 6ff19bc..78dc94e 100644 --- a/lib/charset/charset.h +++ b/lib/charset/charset.h @@ -81,9 +81,14 @@ class CharsetConverter return encoded_line; } - private: + /* Convert a EBU Latin byte stream to a UTF-8 encoded string. + * Invalid input characters are converted to ⁇ (unicode U+2047). + */ + std::string convert_ebu_to_utf8(const std::string& str); + private: std::vector m_conversion_table; }; + #endif diff --git a/src/MuxElements.cpp b/src/MuxElements.cpp index cdfcc42..a31791a 100644 --- a/src/MuxElements.cpp +++ b/src/MuxElements.cpp @@ -30,6 +30,7 @@ #include #include "MuxElements.h" +#include "lib/charset/charset.h" #include #include @@ -44,7 +45,7 @@ const unsigned short Sub_Channel_SizeTable[64] = { 232, 280, 160, 208, 280, 192, 280, 416 }; - +static CharsetConverter charset_converter; using namespace std; @@ -177,7 +178,14 @@ int DabLabel::setLabel(const std::string& label) m_flag = 0xFF00; // truncate the label to the eight first characters - m_label = label; + try { + m_label = charset_converter.convert(label); + } + catch (const utf8::exception& e) { + etiLog.level(warn) << "Failed to convert label '" << label << + "' to EBU Charset"; + m_label = label; + } return 0; } @@ -185,20 +193,43 @@ int DabLabel::setLabel(const std::string& label) int DabLabel::setLabel(const std::string& label, const std::string& short_label) { DabLabel newlabel; + newlabel.m_flag = 0xFF00; + + try { + newlabel.m_label = charset_converter.convert(label); + + int flag = newlabel.setShortLabel(charset_converter.convert(short_label)); + if (flag < 0) { + return flag; + } + + m_flag = flag & 0xFFFF; + } + catch (const utf8::exception& e) { + etiLog.level(warn) << "Failed to convert label '" << label << + " or short label '" << short_label << "' to EBU Charset"; + + // Use label as-is - int result = newlabel.setLabel(label); - if (result < 0) - return result; + newlabel.m_label = label; + newlabel.m_flag = 0xFF00; - /* First check if we can actually create the short label */ - int flag = newlabel.setShortLabel(short_label); - if (flag < 0) - return flag; + int result = newlabel.setLabel(label); + if (result < 0) { + return result; + } + + /* First check if we can actually create the short label */ + int flag = newlabel.setShortLabel(short_label); + if (flag < 0) { + return flag; + } + + m_flag = flag & 0xFFFF; + } // short label is valid. - m_flag = flag & 0xFFFF; m_label = newlabel.m_label; - return 0; } @@ -258,6 +289,11 @@ int DabLabel::setShortLabel(const std::string& slabel) return flag; } +const string DabLabel::long_label() const +{ + return charset_converter.convert_ebu_to_utf8(m_label); +} + const string DabLabel::short_label() const { stringstream shortlabel; @@ -267,7 +303,7 @@ const string DabLabel::short_label() const } } - return shortlabel.str(); + return charset_converter.convert_ebu_to_utf8(shortlabel.str()); } void DabLabel::writeLabel(uint8_t* buf) const diff --git a/src/MuxElements.h b/src/MuxElements.h index ba9941f..1f3f548 100644 --- a/src/MuxElements.h +++ b/src/MuxElements.h @@ -147,7 +147,11 @@ struct dabOutput { class DabLabel { public: - /* Set a new label and short label. + /* Set a new label and short label. If the label parses as valid UTF-8, it + * will be converted to EBU Latin. If utf-8 decoding fails, the label + * will be used as is. Characters that cannot be converted are replaced + * by a space. + * * returns: 0 on success * -1 if the short_label is not a representable * -2 if the short_label is too long @@ -170,7 +174,7 @@ class DabLabel void writeLabel(uint8_t* buf) const; uint16_t flag() const { return m_flag; } - const std::string long_label() const { return m_label; } + const std::string long_label() const; const std::string short_label() const; private: @@ -182,7 +186,7 @@ class DabLabel /* The m_label is not padded in any way */ std::string m_label; - /* Checks and calculates the flag */ + /* Checks and calculates the flag. slabel must be EBU Latin Charset */ int setShortLabel(const std::string& slabel); }; -- cgit v1.2.3