aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias P. Braendli <matthias.braendli@mpb.li>2018-02-09 11:43:10 +0100
committerMatthias P. Braendli <matthias.braendli@mpb.li>2018-02-09 11:43:10 +0100
commit6902285dbc1c816f9c16d1d63a151b2022c094dc (patch)
tree4ea42146ffbcb54ad27526f4659099f77d435610
parentf4ef1284d754ecd907dd4ed3072f8be65b26f2de (diff)
downloaddabmux-6902285dbc1c816f9c16d1d63a151b2022c094dc.tar.gz
dabmux-6902285dbc1c816f9c16d1d63a151b2022c094dc.tar.bz2
dabmux-6902285dbc1c816f9c16d1d63a151b2022c094dc.zip
Convert labels from utf-8 to EBU
-rw-r--r--doc/example.mux11
-rw-r--r--lib/charset/charset.cpp20
-rw-r--r--lib/charset/charset.h7
-rw-r--r--src/MuxElements.cpp60
-rw-r--r--src/MuxElements.h10
5 files changed, 89 insertions, 19 deletions
diff --git a/doc/example.mux b/doc/example.mux
index 556a4c9..632772d 100644
--- a/doc/example.mux
+++ b/doc/example.mux
@@ -81,8 +81,13 @@ ensemble {
; automatic calculation of the local time offset, set the environment variable TZ
; to your timezone (e.g. TZ=Europe/Rome) before you launch ODR-DabMux
- ; all labels are maximum 16 characters in length
+ ; All labels are maximum 16 characters in length.
+ ; Labels that are valid utf-8 will be converted to EBU Latin Character set
+ ; as defined in ETSI TS 101 756, in Annex C. If it's not valid utf-8, the
+ ; label is taken as-is, byte per byte. Characters that cannot be
+ ; represented in EBU Latin will be replaced by a space character.
label "OpenDigitalRadio"
+
; The short label is built from the label by erasing letters, and cannot
; be longer than 8 characters. If omitted, it will be truncated from the
; label
@@ -95,8 +100,8 @@ services {
; used throughout the configuration file and for the RC.
srv-fu {
id 0x8daa
- label "Funk"
- ; you can define a shortlabel too.
+ label "Fünk"
+ ; You can define a shortlabel too.
}
srv-ri {
id 0x8dab
diff --git a/lib/charset/charset.cpp b/lib/charset/charset.cpp
index 2ee14f8..5cc773d 100644
--- a/lib/charset/charset.cpp
+++ b/lib/charset/charset.cpp
@@ -52,3 +52,23 @@ const char* utf8_encoded_EBU_Latin[CHARSET_TABLE_ENTRIES] = {
"Â", "Ä", "Ê", "Ë", "Î", "Ï", "Ô", "Ö", "Û", "Ü", "ř", "č", "š", "ž", "đ", "ŀ",
"Ã", "Å", "Æ", "Œ", "ŷ", "Ý", "Õ", "Ø", "Þ", "Ŋ", "Ŕ", "Ć", "Ś", "Ź", "Ť", "ð",
"ã", "å", "æ", "œ", "ŵ", "ý", "õ", "ø", "þ", "ŋ", "ŕ", "ć", "ś", "ź", "ť", "ħ"};
+
+std::string CharsetConverter::convert_ebu_to_utf8(const std::string& str)
+{
+ using namespace std;
+
+ string utf8_str;
+ for (const uint8_t c : str) {
+ // Table offset because NUL is not represented
+ if (c >= CHARSET_TABLE_OFFSET) {
+ string utf8_char(utf8_encoded_EBU_Latin[c - CHARSET_TABLE_OFFSET]);
+ utf8_str += utf8_char;
+ }
+ else {
+ utf8_str += "⁇";
+ }
+ }
+
+ return utf8_str;
+}
+
diff --git a/lib/charset/charset.h b/lib/charset/charset.h
index 6ff19bc..78dc94e 100644
--- a/lib/charset/charset.h
+++ b/lib/charset/charset.h
@@ -81,9 +81,14 @@ class CharsetConverter
return encoded_line;
}
- private:
+ /* Convert a EBU Latin byte stream to a UTF-8 encoded string.
+ * Invalid input characters are converted to ⁇ (unicode U+2047).
+ */
+ std::string convert_ebu_to_utf8(const std::string& str);
+ private:
std::vector<uint32_t> m_conversion_table;
};
+
#endif
diff --git a/src/MuxElements.cpp b/src/MuxElements.cpp
index cdfcc42..a31791a 100644
--- a/src/MuxElements.cpp
+++ b/src/MuxElements.cpp
@@ -30,6 +30,7 @@
#include <algorithm>
#include "MuxElements.h"
+#include "lib/charset/charset.h"
#include <boost/algorithm/string.hpp>
#include <boost/format.hpp>
@@ -44,7 +45,7 @@ const unsigned short Sub_Channel_SizeTable[64] = {
232, 280, 160, 208, 280, 192, 280, 416
};
-
+static CharsetConverter charset_converter;
using namespace std;
@@ -177,7 +178,14 @@ int DabLabel::setLabel(const std::string& label)
m_flag = 0xFF00; // truncate the label to the eight first characters
- m_label = label;
+ try {
+ m_label = charset_converter.convert(label);
+ }
+ catch (const utf8::exception& e) {
+ etiLog.level(warn) << "Failed to convert label '" << label <<
+ "' to EBU Charset";
+ m_label = label;
+ }
return 0;
}
@@ -185,20 +193,43 @@ int DabLabel::setLabel(const std::string& label)
int DabLabel::setLabel(const std::string& label, const std::string& short_label)
{
DabLabel newlabel;
+ newlabel.m_flag = 0xFF00;
+
+ try {
+ newlabel.m_label = charset_converter.convert(label);
+
+ int flag = newlabel.setShortLabel(charset_converter.convert(short_label));
+ if (flag < 0) {
+ return flag;
+ }
+
+ m_flag = flag & 0xFFFF;
+ }
+ catch (const utf8::exception& e) {
+ etiLog.level(warn) << "Failed to convert label '" << label <<
+ " or short label '" << short_label << "' to EBU Charset";
+
+ // Use label as-is
- int result = newlabel.setLabel(label);
- if (result < 0)
- return result;
+ newlabel.m_label = label;
+ newlabel.m_flag = 0xFF00;
- /* First check if we can actually create the short label */
- int flag = newlabel.setShortLabel(short_label);
- if (flag < 0)
- return flag;
+ int result = newlabel.setLabel(label);
+ if (result < 0) {
+ return result;
+ }
+
+ /* First check if we can actually create the short label */
+ int flag = newlabel.setShortLabel(short_label);
+ if (flag < 0) {
+ return flag;
+ }
+
+ m_flag = flag & 0xFFFF;
+ }
// short label is valid.
- m_flag = flag & 0xFFFF;
m_label = newlabel.m_label;
-
return 0;
}
@@ -258,6 +289,11 @@ int DabLabel::setShortLabel(const std::string& slabel)
return flag;
}
+const string DabLabel::long_label() const
+{
+ return charset_converter.convert_ebu_to_utf8(m_label);
+}
+
const string DabLabel::short_label() const
{
stringstream shortlabel;
@@ -267,7 +303,7 @@ const string DabLabel::short_label() const
}
}
- return shortlabel.str();
+ return charset_converter.convert_ebu_to_utf8(shortlabel.str());
}
void DabLabel::writeLabel(uint8_t* buf) const
diff --git a/src/MuxElements.h b/src/MuxElements.h
index ba9941f..1f3f548 100644
--- a/src/MuxElements.h
+++ b/src/MuxElements.h
@@ -147,7 +147,11 @@ struct dabOutput {
class DabLabel
{
public:
- /* Set a new label and short label.
+ /* Set a new label and short label. If the label parses as valid UTF-8, it
+ * will be converted to EBU Latin. If utf-8 decoding fails, the label
+ * will be used as is. Characters that cannot be converted are replaced
+ * by a space.
+ *
* returns: 0 on success
* -1 if the short_label is not a representable
* -2 if the short_label is too long
@@ -170,7 +174,7 @@ class DabLabel
void writeLabel(uint8_t* buf) const;
uint16_t flag() const { return m_flag; }
- const std::string long_label() const { return m_label; }
+ const std::string long_label() const;
const std::string short_label() const;
private:
@@ -182,7 +186,7 @@ class DabLabel
/* The m_label is not padded in any way */
std::string m_label;
- /* Checks and calculates the flag */
+ /* Checks and calculates the flag. slabel must be EBU Latin Charset */
int setShortLabel(const std::string& slabel);
};