aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/charset/charset.cpp20
-rw-r--r--lib/charset/charset.h7
2 files changed, 26 insertions, 1 deletions
diff --git a/lib/charset/charset.cpp b/lib/charset/charset.cpp
index 2ee14f8..5cc773d 100644
--- a/lib/charset/charset.cpp
+++ b/lib/charset/charset.cpp
@@ -52,3 +52,23 @@ const char* utf8_encoded_EBU_Latin[CHARSET_TABLE_ENTRIES] = {
"Â", "Ä", "Ê", "Ë", "Î", "Ï", "Ô", "Ö", "Û", "Ü", "ř", "č", "š", "ž", "đ", "ŀ",
"Ã", "Å", "Æ", "Œ", "ŷ", "Ý", "Õ", "Ø", "Þ", "Ŋ", "Ŕ", "Ć", "Ś", "Ź", "Ť", "ð",
"ã", "å", "æ", "œ", "ŵ", "ý", "õ", "ø", "þ", "ŋ", "ŕ", "ć", "ś", "ź", "ť", "ħ"};
+
+std::string CharsetConverter::convert_ebu_to_utf8(const std::string& str)
+{
+ using namespace std;
+
+ string utf8_str;
+ for (const uint8_t c : str) {
+ // Table offset because NUL is not represented
+ if (c >= CHARSET_TABLE_OFFSET) {
+ string utf8_char(utf8_encoded_EBU_Latin[c - CHARSET_TABLE_OFFSET]);
+ utf8_str += utf8_char;
+ }
+ else {
+ utf8_str += "⁇";
+ }
+ }
+
+ return utf8_str;
+}
+
diff --git a/lib/charset/charset.h b/lib/charset/charset.h
index 6ff19bc..78dc94e 100644
--- a/lib/charset/charset.h
+++ b/lib/charset/charset.h
@@ -81,9 +81,14 @@ class CharsetConverter
return encoded_line;
}
- private:
+ /* Convert a EBU Latin byte stream to a UTF-8 encoded string.
+ * Invalid input characters are converted to ⁇ (unicode U+2047).
+ */
+ std::string convert_ebu_to_utf8(const std::string& str);
+ private:
std::vector<uint32_t> m_conversion_table;
};
+
#endif