diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/charset/charset.cpp | 20 | ||||
-rw-r--r-- | lib/charset/charset.h | 7 |
2 files changed, 26 insertions, 1 deletions
diff --git a/lib/charset/charset.cpp b/lib/charset/charset.cpp index 2ee14f8..5cc773d 100644 --- a/lib/charset/charset.cpp +++ b/lib/charset/charset.cpp @@ -52,3 +52,23 @@ const char* utf8_encoded_EBU_Latin[CHARSET_TABLE_ENTRIES] = { "Â", "Ä", "Ê", "Ë", "Î", "Ï", "Ô", "Ö", "Û", "Ü", "ř", "č", "š", "ž", "đ", "ŀ", "Ã", "Å", "Æ", "Œ", "ŷ", "Ý", "Õ", "Ø", "Þ", "Ŋ", "Ŕ", "Ć", "Ś", "Ź", "Ť", "ð", "ã", "å", "æ", "œ", "ŵ", "ý", "õ", "ø", "þ", "ŋ", "ŕ", "ć", "ś", "ź", "ť", "ħ"}; + +std::string CharsetConverter::convert_ebu_to_utf8(const std::string& str) +{ + using namespace std; + + string utf8_str; + for (const uint8_t c : str) { + // Table offset because NUL is not represented + if (c >= CHARSET_TABLE_OFFSET) { + string utf8_char(utf8_encoded_EBU_Latin[c - CHARSET_TABLE_OFFSET]); + utf8_str += utf8_char; + } + else { + utf8_str += "⁇"; + } + } + + return utf8_str; +} + diff --git a/lib/charset/charset.h b/lib/charset/charset.h index 6ff19bc..78dc94e 100644 --- a/lib/charset/charset.h +++ b/lib/charset/charset.h @@ -81,9 +81,14 @@ class CharsetConverter return encoded_line; } - private: + /* Convert a EBU Latin byte stream to a UTF-8 encoded string. + * Invalid input characters are converted to ⁇ (unicode U+2047). + */ + std::string convert_ebu_to_utf8(const std::string& str); + private: std::vector<uint32_t> m_conversion_table; }; + #endif |