diff options
author | Matthias P. Braendli <matthias.braendli@mpb.li> | 2019-01-16 15:44:45 +0100 |
---|---|---|
committer | Matthias P. Braendli <matthias.braendli@mpb.li> | 2019-01-16 15:44:45 +0100 |
commit | 9d3d404a46dd79ed2b7a6c39719c76839787127e (patch) | |
tree | 9f67996d2f72b0f93302d695299b173e50698244 /src/ensembledatabase.cpp | |
parent | 1df947bae7346948e08edb75616d34fcf8802dae (diff) | |
download | etisnoop-9d3d404a46dd79ed2b7a6c39719c76839787127e.tar.gz etisnoop-9d3d404a46dd79ed2b7a6c39719c76839787127e.tar.bz2 etisnoop-9d3d404a46dd79ed2b7a6c39719c76839787127e.zip |
Add charset decoder for FIG1 labels
Diffstat (limited to 'src/ensembledatabase.cpp')
-rw-r--r-- | src/ensembledatabase.cpp | 62 |
1 files changed, 57 insertions, 5 deletions
diff --git a/src/ensembledatabase.cpp b/src/ensembledatabase.cpp index 1353c24..e5066a6 100644 --- a/src/ensembledatabase.cpp +++ b/src/ensembledatabase.cpp @@ -31,6 +31,7 @@ #include <codecvt> #include <sstream> #include "ensembledatabase.hpp" +#include "charset.hpp" namespace ensemble_database { @@ -50,6 +51,37 @@ static string ucs2toutf8(const uint8_t *ucs2, size_t len_bytes) return ucsconv.to_bytes(ucs2label); } +std::string label_t::label() const +{ + switch (charset) { + case charset_e::COMPLETE_EBU_LATIN: + return convert_ebu_to_utf8(string(label_bytes.begin(), label_bytes.end())); + case charset_e::UTF8: + return string(label_bytes.begin(), label_bytes.end()); + case charset_e::UCS2: + try { + return ucs2toutf8(label_bytes.data(), label_bytes.size()); + } + catch (const range_error&) { + return ""; + } + case charset_e::UNDEFINED: + throw logic_error("charset undefined"); + } + throw logic_error("invalid charset " + to_string((int)charset)); +} + +std::string label_t::shortlabel() const +{ + string shortlabel; + for (size_t i = 0; i < label_bytes.size(); ++i) { + if (shortlabel_flag & 0x8000 >> i) { + shortlabel += static_cast<char>(label_bytes[i]); + } + } + + return shortlabel; +} string label_t::assemble() const { @@ -64,18 +96,23 @@ string label_t::assemble() const } } - switch (charset) { - case extended_label_charset::UTF8: + switch (extended_label_charset) { + case charset_e::COMPLETE_EBU_LATIN: + // FIG2 doesn't allow EBU, use FIG1 for those + return ""; + case charset_e::UTF8: return string(segments_cat.begin(), segments_cat.end()); - case extended_label_charset::UCS2: + case charset_e::UCS2: try { return ucs2toutf8(segments_cat.data(), segments_cat.size()); } catch (const range_error&) { return ""; } + case charset_e::UNDEFINED: + return ""; } - throw logic_error("invalid charset"); + throw logic_error("invalid extended label charset " + to_string((int)extended_label_charset)); } string label_t::assembly_state() const @@ -86,7 +123,22 @@ string label_t::assembly_state() const ss << s.first << ","; } - ss << "count=" << segment_count << "]"; + ss << "count=" << segment_count << ","; + ss << "charset="; + switch (extended_label_charset) { + case charset_e::COMPLETE_EBU_LATIN: + throw logic_error("invalid extended label LATIN charset"); + case charset_e::UTF8: + ss << "UTF8"; + break; + case charset_e::UCS2: + ss << "UCS2"; + break; + case charset_e::UNDEFINED: + ss << "UNDEFINED"; + break; + } + ss << "]"; return ss.str(); } |