/*
Copyright (C) 2015 Matthias P. Braendli (http://opendigitalradio.org)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
/*!
\file charset.h
\brief Define the EBU charset according to ETSI TS 101 756v1.8.1 for DLS encoding
\author Matthias P. Braendli
\author Lindsay Cornell
*/
#ifndef __CHARSET_H_
#define __CHARSET_H_
#include "utf8.h"
#include
#include
#include
#include
#define CHARSET_TABLE_OFFSET 1 // NUL at index 0 cannot be represented
#define CHARSET_TABLE_ENTRIES (256 - CHARSET_TABLE_OFFSET)
extern const char* utf8_encoded_EBU_Latin[CHARSET_TABLE_ENTRIES];
class CharsetConverter
{
public:
CharsetConverter() {
/*! Build the converstion table that contains the known code points,
* at the indices corresponding to the EBU Latin table
*/
using namespace std;
for (size_t i = 0; i < CHARSET_TABLE_ENTRIES; i++) {
string table_entry(utf8_encoded_EBU_Latin[i]);
string::iterator it = table_entry.begin();
uint32_t code_point = utf8::next(it, table_entry.end());
m_conversion_table.push_back(code_point);
}
}
/*! Convert a UTF-8 encoded text line into an EBU Latin encoded byte
* stream. If up_to_first_error is set, convert as much text as possible.
* If false, raise an exception in case of conversion errors.
*/
std::string convert(std::string line_utf8, bool up_to_first_error = true) {
using namespace std;
// check for invalid utf-8, we only convert up to the first error
string::iterator end_it;
if (up_to_first_error) {
end_it = utf8::find_invalid(line_utf8.begin(), line_utf8.end());
}
else {
end_it = line_utf8.end();
}
// Convert it to utf-32
vector utf32line;
utf8::utf8to32(line_utf8.begin(), end_it, back_inserter(utf32line));
string encoded_line(utf32line.size(), '0');
// Try to convert each codepoint
for (size_t i = 0; i < utf32line.size(); i++) {
vector::iterator iter = find(m_conversion_table.begin(),
m_conversion_table.end(), utf32line[i]);
if (iter != m_conversion_table.end()) {
size_t index = std::distance(m_conversion_table.begin(), iter);
encoded_line[i] = (char)(index + CHARSET_TABLE_OFFSET);
}
else {
encoded_line[i] = ' ';
}
}
return encoded_line;
}
/* Convert a EBU Latin byte stream to a UTF-8 encoded string.
* Invalid input characters are converted to ⁇ (unicode U+2047).
*/
std::string convert_ebu_to_utf8(const std::string& str);
private:
std::vector m_conversion_table;
};
#endif