You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

cet_util.cc 11KB


  1. /*
  2. Character encoding transformation - utilities
  3. Copyright (C) 2005-2008 Olaf Klein, o.b.klein@gpsbabel.org
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111 USA
  15. */
  16. #include "defs.h"
  17. #include "cet.h"
  18. #include "cet_util.h"
  19. #include <stdlib.h> // qsort
  20. #include "src/core/logging.h"
  21. #include <QtCore/QDebug>
  22. #include <QtCore/QTextCodec>
  23. #define MYNAME "cet_util"
  24. static cet_cs_vec_t* cet_cs_vec_root = NULL;
  25. typedef struct cet_cs_alias_s {
  26. char* name;
  27. cet_cs_vec_t* vec;
  28. } cet_cs_alias_t;
  29. static cet_cs_alias_t* cet_cs_alias;
  30. static int cet_cs_alias_ct = 0;
  31. static int cet_cs_vec_ct = 0;
  32. static int cet_output = 0;
  33. /* %%% fixed inbuild character sets %%% */
  34. #include "cet/ansi_x3_4_1968.h"
  35. #include "cet/cp1252.h"
  36. #include "cet/iso_8859_8.h"
  37. /* %%% short hand strings transmission for main character sets %%% */
  38. short*
  39. cet_str_utf8_to_uni(const char* src, int* length)
  40. {
  41. return cet_str_any_to_uni(src, &cet_cs_vec_utf8, length);
  42. }
  43. /* %%% cet_str_any_to_any %%%
  44. *
  45. * -->> for use in mkshort */
  46. char*
  47. cet_str_any_to_any(const char* src, const cet_cs_vec_t* src_vec, const cet_cs_vec_t* dest_vec)
  48. {
  49. char* c0, *c1;
  50. const cet_cs_vec_t* v_in = (src_vec != NULL) ? src_vec : &cet_cs_vec_ansi_x3_4_1968;
  51. const cet_cs_vec_t* v_out = (dest_vec != NULL) ? dest_vec : &cet_cs_vec_ansi_x3_4_1968;
  52. if (src == NULL) {
  53. return NULL;
  54. } else if ((*src == '\0') || (v_in == v_out)) {
  55. return xstrdup(src);
  56. }
  57. c0 = (v_in == &cet_cs_vec_utf8) ? xstrdup(src) : cet_str_any_to_utf8(src, v_in);
  58. c1 = (v_out == &cet_cs_vec_utf8) ? xstrdup(c0) : cet_str_utf8_to_any(c0, v_out);
  59. xfree(c0);
  60. return c1;
  61. }
  62. static signed int
  63. cet_cs_alias_qsort_cb(const void* a, const void* b)
  64. {
  65. const cet_cs_alias_t* va = (const cet_cs_alias_t*) a;
  66. const cet_cs_alias_t* vb = (const cet_cs_alias_t*) b;
  67. return case_ignore_strcmp(va->name, vb->name);
  68. }
  69. void
  70. cet_register_cs(cet_cs_vec_t* vec)
  71. {
  72. if (vec->next == NULL) {
  73. vec->next = cet_cs_vec_root;
  74. cet_cs_vec_root = vec;
  75. cet_cs_vec_ct++;
  76. #ifdef DEBUG_MEM
  77. cet_check_cs(vec);
  78. #endif
  79. }
  80. }
  81. /* Dummy vector for our native character set */
  82. const char* cet_cs_utf8_alias[] = {
  83. "utf8", NULL
  84. };
  85. cet_cs_vec_t cet_cs_vec_utf8 = {
  86. CET_CHARSET_UTF8,
  87. cet_cs_utf8_alias,
  88. NULL,
  89. NULL,
  90. NULL,
  91. 0,
  92. 0,
  93. NULL,
  94. 0,
  95. NULL,
  96. 0,
  97. NULL,
  98. };
  99. void
  100. cet_register(void)
  101. {
  102. int i, c;
  103. if (cet_cs_vec_root != NULL) {
  104. return;
  105. }
  106. cet_cs_vec_ct = 0;
  107. cet_register_cs(&cet_cs_vec_utf8); /* internal place holder */
  108. // Alias for "US-ASCII".
  109. #ifdef cet_cs_name_ansi_x3_4_1968
  110. cet_register_cs(&cet_cs_vec_ansi_x3_4_1968);
  111. #endif
  112. #ifdef cet_cs_name_cp1252
  113. cet_register_cs(&cet_cs_vec_cp1252);
  114. #endif
  115. #ifdef cet_cs_name_iso_8859_8
  116. cet_register_cs(&cet_cs_vec_iso_8859_8);
  117. #endif
  118. if (cet_cs_vec_ct > 0) {
  119. cet_cs_vec_t* p;
  120. cet_cs_alias_t* list;
  121. c = 0;
  122. /* enumerate count of all names and aliases */
  123. for (p = cet_cs_vec_root; p != NULL; p = p->next) {
  124. c++;
  125. if (p->alias != NULL) {
  126. char** a = (char**)p->alias;
  127. while ((*a) != NULL) {
  128. a++;
  129. c++;
  130. }
  131. }
  132. }
  133. /* create name to vec table */
  134. list = (cet_cs_alias_t*) xcalloc(c, sizeof(*list));
  135. i = 0;
  136. for (p = cet_cs_vec_root; p != NULL; p = p->next) {
  137. if (p->alias != NULL) {
  138. char** a = (char**)p->alias;
  139. list[i].name = xstrdup(p->name);
  140. list[i].vec = p;
  141. i++;
  142. while (*a != NULL) {
  143. list[i].name = xstrdup(*a);
  144. list[i].vec = p;
  145. i++;
  146. a++;
  147. }
  148. }
  149. }
  150. qsort(list, c, sizeof(*list), cet_cs_alias_qsort_cb);
  151. cet_cs_alias = list;
  152. cet_cs_alias_ct = c;
  153. /* install fallback for ascii-like (first 128 ch.) character sets */
  154. for (i = 1250; i <= 1258; i++) {
  155. char name[16];
  156. cet_cs_vec_t* vec;
  157. snprintf(name, sizeof(name), "WIN-CP%d", i);
  158. if ((vec = cet_find_cs_by_name(name))) {
  159. vec->fallback = &cet_cs_vec_ansi_x3_4_1968;
  160. }
  161. }
  162. for (i = 1; i <= 15; i++) {
  163. char name[16];
  164. cet_cs_vec_t* vec;
  165. snprintf(name, sizeof(name), "ISO-8859-%d", i);
  166. if ((vec = cet_find_cs_by_name(name))) {
  167. vec->fallback = &cet_cs_vec_ansi_x3_4_1968;
  168. }
  169. }
  170. }
  171. #ifdef CET_DEBUG
  172. printf("We have registered %d character sets with %d aliases\n", cet_cs_vec_ct, cet_cs_alias_ct);
  173. #endif
  174. }
  175. cet_cs_vec_t*
  176. cet_find_cs_by_name(const QString& name)
  177. {
  178. int i, j;
  179. cet_register();
  180. if (cet_cs_alias == NULL) {
  181. return NULL;
  182. }
  183. i = 0;
  184. j = cet_cs_alias_ct - 1;
  185. while (i <= j) {
  186. int a, x;
  187. cet_cs_alias_t* n;
  188. a = (i + j) >> 1;
  189. n = &cet_cs_alias[a];
  190. x = case_ignore_strcmp(name, n->name);
  191. if (x == 0) {
  192. return n->vec;
  193. } else if (x < 0) {
  194. j = a - 1;
  195. } else {
  196. i = a + 1;
  197. }
  198. }
  199. return NULL;
  200. }
  201. void
  202. cet_deregister(void)
  203. {
  204. int i;
  205. int j = cet_cs_alias_ct;
  206. cet_cs_alias_t* p = cet_cs_alias;
  207. if (p == NULL) {
  208. return;
  209. }
  210. cet_cs_alias_ct = 0;
  211. cet_cs_alias = NULL;
  212. for (i = 0; i < j; i++) {
  213. xfree(p[i].name);
  214. }
  215. xfree(p);
  216. }
  217. /* gpsbabel additions */
  218. int
  219. cet_validate_cs(const QString& cs, cet_cs_vec_t** vec, QString* cs_name)
  220. {
  221. cet_cs_vec_t* v;
  222. if (cs.isEmpty()) { /* set default us-ascii */
  223. *vec = &cet_cs_vec_ansi_x3_4_1968;
  224. *cs_name = CET_CHARSET_ASCII;
  225. return 1;
  226. }
  227. v = cet_find_cs_by_name(cs);
  228. if (v != NULL) {
  229. // TODO: make v->name into q QString and replace this...
  230. char* tmp = xstrdup(v->name);
  231. *cs_name = strupper(tmp);
  232. xfree(tmp);
  233. *vec = v;
  234. return 1;
  235. } else {
  236. cs_name->clear();
  237. *vec = NULL;
  238. return 0;
  239. }
  240. }
  241. void
  242. cet_convert_deinit(void)
  243. {
  244. global_opts.charset = NULL;
  245. global_opts.codec = NULL;
  246. }
  247. void
  248. cet_convert_init(const QString& cs_name, const int force)
  249. {
  250. if ((force != 0) || (global_opts.charset == NULL)) {
  251. cet_convert_deinit();
  252. if (0 == cet_validate_cs(cs_name, &global_opts.charset, &global_opts.charset_name)) {
  253. Fatal() << "Unsupported character set \"" << cs_name << ".";
  254. }
  255. if (cs_name.isEmpty()) { /* set default us-ascii */
  256. global_opts.codec = QTextCodec::codecForName(CET_CHARSET_ASCII);
  257. } else {
  258. QByteArray ba = CSTR(cs_name);
  259. global_opts.codec = QTextCodec::codecForName(ba);
  260. }
  261. if (!global_opts.codec) {
  262. Fatal() << "Unsupported character set \"" << cs_name << ".";
  263. }
  264. }
  265. }
  266. /* -------------------------------------------------------------------- */
  267. static void
  268. cet_flag_waypt(const Waypoint* wpt)
  269. {
  270. ((Waypoint*)(wpt))->wpt_flags.cet_converted = 1;
  271. }
  272. static void
  273. cet_flag_route(const route_head* rte)
  274. {
  275. ((route_head*)(rte))->cet_converted = 1;
  276. }
  277. static void
  278. cet_flag_all(void)
  279. {
  280. waypt_disp_all(cet_flag_waypt);
  281. route_disp_all(cet_flag_route, NULL, cet_flag_waypt);
  282. track_disp_all(cet_flag_route, NULL, cet_flag_waypt);
  283. }
  284. /* -------------------------------------------------------------------- */
  285. /* %%% complete data strings transformation %%% */
  286. /* -------------------------------------------------------------------- */
  287. static char* (*converter)(const char*) = NULL;
  288. /* two converters */
  289. static char*
  290. cet_convert_to_utf8(const char* str)
  291. {
  292. return cet_str_any_to_utf8(str, global_opts.charset);
  293. }
  294. static char*
  295. cet_convert_from_utf8(const char* str)
  296. {
  297. return cet_str_utf8_to_any(str, global_opts.charset);
  298. }
  299. /* cet_convert_string: internal used within cet_convert_strings process */
  300. char*
  301. cet_convert_string(char* str)
  302. {
  303. char* res;
  304. if (str == NULL) {
  305. return NULL; /* return origin if empty or NULL */
  306. } else if (*str == '\0') {
  307. return str;
  308. }
  309. res = converter(str);
  310. xfree(str);
  311. return res;
  312. }
  313. const char*
  314. cet_convert_string(const QString& str)
  315. {
  316. // FIXME: this is really weird. Since cet_convert_string wants to free
  317. // its argument (!) we make a duplicate just to satisfy that kind of goofy
  318. // requirement.
  319. return cet_convert_string(xstrdup(str));
  320. }
  321. /* cet_convert_waypt: internal used within cet_convert_strings process */
  322. static void
  323. cet_convert_waypt(const Waypoint* wpt)
  324. {
  325. Waypoint* w = (Waypoint*)wpt;
  326. format_specific_data* fs;
  327. if ((cet_output == 0) && (w->wpt_flags.cet_converted != 0)) {
  328. return;
  329. }
  330. w->wpt_flags.cet_converted = 1;
  331. fs = wpt->fs;
  332. while (fs != NULL) {
  333. if (fs->convert != NULL) {
  334. fs->convert(fs);
  335. }
  336. fs = fs->next;
  337. }
  338. }
  339. /* cet_convert_route_hdr: internal used within cet_convert_strings process */
  340. static void
  341. cet_convert_route_hdr(const route_head* route)
  342. {
  343. route_head* rte = (route_head*)route;
  344. if ((cet_output == 0) && (rte->cet_converted != 0)) {
  345. return;
  346. }
  347. rte->cet_converted = 1;
  348. }
  349. /* cet_convert_route_tlr: internal used within cet_convert_strings process */
  350. static void
  351. cet_convert_route_tlr(const route_head* route)
  352. {
  353. (void)route;
  354. }
  355. /* %%% cet_convert_strings (public) %%%
  356. *
  357. * - Convert all well known strings of GPS data from or to UTF-8 -
  358. *
  359. * !!! One of "source" or "target" must be internal cet_cs_vec_utf8 or NULL !!! */
  360. void
  361. cet_convert_strings(const cet_cs_vec_t* source, const cet_cs_vec_t* target, const char* format)
  362. {
  363. char* cs_name_from, *cs_name_to;
  364. (void)format;
  365. converter = NULL;
  366. if ((source == NULL) || (source == &cet_cs_vec_utf8)) {
  367. if ((target == NULL) || (target == &cet_cs_vec_utf8)) {
  368. cet_flag_all();
  369. return;
  370. }
  371. cet_output = 1;
  372. converter = cet_convert_from_utf8;
  373. cs_name_from = (char*)cet_cs_vec_utf8.name;
  374. cs_name_to = (char*)target->name;
  375. } else {
  376. if ((target != NULL) && (target != &cet_cs_vec_utf8)) {
  377. fatal(MYNAME ": Internal error!\n");
  378. }
  379. cet_output = 0;
  380. converter = cet_convert_to_utf8;
  381. cs_name_to = (char*)cet_cs_vec_utf8.name;
  382. cs_name_from = (char*)source->name;
  383. }
  384. if (global_opts.debug_level > 0) {
  385. printf(MYNAME ": Converting from \"%s\" to \"%s\"", cs_name_from, cs_name_to);
  386. }
  387. waypt_disp_all(cet_convert_waypt);
  388. route_disp_all(cet_convert_route_hdr, cet_convert_route_tlr, cet_convert_waypt);
  389. track_disp_all(cet_convert_route_hdr, cet_convert_route_tlr, cet_convert_waypt);
  390. cet_output = 0;
  391. if (global_opts.debug_level > 0) {
  392. printf(", done.\n");
  393. }
  394. }