00001
00002
00003
00004
00005
00006
00007
00008
00009
00012 #include "../stdafx.h"
00013 #include "../core/alloc_func.hpp"
00014 #include "../core/endian_func.hpp"
00015 #include "../string_func.h"
00016 #include "../strings_type.h"
00017 #include "strgen.h"
00018 #include "../table/control_codes.h"
00019
00020 #include <stdarg.h>
00021
00022 #if (!defined(WIN32) && !defined(WIN64)) || defined(__CYGWIN__)
00023 #include <unistd.h>
00024 #include <sys/stat.h>
00025 #endif
00026
00027 #if defined WIN32 || defined __WATCOMC__
00028 #include <direct.h>
00029 #endif
00030
00031 #ifdef __MORPHOS__
00032 #ifdef stderr
00033 #undef stderr
00034 #endif
00035 #define stderr stdout
00036 #endif
00037
00038 #include "../table/strgen_tables.h"
00039
00040
00041
00042 struct Case {
00043 int caseidx;
00044 char *string;
00045 Case *next;
00046 };
00047
00048 static bool _masterlang;
00049 static bool _translated;
00050 static bool _translation;
00051 static const char *_file = "(unknown file)";
00052 static int _cur_line;
00053 static int _errors, _warnings, _show_todo;
00054
00055 struct LangString {
00056 char *name;
00057 char *english;
00058 char *translated;
00059 uint16 hash_next;
00060 uint16 index;
00061 int line;
00062 Case *english_case;
00063 Case *translated_case;
00064 };
00065
00066 static LangString *_strings[65536];
00067
00068
00069 #define HASH_SIZE 32767
00070 static uint16 _hash_head[HASH_SIZE];
00071
00072 static byte _put_buf[4096];
00073 static int _put_pos;
00074 static int _next_string_id;
00075
00076 static uint32 _hash;
00077 static char _lang_name[32], _lang_ownname[32], _lang_isocode[16];
00078 static char _lang_digit_group_separator[8];
00079 static char _lang_digit_group_separator_currency[8];
00080 static char _lang_digit_decimal_separator[8];
00081 static byte _lang_pluralform;
00082 static byte _lang_textdir;
00083 static uint16 _lang_winlangid;
00084 static uint8 _lang_newgrflangid;
00085 #define MAX_NUM_GENDER 8
00086 static char _genders[MAX_NUM_GENDER][16];
00087 static uint _numgenders;
00088
00089
00090 #define MAX_NUM_CASES 50
00091 static char _cases[MAX_NUM_CASES][16];
00092 static uint _numcases;
00093
00094 static const char *_cur_ident;
00095
00096 struct CmdPair {
00097 const CmdStruct *a;
00098 const char *v;
00099 };
00100
00101 struct ParsedCommandStruct {
00102 uint np;
00103 CmdPair pairs[32];
00104 const CmdStruct *cmd[32];
00105 };
00106
00107
00108 static ParsedCommandStruct _cur_pcs;
00109 static int _cur_argidx;
00110
00111 static uint HashStr(const char *s)
00112 {
00113 uint hash = 0;
00114 for (; *s != '\0'; s++) hash = ROL(hash, 3) ^ *s;
00115 return hash % HASH_SIZE;
00116 }
00117
00118 static void HashAdd(const char *s, LangString *ls)
00119 {
00120 uint hash = HashStr(s);
00121 ls->hash_next = _hash_head[hash];
00122 _hash_head[hash] = ls->index + 1;
00123 }
00124
00125 static LangString *HashFind(const char *s)
00126 {
00127 int idx = _hash_head[HashStr(s)];
00128
00129 while (--idx >= 0) {
00130 LangString *ls = _strings[idx];
00131
00132 if (strcmp(ls->name, s) == 0) return ls;
00133 idx = ls->hash_next;
00134 }
00135 return NULL;
00136 }
00137
00138 #ifdef _MSC_VER
00139 # define LINE_NUM_FMT "(%d)"
00140 #else
00141 # define LINE_NUM_FMT ":%d"
00142 #endif
00143
00144 static void CDECL strgen_warning(const char *s, ...) WARN_FORMAT(1, 2);
00145
00146 static void CDECL strgen_warning(const char *s, ...)
00147 {
00148 char buf[1024];
00149 va_list va;
00150 va_start(va, s);
00151 vsnprintf(buf, lengthof(buf), s, va);
00152 va_end(va);
00153 fprintf(stderr, "%s" LINE_NUM_FMT ": warning: %s\n", _file, _cur_line, buf);
00154 _warnings++;
00155 }
00156
00157 static void CDECL strgen_error(const char *s, ...) WARN_FORMAT(1, 2);
00158
00159 static void CDECL strgen_error(const char *s, ...)
00160 {
00161 char buf[1024];
00162 va_list va;
00163 va_start(va, s);
00164 vsnprintf(buf, lengthof(buf), s, va);
00165 va_end(va);
00166 fprintf(stderr, "%s" LINE_NUM_FMT ": error: %s\n", _file, _cur_line, buf);
00167 _errors++;
00168 }
00169
00170 void NORETURN CDECL error(const char *s, ...)
00171 {
00172 char buf[1024];
00173 va_list va;
00174 va_start(va, s);
00175 vsnprintf(buf, lengthof(buf), s, va);
00176 va_end(va);
00177 fprintf(stderr, "%s" LINE_NUM_FMT ": FATAL: %s\n", _file, _cur_line, buf);
00178 exit(1);
00179 }
00180
00181 static void PutByte(byte c)
00182 {
00183 if (_put_pos == lengthof(_put_buf)) error("Put buffer too small");
00184 _put_buf[_put_pos++] = c;
00185 }
00186
00187
00188 static void PutUtf8(uint32 value)
00189 {
00190 if (value < 0x80) {
00191 PutByte(value);
00192 } else if (value < 0x800) {
00193 PutByte(0xC0 + GB(value, 6, 5));
00194 PutByte(0x80 + GB(value, 0, 6));
00195 } else if (value < 0x10000) {
00196 PutByte(0xE0 + GB(value, 12, 4));
00197 PutByte(0x80 + GB(value, 6, 6));
00198 PutByte(0x80 + GB(value, 0, 6));
00199 } else if (value < 0x110000) {
00200 PutByte(0xF0 + GB(value, 18, 3));
00201 PutByte(0x80 + GB(value, 12, 6));
00202 PutByte(0x80 + GB(value, 6, 6));
00203 PutByte(0x80 + GB(value, 0, 6));
00204 } else {
00205 strgen_warning("Invalid unicode value U+0x%X", value);
00206 }
00207 }
00208
00209
00210 size_t Utf8Validate(const char *s)
00211 {
00212 uint32 c;
00213
00214 if (!HasBit(s[0], 7)) {
00215
00216 return 1;
00217 } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
00218
00219 c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
00220 if (c >= 0x80) return 2;
00221 } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
00222
00223 c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
00224 if (c >= 0x800) return 3;
00225 } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
00226
00227 c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
00228 if (c >= 0x10000 && c <= 0x10FFFF) return 4;
00229 }
00230
00231 return 0;
00232 }
00233
00234
00235 static void EmitSingleChar(char *buf, int value)
00236 {
00237 if (*buf != '\0') strgen_warning("Ignoring trailing letters in command");
00238 PutUtf8(value);
00239 }
00240
00241
00242
00243
00244
00245
00246
00247
00248 bool ParseRelNum(char **buf, int *value, int *offset)
00249 {
00250 const char *s = *buf;
00251 char *end;
00252 bool rel = false;
00253
00254 while (*s == ' ' || *s == '\t') s++;
00255 if (*s == '+') {
00256 rel = true;
00257 s++;
00258 }
00259 int v = strtol(s, &end, 0);
00260 if (end == s) return false;
00261 if (rel || v < 0) {
00262 *value += v;
00263 } else {
00264 *value = v;
00265 }
00266 if (offset != NULL && *end == ':') {
00267
00268 s = end + 1;
00269 *offset = strtol(s, &end, 0);
00270 if (end == s) return false;
00271 }
00272 *buf = end;
00273 return true;
00274 }
00275
00276
00277 char *ParseWord(char **buf)
00278 {
00279 char *s = *buf, *r;
00280
00281 while (*s == ' ' || *s == '\t') s++;
00282 if (*s == '\0') return NULL;
00283
00284 if (*s == '"') {
00285 r = ++s;
00286
00287 for (;;) {
00288 if (*s == '\0') break;
00289 if (*s == '"') {
00290 *s++ = '\0';
00291 break;
00292 }
00293 s++;
00294 }
00295 } else {
00296
00297 r = s;
00298 for (;;) {
00299 if (*s == '\0') break;
00300 if (*s == ' ' || *s == '\t') {
00301 *s++ = '\0';
00302 break;
00303 }
00304 s++;
00305 }
00306 }
00307 *buf = s;
00308 return r;
00309 }
00310
00311
00312 static int TranslateArgumentIdx(int arg, int offset = 0);
00313
00314 static void EmitWordList(const char * const *words, uint nw)
00315 {
00316 PutByte(nw);
00317 for (uint i = 0; i < nw; i++) PutByte(strlen(words[i]) + 1);
00318 for (uint i = 0; i < nw; i++) {
00319 for (uint j = 0; words[i][j] != '\0'; j++) PutByte(words[i][j]);
00320 PutByte(0);
00321 }
00322 }
00323
00324 static void EmitPlural(char *buf, int value)
00325 {
00326 int argidx = _cur_argidx;
00327 int offset = 0;
00328 const char *words[5];
00329 int nw = 0;
00330
00331
00332 if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
00333
00334
00335 for (nw = 0; nw < 5; nw++) {
00336 words[nw] = ParseWord(&buf);
00337 if (words[nw] == NULL) break;
00338 }
00339
00340 if (nw == 0) {
00341 error("%s: No plural words", _cur_ident);
00342 }
00343
00344 if (_plural_forms[_lang_pluralform].plural_count != nw) {
00345 if (_translated) {
00346 error("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
00347 _plural_forms[_lang_pluralform].plural_count, nw);
00348 } else {
00349 if ((_show_todo & 2) != 0) strgen_warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
00350 if (nw > _plural_forms[_lang_pluralform].plural_count) {
00351 nw = _plural_forms[_lang_pluralform].plural_count;
00352 } else {
00353 for (; nw < _plural_forms[_lang_pluralform].plural_count; nw++) {
00354 words[nw] = words[nw - 1];
00355 }
00356 }
00357 }
00358 }
00359
00360 PutUtf8(SCC_PLURAL_LIST);
00361 PutByte(TranslateArgumentIdx(argidx, offset));
00362 EmitWordList(words, nw);
00363 }
00364
00365
00366 static void EmitGender(char *buf, int value)
00367 {
00368 int argidx = _cur_argidx;
00369 int offset = 0;
00370 uint nw;
00371
00372 if (buf[0] == '=') {
00373 buf++;
00374
00375
00376 for (nw = 0; ; nw++) {
00377 if (nw >= MAX_NUM_GENDER) error("G argument '%s' invalid", buf);
00378 if (strcmp(buf, _genders[nw]) == 0) break;
00379 }
00380
00381 PutUtf8(SCC_GENDER_INDEX);
00382 PutByte(nw);
00383 } else {
00384 const char *words[MAX_NUM_GENDER];
00385
00386
00387
00388 if (!ParseRelNum(&buf, &argidx, &offset)) {}
00389
00390 for (nw = 0; nw < MAX_NUM_GENDER; nw++) {
00391 words[nw] = ParseWord(&buf);
00392 if (words[nw] == NULL) break;
00393 }
00394 if (nw != _numgenders) error("Bad # of arguments for gender command");
00395 PutUtf8(SCC_GENDER_LIST);
00396 PutByte(TranslateArgumentIdx(argidx, offset));
00397 EmitWordList(words, nw);
00398 }
00399 }
00400
00401 static const CmdStruct *FindCmd(const char *s, int len)
00402 {
00403 for (const CmdStruct *cs = _cmd_structs; cs != endof(_cmd_structs); cs++) {
00404 if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == '\0') return cs;
00405 }
00406 return NULL;
00407 }
00408
00409 static uint ResolveCaseName(const char *str, uint len)
00410 {
00411 for (uint i = 0; i < MAX_NUM_CASES; i++) {
00412 if (memcmp(_cases[i], str, len) == 0 && _cases[i][len] == 0) return i + 1;
00413 }
00414 error("Invalid case-name '%s'", str);
00415 }
00416
00417
00418
00419
00420 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei)
00421 {
00422 const char *s = *str, *start;
00423 char c;
00424
00425 *argno = -1;
00426 *casei = -1;
00427
00428
00429 for (; *s != '{'; s++) {
00430 if (*s == '\0') return NULL;
00431 }
00432 s++;
00433
00434 if (*s >= '0' && *s <= '9') {
00435 char *end;
00436
00437 *argno = strtoul(s, &end, 0);
00438 if (*end != ':') error("missing arg #");
00439 s = end + 1;
00440 }
00441
00442
00443 start = s;
00444 do {
00445 c = *s++;
00446 } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
00447
00448 const CmdStruct *cmd = FindCmd(start, s - start - 1);
00449 if (cmd == NULL) {
00450 strgen_error("Undefined command '%.*s'", (int)(s - start - 1), start);
00451 return NULL;
00452 }
00453
00454 if (c == '.') {
00455 const char *casep = s;
00456
00457 if (!(cmd->flags & C_CASE)) {
00458 error("Command '%s' can't have a case", cmd->cmd);
00459 }
00460
00461 do {
00462 c = *s++;
00463 } while (c != '}' && c != ' ' && c != '\0');
00464 *casei = ResolveCaseName(casep, s - casep - 1);
00465 }
00466
00467 if (c == '\0') {
00468 strgen_error("Missing } from command '%s'", start);
00469 return NULL;
00470 }
00471
00472
00473 if (c != '}') {
00474 if (c == '=') s--;
00475
00476 start = s;
00477 for (;;) {
00478 c = *s++;
00479 if (c == '}') break;
00480 if (c == '\0') {
00481 strgen_error("Missing } from command '%s'", start);
00482 return NULL;
00483 }
00484 if (s - start == 250) error("param command too long");
00485 *param++ = c;
00486 }
00487 }
00488 *param = '\0';
00489
00490 *str = s;
00491
00492 return cmd;
00493 }
00494
00495
00496 static void HandlePragma(char *str)
00497 {
00498 if (!memcmp(str, "id ", 3)) {
00499 _next_string_id = strtoul(str + 3, NULL, 0);
00500 } else if (!memcmp(str, "name ", 5)) {
00501 strecpy(_lang_name, str + 5, lastof(_lang_name));
00502 } else if (!memcmp(str, "ownname ", 8)) {
00503 strecpy(_lang_ownname, str + 8, lastof(_lang_ownname));
00504 } else if (!memcmp(str, "isocode ", 8)) {
00505 strecpy(_lang_isocode, str + 8, lastof(_lang_isocode));
00506 } else if (!memcmp(str, "plural ", 7)) {
00507 _lang_pluralform = atoi(str + 7);
00508 if (_lang_pluralform >= lengthof(_plural_forms))
00509 error("Invalid pluralform %d", _lang_pluralform);
00510 } else if (!memcmp(str, "textdir ", 8)) {
00511 if (!memcmp(str + 8, "ltr", 3)) {
00512 _lang_textdir = TD_LTR;
00513 } else if (!memcmp(str + 8, "rtl", 3)) {
00514 _lang_textdir = TD_RTL;
00515 } else {
00516 error("Invalid textdir %s", str + 8);
00517 }
00518 } else if (!memcmp(str, "digitsep ", 9)) {
00519 str += 9;
00520 strecpy(_lang_digit_group_separator, strcmp(str, "{NBSP}") == 0 ? "\xC2\xA0" : str, lastof(_lang_digit_group_separator));
00521 } else if (!memcmp(str, "digitsepcur ", 12)) {
00522 str += 12;
00523 strecpy(_lang_digit_group_separator_currency, strcmp(str, "{NBSP}") == 0 ? "\xC2\xA0" : str, lastof(_lang_digit_group_separator_currency));
00524 } else if (!memcmp(str, "decimalsep ", 11)) {
00525 str += 11;
00526 strecpy(_lang_digit_decimal_separator, strcmp(str, "{NBSP}") == 0 ? "\xC2\xA0" : str, lastof(_lang_digit_decimal_separator));
00527 } else if (!memcmp(str, "winlangid ", 10)) {
00528 const char *buf = str + 10;
00529 long langid = strtol(buf, NULL, 16);
00530 if (langid > (long)UINT16_MAX || langid < 0) {
00531 error("Invalid winlangid %s", buf);
00532 }
00533 _lang_winlangid = (uint16)langid;
00534 } else if (!memcmp(str, "grflangid ", 10)) {
00535 const char *buf = str + 10;
00536 long langid = strtol(buf, NULL, 16);
00537 if (langid >= 0x7F || langid < 0) {
00538 error("Invalid grflangid %s", buf);
00539 }
00540 _lang_newgrflangid = (uint8)langid;
00541 } else if (!memcmp(str, "gender ", 7)) {
00542 char *buf = str + 7;
00543
00544 for (;;) {
00545 const char *s = ParseWord(&buf);
00546
00547 if (s == NULL) break;
00548 if (_numgenders >= MAX_NUM_GENDER) error("Too many genders, max %d", MAX_NUM_GENDER);
00549 strecpy(_genders[_numgenders], s, lastof(_genders[_numgenders]));
00550 _numgenders++;
00551 }
00552 } else if (!memcmp(str, "case ", 5)) {
00553 char *buf = str + 5;
00554
00555 for (;;) {
00556 const char *s = ParseWord(&buf);
00557
00558 if (s == NULL) break;
00559 if (_numcases >= MAX_NUM_CASES) error("Too many cases, max %d", MAX_NUM_CASES);
00560 strecpy(_cases[_numcases], s, lastof(_cases[_numcases]));
00561 _numcases++;
00562 }
00563 } else {
00564 error("unknown pragma '%s'", str);
00565 }
00566 }
00567
00568 static void ExtractCommandString(ParsedCommandStruct *p, const char *s, bool warnings)
00569 {
00570 char param[100];
00571 int argno;
00572 int argidx = 0;
00573 int casei;
00574
00575 memset(p, 0, sizeof(*p));
00576
00577 for (;;) {
00578
00579 const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
00580
00581 if (ar == NULL) break;
00582
00583
00584 if (argno != -1 && ar->consumes == 0) error("Non consumer param can't have a paramindex");
00585
00586 if (ar->consumes) {
00587 if (argno != -1) argidx = argno;
00588 if (argidx < 0 || (uint)argidx >= lengthof(p->cmd)) error("invalid param idx %d", argidx);
00589 if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) error("duplicate param idx %d", argidx);
00590
00591 p->cmd[argidx++] = ar;
00592 } else if (!(ar->flags & C_DONTCOUNT)) {
00593 if (p->np >= lengthof(p->pairs)) error("too many commands in string, max " PRINTF_SIZE, lengthof(p->pairs));
00594 p->pairs[p->np].a = ar;
00595 p->pairs[p->np].v = param[0] != '\0' ? strdup(param) : "";
00596 p->np++;
00597 }
00598 }
00599 }
00600
00601
00602 static const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
00603 {
00604 if (a == NULL) return NULL;
00605
00606 if (strcmp(a->cmd, "STRING1") == 0 ||
00607 strcmp(a->cmd, "STRING2") == 0 ||
00608 strcmp(a->cmd, "STRING3") == 0 ||
00609 strcmp(a->cmd, "STRING4") == 0 ||
00610 strcmp(a->cmd, "STRING5") == 0 ||
00611 strcmp(a->cmd, "RAW_STRING") == 0) {
00612 return FindCmd("STRING", 6);
00613 }
00614
00615 return a;
00616 }
00617
00618
00619 static bool CheckCommandsMatch(char *a, char *b, const char *name)
00620 {
00621
00622
00623
00624
00625 if (!_translation) return true;
00626
00627 ParsedCommandStruct templ;
00628 ParsedCommandStruct lang;
00629 bool result = true;
00630
00631 ExtractCommandString(&templ, b, true);
00632 ExtractCommandString(&lang, a, true);
00633
00634
00635 if (templ.np != lang.np) {
00636 strgen_warning("%s: template string and language string have a different # of commands", name);
00637 result = false;
00638 }
00639
00640 for (uint i = 0; i < templ.np; i++) {
00641
00642 bool found = false;
00643 for (uint j = 0; j < lang.np; j++) {
00644 if (templ.pairs[i].a == lang.pairs[j].a &&
00645 strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
00646
00647 lang.pairs[j].a = NULL;
00648 found = true;
00649 break;
00650 }
00651 }
00652
00653 if (!found) {
00654 strgen_warning("%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
00655 result = false;
00656 }
00657 }
00658
00659
00660
00661 for (uint i = 0; i < lengthof(templ.cmd); i++) {
00662 if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
00663 strgen_warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
00664 lang.cmd[i] == NULL ? "<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
00665 templ.cmd[i] == NULL ? "<empty>" : templ.cmd[i]->cmd);
00666 result = false;
00667 }
00668 }
00669
00670 return result;
00671 }
00672
00673 static void HandleString(char *str, bool master)
00674 {
00675 if (*str == '#') {
00676 if (str[1] == '#' && str[2] != '#') HandlePragma(str + 2);
00677 return;
00678 }
00679
00680
00681 if (*str == ';' || *str == ' ' || *str == '\0') return;
00682
00683 char *s = strchr(str, ':');
00684 if (s == NULL) {
00685 strgen_error("Line has no ':' delimiter");
00686 return;
00687 }
00688
00689 char *t;
00690
00691
00692 for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
00693 *t = 0;
00694 s++;
00695
00696
00697 const char *tmp;
00698 for (tmp = s; *tmp != '\0';) {
00699 size_t len = Utf8Validate(tmp);
00700 if (len == 0) error("Invalid UTF-8 sequence in '%s'", s);
00701 tmp += len;
00702 }
00703
00704
00705
00706 char *casep = strchr(str, '.');
00707 if (casep) *casep++ = '\0';
00708
00709
00710 LangString *ent = HashFind(str);
00711
00712 if (master) {
00713 if (ent != NULL && casep == NULL) {
00714 strgen_error("String name '%s' is used multiple times", str);
00715 return;
00716 }
00717
00718 if (ent == NULL && casep != NULL) {
00719 strgen_error("Base string name '%s' doesn't exist yet. Define it before defining a case.", str);
00720 return;
00721 }
00722
00723 if (ent == NULL) {
00724 if (_strings[_next_string_id]) {
00725 strgen_error("String ID 0x%X for '%s' already in use by '%s'", _next_string_id, str, _strings[_next_string_id]->name);
00726 return;
00727 }
00728
00729
00730 ent = CallocT<LangString>(1);
00731 _strings[_next_string_id] = ent;
00732 ent->index = _next_string_id++;
00733 ent->name = strdup(str);
00734 ent->line = _cur_line;
00735
00736 HashAdd(str, ent);
00737 }
00738
00739 if (casep != NULL) {
00740 Case *c = MallocT<Case>(1);
00741
00742 c->caseidx = ResolveCaseName(casep, strlen(casep));
00743 c->string = strdup(s);
00744 c->next = ent->english_case;
00745 ent->english_case = c;
00746 } else {
00747 ent->english = strdup(s);
00748 }
00749
00750 } else {
00751 if (ent == NULL) {
00752 strgen_warning("String name '%s' does not exist in master file", str);
00753 return;
00754 }
00755
00756 if (ent->translated && casep == NULL) {
00757 strgen_error("String name '%s' is used multiple times", str);
00758 return;
00759 }
00760
00761 if (s[0] == ':' && s[1] == '\0' && casep == NULL) {
00762
00763 ent->translated = strdup(ent->english);
00764 } else {
00765
00766 if (!CheckCommandsMatch(s, ent->english, str)) return;
00767
00768 if (casep != NULL) {
00769 Case *c = MallocT<Case>(1);
00770
00771 c->caseidx = ResolveCaseName(casep, strlen(casep));
00772 c->string = strdup(s);
00773 c->next = ent->translated_case;
00774 ent->translated_case = c;
00775 } else {
00776 ent->translated = strdup(s);
00777
00778
00779
00780 ent->line = _cur_line;
00781 }
00782 }
00783 }
00784 }
00785
00786
00787 static void rstrip(char *buf)
00788 {
00789 int i = strlen(buf);
00790 while (i > 0 && (buf[i - 1] == '\r' || buf[i - 1] == '\n' || buf[i - 1] == ' ')) i--;
00791 buf[i] = '\0';
00792 }
00793
00794
00795 static void ParseFile(const char *file, bool english)
00796 {
00797 FILE *in;
00798 char buf[2048];
00799
00800
00801 const char *cur_file = strrchr(_file, PATHSEPCHAR);
00802 const char *next_file = strrchr(file, PATHSEPCHAR);
00803 _translation = next_file != NULL && cur_file != NULL && strcmp(cur_file, next_file) != 0;
00804 _file = file;
00805
00806
00807 _numgenders = 0;
00808 _lang_name[0] = _lang_ownname[0] = _lang_isocode[0] = '\0';
00809 strecpy(_lang_digit_group_separator, ",", lastof(_lang_digit_group_separator));
00810 strecpy(_lang_digit_group_separator_currency, ",", lastof(_lang_digit_group_separator_currency));
00811 strecpy(_lang_digit_decimal_separator, ".", lastof(_lang_digit_decimal_separator));
00812 _lang_textdir = TD_LTR;
00813 _lang_winlangid = 0x0000;
00814 _lang_newgrflangid = 0;
00815
00816
00817
00818 in = fopen(file, "r");
00819 if (in == NULL) error("Cannot open file");
00820 _cur_line = 1;
00821 while (fgets(buf, sizeof(buf), in) != NULL) {
00822 rstrip(buf);
00823 HandleString(buf, english);
00824 _cur_line++;
00825 }
00826 fclose(in);
00827
00828 if (StrEmpty(_lang_name) || StrEmpty(_lang_ownname) || StrEmpty(_lang_isocode)) {
00829 error("Language must include ##name, ##ownname and ##isocode");
00830 }
00831 }
00832
00833
00834 static uint32 MyHashStr(uint32 hash, const char *s)
00835 {
00836 for (; *s != '\0'; s++) {
00837 hash = ROL(hash, 3) ^ *s;
00838 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00839 }
00840 return hash;
00841 }
00842
00843
00844
00845 static void MakeHashOfStrings()
00846 {
00847 uint32 hash = 0;
00848 uint i;
00849
00850 for (i = 0; i != lengthof(_strings); i++) {
00851 const LangString *ls = _strings[i];
00852
00853 if (ls != NULL) {
00854 const CmdStruct *cs;
00855 const char *s;
00856 char buf[256];
00857 int argno;
00858 int casei;
00859
00860 s = ls->name;
00861 hash ^= i * 0x717239;
00862 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00863 hash = MyHashStr(hash, s + 1);
00864
00865 s = ls->english;
00866 while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
00867 if (cs->flags & C_DONTCOUNT) continue;
00868
00869 hash ^= (cs - _cmd_structs) * 0x1234567;
00870 hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
00871 }
00872 }
00873 }
00874 _hash = hash;
00875 }
00876
00877
00878 static uint CountInUse(uint grp)
00879 {
00880 int i;
00881
00882 for (i = 0x800; --i >= 0;) if (_strings[(grp << 11) + i] != NULL) break;
00883 return i + 1;
00884 }
00885
00886
00887 bool CompareFiles(const char *n1, const char *n2)
00888 {
00889 FILE *f2 = fopen(n2, "rb");
00890 if (f2 == NULL) return false;
00891
00892 FILE *f1 = fopen(n1, "rb");
00893 if (f1 == NULL) error("can't open %s", n1);
00894
00895 size_t l1, l2;
00896 do {
00897 char b1[4096];
00898 char b2[4096];
00899 l1 = fread(b1, 1, sizeof(b1), f1);
00900 l2 = fread(b2, 1, sizeof(b2), f2);
00901
00902 if (l1 != l2 || memcmp(b1, b2, l1)) {
00903 fclose(f2);
00904 fclose(f1);
00905 return false;
00906 }
00907 } while (l1);
00908
00909 fclose(f2);
00910 fclose(f1);
00911 return true;
00912 }
00913
00914
00915 static void WriteStringsH(const char *filename)
00916 {
00917 int next = -1;
00918
00919 FILE *out = fopen("tmp.xxx", "w");
00920 if (out == NULL) error("can't open tmp.xxx");
00921
00922 fprintf(out, "/* This file is automatically generated. Do not modify */\n\n");
00923 fprintf(out, "#ifndef TABLE_STRINGS_H\n");
00924 fprintf(out, "#define TABLE_STRINGS_H\n");
00925
00926 for (int i = 0; i != lengthof(_strings); i++) {
00927 if (_strings[i] != NULL) {
00928 if (next != i) fprintf(out, "\n");
00929 fprintf(out, "static const StringID %s = 0x%X;\n", _strings[i]->name, i);
00930 next = i + 1;
00931 }
00932 }
00933
00934 fprintf(out, "\nstatic const StringID STR_LAST_STRINGID = 0x%X;\n", next - 1);
00935
00936 fprintf(out,
00937 "\nenum {\n"
00938 "\tLANGUAGE_PACK_IDENT = 0x474E414C, // Big Endian value for 'LANG' (LE is 0x 4C 41 4E 47)\n"
00939 "\tLANGUAGE_PACK_VERSION = 0x%X,\n"
00940 "};\n", (uint)_hash
00941 );
00942
00943 fprintf(out, "\n#endif /* TABLE_STRINGS_H */\n");
00944
00945 fclose(out);
00946
00947 if (CompareFiles("tmp.xxx", filename)) {
00948
00949 unlink("tmp.xxx");
00950 } else {
00951
00952 #if defined(WIN32) || defined(WIN64)
00953 unlink(filename);
00954 #endif
00955 if (rename("tmp.xxx", filename) == -1) error("rename() failed");
00956 }
00957 }
00958
00959 static int TranslateArgumentIdx(int argidx, int offset)
00960 {
00961 int sum;
00962
00963 if (argidx < 0 || (uint)argidx >= lengthof(_cur_pcs.cmd)) {
00964 error("invalid argidx %d", argidx);
00965 }
00966 const CmdStruct *cs = _cur_pcs.cmd[argidx];
00967 if (cs != NULL && cs->consumes <= offset) {
00968 error("invalid argidx offset %d:%d\n", argidx, offset);
00969 }
00970
00971 for (int i = sum = 0; i < argidx; i++) {
00972 const CmdStruct *cs = _cur_pcs.cmd[i];
00973
00974 sum += (cs != NULL) ? cs->consumes : 1;
00975 }
00976
00977 return sum + offset;
00978 }
00979
00980 static void PutArgidxCommand()
00981 {
00982 PutUtf8(SCC_ARG_INDEX);
00983 PutByte(TranslateArgumentIdx(_cur_argidx));
00984 }
00985
00986
00987 static void PutCommandString(const char *str)
00988 {
00989 _cur_argidx = 0;
00990
00991 while (*str != '\0') {
00992
00993 if (*str != '{') {
00994 PutByte(*str++);
00995 continue;
00996 }
00997
00998 char param[256];
00999 int argno;
01000 int casei;
01001 const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
01002 if (cs == NULL) break;
01003
01004 if (casei != -1) {
01005 PutUtf8(SCC_SETCASE);
01006 PutByte(casei);
01007 }
01008
01009
01010 if (cs->consumes > 0) {
01011
01012 if (argno != -1 && argno != _cur_argidx) {
01013 _cur_argidx = argno;
01014 PutArgidxCommand();
01015 }
01016
01017
01018 cs = _cur_pcs.cmd[_cur_argidx++];
01019 if (cs == NULL) {
01020 error("%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
01021 }
01022 }
01023
01024 cs->proc(param, cs->value);
01025 }
01026 }
01027
01028 static void WriteLength(FILE *f, uint length)
01029 {
01030 if (length < 0xC0) {
01031 fputc(length, f);
01032 } else if (length < 0x4000) {
01033 fputc((length >> 8) | 0xC0, f);
01034 fputc(length & 0xFF, f);
01035 } else {
01036 error("string too long");
01037 }
01038 }
01039
01040
01041 static void WriteLangfile(const char *filename)
01042 {
01043 uint in_use[32];
01044 LanguagePackHeader hdr;
01045
01046 FILE *f = fopen(filename, "wb");
01047 if (f == NULL) error("can't open %s", filename);
01048
01049 memset(&hdr, 0, sizeof(hdr));
01050 for (int i = 0; i != 32; i++) {
01051 uint n = CountInUse(i);
01052
01053 in_use[i] = n;
01054 hdr.offsets[i] = TO_LE16(n);
01055 }
01056
01057
01058 hdr.ident = TO_LE32(0x474E414C);
01059 hdr.version = TO_LE32(_hash);
01060 hdr.plural_form = _lang_pluralform;
01061 hdr.text_dir = _lang_textdir;
01062 hdr.winlangid = TO_LE16(_lang_winlangid);
01063 hdr.newgrflangid = _lang_newgrflangid;
01064 strecpy(hdr.name, _lang_name, lastof(hdr.name));
01065 strecpy(hdr.own_name, _lang_ownname, lastof(hdr.own_name));
01066 strecpy(hdr.isocode, _lang_isocode, lastof(hdr.isocode));
01067 strecpy(hdr.digit_group_separator, _lang_digit_group_separator, lastof(hdr.digit_group_separator));
01068 strecpy(hdr.digit_group_separator_currency, _lang_digit_group_separator_currency, lastof(hdr.digit_group_separator_currency));
01069 strecpy(hdr.digit_decimal_separator, _lang_digit_decimal_separator, lastof(hdr.digit_decimal_separator));
01070
01071 fwrite(&hdr, sizeof(hdr), 1, f);
01072
01073 for (int i = 0; i != 32; i++) {
01074 for (uint j = 0; j != in_use[i]; j++) {
01075 const LangString *ls = _strings[(i << 11) + j];
01076 const Case *casep;
01077 const char *cmdp;
01078
01079
01080 if (ls == NULL) {
01081 WriteLength(f, 0);
01082 continue;
01083 }
01084
01085 _cur_ident = ls->name;
01086 _cur_line = ls->line;
01087
01088
01089 if (_show_todo > 0 && ls->translated == NULL) {
01090 if ((_show_todo & 2) != 0) {
01091 strgen_warning("'%s' is untranslated", ls->name);
01092 }
01093 if ((_show_todo & 1) != 0) {
01094 const char *s = "<TODO> ";
01095 while (*s != '\0') PutByte(*s++);
01096 }
01097 }
01098
01099
01100 ExtractCommandString(&_cur_pcs, ls->english, false);
01101
01102 if (ls->translated_case != NULL || ls->translated != NULL) {
01103 casep = ls->translated_case;
01104 cmdp = ls->translated;
01105 } else {
01106 casep = ls->english_case;
01107 cmdp = ls->english;
01108 }
01109
01110 _translated = _masterlang || (cmdp != ls->english);
01111
01112 if (casep != NULL) {
01113 const Case *c;
01114 uint num;
01115
01116
01117
01118
01119
01120 PutUtf8(SCC_SWITCH_CASE);
01121
01122 for (num = 0, c = casep; c; c = c->next) num++;
01123 PutByte(num);
01124
01125
01126 for (c = casep; c != NULL; c = c->next) {
01127 int pos;
01128
01129 PutByte(c->caseidx);
01130
01131 pos = _put_pos;
01132 PutByte(0);
01133 PutByte(0);
01134
01135 PutCommandString(c->string);
01136 PutByte(0);
01137
01138 _put_buf[pos + 0] = GB(_put_pos - (pos + 2), 8, 8);
01139 _put_buf[pos + 1] = GB(_put_pos - (pos + 2), 0, 8);
01140 }
01141 }
01142
01143 if (cmdp != NULL) PutCommandString(cmdp);
01144
01145 WriteLength(f, _put_pos);
01146 fwrite(_put_buf, 1, _put_pos, f);
01147 _put_pos = 0;
01148 }
01149 }
01150
01151 fputc(0, f);
01152 fclose(f);
01153 }
01154
01156 static inline void ottd_mkdir(const char *directory)
01157 {
01158 #if defined(WIN32) || defined(__WATCOMC__)
01159 mkdir(directory);
01160 #else
01161 mkdir(directory, 0755);
01162 #endif
01163 }
01164
01168 static inline char *mkpath(char *buf, size_t buflen, const char *path, const char *file)
01169 {
01170 ttd_strlcpy(buf, path, buflen);
01171
01172 char *p = strchr(buf, '\0');
01173 if (p[-1] != PATHSEPCHAR && (size_t)(p - buf) + 1 < buflen) *p++ = PATHSEPCHAR;
01174 ttd_strlcpy(p, file, buflen - (size_t)(p - buf));
01175 return buf;
01176 }
01177
01178 #if defined(__MINGW32__)
01179
01184 static inline char *replace_pathsep(char *s)
01185 {
01186 for (char *c = s; *c != '\0'; c++) if (*c == '/') *c = '\\';
01187 return s;
01188 }
01189 #else
01190 static inline char *replace_pathsep(char *s) { return s; }
01191 #endif
01192
01193 int CDECL main(int argc, char *argv[])
01194 {
01195 char pathbuf[MAX_PATH];
01196 const char *src_dir = ".";
01197 const char *dest_dir = NULL;
01198
01199 while (argc > 1 && *argv[1] == '-') {
01200 if (strcmp(argv[1], "-v") == 0 || strcmp(argv[1], "--version") == 0) {
01201 puts("$Revision$");
01202 return 0;
01203 }
01204
01205 if (strcmp(argv[1], "-export-commands") == 0) {
01206 printf("args\tflags\tcommand\treplacement\n");
01207 for (const CmdStruct *cs = _cmd_structs; cs < endof(_cmd_structs); cs++) {
01208 char flags;
01209 switch (cs->value) {
01210 case 0x200E: case 0x200F:
01211 case 0x202A: case 0x202B: case 0x202C: case 0x202D: case 0x202E:
01212 case 0xA0:
01213 case '\n':
01214 case '{':
01215
01216 flags = 'i';
01217 break;
01218
01219 default:
01220 if (cs->proc == EmitGender) {
01221 flags = 'g';
01222 } else if (cs->proc == EmitPlural) {
01223 flags = 'p';
01224 } else {
01225 flags = '0';
01226 }
01227 }
01228 printf("%i\t%c\t\"%s\"\t\"%s\"\n", cs->consumes, flags, cs->cmd, strstr(cs->cmd, "STRING") ? "STRING" : cs->cmd);
01229 }
01230 return 0;
01231 }
01232
01233 if (strcmp(argv[1], "-export-plurals") == 0) {
01234 printf("count\tdescription\n");
01235 for (const PluralForm *pf = _plural_forms; pf < endof(_plural_forms); pf++) {
01236 printf("%i\t\"%s\"\n", pf->plural_count, pf->description);
01237 }
01238 return 0;
01239 }
01240
01241 if (strcmp(argv[1], "-export-pragmas") == 0) {
01242 printf("name\tflags\tdefault\tdescription\n");
01243 for (size_t i = 0; i < lengthof(_pragmas); i++) {
01244 printf("\"%s\"\t%s\t\"%s\"\t\"%s\"\n",
01245 _pragmas[i][0], _pragmas[i][1], _pragmas[i][2], _pragmas[i][3]);
01246 }
01247 return 0;
01248 }
01249
01250 if (strcmp(argv[1], "-t") == 0 || strcmp(argv[1], "--todo") == 0) {
01251 _show_todo |= 1;
01252 argc--, argv++;
01253 continue;
01254 }
01255
01256 if (strcmp(argv[1], "-w") == 0 || strcmp(argv[1], "--warning") == 0) {
01257 _show_todo |= 2;
01258 argc--, argv++;
01259 continue;
01260 }
01261
01262 if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) {
01263 puts(
01264 "strgen - $Revision$\n"
01265 " -v | --version print version information and exit\n"
01266 " -t | --todo replace any untranslated strings with '<TODO>'\n"
01267 " -w | --warning print a warning for any untranslated strings\n"
01268 " -h | -? | --help print this help message and exit\n"
01269 " -s | --source_dir search for english.txt in the specified directory\n"
01270 " -d | --dest_dir put output file in the specified directory, create if needed\n"
01271 " -export-commands export all commands and exit\n"
01272 " -export-plurals export all plural forms and exit\n"
01273 " -export-pragmas export all pragmas and exit\n"
01274 " Run without parameters and strgen will search for english.txt and parse it,\n"
01275 " creating strings.h. Passing an argument, strgen will translate that language\n"
01276 " file using english.txt as a reference and output <language>.lng."
01277 );
01278 return 0;
01279 }
01280
01281 if (argc > 2 && (strcmp(argv[1], "-s") == 0 || strcmp(argv[1], "--source_dir") == 0)) {
01282 src_dir = replace_pathsep(argv[2]);
01283 argc -= 2, argv += 2;
01284 continue;
01285 }
01286
01287 if (argc > 2 && (strcmp(argv[1], "-d") == 0 || strcmp(argv[1], "--dest_dir") == 0)) {
01288 dest_dir = replace_pathsep(argv[2]);
01289 argc -= 2, argv += 2;
01290 continue;
01291 }
01292
01293 fprintf(stderr, "Invalid arguments\n");
01294 return 0;
01295 }
01296
01297 if (dest_dir == NULL) dest_dir = src_dir;
01298
01299
01300
01301
01302
01303 if (argc == 1) {
01304 mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
01305
01306
01307 _masterlang = true;
01308 ParseFile(pathbuf, true);
01309 MakeHashOfStrings();
01310 if (_errors) return 1;
01311
01312
01313 ottd_mkdir(dest_dir);
01314 mkpath(pathbuf, lengthof(pathbuf), dest_dir, "strings.h");
01315 WriteStringsH(pathbuf);
01316 } else if (argc == 2) {
01317 char *r;
01318
01319 mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
01320
01321
01322 _masterlang = false;
01323 ParseFile(pathbuf, true);
01324 MakeHashOfStrings();
01325 ParseFile(replace_pathsep(argv[1]), false);
01326 if (_errors) return 1;
01327
01328
01329 r = strrchr(argv[1], PATHSEPCHAR);
01330 mkpath(pathbuf, lengthof(pathbuf), dest_dir, (r != NULL) ? &r[1] : argv[1]);
01331
01332
01333 r = strrchr(pathbuf, '.');
01334 if (r == NULL || strcmp(r, ".txt") != 0) r = strchr(pathbuf, '\0');
01335 ttd_strlcpy(r, ".lng", (size_t)(r - pathbuf));
01336 WriteLangfile(pathbuf);
01337
01338
01339 if ((_show_todo & 2) != 0) {
01340 fprintf(stdout, "%d warnings and %d errors for %s\n", _warnings, _errors, pathbuf);
01341 }
01342 } else {
01343 fprintf(stderr, "Invalid arguments\n");
01344 }
01345
01346 return 0;
01347 }