00001
00002
00003
00004
00005
00006
00007
00008
00009
00012 #include "../stdafx.h"
00013 #include "../core/endian_func.hpp"
00014 #include "../string_func.h"
00015 #include "../strings_type.h"
00016 #include "../language.h"
00017 #include "../misc/getoptdata.h"
00018 #include "../table/control_codes.h"
00019
00020 #include <stdarg.h>
00021
00022 #if (!defined(WIN32) && !defined(WIN64)) || defined(__CYGWIN__)
00023 #include <unistd.h>
00024 #include <sys/stat.h>
00025 #endif
00026
00027 #if defined WIN32 || defined __WATCOMC__
00028 #include <direct.h>
00029 #endif
00030
00031 #ifdef __MORPHOS__
00032 #ifdef stderr
00033 #undef stderr
00034 #endif
00035 #define stderr stdout
00036 #endif
00037
00038 #include "../table/strgen_tables.h"
00039
00040
00041
00042 struct Case {
00043 int caseidx;
00044 char *string;
00045 Case *next;
00046 };
00047
00048 static bool _masterlang;
00049 static bool _translated;
00050 static bool _translation;
00051 static const char *_file = "(unknown file)";
00052 static FILE *_output_file = NULL;
00053 static const char *_output_filename = NULL;
00054 static int _cur_line;
00055 static int _errors, _warnings, _show_todo;
00056
00057 static const ptrdiff_t MAX_COMMAND_PARAM_SIZE = 100;
00058
00059 struct LangString {
00060 char *name;
00061 char *english;
00062 char *translated;
00063 uint16 hash_next;
00064 uint16 index;
00065 int line;
00066 Case *translated_case;
00067 };
00068
00069 static LangString *_strings[65536];
00070 static LanguagePackHeader _lang;
00071
00072
00073 #define HASH_SIZE 32767
00074 static uint16 _hash_head[HASH_SIZE];
00075
00076 static byte _put_buf[4096];
00077 static uint _put_pos;
00078 static int _next_string_id;
00079
00080 static uint32 _hash;
00081
00082 static const char *_cur_ident;
00083
00084 struct CmdPair {
00085 const CmdStruct *a;
00086 const char *v;
00087 };
00088
00089 struct ParsedCommandStruct {
00090 uint np;
00091 CmdPair pairs[32];
00092 const CmdStruct *cmd[32];
00093 };
00094
00095
00096 static ParsedCommandStruct _cur_pcs;
00097 static int _cur_argidx;
00098
00099 static uint HashStr(const char *s)
00100 {
00101 uint hash = 0;
00102 for (; *s != '\0'; s++) hash = ROL(hash, 3) ^ *s;
00103 return hash % HASH_SIZE;
00104 }
00105
00106 static void HashAdd(const char *s, LangString *ls)
00107 {
00108 uint hash = HashStr(s);
00109 ls->hash_next = _hash_head[hash];
00110 _hash_head[hash] = ls->index + 1;
00111 }
00112
00113 static LangString *HashFind(const char *s)
00114 {
00115 int idx = _hash_head[HashStr(s)];
00116
00117 while (--idx >= 0) {
00118 LangString *ls = _strings[idx];
00119
00120 if (strcmp(ls->name, s) == 0) return ls;
00121 idx = ls->hash_next;
00122 }
00123 return NULL;
00124 }
00125
00126 #ifdef _MSC_VER
00127 # define LINE_NUM_FMT(s) "%s (%d): warning: %s (" s ")\n"
00128 #else
00129 # define LINE_NUM_FMT(s) "%s:%d: " s ": %s\n"
00130 #endif
00131
00132 static void CDECL strgen_warning(const char *s, ...) WARN_FORMAT(1, 2);
00133
00134 static void CDECL strgen_warning(const char *s, ...)
00135 {
00136 char buf[1024];
00137 va_list va;
00138 va_start(va, s);
00139 vsnprintf(buf, lengthof(buf), s, va);
00140 va_end(va);
00141 fprintf(stderr, LINE_NUM_FMT("warning"), _file, _cur_line, buf);
00142 _warnings++;
00143 }
00144
00145 static void CDECL strgen_error(const char *s, ...) WARN_FORMAT(1, 2);
00146
00147 static void CDECL strgen_error(const char *s, ...)
00148 {
00149 char buf[1024];
00150 va_list va;
00151 va_start(va, s);
00152 vsnprintf(buf, lengthof(buf), s, va);
00153 va_end(va);
00154 fprintf(stderr, LINE_NUM_FMT("error"), _file, _cur_line, buf);
00155 _errors++;
00156 }
00157
00158 void NORETURN CDECL error(const char *s, ...)
00159 {
00160 char buf[1024];
00161 va_list va;
00162 va_start(va, s);
00163 vsnprintf(buf, lengthof(buf), s, va);
00164 va_end(va);
00165 fprintf(stderr, LINE_NUM_FMT("FATAL"), _file, _cur_line, buf);
00166 #ifdef _MSC_VER
00167 fprintf(stderr, LINE_NUM_FMT("warning"), _file, _cur_line, "language is not compiled");
00168 #endif
00169
00170 if (_output_file != NULL) {
00171 fclose(_output_file);
00172 unlink(_output_filename);
00173 }
00174 exit(1);
00175 }
00176
00177 static void PutByte(byte c)
00178 {
00179 if (_put_pos >= lengthof(_put_buf)) error("Put buffer too small");
00180 _put_buf[_put_pos++] = c;
00181 }
00182
00183
00184 static void PutUtf8(uint32 value)
00185 {
00186 if (value < 0x80) {
00187 PutByte(value);
00188 } else if (value < 0x800) {
00189 PutByte(0xC0 + GB(value, 6, 5));
00190 PutByte(0x80 + GB(value, 0, 6));
00191 } else if (value < 0x10000) {
00192 PutByte(0xE0 + GB(value, 12, 4));
00193 PutByte(0x80 + GB(value, 6, 6));
00194 PutByte(0x80 + GB(value, 0, 6));
00195 } else if (value < 0x110000) {
00196 PutByte(0xF0 + GB(value, 18, 3));
00197 PutByte(0x80 + GB(value, 12, 6));
00198 PutByte(0x80 + GB(value, 6, 6));
00199 PutByte(0x80 + GB(value, 0, 6));
00200 } else {
00201 strgen_warning("Invalid unicode value U+0x%X", value);
00202 }
00203 }
00204
00205
00206 size_t Utf8Validate(const char *s)
00207 {
00208 uint32 c;
00209
00210 if (!HasBit(s[0], 7)) {
00211
00212 return 1;
00213 } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
00214
00215 c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
00216 if (c >= 0x80) return 2;
00217 } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
00218
00219 c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
00220 if (c >= 0x800) return 3;
00221 } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
00222
00223 c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
00224 if (c >= 0x10000 && c <= 0x10FFFF) return 4;
00225 }
00226
00227 return 0;
00228 }
00229
00230
00231 static void EmitSingleChar(char *buf, int value)
00232 {
00233 if (*buf != '\0') strgen_warning("Ignoring trailing letters in command");
00234 PutUtf8(value);
00235 }
00236
00237
00238
00239
00240
00241
00242
00243
00244 bool ParseRelNum(char **buf, int *value, int *offset)
00245 {
00246 const char *s = *buf;
00247 char *end;
00248 bool rel = false;
00249
00250 while (*s == ' ' || *s == '\t') s++;
00251 if (*s == '+') {
00252 rel = true;
00253 s++;
00254 }
00255 int v = strtol(s, &end, 0);
00256 if (end == s) return false;
00257 if (rel || v < 0) {
00258 *value += v;
00259 } else {
00260 *value = v;
00261 }
00262 if (offset != NULL && *end == ':') {
00263
00264 s = end + 1;
00265 *offset = strtol(s, &end, 0);
00266 if (end == s) return false;
00267 }
00268 *buf = end;
00269 return true;
00270 }
00271
00272
00273 char *ParseWord(char **buf)
00274 {
00275 char *s = *buf, *r;
00276
00277 while (*s == ' ' || *s == '\t') s++;
00278 if (*s == '\0') return NULL;
00279
00280 if (*s == '"') {
00281 r = ++s;
00282
00283 for (;;) {
00284 if (*s == '\0') break;
00285 if (*s == '"') {
00286 *s++ = '\0';
00287 break;
00288 }
00289 s++;
00290 }
00291 } else {
00292
00293 r = s;
00294 for (;;) {
00295 if (*s == '\0') break;
00296 if (*s == ' ' || *s == '\t') {
00297 *s++ = '\0';
00298 break;
00299 }
00300 s++;
00301 }
00302 }
00303 *buf = s;
00304 return r;
00305 }
00306
00307
00308 static int TranslateArgumentIdx(int arg, int offset = 0);
00309
00310 static void EmitWordList(const char * const *words, uint nw)
00311 {
00312 PutByte(nw);
00313 for (uint i = 0; i < nw; i++) PutByte(strlen(words[i]) + 1);
00314 for (uint i = 0; i < nw; i++) {
00315 for (uint j = 0; words[i][j] != '\0'; j++) PutByte(words[i][j]);
00316 PutByte(0);
00317 }
00318 }
00319
00320 static void EmitPlural(char *buf, int value)
00321 {
00322 int argidx = _cur_argidx;
00323 int offset = 0;
00324 const char *words[5];
00325 int nw = 0;
00326
00327
00328 if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
00329
00330
00331 for (nw = 0; nw < 5; nw++) {
00332 words[nw] = ParseWord(&buf);
00333 if (words[nw] == NULL) break;
00334 }
00335
00336 if (nw == 0) {
00337 error("%s: No plural words", _cur_ident);
00338 }
00339
00340 if (_plural_forms[_lang.plural_form].plural_count != nw) {
00341 if (_translated) {
00342 error("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
00343 _plural_forms[_lang.plural_form].plural_count, nw);
00344 } else {
00345 if ((_show_todo & 2) != 0) strgen_warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
00346 if (nw > _plural_forms[_lang.plural_form].plural_count) {
00347 nw = _plural_forms[_lang.plural_form].plural_count;
00348 } else {
00349 for (; nw < _plural_forms[_lang.plural_form].plural_count; nw++) {
00350 words[nw] = words[nw - 1];
00351 }
00352 }
00353 }
00354 }
00355
00356 PutUtf8(SCC_PLURAL_LIST);
00357 PutByte(_lang.plural_form);
00358 PutByte(TranslateArgumentIdx(argidx, offset));
00359 EmitWordList(words, nw);
00360 }
00361
00362
00363 static void EmitGender(char *buf, int value)
00364 {
00365 int argidx = _cur_argidx;
00366 int offset = 0;
00367 uint nw;
00368
00369 if (buf[0] == '=') {
00370 buf++;
00371
00372
00373 nw = _lang.GetGenderIndex(buf);
00374 if (nw >= MAX_NUM_GENDERS) error("G argument '%s' invalid", buf);
00375
00376
00377 PutUtf8(SCC_GENDER_INDEX);
00378 PutByte(nw);
00379 } else {
00380 const char *words[MAX_NUM_GENDERS];
00381
00382
00383
00384 if (!ParseRelNum(&buf, &argidx, &offset)) {}
00385
00386 const CmdStruct *cmd = _cur_pcs.cmd[argidx];
00387 if (cmd == NULL || (cmd->flags & C_GENDER) == 0) {
00388 error("Command '%s' can't have a gender", cmd == NULL ? "<empty>" : cmd->cmd);
00389 }
00390
00391 for (nw = 0; nw < MAX_NUM_GENDERS; nw++) {
00392 words[nw] = ParseWord(&buf);
00393 if (words[nw] == NULL) break;
00394 }
00395 if (nw != _lang.num_genders) error("Bad # of arguments for gender command");
00396
00397 assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
00398 PutUtf8(SCC_GENDER_LIST);
00399 PutByte(TranslateArgumentIdx(argidx, offset));
00400 EmitWordList(words, nw);
00401 }
00402 }
00403
00404 static const CmdStruct *FindCmd(const char *s, int len)
00405 {
00406 for (const CmdStruct *cs = _cmd_structs; cs != endof(_cmd_structs); cs++) {
00407 if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == '\0') return cs;
00408 }
00409 return NULL;
00410 }
00411
00412 static uint ResolveCaseName(const char *str, uint len)
00413 {
00414
00415 char case_str[CASE_GENDER_LEN];
00416 len = min(lengthof(case_str) - 1, len);
00417 memcpy(case_str, str, len);
00418 case_str[len] = '\0';
00419
00420 uint8 case_idx = _lang.GetCaseIndex(case_str);
00421 if (case_idx >= MAX_NUM_CASES) error("Invalid case-name '%s'", case_str);
00422 return case_idx + 1;
00423 }
00424
00425
00426
00427
00428 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei)
00429 {
00430 const char *s = *str, *start;
00431 char c;
00432
00433 *argno = -1;
00434 *casei = -1;
00435
00436
00437 for (; *s != '{'; s++) {
00438 if (*s == '\0') return NULL;
00439 }
00440 s++;
00441
00442 if (*s >= '0' && *s <= '9') {
00443 char *end;
00444
00445 *argno = strtoul(s, &end, 0);
00446 if (*end != ':') error("missing arg #");
00447 s = end + 1;
00448 }
00449
00450
00451 start = s;
00452 do {
00453 c = *s++;
00454 } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
00455
00456 const CmdStruct *cmd = FindCmd(start, s - start - 1);
00457 if (cmd == NULL) {
00458 strgen_error("Undefined command '%.*s'", (int)(s - start - 1), start);
00459 return NULL;
00460 }
00461
00462 if (c == '.') {
00463 const char *casep = s;
00464
00465 if (!(cmd->flags & C_CASE)) {
00466 error("Command '%s' can't have a case", cmd->cmd);
00467 }
00468
00469 do {
00470 c = *s++;
00471 } while (c != '}' && c != ' ' && c != '\0');
00472 *casei = ResolveCaseName(casep, s - casep - 1);
00473 }
00474
00475 if (c == '\0') {
00476 strgen_error("Missing } from command '%s'", start);
00477 return NULL;
00478 }
00479
00480
00481 if (c != '}') {
00482 if (c == '=') s--;
00483
00484 start = s;
00485 for (;;) {
00486 c = *s++;
00487 if (c == '}') break;
00488 if (c == '\0') {
00489 strgen_error("Missing } from command '%s'", start);
00490 return NULL;
00491 }
00492 if (s - start == MAX_COMMAND_PARAM_SIZE) error("param command too long");
00493 *param++ = c;
00494 }
00495 }
00496 *param = '\0';
00497
00498 *str = s;
00499
00500 return cmd;
00501 }
00502
00503
00504 static void HandlePragma(char *str, bool master)
00505 {
00506 if (!memcmp(str, "id ", 3)) {
00507 _next_string_id = strtoul(str + 3, NULL, 0);
00508 } else if (!memcmp(str, "name ", 5)) {
00509 strecpy(_lang.name, str + 5, lastof(_lang.name));
00510 } else if (!memcmp(str, "ownname ", 8)) {
00511 strecpy(_lang.own_name, str + 8, lastof(_lang.own_name));
00512 } else if (!memcmp(str, "isocode ", 8)) {
00513 strecpy(_lang.isocode, str + 8, lastof(_lang.isocode));
00514 } else if (!memcmp(str, "plural ", 7)) {
00515 _lang.plural_form = atoi(str + 7);
00516 if (_lang.plural_form >= lengthof(_plural_forms)) {
00517 error("Invalid pluralform %d", _lang.plural_form);
00518 }
00519 } else if (!memcmp(str, "textdir ", 8)) {
00520 if (!memcmp(str + 8, "ltr", 3)) {
00521 _lang.text_dir = TD_LTR;
00522 } else if (!memcmp(str + 8, "rtl", 3)) {
00523 _lang.text_dir = TD_RTL;
00524 } else {
00525 error("Invalid textdir %s", str + 8);
00526 }
00527 } else if (!memcmp(str, "digitsep ", 9)) {
00528 str += 9;
00529 strecpy(_lang.digit_group_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str, lastof(_lang.digit_group_separator));
00530 } else if (!memcmp(str, "digitsepcur ", 12)) {
00531 str += 12;
00532 strecpy(_lang.digit_group_separator_currency, strcmp(str, "{NBSP}") == 0 ? NBSP : str, lastof(_lang.digit_group_separator_currency));
00533 } else if (!memcmp(str, "decimalsep ", 11)) {
00534 str += 11;
00535 strecpy(_lang.digit_decimal_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str, lastof(_lang.digit_decimal_separator));
00536 } else if (!memcmp(str, "winlangid ", 10)) {
00537 const char *buf = str + 10;
00538 long langid = strtol(buf, NULL, 16);
00539 if (langid > (long)UINT16_MAX || langid < 0) {
00540 error("Invalid winlangid %s", buf);
00541 }
00542 _lang.winlangid = (uint16)langid;
00543 } else if (!memcmp(str, "grflangid ", 10)) {
00544 const char *buf = str + 10;
00545 long langid = strtol(buf, NULL, 16);
00546 if (langid >= 0x7F || langid < 0) {
00547 error("Invalid grflangid %s", buf);
00548 }
00549 _lang.newgrflangid = (uint8)langid;
00550 } else if (!memcmp(str, "gender ", 7)) {
00551 if (master) error("Genders are not allowed in the base translation.");
00552 char *buf = str + 7;
00553
00554 for (;;) {
00555 const char *s = ParseWord(&buf);
00556
00557 if (s == NULL) break;
00558 if (_lang.num_genders >= MAX_NUM_GENDERS) error("Too many genders, max %d", MAX_NUM_GENDERS);
00559 strecpy(_lang.genders[_lang.num_genders], s, lastof(_lang.genders[_lang.num_genders]));
00560 _lang.num_genders++;
00561 }
00562 } else if (!memcmp(str, "case ", 5)) {
00563 if (master) error("Cases are not allowed in the base translation.");
00564 char *buf = str + 5;
00565
00566 for (;;) {
00567 const char *s = ParseWord(&buf);
00568
00569 if (s == NULL) break;
00570 if (_lang.num_cases >= MAX_NUM_CASES) error("Too many cases, max %d", MAX_NUM_CASES);
00571 strecpy(_lang.cases[_lang.num_cases], s, lastof(_lang.cases[_lang.num_cases]));
00572 _lang.num_cases++;
00573 }
00574 } else {
00575 error("unknown pragma '%s'", str);
00576 }
00577 }
00578
00579 static void ExtractCommandString(ParsedCommandStruct *p, const char *s, bool warnings)
00580 {
00581 char param[MAX_COMMAND_PARAM_SIZE];
00582 int argno;
00583 int argidx = 0;
00584 int casei;
00585
00586 memset(p, 0, sizeof(*p));
00587
00588 for (;;) {
00589
00590 const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
00591
00592 if (ar == NULL) break;
00593
00594
00595 if (argno != -1 && ar->consumes == 0) error("Non consumer param can't have a paramindex");
00596
00597 if (ar->consumes) {
00598 if (argno != -1) argidx = argno;
00599 if (argidx < 0 || (uint)argidx >= lengthof(p->cmd)) error("invalid param idx %d", argidx);
00600 if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) error("duplicate param idx %d", argidx);
00601
00602 p->cmd[argidx++] = ar;
00603 } else if (!(ar->flags & C_DONTCOUNT)) {
00604 if (p->np >= lengthof(p->pairs)) error("too many commands in string, max " PRINTF_SIZE, lengthof(p->pairs));
00605 p->pairs[p->np].a = ar;
00606 p->pairs[p->np].v = param[0] != '\0' ? strdup(param) : "";
00607 p->np++;
00608 }
00609 }
00610 }
00611
00612
00613 static const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
00614 {
00615 if (a == NULL) return NULL;
00616
00617 if (strcmp(a->cmd, "STRING1") == 0 ||
00618 strcmp(a->cmd, "STRING2") == 0 ||
00619 strcmp(a->cmd, "STRING3") == 0 ||
00620 strcmp(a->cmd, "STRING4") == 0 ||
00621 strcmp(a->cmd, "STRING5") == 0 ||
00622 strcmp(a->cmd, "RAW_STRING") == 0) {
00623 return FindCmd("STRING", 6);
00624 }
00625
00626 return a;
00627 }
00628
00629
00630 static bool CheckCommandsMatch(char *a, char *b, const char *name)
00631 {
00632
00633
00634
00635
00636 if (!_translation) return true;
00637
00638 ParsedCommandStruct templ;
00639 ParsedCommandStruct lang;
00640 bool result = true;
00641
00642 ExtractCommandString(&templ, b, true);
00643 ExtractCommandString(&lang, a, true);
00644
00645
00646 if (templ.np != lang.np) {
00647 strgen_warning("%s: template string and language string have a different # of commands", name);
00648 result = false;
00649 }
00650
00651 for (uint i = 0; i < templ.np; i++) {
00652
00653 bool found = false;
00654 for (uint j = 0; j < lang.np; j++) {
00655 if (templ.pairs[i].a == lang.pairs[j].a &&
00656 strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
00657
00658 lang.pairs[j].a = NULL;
00659 found = true;
00660 break;
00661 }
00662 }
00663
00664 if (!found) {
00665 strgen_warning("%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
00666 result = false;
00667 }
00668 }
00669
00670
00671
00672 for (uint i = 0; i < lengthof(templ.cmd); i++) {
00673 if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
00674 strgen_warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
00675 lang.cmd[i] == NULL ? "<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
00676 templ.cmd[i] == NULL ? "<empty>" : templ.cmd[i]->cmd);
00677 result = false;
00678 }
00679 }
00680
00681 return result;
00682 }
00683
00684 static void HandleString(char *str, bool master)
00685 {
00686 if (*str == '#') {
00687 if (str[1] == '#' && str[2] != '#') HandlePragma(str + 2, master);
00688 return;
00689 }
00690
00691
00692 if (*str == ';' || *str == ' ' || *str == '\0') return;
00693
00694 char *s = strchr(str, ':');
00695 if (s == NULL) {
00696 strgen_error("Line has no ':' delimiter");
00697 return;
00698 }
00699
00700 char *t;
00701
00702
00703 for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
00704 *t = 0;
00705 s++;
00706
00707
00708 const char *tmp;
00709 for (tmp = s; *tmp != '\0';) {
00710 size_t len = Utf8Validate(tmp);
00711 if (len == 0) error("Invalid UTF-8 sequence in '%s'", s);
00712
00713 WChar c;
00714 Utf8Decode(&c, tmp);
00715 if (c <= 0x001F ||
00716 (c >= 0xE000 && c <= 0xF8FF) ||
00717 (c >= 0xFFF0 && c <= 0xFFFF)) {
00718 error("Unwanted UTF-8 character U+%04X in sequence '%s'", c, s);
00719 }
00720
00721 tmp += len;
00722 }
00723
00724
00725
00726 char *casep = strchr(str, '.');
00727 if (casep != NULL) *casep++ = '\0';
00728
00729
00730 LangString *ent = HashFind(str);
00731
00732 if (master) {
00733 if (casep != NULL) {
00734 strgen_error("Cases in the base translation are not supported.");
00735 return;
00736 }
00737
00738 if (ent != NULL) {
00739 strgen_error("String name '%s' is used multiple times", str);
00740 return;
00741 }
00742
00743 if (_strings[_next_string_id]) {
00744 strgen_error("String ID 0x%X for '%s' already in use by '%s'", _next_string_id, str, _strings[_next_string_id]->name);
00745 return;
00746 }
00747
00748
00749 ent = CallocT<LangString>(1);
00750 _strings[_next_string_id] = ent;
00751 ent->index = _next_string_id++;
00752 ent->name = strdup(str);
00753 ent->line = _cur_line;
00754
00755 HashAdd(str, ent);
00756
00757 ent->english = strdup(s);
00758 } else {
00759 if (ent == NULL) {
00760 strgen_warning("String name '%s' does not exist in master file", str);
00761 return;
00762 }
00763
00764 if (ent->translated && casep == NULL) {
00765 strgen_error("String name '%s' is used multiple times", str);
00766 return;
00767 }
00768
00769
00770 if (!CheckCommandsMatch(s, ent->english, str)) return;
00771
00772 if (casep != NULL) {
00773 Case *c = MallocT<Case>(1);
00774
00775 c->caseidx = ResolveCaseName(casep, strlen(casep));
00776 c->string = strdup(s);
00777 c->next = ent->translated_case;
00778 ent->translated_case = c;
00779 } else {
00780 ent->translated = strdup(s);
00781
00782
00783
00784 ent->line = _cur_line;
00785 }
00786 }
00787 }
00788
00789
00790 static void rstrip(char *buf)
00791 {
00792 int i = strlen(buf);
00793 while (i > 0 && (buf[i - 1] == '\r' || buf[i - 1] == '\n' || buf[i - 1] == ' ')) i--;
00794 buf[i] = '\0';
00795 }
00796
00797
00798 static void ParseFile(const char *file, bool english)
00799 {
00800 FILE *in;
00801 char buf[2048];
00802
00803
00804 const char *cur_file = strrchr(_file, PATHSEPCHAR);
00805 const char *next_file = strrchr(file, PATHSEPCHAR);
00806 _translation = next_file != NULL && cur_file != NULL && strcmp(cur_file, next_file) != 0;
00807 _file = file;
00808
00809
00810 MemSetT(&_lang, 0);
00811 strecpy(_lang.digit_group_separator, ",", lastof(_lang.digit_group_separator));
00812 strecpy(_lang.digit_group_separator_currency, ",", lastof(_lang.digit_group_separator_currency));
00813 strecpy(_lang.digit_decimal_separator, ".", lastof(_lang.digit_decimal_separator));
00814
00815 in = fopen(file, "r");
00816 if (in == NULL) error("Cannot open file");
00817 _cur_line = 1;
00818 while (fgets(buf, sizeof(buf), in) != NULL) {
00819 rstrip(buf);
00820 HandleString(buf, english);
00821 _cur_line++;
00822 }
00823 fclose(in);
00824
00825 if (StrEmpty(_lang.name) || StrEmpty(_lang.own_name) || StrEmpty(_lang.isocode)) {
00826 error("Language must include ##name, ##ownname and ##isocode");
00827 }
00828 }
00829
00830
00831 static uint32 MyHashStr(uint32 hash, const char *s)
00832 {
00833 for (; *s != '\0'; s++) {
00834 hash = ROL(hash, 3) ^ *s;
00835 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00836 }
00837 return hash;
00838 }
00839
00840
00841
00842 static void MakeHashOfStrings()
00843 {
00844 uint32 hash = 0;
00845 uint i;
00846
00847 for (i = 0; i != lengthof(_strings); i++) {
00848 const LangString *ls = _strings[i];
00849
00850 if (ls != NULL) {
00851 const CmdStruct *cs;
00852 const char *s;
00853 char buf[MAX_COMMAND_PARAM_SIZE];
00854 int argno;
00855 int casei;
00856
00857 s = ls->name;
00858 hash ^= i * 0x717239;
00859 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00860 hash = MyHashStr(hash, s + 1);
00861
00862 s = ls->english;
00863 while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
00864 if (cs->flags & C_DONTCOUNT) continue;
00865
00866 hash ^= (cs - _cmd_structs) * 0x1234567;
00867 hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
00868 }
00869 }
00870 }
00871 _hash = hash;
00872 }
00873
00874
00875 static uint CountInUse(uint grp)
00876 {
00877 int i;
00878
00879 for (i = 0x800; --i >= 0;) if (_strings[(grp << 11) + i] != NULL) break;
00880 return i + 1;
00881 }
00882
00883
00884 bool CompareFiles(const char *n1, const char *n2)
00885 {
00886 FILE *f2 = fopen(n2, "rb");
00887 if (f2 == NULL) return false;
00888
00889 FILE *f1 = fopen(n1, "rb");
00890 if (f1 == NULL) error("can't open %s", n1);
00891
00892 size_t l1, l2;
00893 do {
00894 char b1[4096];
00895 char b2[4096];
00896 l1 = fread(b1, 1, sizeof(b1), f1);
00897 l2 = fread(b2, 1, sizeof(b2), f2);
00898
00899 if (l1 != l2 || memcmp(b1, b2, l1)) {
00900 fclose(f2);
00901 fclose(f1);
00902 return false;
00903 }
00904 } while (l1 != 0);
00905
00906 fclose(f2);
00907 fclose(f1);
00908 return true;
00909 }
00910
00911
00912 static void WriteStringsH(const char *filename)
00913 {
00914 int next = -1;
00915
00916 _output_filename = "tmp.xxx";
00917 _output_file = fopen(_output_filename, "w");
00918 if (_output_file == NULL) error("can't open tmp.xxx");
00919
00920 fprintf(_output_file, "/* This file is automatically generated. Do not modify */\n\n");
00921 fprintf(_output_file, "#ifndef TABLE_STRINGS_H\n");
00922 fprintf(_output_file, "#define TABLE_STRINGS_H\n");
00923
00924 for (int i = 0; i != lengthof(_strings); i++) {
00925 if (_strings[i] != NULL) {
00926 if (next != i) fprintf(_output_file, "\n");
00927 fprintf(_output_file, "static const StringID %s = 0x%X;\n", _strings[i]->name, i);
00928 next = i + 1;
00929 }
00930 }
00931
00932 fprintf(_output_file, "\nstatic const StringID STR_LAST_STRINGID = 0x%X;\n\n", next - 1);
00933
00934
00935 int max_plural_forms = 0;
00936 for (uint i = 0; i < lengthof(_plural_forms); i++) {
00937 max_plural_forms = max(max_plural_forms, _plural_forms[i].plural_count);
00938 }
00939
00940 fprintf(_output_file,
00941 "static const uint LANGUAGE_PACK_VERSION = 0x%X;\n"
00942 "static const uint LANGUAGE_MAX_PLURAL = %d;\n"
00943 "static const uint LANGUAGE_MAX_PLURAL_FORMS = %d;\n\n",
00944 (uint)_hash, (uint)lengthof(_plural_forms), max_plural_forms
00945 );
00946
00947 fprintf(_output_file, "#endif /* TABLE_STRINGS_H */\n");
00948
00949 fclose(_output_file);
00950 _output_file = NULL;
00951
00952 if (CompareFiles(_output_filename, filename)) {
00953
00954 unlink(_output_filename);
00955 } else {
00956
00957 #if defined(WIN32) || defined(WIN64)
00958 unlink(filename);
00959 #endif
00960 if (rename(_output_filename, filename) == -1) error("rename() failed");
00961 }
00962 _output_filename = NULL;
00963 }
00964
00965 static int TranslateArgumentIdx(int argidx, int offset)
00966 {
00967 int sum;
00968
00969 if (argidx < 0 || (uint)argidx >= lengthof(_cur_pcs.cmd)) {
00970 error("invalid argidx %d", argidx);
00971 }
00972 const CmdStruct *cs = _cur_pcs.cmd[argidx];
00973 if (cs != NULL && cs->consumes <= offset) {
00974 error("invalid argidx offset %d:%d", argidx, offset);
00975 }
00976
00977 if (_cur_pcs.cmd[argidx] == NULL) {
00978 error("no command for this argidx %d", argidx);
00979 }
00980
00981 for (int i = sum = 0; i < argidx; i++) {
00982 const CmdStruct *cs = _cur_pcs.cmd[i];
00983
00984 sum += (cs != NULL) ? cs->consumes : 1;
00985 }
00986
00987 return sum + offset;
00988 }
00989
00990 static void PutArgidxCommand()
00991 {
00992 PutUtf8(SCC_ARG_INDEX);
00993 PutByte(TranslateArgumentIdx(_cur_argidx));
00994 }
00995
00996
00997 static void PutCommandString(const char *str)
00998 {
00999 _cur_argidx = 0;
01000
01001 while (*str != '\0') {
01002
01003 if (*str != '{') {
01004 PutByte(*str++);
01005 continue;
01006 }
01007
01008 char param[MAX_COMMAND_PARAM_SIZE];
01009 int argno;
01010 int casei;
01011 const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
01012 if (cs == NULL) break;
01013
01014 if (casei != -1) {
01015 PutUtf8(SCC_SETCASE);
01016 PutByte(casei);
01017 }
01018
01019
01020 if (cs->consumes > 0) {
01021
01022 if (argno != -1 && argno != _cur_argidx) {
01023 _cur_argidx = argno;
01024 PutArgidxCommand();
01025 }
01026
01027
01028 cs = _cur_pcs.cmd[_cur_argidx++];
01029 if (cs == NULL) {
01030 error("%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
01031 }
01032 }
01033
01034 cs->proc(param, cs->value);
01035 }
01036 }
01037
01038 static void WriteLength(FILE *f, uint length)
01039 {
01040 if (length < 0xC0) {
01041 fputc(length, f);
01042 } else if (length < 0x4000) {
01043 fputc((length >> 8) | 0xC0, f);
01044 fputc(length & 0xFF, f);
01045 } else {
01046 error("string too long");
01047 }
01048 }
01049
01050
01051 static void WriteLangfile(const char *filename)
01052 {
01053 uint in_use[32];
01054
01055 _output_filename = filename;
01056 _output_file = fopen(filename, "wb");
01057 if (_output_file == NULL) error("can't open %s", filename);
01058
01059 for (int i = 0; i != 32; i++) {
01060 uint n = CountInUse(i);
01061
01062 in_use[i] = n;
01063 _lang.offsets[i] = TO_LE16(n);
01064 }
01065
01066 _lang.ident = TO_LE32(LanguagePackHeader::IDENT);
01067 _lang.version = TO_LE32(_hash);
01068 _lang.winlangid = TO_LE16(_lang.winlangid);
01069
01070 fwrite(&_lang, sizeof(_lang), 1, _output_file);
01071
01072 for (int i = 0; i != 32; i++) {
01073 for (uint j = 0; j != in_use[i]; j++) {
01074 const LangString *ls = _strings[(i << 11) + j];
01075 const Case *casep;
01076 const char *cmdp;
01077
01078
01079 if (ls == NULL) {
01080 WriteLength(_output_file, 0);
01081 continue;
01082 }
01083
01084 _cur_ident = ls->name;
01085 _cur_line = ls->line;
01086
01087
01088 if (_show_todo > 0 && ls->translated == NULL) {
01089 if ((_show_todo & 2) != 0) {
01090 strgen_warning("'%s' is untranslated", ls->name);
01091 }
01092 if ((_show_todo & 1) != 0) {
01093 const char *s = "<TODO> ";
01094 while (*s != '\0') PutByte(*s++);
01095 }
01096 }
01097
01098
01099 ExtractCommandString(&_cur_pcs, ls->english, false);
01100
01101 if (ls->translated_case != NULL || ls->translated != NULL) {
01102 casep = ls->translated_case;
01103 cmdp = ls->translated;
01104 } else {
01105 casep = NULL;
01106 cmdp = ls->english;
01107 }
01108
01109 _translated = _masterlang || (cmdp != ls->english);
01110
01111 if (casep != NULL) {
01112 const Case *c;
01113 uint num;
01114
01115
01116
01117
01118
01119 PutUtf8(SCC_SWITCH_CASE);
01120
01121 for (num = 0, c = casep; c; c = c->next) num++;
01122 PutByte(num);
01123
01124
01125 for (c = casep; c != NULL; c = c->next) {
01126 uint pos;
01127
01128 PutByte(c->caseidx);
01129
01130 pos = _put_pos;
01131 PutByte(0);
01132 PutByte(0);
01133
01134 PutCommandString(c->string);
01135 PutByte(0);
01136
01137 _put_buf[pos + 0] = GB(_put_pos - (pos + 2), 8, 8);
01138 _put_buf[pos + 1] = GB(_put_pos - (pos + 2), 0, 8);
01139 }
01140 }
01141
01142 if (cmdp != NULL) PutCommandString(cmdp);
01143
01144 WriteLength(_output_file, _put_pos);
01145 fwrite(_put_buf, 1, _put_pos, _output_file);
01146 _put_pos = 0;
01147 }
01148 }
01149
01150 fputc(0, _output_file);
01151 fclose(_output_file);
01152
01153 _output_file = NULL;
01154 _output_filename = NULL;
01155 }
01156
01158 static inline void ottd_mkdir(const char *directory)
01159 {
01160 #if defined(WIN32) || defined(__WATCOMC__)
01161 mkdir(directory);
01162 #else
01163 mkdir(directory, 0755);
01164 #endif
01165 }
01166
01172 static inline char *mkpath(char *buf, size_t buflen, const char *path, const char *file)
01173 {
01174 ttd_strlcpy(buf, path, buflen);
01175
01176 char *p = strchr(buf, '\0');
01177 if (p[-1] != PATHSEPCHAR && (size_t)(p - buf) + 1 < buflen) *p++ = PATHSEPCHAR;
01178 ttd_strlcpy(p, file, buflen - (size_t)(p - buf));
01179 return buf;
01180 }
01181
01182 #if defined(__MINGW32__)
01183
01188 static inline char *replace_pathsep(char *s)
01189 {
01190 for (char *c = s; *c != '\0'; c++) if (*c == '/') *c = '\\';
01191 return s;
01192 }
01193 #else
01194 static inline char *replace_pathsep(char *s) { return s; }
01195 #endif
01196
01198 static const OptionData _opts[] = {
01199 GETOPT_NOVAL( 'v', "--version"),
01200 GETOPT_GENERAL('C', '\0', "-export-commands", ODF_NO_VALUE),
01201 GETOPT_GENERAL('L', '\0', "-export-plurals", ODF_NO_VALUE),
01202 GETOPT_GENERAL('P', '\0', "-export-pragmas", ODF_NO_VALUE),
01203 GETOPT_NOVAL( 't', "--todo"),
01204 GETOPT_NOVAL( 'w', "--warning"),
01205 GETOPT_NOVAL( 'h', "--help"),
01206 GETOPT_GENERAL('h', '?', NULL, ODF_NO_VALUE),
01207 GETOPT_VALUE( 's', "--source_dir"),
01208 GETOPT_VALUE( 'd', "--dest_dir"),
01209 GETOPT_END(),
01210 };
01211
01212 int CDECL main(int argc, char *argv[])
01213 {
01214 char pathbuf[MAX_PATH];
01215 const char *src_dir = ".";
01216 const char *dest_dir = NULL;
01217
01218 GetOptData mgo(argc - 1, argv + 1, _opts);
01219 for (;;) {
01220 int i = mgo.GetOpt();
01221 if (i == -1) break;
01222
01223 switch (i) {
01224 case 'v':
01225 puts("$Revision$");
01226 return 0;
01227
01228 case 'C':
01229 printf("args\tflags\tcommand\treplacement\n");
01230 for (const CmdStruct *cs = _cmd_structs; cs < endof(_cmd_structs); cs++) {
01231 char flags;
01232 switch (cs->value) {
01233 case 0x200E: case 0x200F:
01234 case 0x202A: case 0x202B: case 0x202C: case 0x202D: case 0x202E:
01235 case 0xA0:
01236 case '\n':
01237 case '{':
01238
01239 flags = 'i';
01240 break;
01241
01242 default:
01243 if (cs->proc == EmitGender) {
01244 flags = 'g';
01245 } else if (cs->proc == EmitPlural) {
01246 flags = 'p';
01247 } else {
01248 flags = '0';
01249 }
01250 }
01251 printf("%i\t%c\t\"%s\"\t\"%s\"\n", cs->consumes, flags, cs->cmd, strstr(cs->cmd, "STRING") ? "STRING" : cs->cmd);
01252 }
01253 return 0;
01254
01255 case 'L':
01256 printf("count\tdescription\n");
01257 for (const PluralForm *pf = _plural_forms; pf < endof(_plural_forms); pf++) {
01258 printf("%i\t\"%s\"\n", pf->plural_count, pf->description);
01259 }
01260 return 0;
01261
01262 case 'P':
01263 printf("name\tflags\tdefault\tdescription\n");
01264 for (size_t i = 0; i < lengthof(_pragmas); i++) {
01265 printf("\"%s\"\t%s\t\"%s\"\t\"%s\"\n",
01266 _pragmas[i][0], _pragmas[i][1], _pragmas[i][2], _pragmas[i][3]);
01267 }
01268 return 0;
01269
01270 case 't':
01271 _show_todo |= 1;
01272 break;
01273
01274 case 'w':
01275 _show_todo |= 2;
01276 break;
01277
01278 case 'h':
01279 puts(
01280 "strgen - $Revision$\n"
01281 " -v | --version print version information and exit\n"
01282 " -t | --todo replace any untranslated strings with '<TODO>'\n"
01283 " -w | --warning print a warning for any untranslated strings\n"
01284 " -h | -? | --help print this help message and exit\n"
01285 " -s | --source_dir search for english.txt in the specified directory\n"
01286 " -d | --dest_dir put output file in the specified directory, create if needed\n"
01287 " -export-commands export all commands and exit\n"
01288 " -export-plurals export all plural forms and exit\n"
01289 " -export-pragmas export all pragmas and exit\n"
01290 " Run without parameters and strgen will search for english.txt and parse it,\n"
01291 " creating strings.h. Passing an argument, strgen will translate that language\n"
01292 " file using english.txt as a reference and output <language>.lng."
01293 );
01294 return 0;
01295
01296 case 's':
01297 src_dir = replace_pathsep(mgo.opt);
01298 break;
01299
01300 case 'd':
01301 dest_dir = replace_pathsep(mgo.opt);
01302 break;
01303
01304 case -2:
01305 fprintf(stderr, "Invalid arguments\n");
01306 return 0;
01307 }
01308 }
01309
01310 if (dest_dir == NULL) dest_dir = src_dir;
01311
01312
01313
01314
01315
01316 if (mgo.numleft == 0) {
01317 mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
01318
01319
01320 _masterlang = true;
01321 ParseFile(pathbuf, true);
01322 MakeHashOfStrings();
01323 if (_errors != 0) return 1;
01324
01325
01326 ottd_mkdir(dest_dir);
01327 mkpath(pathbuf, lengthof(pathbuf), dest_dir, "strings.h");
01328 WriteStringsH(pathbuf);
01329 } else if (mgo.numleft == 1) {
01330 char *r;
01331
01332 mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
01333
01334
01335 _masterlang = false;
01336 ParseFile(pathbuf, true);
01337 MakeHashOfStrings();
01338 ParseFile(replace_pathsep(mgo.argv[0]), false);
01339 if (_errors != 0) return 1;
01340
01341
01342 r = strrchr(mgo.argv[0], PATHSEPCHAR);
01343 mkpath(pathbuf, lengthof(pathbuf), dest_dir, (r != NULL) ? &r[1] : mgo.argv[0]);
01344
01345
01346 r = strrchr(pathbuf, '.');
01347 if (r == NULL || strcmp(r, ".txt") != 0) r = strchr(pathbuf, '\0');
01348 ttd_strlcpy(r, ".lng", (size_t)(r - pathbuf));
01349 WriteLangfile(pathbuf);
01350
01351
01352 if ((_show_todo & 2) != 0) {
01353 fprintf(stdout, "%d warnings and %d errors for %s\n", _warnings, _errors, pathbuf);
01354 }
01355 } else {
01356 fprintf(stderr, "Invalid arguments\n");
01357 }
01358
01359 return 0;
01360 }