strgen_base.cpp

00001 /* $Id$ */
00002 
00003 /*
00004  * This file is part of OpenTTD.
00005  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
00006  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00007  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
00008  */
00009 
00012 #include "../stdafx.h"
00013 #include "../core/endian_func.hpp"
00014 #include "../string_func.h"
00015 #include "../strings_type.h"
00016 #include "../language.h"
00017 #include "../table/control_codes.h"
00018 
00019 #include "strgen.h"
00020 
00021 #include <stdarg.h>
00022 #include <exception>
00023 
00024 #include "../table/strgen_tables.h"
00025 
00026 /* Compiles a list of strings into a compiled string list */
00027 
00028 static bool _translated;              
00029 static bool _translation;             
00030 const char *_file = "(unknown file)"; 
00031 int _cur_line;                        
00032 int _errors, _warnings, _show_todo;
00033 LanguagePackHeader _lang;             
00034 
00035 static const ptrdiff_t MAX_COMMAND_PARAM_SIZE = 100; 
00036 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei);
00037 
00044 Case::Case(int caseidx, const char *string, Case *next) :
00045     caseidx(caseidx), string(strdup(string)), next(next)
00046 {
00047 }
00048 
00050 Case::~Case()
00051 {
00052   free(this->string);
00053   delete this->next;
00054 }
00055 
00063 LangString::LangString(const char *name, const char *english, int index, int line) :
00064     name(strdup(name)), english(strdup(english)), translated(NULL),
00065     hash_next(0), index(index), line(line), translated_case(NULL)
00066 {
00067 }
00068 
00070 LangString::~LangString()
00071 {
00072   free(this->name);
00073   free(this->english);
00074   free(this->translated);
00075   delete this->translated_case;
00076 }
00077 
00079 void LangString::FreeTranslation()
00080 {
00081   free(this->translated);
00082   this->translated = NULL;
00083 
00084   delete this->translated_case;
00085   this->translated_case = NULL;
00086 }
00087 
00092 StringData::StringData(size_t tabs) : tabs(tabs), max_strings(tabs * TAB_SIZE)
00093 {
00094   this->strings = CallocT<LangString *>(max_strings);
00095   this->hash_heads = CallocT<uint16>(max_strings);
00096   this->next_string_id = 0;
00097 }
00098 
00100 StringData::~StringData()
00101 {
00102   for (size_t i = 0; i < this->max_strings; i++) delete this->strings[i];
00103   free(this->strings);
00104   free(this->hash_heads);
00105 }
00106 
00108 void StringData::FreeTranslation()
00109 {
00110   for (size_t i = 0; i < this->max_strings; i++) {
00111     LangString *ls = this->strings[i];
00112     if (ls != NULL) ls->FreeTranslation();
00113   }
00114 }
00115 
00121 uint StringData::HashStr(const char *s) const
00122 {
00123   uint hash = 0;
00124   for (; *s != '\0'; s++) hash = ROL(hash, 3) ^ *s;
00125   return hash % this->max_strings;
00126 }
00127 
00133 void StringData::Add(const char *s, LangString *ls)
00134 {
00135   uint hash = this->HashStr(s);
00136   ls->hash_next = this->hash_heads[hash];
00137   /* Off-by-one for hash find. */
00138   this->hash_heads[hash] = ls->index + 1;
00139   this->strings[ls->index] = ls;
00140 }
00141 
00147 LangString *StringData::Find(const char *s)
00148 {
00149   int idx = this->hash_heads[this->HashStr(s)];
00150 
00151   while (--idx >= 0) {
00152     LangString *ls = this->strings[idx];
00153 
00154     if (strcmp(ls->name, s) == 0) return ls;
00155     idx = ls->hash_next;
00156   }
00157   return NULL;
00158 }
00159 
00166 uint StringData::VersionHashStr(uint hash, const char *s) const
00167 {
00168   for (; *s != '\0'; s++) {
00169     hash = ROL(hash, 3) ^ *s;
00170     hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00171   }
00172   return hash;
00173 }
00174 
00179 uint StringData::Version() const
00180 {
00181   uint hash = 0;
00182 
00183   for (size_t i = 0; i < this->max_strings; i++) {
00184     const LangString *ls = this->strings[i];
00185 
00186     if (ls != NULL) {
00187       const CmdStruct *cs;
00188       const char *s;
00189       char buf[MAX_COMMAND_PARAM_SIZE];
00190       int argno;
00191       int casei;
00192 
00193       s = ls->name;
00194       hash ^= i * 0x717239;
00195       hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00196       hash = this->VersionHashStr(hash, s + 1);
00197 
00198       s = ls->english;
00199       while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
00200         if (cs->flags & C_DONTCOUNT) continue;
00201 
00202         hash ^= (cs - _cmd_structs) * 0x1234567;
00203         hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
00204       }
00205     }
00206   }
00207 
00208   return hash;
00209 }
00210 
00215 uint StringData::CountInUse(uint tab) const
00216 {
00217   int i;
00218   for (i = TAB_SIZE; --i >= 0;) if (this->strings[(tab * TAB_SIZE) + i] != NULL) break;
00219   return i + 1;
00220 }
00221 
00222 static const char *_cur_ident;
00223 
00224 struct CmdPair {
00225   const CmdStruct *a;
00226   const char *v;
00227 };
00228 
00229 struct ParsedCommandStruct {
00230   uint np;
00231   CmdPair pairs[32];
00232   const CmdStruct *cmd[32]; // ordered by param #
00233 };
00234 
00235 /* Used when generating some advanced commands. */
00236 static ParsedCommandStruct _cur_pcs;
00237 static int _cur_argidx;
00238 
00240 struct Buffer : SmallVector<byte, 256> {
00245   void AppendByte(byte value)
00246   {
00247     *this->Append() = value;
00248   }
00249 
00254   void AppendUtf8(uint32 value)
00255   {
00256     if (value < 0x80) {
00257       *this->Append() = value;
00258     } else if (value < 0x800) {
00259       *this->Append() = 0xC0 + GB(value,  6, 5);
00260       *this->Append() = 0x80 + GB(value,  0, 6);
00261     } else if (value < 0x10000) {
00262       *this->Append() = 0xE0 + GB(value, 12, 4);
00263       *this->Append() = 0x80 + GB(value,  6, 6);
00264       *this->Append() = 0x80 + GB(value,  0, 6);
00265     } else if (value < 0x110000) {
00266       *this->Append() = 0xF0 + GB(value, 18, 3);
00267       *this->Append() = 0x80 + GB(value, 12, 6);
00268       *this->Append() = 0x80 + GB(value,  6, 6);
00269       *this->Append() = 0x80 + GB(value,  0, 6);
00270     } else {
00271       strgen_warning("Invalid unicode value U+0x%X", value);
00272     }
00273   }
00274 };
00275 
00276 size_t Utf8Validate(const char *s)
00277 {
00278   uint32 c;
00279 
00280   if (!HasBit(s[0], 7)) {
00281     /* 1 byte */
00282     return 1;
00283   } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
00284     /* 2 bytes */
00285     c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
00286     if (c >= 0x80) return 2;
00287   } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
00288     /* 3 bytes */
00289     c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
00290     if (c >= 0x800) return 3;
00291   } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
00292     /* 4 bytes */
00293     c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
00294     if (c >= 0x10000 && c <= 0x10FFFF) return 4;
00295   }
00296 
00297   return 0;
00298 }
00299 
00300 
00301 void EmitSingleChar(Buffer *buffer, char *buf, int value)
00302 {
00303   if (*buf != '\0') strgen_warning("Ignoring trailing letters in command");
00304   buffer->AppendUtf8(value);
00305 }
00306 
00307 
00308 /* The plural specifier looks like
00309  * {NUM} {PLURAL -1 passenger passengers} then it picks either passenger/passengers depending on the count in NUM */
00310 
00311 /* This is encoded like
00312  *  CommandByte <ARG#> <NUM> {Length of each string} {each string} */
00313 
00314 bool ParseRelNum(char **buf, int *value, int *offset)
00315 {
00316   const char *s = *buf;
00317   char *end;
00318   bool rel = false;
00319 
00320   while (*s == ' ' || *s == '\t') s++;
00321   if (*s == '+') {
00322     rel = true;
00323     s++;
00324   }
00325   int v = strtol(s, &end, 0);
00326   if (end == s) return false;
00327   if (rel || v < 0) {
00328     *value += v;
00329   } else {
00330     *value = v;
00331   }
00332   if (offset != NULL && *end == ':') {
00333     /* Take the Nth within */
00334     s = end + 1;
00335     *offset = strtol(s, &end, 0);
00336     if (end == s) return false;
00337   }
00338   *buf = end;
00339   return true;
00340 }
00341 
00342 /* Parse out the next word, or NULL */
00343 char *ParseWord(char **buf)
00344 {
00345   char *s = *buf, *r;
00346 
00347   while (*s == ' ' || *s == '\t') s++;
00348   if (*s == '\0') return NULL;
00349 
00350   if (*s == '"') {
00351     r = ++s;
00352     /* parse until next " or NUL */
00353     for (;;) {
00354       if (*s == '\0') break;
00355       if (*s == '"') {
00356         *s++ = '\0';
00357         break;
00358       }
00359       s++;
00360     }
00361   } else {
00362     /* proceed until whitespace or NUL */
00363     r = s;
00364     for (;;) {
00365       if (*s == '\0') break;
00366       if (*s == ' ' || *s == '\t') {
00367         *s++ = '\0';
00368         break;
00369       }
00370       s++;
00371     }
00372   }
00373   *buf = s;
00374   return r;
00375 }
00376 
00377 /* Forward declaration */
00378 static int TranslateArgumentIdx(int arg, int offset = 0);
00379 
00380 static void EmitWordList(Buffer *buffer, const char * const *words, uint nw)
00381 {
00382   buffer->AppendByte(nw);
00383   for (uint i = 0; i < nw; i++) buffer->AppendByte((uint)strlen(words[i]) + 1);
00384   for (uint i = 0; i < nw; i++) {
00385     for (uint j = 0; words[i][j] != '\0'; j++) buffer->AppendByte(words[i][j]);
00386     buffer->AppendByte(0);
00387   }
00388 }
00389 
00390 void EmitPlural(Buffer *buffer, char *buf, int value)
00391 {
00392   int argidx = _cur_argidx;
00393   int offset = 0;
00394   const char *words[5];
00395   int nw = 0;
00396 
00397   /* Parse out the number, if one exists. Otherwise default to prev arg. */
00398   if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
00399 
00400   /* Parse each string */
00401   for (nw = 0; nw < 5; nw++) {
00402     words[nw] = ParseWord(&buf);
00403     if (words[nw] == NULL) break;
00404   }
00405 
00406   if (nw == 0) {
00407     strgen_fatal("%s: No plural words", _cur_ident);
00408   }
00409 
00410   if (_plural_forms[_lang.plural_form].plural_count != nw) {
00411     if (_translated) {
00412       strgen_fatal("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
00413         _plural_forms[_lang.plural_form].plural_count, nw);
00414     } else {
00415       if ((_show_todo & 2) != 0) strgen_warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
00416       if (nw > _plural_forms[_lang.plural_form].plural_count) {
00417         nw = _plural_forms[_lang.plural_form].plural_count;
00418       } else {
00419         for (; nw < _plural_forms[_lang.plural_form].plural_count; nw++) {
00420           words[nw] = words[nw - 1];
00421         }
00422       }
00423     }
00424   }
00425 
00426   buffer->AppendUtf8(SCC_PLURAL_LIST);
00427   buffer->AppendByte(_lang.plural_form);
00428   buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
00429   EmitWordList(buffer, words, nw);
00430 }
00431 
00432 
00433 void EmitGender(Buffer *buffer, char *buf, int value)
00434 {
00435   int argidx = _cur_argidx;
00436   int offset = 0;
00437   uint nw;
00438 
00439   if (buf[0] == '=') {
00440     buf++;
00441 
00442     /* This is a {G=DER} command */
00443     nw = _lang.GetGenderIndex(buf);
00444     if (nw >= MAX_NUM_GENDERS) strgen_fatal("G argument '%s' invalid", buf);
00445 
00446     /* now nw contains the gender index */
00447     buffer->AppendUtf8(SCC_GENDER_INDEX);
00448     buffer->AppendByte(nw);
00449   } else {
00450     const char *words[MAX_NUM_GENDERS];
00451 
00452     /* This is a {G 0 foo bar two} command.
00453      * If no relative number exists, default to +0 */
00454     if (!ParseRelNum(&buf, &argidx, &offset)) {}
00455 
00456     const CmdStruct *cmd = _cur_pcs.cmd[argidx];
00457     if (cmd == NULL || (cmd->flags & C_GENDER) == 0) {
00458       strgen_fatal("Command '%s' can't have a gender", cmd == NULL ? "<empty>" : cmd->cmd);
00459     }
00460 
00461     for (nw = 0; nw < MAX_NUM_GENDERS; nw++) {
00462       words[nw] = ParseWord(&buf);
00463       if (words[nw] == NULL) break;
00464     }
00465     if (nw != _lang.num_genders) strgen_fatal("Bad # of arguments for gender command");
00466 
00467     assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
00468     buffer->AppendUtf8(SCC_GENDER_LIST);
00469     buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
00470     EmitWordList(buffer, words, nw);
00471   }
00472 }
00473 
00474 static const CmdStruct *FindCmd(const char *s, int len)
00475 {
00476   for (const CmdStruct *cs = _cmd_structs; cs != endof(_cmd_structs); cs++) {
00477     if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == '\0') return cs;
00478   }
00479   return NULL;
00480 }
00481 
00482 static uint ResolveCaseName(const char *str, size_t len)
00483 {
00484   /* First get a clean copy of only the case name, then resolve it. */
00485   char case_str[CASE_GENDER_LEN];
00486   len = min(lengthof(case_str) - 1, len);
00487   memcpy(case_str, str, len);
00488   case_str[len] = '\0';
00489 
00490   uint8 case_idx = _lang.GetCaseIndex(case_str);
00491   if (case_idx >= MAX_NUM_CASES) strgen_fatal("Invalid case-name '%s'", case_str);
00492   return case_idx + 1;
00493 }
00494 
00495 
00496 /* returns NULL on eof
00497  * else returns command struct */
00498 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei)
00499 {
00500   const char *s = *str, *start;
00501   char c;
00502 
00503   *argno = -1;
00504   *casei = -1;
00505 
00506   /* Scan to the next command, exit if there's no next command. */
00507   for (; *s != '{'; s++) {
00508     if (*s == '\0') return NULL;
00509   }
00510   s++; // Skip past the {
00511 
00512   if (*s >= '0' && *s <= '9') {
00513     char *end;
00514 
00515     *argno = strtoul(s, &end, 0);
00516     if (*end != ':') strgen_fatal("missing arg #");
00517     s = end + 1;
00518   }
00519 
00520   /* parse command name */
00521   start = s;
00522   do {
00523     c = *s++;
00524   } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
00525 
00526   const CmdStruct *cmd = FindCmd(start, s - start - 1);
00527   if (cmd == NULL) {
00528     strgen_error("Undefined command '%.*s'", (int)(s - start - 1), start);
00529     return NULL;
00530   }
00531 
00532   if (c == '.') {
00533     const char *casep = s;
00534 
00535     if (!(cmd->flags & C_CASE)) {
00536       strgen_fatal("Command '%s' can't have a case", cmd->cmd);
00537     }
00538 
00539     do {
00540       c = *s++;
00541     } while (c != '}' && c != ' ' && c != '\0');
00542     *casei = ResolveCaseName(casep, s - casep - 1);
00543   }
00544 
00545   if (c == '\0') {
00546     strgen_error("Missing } from command '%s'", start);
00547     return NULL;
00548   }
00549 
00550 
00551   if (c != '}') {
00552     if (c == '=') s--;
00553     /* copy params */
00554     start = s;
00555     for (;;) {
00556       c = *s++;
00557       if (c == '}') break;
00558       if (c == '\0') {
00559         strgen_error("Missing } from command '%s'", start);
00560         return NULL;
00561       }
00562       if (s - start == MAX_COMMAND_PARAM_SIZE) error("param command too long");
00563       *param++ = c;
00564     }
00565   }
00566   *param = '\0';
00567 
00568   *str = s;
00569 
00570   return cmd;
00571 }
00572 
00580 StringReader::StringReader(StringData &data, const char *file, bool master, bool translation) :
00581     data(data), file(strdup(file)), master(master), translation(translation)
00582 {
00583 }
00584 
00586 StringReader::~StringReader()
00587 {
00588   free(file);
00589 }
00590 
00591 static void ExtractCommandString(ParsedCommandStruct *p, const char *s, bool warnings)
00592 {
00593   char param[MAX_COMMAND_PARAM_SIZE];
00594   int argno;
00595   int argidx = 0;
00596   int casei;
00597 
00598   memset(p, 0, sizeof(*p));
00599 
00600   for (;;) {
00601     /* read until next command from a. */
00602     const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
00603 
00604     if (ar == NULL) break;
00605 
00606     /* Sanity checking */
00607     if (argno != -1 && ar->consumes == 0) strgen_fatal("Non consumer param can't have a paramindex");
00608 
00609     if (ar->consumes) {
00610       if (argno != -1) argidx = argno;
00611       if (argidx < 0 || (uint)argidx >= lengthof(p->cmd)) strgen_fatal("invalid param idx %d", argidx);
00612       if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) strgen_fatal("duplicate param idx %d", argidx);
00613 
00614       p->cmd[argidx++] = ar;
00615     } else if (!(ar->flags & C_DONTCOUNT)) { // Ignore some of them
00616       if (p->np >= lengthof(p->pairs)) strgen_fatal("too many commands in string, max " PRINTF_SIZE, lengthof(p->pairs));
00617       p->pairs[p->np].a = ar;
00618       p->pairs[p->np].v = param[0] != '\0' ? strdup(param) : "";
00619       p->np++;
00620     }
00621   }
00622 }
00623 
00624 
00625 static const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
00626 {
00627   if (a == NULL) return NULL;
00628 
00629   if (strcmp(a->cmd, "STRING1") == 0 ||
00630       strcmp(a->cmd, "STRING2") == 0 ||
00631       strcmp(a->cmd, "STRING3") == 0 ||
00632       strcmp(a->cmd, "STRING4") == 0 ||
00633       strcmp(a->cmd, "STRING5") == 0 ||
00634       strcmp(a->cmd, "RAW_STRING") == 0) {
00635     return FindCmd("STRING", 6);
00636   }
00637 
00638   return a;
00639 }
00640 
00641 
00642 static bool CheckCommandsMatch(char *a, char *b, const char *name)
00643 {
00644   /* If we're not translating, i.e. we're compiling the base language,
00645    * it is pointless to do all these checks as it'll always be correct.
00646    * After all, all checks are based on the base language.
00647    */
00648   if (!_translation) return true;
00649 
00650   ParsedCommandStruct templ;
00651   ParsedCommandStruct lang;
00652   bool result = true;
00653 
00654   ExtractCommandString(&templ, b, true);
00655   ExtractCommandString(&lang, a, true);
00656 
00657   /* For each string in templ, see if we find it in lang */
00658   if (templ.np != lang.np) {
00659     strgen_warning("%s: template string and language string have a different # of commands", name);
00660     result = false;
00661   }
00662 
00663   for (uint i = 0; i < templ.np; i++) {
00664     /* see if we find it in lang, and zero it out */
00665     bool found = false;
00666     for (uint j = 0; j < lang.np; j++) {
00667       if (templ.pairs[i].a == lang.pairs[j].a &&
00668           strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
00669         /* it was found in both. zero it out from lang so we don't find it again */
00670         lang.pairs[j].a = NULL;
00671         found = true;
00672         break;
00673       }
00674     }
00675 
00676     if (!found) {
00677       strgen_warning("%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
00678       result = false;
00679     }
00680   }
00681 
00682   /* if we reach here, all non consumer commands match up.
00683    * Check if the non consumer commands match up also. */
00684   for (uint i = 0; i < lengthof(templ.cmd); i++) {
00685     if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
00686       strgen_warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
00687         lang.cmd[i]  == NULL ? "<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
00688         templ.cmd[i] == NULL ? "<empty>" : templ.cmd[i]->cmd);
00689       result = false;
00690     }
00691   }
00692 
00693   return result;
00694 }
00695 
00696 void StringReader::HandleString(char *str)
00697 {
00698   if (*str == '#') {
00699     if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2);
00700     return;
00701   }
00702 
00703   /* Ignore comments & blank lines */
00704   if (*str == ';' || *str == ' ' || *str == '\0') return;
00705 
00706   char *s = strchr(str, ':');
00707   if (s == NULL) {
00708     strgen_error("Line has no ':' delimiter");
00709     return;
00710   }
00711 
00712   char *t;
00713   /* Trim spaces.
00714    * After this str points to the command name, and s points to the command contents */
00715   for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
00716   *t = 0;
00717   s++;
00718 
00719   /* Check string is valid UTF-8 */
00720   const char *tmp;
00721   for (tmp = s; *tmp != '\0';) {
00722     size_t len = Utf8Validate(tmp);
00723     if (len == 0) strgen_fatal("Invalid UTF-8 sequence in '%s'", s);
00724 
00725     WChar c;
00726     Utf8Decode(&c, tmp);
00727     if (c <= 0x001F || // ASCII control character range
00728         (c >= 0xE000 && c <= 0xF8FF) || // Private range
00729         (c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range
00730       strgen_fatal("Unwanted UTF-8 character U+%04X in sequence '%s'", c, s);
00731     }
00732 
00733     tmp += len;
00734   }
00735 
00736   /* Check if the string has a case..
00737    * The syntax for cases is IDENTNAME.case */
00738   char *casep = strchr(str, '.');
00739   if (casep != NULL) *casep++ = '\0';
00740 
00741   /* Check if this string already exists.. */
00742   LangString *ent = this->data.Find(str);
00743 
00744   if (this->master) {
00745     if (casep != NULL) {
00746       strgen_error("Cases in the base translation are not supported.");
00747       return;
00748     }
00749 
00750     if (ent != NULL) {
00751       strgen_error("String name '%s' is used multiple times", str);
00752       return;
00753     }
00754 
00755     if (this->data.strings[this->data.next_string_id] != NULL) {
00756       strgen_error("String ID 0x%X for '%s' already in use by '%s'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name);
00757       return;
00758     }
00759 
00760     /* Allocate a new LangString */
00761     this->data.Add(str, new LangString(str, s, this->data.next_string_id++, _cur_line));
00762   } else {
00763     if (ent == NULL) {
00764       strgen_warning("String name '%s' does not exist in master file", str);
00765       return;
00766     }
00767 
00768     if (ent->translated && casep == NULL) {
00769       strgen_error("String name '%s' is used multiple times", str);
00770       return;
00771     }
00772 
00773     /* make sure that the commands match */
00774     if (!CheckCommandsMatch(s, ent->english, str)) return;
00775 
00776     if (casep != NULL) {
00777       ent->translated_case = new Case(ResolveCaseName(casep, strlen(casep)), s, ent->translated_case);
00778     } else {
00779       ent->translated = strdup(s);
00780       /* If the string was translated, use the line from the
00781        * translated language so errors in the translated file
00782        * are properly referenced to. */
00783       ent->line = _cur_line;
00784     }
00785   }
00786 }
00787 
00788 
00789 static void rstrip(char *buf)
00790 {
00791   size_t i = strlen(buf);
00792   while (i > 0 && (buf[i - 1] == '\r' || buf[i - 1] == '\n' || buf[i - 1] == ' ')) i--;
00793   buf[i] = '\0';
00794 }
00795 
00796 void StringReader::ParseFile()
00797 {
00798   char buf[2048];
00799   _warnings = _errors = 0;
00800 
00801   _translation = this->master || this->translation;
00802   _file = this->file;
00803 
00804   /* For each new file we parse, reset the genders, and language codes. */
00805   MemSetT(&_lang, 0);
00806   strecpy(_lang.digit_group_separator, ",", lastof(_lang.digit_group_separator));
00807   strecpy(_lang.digit_group_separator_currency, ",", lastof(_lang.digit_group_separator_currency));
00808   strecpy(_lang.digit_decimal_separator, ".", lastof(_lang.digit_decimal_separator));
00809 
00810   _cur_line = 1;
00811   while (this->ReadLine(buf, sizeof(buf)) != NULL) {
00812     rstrip(buf);
00813     this->HandleString(buf);
00814     _cur_line++;
00815   }
00816 }
00817 
00822 void HeaderWriter::WriteHeader(const StringData &data)
00823 {
00824   int last = 0;
00825   for (size_t i = 0; i < data.max_strings; i++) {
00826     if (data.strings[i] != NULL) {
00827       this->WriteStringID(data.strings[i]->name, (int)i);
00828       last = (int)i;
00829     }
00830   }
00831 
00832   this->WriteStringID("STR_LAST_STRINGID", last);
00833 }
00834 
00835 static int TranslateArgumentIdx(int argidx, int offset)
00836 {
00837   int sum;
00838 
00839   if (argidx < 0 || (uint)argidx >= lengthof(_cur_pcs.cmd)) {
00840     strgen_fatal("invalid argidx %d", argidx);
00841   }
00842   const CmdStruct *cs = _cur_pcs.cmd[argidx];
00843   if (cs != NULL && cs->consumes <= offset) {
00844     strgen_fatal("invalid argidx offset %d:%d", argidx, offset);
00845   }
00846 
00847   if (_cur_pcs.cmd[argidx] == NULL) {
00848     strgen_fatal("no command for this argidx %d", argidx);
00849   }
00850 
00851   for (int i = sum = 0; i < argidx; i++) {
00852     const CmdStruct *cs = _cur_pcs.cmd[i];
00853 
00854     sum += (cs != NULL) ? cs->consumes : 1;
00855   }
00856 
00857   return sum + offset;
00858 }
00859 
00860 static void PutArgidxCommand(Buffer *buffer)
00861 {
00862   buffer->AppendUtf8(SCC_ARG_INDEX);
00863   buffer->AppendByte(TranslateArgumentIdx(_cur_argidx));
00864 }
00865 
00866 
00867 static void PutCommandString(Buffer *buffer, const char *str)
00868 {
00869   _cur_argidx = 0;
00870 
00871   while (*str != '\0') {
00872     /* Process characters as they are until we encounter a { */
00873     if (*str != '{') {
00874       buffer->AppendByte(*str++);
00875       continue;
00876     }
00877 
00878     char param[MAX_COMMAND_PARAM_SIZE];
00879     int argno;
00880     int casei;
00881     const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
00882     if (cs == NULL) break;
00883 
00884     if (casei != -1) {
00885       buffer->AppendUtf8(SCC_SET_CASE); // {SET_CASE}
00886       buffer->AppendByte(casei);
00887     }
00888 
00889     /* For params that consume values, we need to handle the argindex properly */
00890     if (cs->consumes > 0) {
00891       /* Check if we need to output a move-param command */
00892       if (argno != -1 && argno != _cur_argidx) {
00893         _cur_argidx = argno;
00894         PutArgidxCommand(buffer);
00895       }
00896 
00897       /* Output the one from the master string... it's always accurate. */
00898       cs = _cur_pcs.cmd[_cur_argidx++];
00899       if (cs == NULL) {
00900         strgen_fatal("%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
00901       }
00902     }
00903 
00904     cs->proc(buffer, param, cs->value);
00905   }
00906 }
00907 
00912 void LanguageWriter::WriteLength(uint length)
00913 {
00914   char buffer[2];
00915   int offs = 0;
00916   if (length >= 0x4000) {
00917     strgen_fatal("string too long");
00918   }
00919 
00920   if (length >= 0xC0) {
00921     buffer[offs++] = (length >> 8) | 0xC0;
00922   }
00923   buffer[offs++] = length & 0xFF;
00924   this->Write((byte*)buffer, offs);
00925 }
00926 
00931 void LanguageWriter::WriteLang(const StringData &data)
00932 {
00933   uint *in_use = AllocaM(uint, data.tabs);
00934   for (size_t tab = 0; tab < data.tabs; tab++) {
00935     uint n = data.CountInUse((uint)tab);
00936 
00937     in_use[tab] = n;
00938     _lang.offsets[tab] = TO_LE16(n);
00939 
00940     for (uint j = 0; j != in_use[tab]; j++) {
00941       const LangString *ls = data.strings[(tab * TAB_SIZE) + j];
00942       if (ls != NULL && ls->translated == NULL) _lang.missing++;
00943     }
00944   }
00945 
00946   _lang.ident = TO_LE32(LanguagePackHeader::IDENT);
00947   _lang.version = TO_LE32(data.Version());
00948   _lang.missing = TO_LE16(_lang.missing);
00949   _lang.winlangid = TO_LE16(_lang.winlangid);
00950 
00951   this->WriteHeader(&_lang);
00952   Buffer buffer;
00953 
00954   for (size_t tab = 0; tab < data.tabs; tab++) {
00955     for (uint j = 0; j != in_use[tab]; j++) {
00956       const LangString *ls = data.strings[(tab * TAB_SIZE) + j];
00957       const Case *casep;
00958       const char *cmdp;
00959 
00960       /* For undefined strings, just set that it's an empty string */
00961       if (ls == NULL) {
00962         this->WriteLength(0);
00963         continue;
00964       }
00965 
00966       _cur_ident = ls->name;
00967       _cur_line = ls->line;
00968 
00969       /* Produce a message if a string doesn't have a translation. */
00970       if (_show_todo > 0 && ls->translated == NULL) {
00971         if ((_show_todo & 2) != 0) {
00972           strgen_warning("'%s' is untranslated", ls->name);
00973         }
00974         if ((_show_todo & 1) != 0) {
00975           const char *s = "<TODO> ";
00976           while (*s != '\0') buffer.AppendByte(*s++);
00977         }
00978       }
00979 
00980       /* Extract the strings and stuff from the english command string */
00981       ExtractCommandString(&_cur_pcs, ls->english, false);
00982 
00983       if (ls->translated_case != NULL || ls->translated != NULL) {
00984         casep = ls->translated_case;
00985         cmdp = ls->translated;
00986       } else {
00987         casep = NULL;
00988         cmdp = ls->english;
00989       }
00990 
00991       _translated = cmdp != ls->english;
00992 
00993       if (casep != NULL) {
00994         const Case *c;
00995         uint num;
00996 
00997         /* Need to output a case-switch.
00998          * It has this format
00999          * <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <STRINGDEFAULT>
01000          * Each LEN is printed using 2 bytes in big endian order. */
01001         buffer.AppendUtf8(SCC_SWITCH_CASE);
01002         /* Count the number of cases */
01003         for (num = 0, c = casep; c; c = c->next) num++;
01004         buffer.AppendByte(num);
01005 
01006         /* Write each case */
01007         for (c = casep; c != NULL; c = c->next) {
01008           buffer.AppendByte(c->caseidx);
01009           /* Make some space for the 16-bit length */
01010           uint pos = buffer.Length();
01011           buffer.AppendByte(0);
01012           buffer.AppendByte(0);
01013           /* Write string */
01014           PutCommandString(&buffer, c->string);
01015           buffer.AppendByte(0); // terminate with a zero
01016           /* Fill in the length */
01017           uint size = buffer.Length() - (pos + 2);
01018           buffer[pos + 0] = GB(size, 8, 8);
01019           buffer[pos + 1] = GB(size, 0, 8);
01020         }
01021       }
01022 
01023       if (cmdp != NULL) PutCommandString(&buffer, cmdp);
01024 
01025       this->WriteLength(buffer.Length());
01026       this->Write(buffer.Begin(), buffer.Length());
01027       buffer.Clear();
01028     }
01029   }
01030 }