strgen.cpp

Go to the documentation of this file.
00001 /* $Id$ */
00002 
00003 /*
00004  * This file is part of OpenTTD.
00005  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
00006  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00007  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
00008  */
00009 
00012 #include "../stdafx.h"
00013 #include "../core/endian_func.hpp"
00014 #include "../string_func.h"
00015 #include "../strings_type.h"
00016 #include "../misc/getoptdata.h"
00017 #include "../table/control_codes.h"
00018 
00019 #include "strgen.h"
00020 
00021 #include <stdarg.h>
00022 #include <exception>
00023 
00024 #if (!defined(WIN32) && !defined(WIN64)) || defined(__CYGWIN__)
00025 #include <unistd.h>
00026 #include <sys/stat.h>
00027 #endif
00028 
00029 #if defined WIN32 || defined __WATCOMC__
00030 #include <direct.h>
00031 #endif /* WIN32 || __WATCOMC__ */
00032 
00033 #ifdef __MORPHOS__
00034 #ifdef stderr
00035 #undef stderr
00036 #endif
00037 #define stderr stdout
00038 #endif /* __MORPHOS__ */
00039 
00040 #include "../table/strgen_tables.h"
00041 
00042 
00043 #ifdef _MSC_VER
00044 # define LINE_NUM_FMT(s) "%s (%d): warning: %s (" s ")\n"
00045 #else
00046 # define LINE_NUM_FMT(s) "%s:%d: " s ": %s\n"
00047 #endif
00048 
00049 void CDECL strgen_warning(const char *s, ...)
00050 {
00051   char buf[1024];
00052   va_list va;
00053   va_start(va, s);
00054   vsnprintf(buf, lengthof(buf), s, va);
00055   va_end(va);
00056   fprintf(stderr, LINE_NUM_FMT("warning"), _file, _cur_line, buf);
00057   _warnings++;
00058 }
00059 
00060 void CDECL strgen_error(const char *s, ...)
00061 {
00062   char buf[1024];
00063   va_list va;
00064   va_start(va, s);
00065   vsnprintf(buf, lengthof(buf), s, va);
00066   va_end(va);
00067   fprintf(stderr, LINE_NUM_FMT("error"), _file, _cur_line, buf);
00068   _errors++;
00069 }
00070 
00071 void NORETURN CDECL strgen_fatal(const char *s, ...)
00072 {
00073   char buf[1024];
00074   va_list va;
00075   va_start(va, s);
00076   vsnprintf(buf, lengthof(buf), s, va);
00077   va_end(va);
00078   fprintf(stderr, LINE_NUM_FMT("FATAL"), _file, _cur_line, buf);
00079 #ifdef _MSC_VER
00080   fprintf(stderr, LINE_NUM_FMT("warning"), _file, _cur_line, "language is not compiled");
00081 #endif
00082   throw std::exception();
00083 }
00084 
00085 void NORETURN CDECL error(const char *s, ...)
00086 {
00087   char buf[1024];
00088   va_list va;
00089   va_start(va, s);
00090   vsnprintf(buf, lengthof(buf), s, va);
00091   va_end(va);
00092   fprintf(stderr, LINE_NUM_FMT("FATAL"), _file, _cur_line, buf);
00093 #ifdef _MSC_VER
00094   fprintf(stderr, LINE_NUM_FMT("warning"), _file, _cur_line, "language is not compiled");
00095 #endif
00096   exit(2);
00097 }
00098 
00100 struct FileStringReader : StringReader {
00101   FILE *fh; 
00102 
00110   FileStringReader(StringData &data, const char *file, bool master, bool translation) :
00111       StringReader(data, file, master, translation)
00112   {
00113     this->fh = fopen(file, "rb");
00114     if (this->fh == NULL) error("Could not open %s", file);
00115   }
00116 
00118   virtual ~FileStringReader()
00119   {
00120     fclose(this->fh);
00121   }
00122 
00123   /* virtual */ char *ReadLine(char *buffer, size_t size)
00124   {
00125     return fgets(buffer, size, this->fh);
00126   }
00127 
00128   /* virtual */ void HandlePragma(char *str);
00129 
00130   /* virtual */ void ParseFile()
00131   {
00132     this->StringReader::ParseFile();
00133 
00134     if (StrEmpty(_lang.name) || StrEmpty(_lang.own_name) || StrEmpty(_lang.isocode)) {
00135       error("Language must include ##name, ##ownname and ##isocode");
00136     }
00137   }
00138 };
00139 
00140 void FileStringReader::HandlePragma(char *str)
00141 {
00142   if (!memcmp(str, "id ", 3)) {
00143     this->data.next_string_id = strtoul(str + 3, NULL, 0);
00144   } else if (!memcmp(str, "name ", 5)) {
00145     strecpy(_lang.name, str + 5, lastof(_lang.name));
00146   } else if (!memcmp(str, "ownname ", 8)) {
00147     strecpy(_lang.own_name, str + 8, lastof(_lang.own_name));
00148   } else if (!memcmp(str, "isocode ", 8)) {
00149     strecpy(_lang.isocode, str + 8, lastof(_lang.isocode));
00150   } else if (!memcmp(str, "plural ", 7)) {
00151     _lang.plural_form = atoi(str + 7);
00152     if (_lang.plural_form >= lengthof(_plural_forms)) {
00153       error("Invalid pluralform %d", _lang.plural_form);
00154     }
00155   } else if (!memcmp(str, "textdir ", 8)) {
00156     if (!memcmp(str + 8, "ltr", 3)) {
00157       _lang.text_dir = TD_LTR;
00158     } else if (!memcmp(str + 8, "rtl", 3)) {
00159       _lang.text_dir = TD_RTL;
00160     } else {
00161       error("Invalid textdir %s", str + 8);
00162     }
00163   } else if (!memcmp(str, "digitsep ", 9)) {
00164     str += 9;
00165     strecpy(_lang.digit_group_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str, lastof(_lang.digit_group_separator));
00166   } else if (!memcmp(str, "digitsepcur ", 12)) {
00167     str += 12;
00168     strecpy(_lang.digit_group_separator_currency, strcmp(str, "{NBSP}") == 0 ? NBSP : str, lastof(_lang.digit_group_separator_currency));
00169   } else if (!memcmp(str, "decimalsep ", 11)) {
00170     str += 11;
00171     strecpy(_lang.digit_decimal_separator, strcmp(str, "{NBSP}") == 0 ? NBSP : str, lastof(_lang.digit_decimal_separator));
00172   } else if (!memcmp(str, "winlangid ", 10)) {
00173     const char *buf = str + 10;
00174     long langid = strtol(buf, NULL, 16);
00175     if (langid > (long)UINT16_MAX || langid < 0) {
00176       error("Invalid winlangid %s", buf);
00177     }
00178     _lang.winlangid = (uint16)langid;
00179   } else if (!memcmp(str, "grflangid ", 10)) {
00180     const char *buf = str + 10;
00181     long langid = strtol(buf, NULL, 16);
00182     if (langid >= 0x7F || langid < 0) {
00183       error("Invalid grflangid %s", buf);
00184     }
00185     _lang.newgrflangid = (uint8)langid;
00186   } else if (!memcmp(str, "gender ", 7)) {
00187     if (this->master) error("Genders are not allowed in the base translation.");
00188     char *buf = str + 7;
00189 
00190     for (;;) {
00191       const char *s = ParseWord(&buf);
00192 
00193       if (s == NULL) break;
00194       if (_lang.num_genders >= MAX_NUM_GENDERS) error("Too many genders, max %d", MAX_NUM_GENDERS);
00195       strecpy(_lang.genders[_lang.num_genders], s, lastof(_lang.genders[_lang.num_genders]));
00196       _lang.num_genders++;
00197     }
00198   } else if (!memcmp(str, "case ", 5)) {
00199     if (this->master) error("Cases are not allowed in the base translation.");
00200     char *buf = str + 5;
00201 
00202     for (;;) {
00203       const char *s = ParseWord(&buf);
00204 
00205       if (s == NULL) break;
00206       if (_lang.num_cases >= MAX_NUM_CASES) error("Too many cases, max %d", MAX_NUM_CASES);
00207       strecpy(_lang.cases[_lang.num_cases], s, lastof(_lang.cases[_lang.num_cases]));
00208       _lang.num_cases++;
00209     }
00210   } else {
00211     error("unknown pragma '%s'", str);
00212   }
00213 }
00214 
00215 bool CompareFiles(const char *n1, const char *n2)
00216 {
00217   FILE *f2 = fopen(n2, "rb");
00218   if (f2 == NULL) return false;
00219 
00220   FILE *f1 = fopen(n1, "rb");
00221   if (f1 == NULL) error("can't open %s", n1);
00222 
00223   size_t l1, l2;
00224   do {
00225     char b1[4096];
00226     char b2[4096];
00227     l1 = fread(b1, 1, sizeof(b1), f1);
00228     l2 = fread(b2, 1, sizeof(b2), f2);
00229 
00230     if (l1 != l2 || memcmp(b1, b2, l1)) {
00231       fclose(f2);
00232       fclose(f1);
00233       return false;
00234     }
00235   } while (l1 != 0);
00236 
00237   fclose(f2);
00238   fclose(f1);
00239   return true;
00240 }
00241 
00243 struct FileWriter {
00244   FILE *fh;             
00245   const char *filename; 
00246 
00251   FileWriter(const char *filename)
00252   {
00253     this->filename = strdup(filename);
00254     this->fh = fopen(this->filename, "wb");
00255 
00256     if (this->fh == NULL) {
00257       error("Could not open %s", this->filename);
00258     }
00259   }
00260 
00262   void Finalise()
00263   {
00264     fclose(this->fh);
00265     this->fh = NULL;
00266   }
00267 
00269   virtual ~FileWriter()
00270   {
00271     /* If we weren't closed an exception was thrown, so remove the termporary file. */
00272     if (fh != NULL) {
00273       fclose(this->fh);
00274       unlink(this->filename);
00275     }
00276     free(this->filename);
00277   }
00278 };
00279 
00280 struct HeaderFileWriter : HeaderWriter, FileWriter {
00282   const char *real_filename;
00284   int prev;
00285 
00290   HeaderFileWriter(const char *filename) : FileWriter("tmp.xxx"),
00291     real_filename(strdup(filename)), prev(0)
00292   {
00293     fprintf(this->fh, "/* This file is automatically generated. Do not modify */\n\n");
00294     fprintf(this->fh, "#ifndef TABLE_STRINGS_H\n");
00295     fprintf(this->fh, "#define TABLE_STRINGS_H\n");
00296   }
00297 
00298   void WriteStringID(const char *name, int stringid)
00299   {
00300     if (prev + 1 != stringid) fprintf(this->fh, "\n");
00301     fprintf(this->fh, "static const StringID %s = 0x%X;\n", name, stringid);
00302     prev = stringid;
00303   }
00304 
00305   void Finalise(const StringData &data)
00306   {
00307     /* Find the plural form with the most amount of cases. */
00308     int max_plural_forms = 0;
00309     for (uint i = 0; i < lengthof(_plural_forms); i++) {
00310       max_plural_forms = max(max_plural_forms, _plural_forms[i].plural_count);
00311     }
00312 
00313     fprintf(this->fh,
00314       "\n"
00315       "static const uint LANGUAGE_PACK_VERSION     = 0x%X;\n"
00316       "static const uint LANGUAGE_MAX_PLURAL       = %d;\n"
00317       "static const uint LANGUAGE_MAX_PLURAL_FORMS = %d;\n\n",
00318       (uint)data.Version(), (uint)lengthof(_plural_forms), max_plural_forms
00319     );
00320 
00321     fprintf(this->fh, "#endif /* TABLE_STRINGS_H */\n");
00322 
00323     this->FileWriter::Finalise();
00324 
00325     if (CompareFiles(this->filename, this->real_filename)) {
00326       /* files are equal. tmp.xxx is not needed */
00327       unlink(this->filename);
00328     } else {
00329       /* else rename tmp.xxx into filename */
00330   #if defined(WIN32) || defined(WIN64)
00331       unlink(this->real_filename);
00332   #endif
00333       if (rename(this->filename, this->real_filename) == -1) error("rename() failed");
00334     }
00335   }
00336 };
00337 
00339 struct LanguageFileWriter : LanguageWriter, FileWriter {
00344   LanguageFileWriter(const char *filename) : FileWriter(filename)
00345   {
00346   }
00347 
00348   void WriteHeader(const LanguagePackHeader *header)
00349   {
00350     this->Write((const byte *)header, sizeof(*header));
00351   }
00352 
00353   void Finalise()
00354   {
00355     fputc(0, this->fh);
00356     this->FileWriter::Finalise();
00357   }
00358 
00359   void Write(const byte *buffer, size_t length)
00360   {
00361     if (fwrite(buffer, sizeof(*buffer), length, this->fh) != length) {
00362       error("Could not write to %s", this->filename);
00363     }
00364   }
00365 };
00366 
00368 static inline void ottd_mkdir(const char *directory)
00369 {
00370 #if defined(WIN32) || defined(__WATCOMC__)
00371     mkdir(directory);
00372 #else
00373     mkdir(directory, 0755);
00374 #endif
00375 }
00376 
00382 static inline char *mkpath(char *buf, size_t buflen, const char *path, const char *file)
00383 {
00384   ttd_strlcpy(buf, path, buflen); // copy directory into buffer
00385 
00386   char *p = strchr(buf, '\0'); // add path seperator if necessary
00387   if (p[-1] != PATHSEPCHAR && (size_t)(p - buf) + 1 < buflen) *p++ = PATHSEPCHAR;
00388   ttd_strlcpy(p, file, buflen - (size_t)(p - buf)); // catenate filename at end of buffer
00389   return buf;
00390 }
00391 
00392 #if defined(__MINGW32__)
00393 
00398 static inline char *replace_pathsep(char *s)
00399 {
00400   for (char *c = s; *c != '\0'; c++) if (*c == '/') *c = '\\';
00401   return s;
00402 }
00403 #else
00404 static inline char *replace_pathsep(char *s) { return s; }
00405 #endif
00406 
00408 static const OptionData _opts[] = {
00409     GETOPT_NOVAL(     'v',  "--version"),
00410   GETOPT_GENERAL('C', '\0', "-export-commands", ODF_NO_VALUE),
00411   GETOPT_GENERAL('L', '\0', "-export-plurals",  ODF_NO_VALUE),
00412   GETOPT_GENERAL('P', '\0', "-export-pragmas",  ODF_NO_VALUE),
00413     GETOPT_NOVAL(     't',  "--todo"),
00414     GETOPT_NOVAL(     'w',  "--warning"),
00415     GETOPT_NOVAL(     'h',  "--help"),
00416   GETOPT_GENERAL('h', '?',  NULL,               ODF_NO_VALUE),
00417     GETOPT_VALUE(     's',  "--source_dir"),
00418     GETOPT_VALUE(     'd',  "--dest_dir"),
00419   GETOPT_END(),
00420 };
00421 
00422 int CDECL main(int argc, char *argv[])
00423 {
00424   char pathbuf[MAX_PATH];
00425   const char *src_dir = ".";
00426   const char *dest_dir = NULL;
00427 
00428   GetOptData mgo(argc - 1, argv + 1, _opts);
00429   for (;;) {
00430     int i = mgo.GetOpt();
00431     if (i == -1) break;
00432 
00433     switch (i) {
00434       case 'v':
00435         puts("$Revision$");
00436         return 0;
00437 
00438       case 'C':
00439         printf("args\tflags\tcommand\treplacement\n");
00440         for (const CmdStruct *cs = _cmd_structs; cs < endof(_cmd_structs); cs++) {
00441           char flags;
00442           switch (cs->value) {
00443             case 0x200E: case 0x200F: // Implicit BIDI controls
00444             case 0x202A: case 0x202B: case 0x202C: case 0x202D: case 0x202E: // Explicit BIDI controls
00445             case 0xA0: // Non breaking space
00446             case '\n': // Newlines may be added too
00447             case '{':  // This special
00448               /* This command may be in the translation when it is not in base */
00449               flags = 'i';
00450               break;
00451 
00452             default:
00453               if (cs->proc == EmitGender) {
00454                 flags = 'g'; // Command needs number of parameters defined by number of genders
00455               } else if (cs->proc == EmitPlural) {
00456                 flags = 'p'; // Command needs number of parameters defined by plural value
00457               } else {
00458                 flags = '0'; // Command needs no parameters
00459               }
00460           }
00461           printf("%i\t%c\t\"%s\"\t\"%s\"\n", cs->consumes, flags, cs->cmd, strstr(cs->cmd, "STRING") ? "STRING" : cs->cmd);
00462         }
00463         return 0;
00464 
00465       case 'L':
00466         printf("count\tdescription\tnames\n");
00467         for (const PluralForm *pf = _plural_forms; pf < endof(_plural_forms); pf++) {
00468           printf("%i\t\"%s\"\t%s\n", pf->plural_count, pf->description, pf->names);
00469         }
00470         return 0;
00471 
00472       case 'P':
00473         printf("name\tflags\tdefault\tdescription\n");
00474         for (size_t i = 0; i < lengthof(_pragmas); i++) {
00475           printf("\"%s\"\t%s\t\"%s\"\t\"%s\"\n",
00476               _pragmas[i][0], _pragmas[i][1], _pragmas[i][2], _pragmas[i][3]);
00477         }
00478         return 0;
00479 
00480       case 't':
00481         _show_todo |= 1;
00482         break;
00483 
00484       case 'w':
00485         _show_todo |= 2;
00486         break;
00487 
00488       case 'h':
00489         puts(
00490           "strgen - $Revision$\n"
00491           " -v | --version    print version information and exit\n"
00492           " -t | --todo       replace any untranslated strings with '<TODO>'\n"
00493           " -w | --warning    print a warning for any untranslated strings\n"
00494           " -h | -? | --help  print this help message and exit\n"
00495           " -s | --source_dir search for english.txt in the specified directory\n"
00496           " -d | --dest_dir   put output file in the specified directory, create if needed\n"
00497           " -export-commands  export all commands and exit\n"
00498           " -export-plurals   export all plural forms and exit\n"
00499           " -export-pragmas   export all pragmas and exit\n"
00500           " Run without parameters and strgen will search for english.txt and parse it,\n"
00501           " creating strings.h. Passing an argument, strgen will translate that language\n"
00502           " file using english.txt as a reference and output <language>.lng."
00503         );
00504         return 0;
00505 
00506       case 's':
00507         src_dir = replace_pathsep(mgo.opt);
00508         break;
00509 
00510       case 'd':
00511         dest_dir = replace_pathsep(mgo.opt);
00512         break;
00513 
00514       case -2:
00515         fprintf(stderr, "Invalid arguments\n");
00516         return 0;
00517     }
00518   }
00519 
00520   if (dest_dir == NULL) dest_dir = src_dir; // if dest_dir is not specified, it equals src_dir
00521 
00522   try {
00523     /* strgen has two modes of operation. If no (free) arguments are passed
00524      * strgen generates strings.h to the destination directory. If it is supplied
00525      * with a (free) parameter the program will translate that language to destination
00526      * directory. As input english.txt is parsed from the source directory */
00527     if (mgo.numleft == 0) {
00528       mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
00529 
00530       /* parse master file */
00531       StringData data(TAB_COUNT);
00532       FileStringReader master_reader(data, pathbuf, true, false);
00533       master_reader.ParseFile();
00534       if (_errors != 0) return 1;
00535 
00536       /* write strings.h */
00537       ottd_mkdir(dest_dir);
00538       mkpath(pathbuf, lengthof(pathbuf), dest_dir, "strings.h");
00539 
00540       HeaderFileWriter writer(pathbuf);
00541       writer.WriteHeader(data);
00542       writer.Finalise(data);
00543     } else if (mgo.numleft >= 1) {
00544       char *r;
00545 
00546       mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
00547 
00548       StringData data(TAB_COUNT);
00549       /* parse master file and check if target file is correct */
00550       FileStringReader master_reader(data, pathbuf, true, false);
00551       master_reader.ParseFile();
00552 
00553       for (int i = 0; i < mgo.numleft; i++) {
00554         data.FreeTranslation();
00555 
00556         const char *translation = replace_pathsep(mgo.argv[i]);
00557         const char *file = strrchr(translation, PATHSEPCHAR);
00558         FileStringReader translation_reader(data, translation, false, file == NULL || strcmp(file + 1, "english.txt") != 0);
00559         translation_reader.ParseFile(); // target file
00560         if (_errors != 0) return 1;
00561 
00562         /* get the targetfile, strip any directories and append to destination path */
00563         r = strrchr(mgo.argv[i], PATHSEPCHAR);
00564         mkpath(pathbuf, lengthof(pathbuf), dest_dir, (r != NULL) ? &r[1] : mgo.argv[i]);
00565 
00566         /* rename the .txt (input-extension) to .lng */
00567         r = strrchr(pathbuf, '.');
00568         if (r == NULL || strcmp(r, ".txt") != 0) r = strchr(pathbuf, '\0');
00569         ttd_strlcpy(r, ".lng", (size_t)(r - pathbuf));
00570 
00571         LanguageFileWriter writer(pathbuf);
00572         writer.WriteLang(data);
00573         writer.Finalise();
00574 
00575         /* if showing warnings, print a summary of the language */
00576         if ((_show_todo & 2) != 0) {
00577           fprintf(stdout, "%d warnings and %d errors for %s\n", _warnings, _errors, pathbuf);
00578         }
00579       }
00580     }
00581   } catch (...) {
00582     return 2;
00583   }
00584 
00585   return 0;
00586 }