blob: c0d84ccce5417bbc73f79cd8b031f5fa9f62135d [file] [log] [blame]
The Android Open Source Project9066cfe2009-03-03 19:31:44 -08001#include "SourcePos.h"
2#include "ValuesFile.h"
3#include "XLIFFFile.h"
4#include "Perforce.h"
5#include "merge_res_and_xliff.h"
6#include "localize.h"
7#include "file_utils.h"
8#include "res_check.h"
9#include "xmb.h"
10
11#include <host/pseudolocalize.h>
12
13#include <stdlib.h>
14#include <stdarg.h>
15#include <sstream>
16#include <stdio.h>
17#include <string.h>
18
19using namespace std;
20
21FILE* g_logFile = NULL;
22
23int test();
24
25int
26read_settings(const string& filename, map<string,Settings>* result, const string& rootDir)
27{
28 XMLNode* root = NodeHandler::ParseFile(filename, XMLNode::PRETTY);
29 if (root == NULL) {
30 SourcePos(filename, -1).Error("Error reading file.");
31 return 1;
32 }
33
34 // <configuration>
35 vector<XMLNode*> configNodes = root->GetElementsByName("", "configuration");
36 const size_t I = configNodes.size();
37 for (size_t i=0; i<I; i++) {
38 const XMLNode* configNode = configNodes[i];
39
40 Settings settings;
41 settings.id = configNode->GetAttribute("", "id", "");
42 if (settings.id == "") {
43 configNode->Position().Error("<configuration> needs an id attribute.");
44 delete root;
45 return 1;
46 }
47
48 settings.oldVersion = configNode->GetAttribute("", "old-cl", "");
49
50 settings.currentVersion = configNode->GetAttribute("", "new-cl", "");
51 if (settings.currentVersion == "") {
52 configNode->Position().Error("<configuration> needs a new-cl attribute.");
53 delete root;
54 return 1;
55 }
56
57 // <app>
58 vector<XMLNode*> appNodes = configNode->GetElementsByName("", "app");
59
60 const size_t J = appNodes.size();
61 for (size_t j=0; j<J; j++) {
62 const XMLNode* appNode = appNodes[j];
63
64 string dir = appNode->GetAttribute("", "dir", "");
65 if (dir == "") {
66 appNode->Position().Error("<app> needs a dir attribute.");
67 delete root;
68 return 1;
69 }
70
71 settings.apps.push_back(dir);
72 }
73
74 // <reject>
75 vector<XMLNode*> rejectNodes = configNode->GetElementsByName("", "reject");
76
77 const size_t K = rejectNodes.size();
78 for (size_t k=0; k<K; k++) {
79 const XMLNode* rejectNode = rejectNodes[k];
80
81 Reject reject;
82
83 reject.file = rejectNode->GetAttribute("", "file", "");
84 if (reject.file == "") {
85 rejectNode->Position().Error("<reject> needs a file attribute.");
86 delete root;
87 return 1;
88 }
89 string f = reject.file;
90 reject.file = rootDir;
91 reject.file += '/';
92 reject.file += f;
93
94 reject.name = rejectNode->GetAttribute("", "name", "");
95 if (reject.name == "") {
96 rejectNode->Position().Error("<reject> needs a name attribute.");
97 delete root;
98 return 1;
99 }
100
101 reject.comment = trim_string(rejectNode->CollapseTextContents());
102
103 settings.reject.push_back(reject);
104 }
105
106 (*result)[settings.id] = settings;
107 }
108
109 delete root;
110 return 0;
111}
112
113
114static void
115ValuesFile_to_XLIFFFile(const ValuesFile* values, XLIFFFile* xliff, const string& englishFilename)
116{
117 const set<StringResource>& strings = values->GetStrings();
118 for (set<StringResource>::const_iterator it=strings.begin(); it!=strings.end(); it++) {
119 StringResource res = *it;
120 res.file = englishFilename;
121 xliff->AddStringResource(res);
122 }
123}
124
125static bool
126contains_reject(const Settings& settings, const string& file, const TransUnit& tu)
127{
128 const string name = tu.id;
129 const vector<Reject>& reject = settings.reject;
130 const size_t I = reject.size();
131 for (size_t i=0; i<I; i++) {
132 const Reject& r = reject[i];
133 if (r.file == file && r.name == name) {
134 return true;
135 }
136 }
137 return false;
138}
139
140/**
141 * If it's been rejected, then we keep whatever info we have.
142 *
143 * Implements this truth table:
144 *
145 * S AT AS Keep
146 * -----------------------
147 * 0 0 0 0 (this case can't happen)
148 * 0 0 1 0 (it was there, never translated, and removed)
149 * 0 1 0 0 (somehow it got translated, but it was removed)
150 * 0 1 1 0 (it was removed after having been translated)
151 *
152 * 1 0 0 1 (it was just added)
153 * 1 0 1 1 (it was added, has been changed, but it never got translated)
154 * 1 1 0 1 (somehow it got translated, but we don't know based on what)
155 * 1 1 1 0/1 (it's in both. 0 if S=AS b/c there's no need to retranslate if they're
156 * the same. 1 if S!=AS because S changed, so it should be retranslated)
157 *
158 * The first four are cases where, whatever happened in the past, the string isn't there
159 * now, so it shouldn't be in the XLIFF file.
160 *
161 * For cases 4 and 5, the string has never been translated, so get it translated.
162 *
163 * For case 6, it's unclear where the translated version came from, so we're conservative
164 * and send it back for them to have another shot at.
165 *
166 * For case 7, we have some data. We have two choices. We could rely on the translator's
167 * translation memory or tools to notice that the strings haven't changed, and populate the
168 * <target> field themselves. Or if the string hasn't changed since last time, we can just
169 * not even tell them about it. As the project nears the end, it will be convenient to see
170 * the xliff files reducing in size, so we pick the latter. Obviously, if the string has
171 * changed, then we need to get it retranslated.
172 */
173bool
174keep_this_trans_unit(const string& file, const TransUnit& unit, void* cookie)
175{
176 const Settings* settings = reinterpret_cast<const Settings*>(cookie);
177
178 if (contains_reject(*settings, file, unit)) {
179 return true;
180 }
181
182 if (unit.source.id == "") {
183 return false;
184 }
185 if (unit.altTarget.id == "" || unit.altSource.id == "") {
186 return true;
187 }
188 return unit.source.value->ContentsToString(XLIFF_NAMESPACES)
189 != unit.altSource.value->ContentsToString(XLIFF_NAMESPACES);
190}
191
192int
193validate_config(const string& settingsFile, const map<string,Settings>& settings,
194 const string& config)
195{
196 if (settings.find(config) == settings.end()) {
197 SourcePos(settingsFile, -1).Error("settings file does not contain setting: %s\n",
198 config.c_str());
199 return 1;
200 }
201 return 0;
202}
203
204int
205validate_configs(const string& settingsFile, const map<string,Settings>& settings,
206 const vector<string>& configs)
207{
208 int err = 0;
209 for (size_t i=0; i<configs.size(); i++) {
210 string config = configs[i];
211 err |= validate_config(settingsFile, settings, config);
212 }
213 return err;
214}
215
216int
217select_files(vector<string> *resFiles, const string& config,
218 const map<string,Settings>& settings, const string& rootDir)
219{
220 int err;
221 vector<vector<string> > allResFiles;
222 vector<string> configs;
223 configs.push_back(config);
224 err = select_files(&allResFiles, configs, settings, rootDir);
225 if (err == 0) {
226 *resFiles = allResFiles[0];
227 }
228 return err;
229}
230
231int
232select_files(vector<vector<string> > *allResFiles, const vector<string>& configs,
233 const map<string,Settings>& settings, const string& rootDir)
234{
235 int err;
236 printf("Selecting files...");
237 fflush(stdout);
238
239 for (size_t i=0; i<configs.size(); i++) {
240 const string& config = configs[i];
241 const Settings& setting = settings.find(config)->second;
242
243 vector<string> resFiles;
244 err = Perforce::GetResourceFileNames(setting.currentVersion, rootDir,
245 setting.apps, &resFiles, true);
246 if (err != 0) {
247 fprintf(stderr, "error with perforce. bailing\n");
248 return err;
249 }
250
251 allResFiles->push_back(resFiles);
252 }
253 return 0;
254}
255
256static int
257do_export(const string& settingsFile, const string& rootDir, const string& outDir,
258 const string& targetLocale, const vector<string>& configs)
259{
260 bool success = true;
261 int err;
262
263 if (false) {
264 printf("settingsFile=%s\n", settingsFile.c_str());
265 printf("rootDir=%s\n", rootDir.c_str());
266 printf("outDir=%s\n", outDir.c_str());
267 for (size_t i=0; i<configs.size(); i++) {
268 printf("config[%zd]=%s\n", i, configs[i].c_str());
269 }
270 }
271
272 map<string,Settings> settings;
273 err = read_settings(settingsFile, &settings, rootDir);
274 if (err != 0) {
275 return err;
276 }
277
278 err = validate_configs(settingsFile, settings, configs);
279 if (err != 0) {
280 return err;
281 }
282
283 vector<vector<string> > allResFiles;
284 err = select_files(&allResFiles, configs, settings, rootDir);
285 if (err != 0) {
286 return err;
287 }
288
289 size_t totalFileCount = 0;
290 for (size_t i=0; i<allResFiles.size(); i++) {
291 totalFileCount += allResFiles[i].size();
292 }
293 totalFileCount *= 3; // we try all 3 versions of the file
294
295 size_t fileProgress = 0;
296 vector<Stats> stats;
297 vector<pair<string,XLIFFFile*> > xliffs;
298
299 for (size_t i=0; i<configs.size(); i++) {
300 const string& config = configs[i];
301 const Settings& setting = settings[config];
302
303 if (false) {
304 fprintf(stderr, "Configuration: %s (%zd of %zd)\n", config.c_str(), i+1,
305 configs.size());
306 fprintf(stderr, " Old CL: %s\n", setting.oldVersion.c_str());
307 fprintf(stderr, " Current CL: %s\n", setting.currentVersion.c_str());
308 }
309
310 Configuration english;
311 english.locale = "en_US";
312 Configuration translated;
313 translated.locale = targetLocale;
314 XLIFFFile* xliff = XLIFFFile::Create(english, translated, setting.currentVersion);
315
316 const vector<string>& resFiles = allResFiles[i];
317 const size_t J = resFiles.size();
318 for (size_t j=0; j<J; j++) {
319 string resFile = resFiles[j];
320
321 // parse the files into a ValuesFile
322 // pull out the strings and add them to the XLIFFFile
323
324 // current file
325 print_file_status(++fileProgress, totalFileCount);
326 ValuesFile* currentFile = get_values_file(resFile, english, CURRENT_VERSION,
327 setting.currentVersion, true);
328 if (currentFile != NULL) {
329 ValuesFile_to_XLIFFFile(currentFile, xliff, resFile);
330 //printf("currentFile=[%s]\n", currentFile->ToString().c_str());
331 } else {
332 fprintf(stderr, "error reading file %s@%s\n", resFile.c_str(),
333 setting.currentVersion.c_str());
334 success = false;
335 }
336
337 // old file
338 print_file_status(++fileProgress, totalFileCount);
339 ValuesFile* oldFile = get_values_file(resFile, english, OLD_VERSION,
340 setting.oldVersion, false);
341 if (oldFile != NULL) {
342 ValuesFile_to_XLIFFFile(oldFile, xliff, resFile);
343 //printf("oldFile=[%s]\n", oldFile->ToString().c_str());
344 }
345
346 // translated version
347 // (get the head of the tree for the most recent translation, but it's considered
348 // the old one because the "current" one hasn't been made yet, and this goes into
349 // the <alt-trans> tag if necessary
350 print_file_status(++fileProgress, totalFileCount);
351 string transFilename = translated_file_name(resFile, targetLocale);
352 ValuesFile* transFile = get_values_file(transFilename, translated, OLD_VERSION,
353 setting.currentVersion, false);
354 if (transFile != NULL) {
355 ValuesFile_to_XLIFFFile(transFile, xliff, resFile);
356 }
357
358 delete currentFile;
359 delete oldFile;
360 delete transFile;
361 }
362
363 Stats beforeFilterStats = xliff->GetStats(config);
364
365 // run through the XLIFFFile and strip out TransUnits that have identical
366 // old and current source values and are not in the reject list, or just
367 // old values and no source values
368 xliff->Filter(keep_this_trans_unit, (void*)&setting);
369
370 Stats afterFilterStats = xliff->GetStats(config);
371 afterFilterStats.totalStrings = beforeFilterStats.totalStrings;
372
373 // add the reject comments
374 for (vector<Reject>::const_iterator reject = setting.reject.begin();
375 reject != setting.reject.end(); reject++) {
376 TransUnit* tu = xliff->EditTransUnit(reject->file, reject->name);
377 tu->rejectComment = reject->comment;
378 }
379
380 // config-locale-current_cl.xliff
381 stringstream filename;
382 if (outDir != "") {
383 filename << outDir << '/';
384 }
385 filename << config << '-' << targetLocale << '-' << setting.currentVersion << ".xliff";
386 xliffs.push_back(pair<string,XLIFFFile*>(filename.str(), xliff));
387
388 stats.push_back(afterFilterStats);
389 }
390
391 // today is a good day to die
392 if (!success || SourcePos::HasErrors()) {
393 return 1;
394 }
395
396 // write the XLIFF files
397 printf("\nWriting %zd file%s...\n", xliffs.size(), xliffs.size() == 1 ? "" : "s");
398 for (vector<pair<string,XLIFFFile*> >::iterator it = xliffs.begin(); it != xliffs.end(); it++) {
399 const string& filename = it->first;
400 XLIFFFile* xliff = it->second;
401 string text = xliff->ToString();
402 write_to_file(filename, text);
403 }
404
405 // the stats
406 printf("\n"
407 " to without total\n"
408 " config files translate comments strings\n"
409 "-----------------------------------------------------------------------\n");
410 Stats totals;
411 totals.config = "total";
412 totals.files = 0;
413 totals.toBeTranslated = 0;
414 totals.noComments = 0;
415 totals.totalStrings = 0;
416 for (vector<Stats>::iterator it=stats.begin(); it!=stats.end(); it++) {
417 string cfg = it->config;
418 if (cfg.length() > 20) {
419 cfg.resize(20);
420 }
421 printf(" %-20s %-9zd %-9zd %-9zd %-19zd\n", cfg.c_str(), it->files,
422 it->toBeTranslated, it->noComments, it->totalStrings);
423 totals.files += it->files;
424 totals.toBeTranslated += it->toBeTranslated;
425 totals.noComments += it->noComments;
426 totals.totalStrings += it->totalStrings;
427 }
428 if (stats.size() > 1) {
429 printf("-----------------------------------------------------------------------\n"
430 " %-20s %-9zd %-9zd %-9zd %-19zd\n", totals.config.c_str(), totals.files,
431 totals.toBeTranslated, totals.noComments, totals.totalStrings);
432 }
433 printf("\n");
434 return 0;
435}
436
437struct PseudolocalizeSettings {
438 XLIFFFile* xliff;
439 bool expand;
440};
441
442
443string
444pseudolocalize_string(const string& source, const PseudolocalizeSettings* settings)
445{
446 return pseudolocalize_string(source);
447}
448
449static XMLNode*
450pseudolocalize_xml_node(const XMLNode* source, const PseudolocalizeSettings* settings)
451{
452 if (source->Type() == XMLNode::TEXT) {
453 return XMLNode::NewText(source->Position(), pseudolocalize_string(source->Text(), settings),
454 source->Pretty());
455 } else {
456 XMLNode* target;
457 if (source->Namespace() == XLIFF_XMLNS && source->Name() == "g") {
458 // XXX don't translate these
459 target = XMLNode::NewElement(source->Position(), source->Namespace(),
460 source->Name(), source->Attributes(), source->Pretty());
461 } else {
462 target = XMLNode::NewElement(source->Position(), source->Namespace(),
463 source->Name(), source->Attributes(), source->Pretty());
464 }
465
466 const vector<XMLNode*>& children = source->Children();
467 const size_t I = children.size();
468 for (size_t i=0; i<I; i++) {
469 target->EditChildren().push_back(pseudolocalize_xml_node(children[i], settings));
470 }
471
472 return target;
473 }
474}
475
476void
477pseudolocalize_trans_unit(const string&file, TransUnit* unit, void* cookie)
478{
479 const PseudolocalizeSettings* settings = (PseudolocalizeSettings*)cookie;
480
481 const StringResource& source = unit->source;
482 StringResource* target = &unit->target;
483 *target = source;
484
485 target->config = settings->xliff->TargetConfig();
486
487 delete target->value;
488 target->value = pseudolocalize_xml_node(source.value, settings);
489}
490
491int
492pseudolocalize_xliff(XLIFFFile* xliff, bool expand)
493{
494 PseudolocalizeSettings settings;
495
496 settings.xliff = xliff;
497 settings.expand = expand;
498 xliff->Map(pseudolocalize_trans_unit, &settings);
499 return 0;
500}
501
502static int
503do_pseudo(const string& infile, const string& outfile, bool expand)
504{
505 int err;
506
507 XLIFFFile* xliff = XLIFFFile::Parse(infile);
508 if (xliff == NULL) {
509 return 1;
510 }
511
512 pseudolocalize_xliff(xliff, expand);
513
514 err = write_to_file(outfile, xliff->ToString());
515
516 delete xliff;
517
518 return err;
519}
520
521void
522log_printf(const char *fmt, ...)
523{
524 int ret;
525 va_list ap;
526
527 if (g_logFile != NULL) {
528 va_start(ap, fmt);
529 ret = vfprintf(g_logFile, fmt, ap);
530 va_end(ap);
531 fflush(g_logFile);
532 }
533}
534
535void
536close_log_file()
537{
538 if (g_logFile != NULL) {
539 fclose(g_logFile);
540 }
541}
542
543void
544open_log_file(const char* file)
545{
546 g_logFile = fopen(file, "w");
547 printf("log file: %s -- %p\n", file, g_logFile);
548 atexit(close_log_file);
549}
550
551static int
552usage()
553{
554 fprintf(stderr,
555 "usage: localize export OPTIONS CONFIGS...\n"
556 " REQUIRED OPTIONS\n"
557 " --settings SETTINGS The settings file to use. See CONFIGS below.\n"
558 " --root TREE_ROOT The location in Perforce of the files. e.g. //device\n"
559 " --target LOCALE The target locale. See LOCALES below.\n"
560 "\n"
561 " OPTIONAL OPTIONS\n"
562 " --out DIR Directory to put the output files. Defaults to the\n"
563 " current directory if not supplied. Files are\n"
564 " named as follows:\n"
565 " CONFIG-LOCALE-CURRENT_CL.xliff\n"
566 "\n"
567 "\n"
568 "usage: localize import XLIFF_FILE...\n"
569 "\n"
570 "Import a translated XLIFF file back into the tree.\n"
571 "\n"
572 "\n"
573 "usage: localize xlb XMB_FILE VALUES_FILES...\n"
574 "\n"
575 "Read resource files from the tree file and write the corresponding XLB file\n"
576 "\n"
577 "Supply all of the android resource files (values files) to export after that.\n"
578 "\n"
579 "\n"
580 "\n"
581 "CONFIGS\n"
582 "\n"
583 "LOCALES\n"
584 "Locales are specified in the form en_US They will be processed correctly\n"
585 "to locate the resouce files in the tree.\n"
586 "\n"
587 "\n"
588 "usage: localize pseudo OPTIONS INFILE [OUTFILE]\n"
589 " OPTIONAL OPTIONS\n"
590 " --big Pad strings so they get longer.\n"
591 "\n"
592 "Read INFILE, an XLIFF file, and output a pseudotranslated version of that file. If\n"
593 "OUTFILE is specified, the results are written there; otherwise, the results are\n"
594 "written back to INFILE.\n"
595 "\n"
596 "\n"
597 "usage: localize rescheck FILES...\n"
598 "\n"
599 "Reads the base strings and prints warnings about bad resources from the given files.\n"
600 "\n");
601 return 1;
602}
603
604int
605main(int argc, const char** argv)
606{
607 //open_log_file("log.txt");
608 //g_logFile = stdout;
609
610 if (argc == 2 && 0 == strcmp(argv[1], "--test")) {
611 return test();
612 }
613
614 if (argc < 2) {
615 return usage();
616 }
617
618 int index = 1;
619
620 if (0 == strcmp("export", argv[index])) {
621 string settingsFile;
622 string rootDir;
623 string outDir;
624 string baseLocale = "en";
625 string targetLocale;
626 string language, region;
627 vector<string> configs;
628
629 index++;
630 while (index < argc) {
631 if (0 == strcmp("--settings", argv[index])) {
632 settingsFile = argv[index+1];
633 index += 2;
634 }
635 else if (0 == strcmp("--root", argv[index])) {
636 rootDir = argv[index+1];
637 index += 2;
638 }
639 else if (0 == strcmp("--out", argv[index])) {
640 outDir = argv[index+1];
641 index += 2;
642 }
643 else if (0 == strcmp("--target", argv[index])) {
644 targetLocale = argv[index+1];
645 index += 2;
646 }
647 else if (argv[index][0] == '-') {
648 fprintf(stderr, "unknown argument %s\n", argv[index]);
649 return usage();
650 }
651 else {
652 break;
653 }
654 }
655 for (; index<argc; index++) {
656 configs.push_back(argv[index]);
657 }
658
659 if (settingsFile == "" || rootDir == "" || configs.size() == 0 || targetLocale == "") {
660 return usage();
661 }
662 if (!split_locale(targetLocale, &language, &region)) {
663 fprintf(stderr, "illegal --target locale: '%s'\n", targetLocale.c_str());
664 return usage();
665 }
666
667
668 return do_export(settingsFile, rootDir, outDir, targetLocale, configs);
669 }
670 else if (0 == strcmp("import", argv[index])) {
671 vector<string> xliffFilenames;
672
673 index++;
674 for (; index<argc; index++) {
675 xliffFilenames.push_back(argv[index]);
676 }
677
678 return do_merge(xliffFilenames);
679 }
680 else if (0 == strcmp("xlb", argv[index])) {
681 string outfile;
682 vector<string> resFiles;
683
684 index++;
685 if (argc < index+1) {
686 return usage();
687 }
688
689 outfile = argv[index];
690
691 index++;
692 for (; index<argc; index++) {
693 resFiles.push_back(argv[index]);
694 }
695
696 return do_xlb_export(outfile, resFiles);
697 }
698 else if (0 == strcmp("pseudo", argv[index])) {
699 string infile;
700 string outfile;
701 bool big = false;
702
703 index++;
704 while (index < argc) {
705 if (0 == strcmp("--big", argv[index])) {
706 big = true;
707 index += 1;
708 }
709 else if (argv[index][0] == '-') {
710 fprintf(stderr, "unknown argument %s\n", argv[index]);
711 return usage();
712 }
713 else {
714 break;
715 }
716 }
717
718 if (index == argc-1) {
719 infile = argv[index];
720 outfile = argv[index];
721 }
722 else if (index == argc-2) {
723 infile = argv[index];
724 outfile = argv[index+1];
725 }
726 else {
727 fprintf(stderr, "unknown argument %s\n", argv[index]);
728 return usage();
729 }
730
731 return do_pseudo(infile, outfile, big);
732 }
733 else if (0 == strcmp("rescheck", argv[index])) {
734 vector<string> files;
735
736 index++;
737 while (index < argc) {
738 if (argv[index][0] == '-') {
739 fprintf(stderr, "unknown argument %s\n", argv[index]);
740 return usage();
741 }
742 else {
743 break;
744 }
745 }
746 for (; index<argc; index++) {
747 files.push_back(argv[index]);
748 }
749
750 if (files.size() == 0) {
751 return usage();
752 }
753
754 return do_rescheck(files);
755 }
756 else {
757 return usage();
758 }
759
760 if (SourcePos::HasErrors()) {
761 SourcePos::PrintErrors(stderr);
762 return 1;
763 }
764
765 return 0;
766}
767