summaryrefslogtreecommitdiff
path: root/tools/localize/localize.cpp
diff options
context:
space:
mode:
author The Android Open Source Project <initial-contribution@android.com> 2008-10-21 07:00:00 -0700
committer The Android Open Source Project <initial-contribution@android.com> 2008-10-21 07:00:00 -0700
commit54b6cfa9a9e5b861a9930af873580d6dc20f773c (patch)
tree35051494d2af230dce54d6b31c6af8fc24091316 /tools/localize/localize.cpp
Initial Contribution
Diffstat (limited to 'tools/localize/localize.cpp')
-rw-r--r--tools/localize/localize.cpp766
1 files changed, 766 insertions, 0 deletions
diff --git a/tools/localize/localize.cpp b/tools/localize/localize.cpp
new file mode 100644
index 000000000000..d03c811b29b7
--- /dev/null
+++ b/tools/localize/localize.cpp
@@ -0,0 +1,766 @@
+#include "SourcePos.h"
+#include "ValuesFile.h"
+#include "XLIFFFile.h"
+#include "Perforce.h"
+#include "merge_res_and_xliff.h"
+#include "localize.h"
+#include "file_utils.h"
+#include "res_check.h"
+#include "xmb.h"
+
+#include <host/pseudolocalize.h>
+
+#include <stdarg.h>
+#include <sstream>
+#include <stdio.h>
+#include <string.h>
+
+using namespace std;
+
+FILE* g_logFile = NULL;
+
+int test();
+
+int
+read_settings(const string& filename, map<string,Settings>* result, const string& rootDir)
+{
+ XMLNode* root = NodeHandler::ParseFile(filename, XMLNode::PRETTY);
+ if (root == NULL) {
+ SourcePos(filename, -1).Error("Error reading file.");
+ return 1;
+ }
+
+ // <configuration>
+ vector<XMLNode*> configNodes = root->GetElementsByName("", "configuration");
+ const size_t I = configNodes.size();
+ for (size_t i=0; i<I; i++) {
+ const XMLNode* configNode = configNodes[i];
+
+ Settings settings;
+ settings.id = configNode->GetAttribute("", "id", "");
+ if (settings.id == "") {
+ configNode->Position().Error("<configuration> needs an id attribute.");
+ delete root;
+ return 1;
+ }
+
+ settings.oldVersion = configNode->GetAttribute("", "old-cl", "");
+
+ settings.currentVersion = configNode->GetAttribute("", "new-cl", "");
+ if (settings.currentVersion == "") {
+ configNode->Position().Error("<configuration> needs a new-cl attribute.");
+ delete root;
+ return 1;
+ }
+
+ // <app>
+ vector<XMLNode*> appNodes = configNode->GetElementsByName("", "app");
+
+ const size_t J = appNodes.size();
+ for (size_t j=0; j<J; j++) {
+ const XMLNode* appNode = appNodes[j];
+
+ string dir = appNode->GetAttribute("", "dir", "");
+ if (dir == "") {
+ appNode->Position().Error("<app> needs a dir attribute.");
+ delete root;
+ return 1;
+ }
+
+ settings.apps.push_back(dir);
+ }
+
+ // <reject>
+ vector<XMLNode*> rejectNodes = configNode->GetElementsByName("", "reject");
+
+ const size_t K = rejectNodes.size();
+ for (size_t k=0; k<K; k++) {
+ const XMLNode* rejectNode = rejectNodes[k];
+
+ Reject reject;
+
+ reject.file = rejectNode->GetAttribute("", "file", "");
+ if (reject.file == "") {
+ rejectNode->Position().Error("<reject> needs a file attribute.");
+ delete root;
+ return 1;
+ }
+ string f = reject.file;
+ reject.file = rootDir;
+ reject.file += '/';
+ reject.file += f;
+
+ reject.name = rejectNode->GetAttribute("", "name", "");
+ if (reject.name == "") {
+ rejectNode->Position().Error("<reject> needs a name attribute.");
+ delete root;
+ return 1;
+ }
+
+ reject.comment = trim_string(rejectNode->CollapseTextContents());
+
+ settings.reject.push_back(reject);
+ }
+
+ (*result)[settings.id] = settings;
+ }
+
+ delete root;
+ return 0;
+}
+
+
+static void
+ValuesFile_to_XLIFFFile(const ValuesFile* values, XLIFFFile* xliff, const string& englishFilename)
+{
+ const set<StringResource>& strings = values->GetStrings();
+ for (set<StringResource>::const_iterator it=strings.begin(); it!=strings.end(); it++) {
+ StringResource res = *it;
+ res.file = englishFilename;
+ xliff->AddStringResource(res);
+ }
+}
+
+static bool
+contains_reject(const Settings& settings, const string& file, const TransUnit& tu)
+{
+ const string name = tu.id;
+ const vector<Reject>& reject = settings.reject;
+ const size_t I = reject.size();
+ for (size_t i=0; i<I; i++) {
+ const Reject& r = reject[i];
+ if (r.file == file && r.name == name) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/**
+ * If it's been rejected, then we keep whatever info we have.
+ *
+ * Implements this truth table:
+ *
+ * S AT AS Keep
+ * -----------------------
+ * 0 0 0 0 (this case can't happen)
+ * 0 0 1 0 (it was there, never translated, and removed)
+ * 0 1 0 0 (somehow it got translated, but it was removed)
+ * 0 1 1 0 (it was removed after having been translated)
+ *
+ * 1 0 0 1 (it was just added)
+ * 1 0 1 1 (it was added, has been changed, but it never got translated)
+ * 1 1 0 1 (somehow it got translated, but we don't know based on what)
+ * 1 1 1 0/1 (it's in both. 0 if S=AS b/c there's no need to retranslate if they're
+ * the same. 1 if S!=AS because S changed, so it should be retranslated)
+ *
+ * The first four are cases where, whatever happened in the past, the string isn't there
+ * now, so it shouldn't be in the XLIFF file.
+ *
+ * For cases 4 and 5, the string has never been translated, so get it translated.
+ *
+ * For case 6, it's unclear where the translated version came from, so we're conservative
+ * and send it back for them to have another shot at.
+ *
+ * For case 7, we have some data. We have two choices. We could rely on the translator's
+ * translation memory or tools to notice that the strings haven't changed, and populate the
+ * <target> field themselves. Or if the string hasn't changed since last time, we can just
+ * not even tell them about it. As the project nears the end, it will be convenient to see
+ * the xliff files reducing in size, so we pick the latter. Obviously, if the string has
+ * changed, then we need to get it retranslated.
+ */
+bool
+keep_this_trans_unit(const string& file, const TransUnit& unit, void* cookie)
+{
+ const Settings* settings = reinterpret_cast<const Settings*>(cookie);
+
+ if (contains_reject(*settings, file, unit)) {
+ return true;
+ }
+
+ if (unit.source.id == "") {
+ return false;
+ }
+ if (unit.altTarget.id == "" || unit.altSource.id == "") {
+ return true;
+ }
+ return unit.source.value->ContentsToString(XLIFF_NAMESPACES)
+ != unit.altSource.value->ContentsToString(XLIFF_NAMESPACES);
+}
+
+int
+validate_config(const string& settingsFile, const map<string,Settings>& settings,
+ const string& config)
+{
+ if (settings.find(config) == settings.end()) {
+ SourcePos(settingsFile, -1).Error("settings file does not contain setting: %s\n",
+ config.c_str());
+ return 1;
+ }
+ return 0;
+}
+
+int
+validate_configs(const string& settingsFile, const map<string,Settings>& settings,
+ const vector<string>& configs)
+{
+ int err = 0;
+ for (size_t i=0; i<configs.size(); i++) {
+ string config = configs[i];
+ err |= validate_config(settingsFile, settings, config);
+ }
+ return err;
+}
+
+int
+select_files(vector<string> *resFiles, const string& config,
+ const map<string,Settings>& settings, const string& rootDir)
+{
+ int err;
+ vector<vector<string> > allResFiles;
+ vector<string> configs;
+ configs.push_back(config);
+ err = select_files(&allResFiles, configs, settings, rootDir);
+ if (err == 0) {
+ *resFiles = allResFiles[0];
+ }
+ return err;
+}
+
+int
+select_files(vector<vector<string> > *allResFiles, const vector<string>& configs,
+ const map<string,Settings>& settings, const string& rootDir)
+{
+ int err;
+ printf("Selecting files...");
+ fflush(stdout);
+
+ for (size_t i=0; i<configs.size(); i++) {
+ const string& config = configs[i];
+ const Settings& setting = settings.find(config)->second;
+
+ vector<string> resFiles;
+ err = Perforce::GetResourceFileNames(setting.currentVersion, rootDir,
+ setting.apps, &resFiles, true);
+ if (err != 0) {
+ fprintf(stderr, "error with perforce. bailing\n");
+ return err;
+ }
+
+ allResFiles->push_back(resFiles);
+ }
+ return 0;
+}
+
+static int
+do_export(const string& settingsFile, const string& rootDir, const string& outDir,
+ const string& targetLocale, const vector<string>& configs)
+{
+ bool success = true;
+ int err;
+
+ if (false) {
+ printf("settingsFile=%s\n", settingsFile.c_str());
+ printf("rootDir=%s\n", rootDir.c_str());
+ printf("outDir=%s\n", outDir.c_str());
+ for (size_t i=0; i<configs.size(); i++) {
+ printf("config[%zd]=%s\n", i, configs[i].c_str());
+ }
+ }
+
+ map<string,Settings> settings;
+ err = read_settings(settingsFile, &settings, rootDir);
+ if (err != 0) {
+ return err;
+ }
+
+ err = validate_configs(settingsFile, settings, configs);
+ if (err != 0) {
+ return err;
+ }
+
+ vector<vector<string> > allResFiles;
+ err = select_files(&allResFiles, configs, settings, rootDir);
+ if (err != 0) {
+ return err;
+ }
+
+ size_t totalFileCount = 0;
+ for (size_t i=0; i<allResFiles.size(); i++) {
+ totalFileCount += allResFiles[i].size();
+ }
+ totalFileCount *= 3; // we try all 3 versions of the file
+
+ size_t fileProgress = 0;
+ vector<Stats> stats;
+ vector<pair<string,XLIFFFile*> > xliffs;
+
+ for (size_t i=0; i<configs.size(); i++) {
+ const string& config = configs[i];
+ const Settings& setting = settings[config];
+
+ if (false) {
+ fprintf(stderr, "Configuration: %s (%zd of %zd)\n", config.c_str(), i+1,
+ configs.size());
+ fprintf(stderr, " Old CL: %s\n", setting.oldVersion.c_str());
+ fprintf(stderr, " Current CL: %s\n", setting.currentVersion.c_str());
+ }
+
+ Configuration english;
+ english.locale = "en_US";
+ Configuration translated;
+ translated.locale = targetLocale;
+ XLIFFFile* xliff = XLIFFFile::Create(english, translated, setting.currentVersion);
+
+ const vector<string>& resFiles = allResFiles[i];
+ const size_t J = resFiles.size();
+ for (size_t j=0; j<J; j++) {
+ string resFile = resFiles[j];
+
+ // parse the files into a ValuesFile
+ // pull out the strings and add them to the XLIFFFile
+
+ // current file
+ print_file_status(++fileProgress, totalFileCount);
+ ValuesFile* currentFile = get_values_file(resFile, english, CURRENT_VERSION,
+ setting.currentVersion, true);
+ if (currentFile != NULL) {
+ ValuesFile_to_XLIFFFile(currentFile, xliff, resFile);
+ //printf("currentFile=[%s]\n", currentFile->ToString().c_str());
+ } else {
+ fprintf(stderr, "error reading file %s@%s\n", resFile.c_str(),
+ setting.currentVersion.c_str());
+ success = false;
+ }
+
+ // old file
+ print_file_status(++fileProgress, totalFileCount);
+ ValuesFile* oldFile = get_values_file(resFile, english, OLD_VERSION,
+ setting.oldVersion, false);
+ if (oldFile != NULL) {
+ ValuesFile_to_XLIFFFile(oldFile, xliff, resFile);
+ //printf("oldFile=[%s]\n", oldFile->ToString().c_str());
+ }
+
+ // translated version
+ // (get the head of the tree for the most recent translation, but it's considered
+ // the old one because the "current" one hasn't been made yet, and this goes into
+ // the <alt-trans> tag if necessary
+ print_file_status(++fileProgress, totalFileCount);
+ string transFilename = translated_file_name(resFile, targetLocale);
+ ValuesFile* transFile = get_values_file(transFilename, translated, OLD_VERSION,
+ setting.currentVersion, false);
+ if (transFile != NULL) {
+ ValuesFile_to_XLIFFFile(transFile, xliff, resFile);
+ }
+
+ delete currentFile;
+ delete oldFile;
+ delete transFile;
+ }
+
+ Stats beforeFilterStats = xliff->GetStats(config);
+
+ // run through the XLIFFFile and strip out TransUnits that have identical
+ // old and current source values and are not in the reject list, or just
+ // old values and no source values
+ xliff->Filter(keep_this_trans_unit, (void*)&setting);
+
+ Stats afterFilterStats = xliff->GetStats(config);
+ afterFilterStats.totalStrings = beforeFilterStats.totalStrings;
+
+ // add the reject comments
+ for (vector<Reject>::const_iterator reject = setting.reject.begin();
+ reject != setting.reject.end(); reject++) {
+ TransUnit* tu = xliff->EditTransUnit(reject->file, reject->name);
+ tu->rejectComment = reject->comment;
+ }
+
+ // config-locale-current_cl.xliff
+ stringstream filename;
+ if (outDir != "") {
+ filename << outDir << '/';
+ }
+ filename << config << '-' << targetLocale << '-' << setting.currentVersion << ".xliff";
+ xliffs.push_back(pair<string,XLIFFFile*>(filename.str(), xliff));
+
+ stats.push_back(afterFilterStats);
+ }
+
+ // today is a good day to die
+ if (!success || SourcePos::HasErrors()) {
+ return 1;
+ }
+
+ // write the XLIFF files
+ printf("\nWriting %zd file%s...\n", xliffs.size(), xliffs.size() == 1 ? "" : "s");
+ for (vector<pair<string,XLIFFFile*> >::iterator it = xliffs.begin(); it != xliffs.end(); it++) {
+ const string& filename = it->first;
+ XLIFFFile* xliff = it->second;
+ string text = xliff->ToString();
+ write_to_file(filename, text);
+ }
+
+ // the stats
+ printf("\n"
+ " to without total\n"
+ " config files translate comments strings\n"
+ "-----------------------------------------------------------------------\n");
+ Stats totals;
+ totals.config = "total";
+ totals.files = 0;
+ totals.toBeTranslated = 0;
+ totals.noComments = 0;
+ totals.totalStrings = 0;
+ for (vector<Stats>::iterator it=stats.begin(); it!=stats.end(); it++) {
+ string cfg = it->config;
+ if (cfg.length() > 20) {
+ cfg.resize(20);
+ }
+ printf(" %-20s %-9zd %-9zd %-9zd %-19zd\n", cfg.c_str(), it->files,
+ it->toBeTranslated, it->noComments, it->totalStrings);
+ totals.files += it->files;
+ totals.toBeTranslated += it->toBeTranslated;
+ totals.noComments += it->noComments;
+ totals.totalStrings += it->totalStrings;
+ }
+ if (stats.size() > 1) {
+ printf("-----------------------------------------------------------------------\n"
+ " %-20s %-9zd %-9zd %-9zd %-19zd\n", totals.config.c_str(), totals.files,
+ totals.toBeTranslated, totals.noComments, totals.totalStrings);
+ }
+ printf("\n");
+ return 0;
+}
+
+struct PseudolocalizeSettings {
+ XLIFFFile* xliff;
+ bool expand;
+};
+
+
+string
+pseudolocalize_string(const string& source, const PseudolocalizeSettings* settings)
+{
+ return pseudolocalize_string(source);
+}
+
+static XMLNode*
+pseudolocalize_xml_node(const XMLNode* source, const PseudolocalizeSettings* settings)
+{
+ if (source->Type() == XMLNode::TEXT) {
+ return XMLNode::NewText(source->Position(), pseudolocalize_string(source->Text(), settings),
+ source->Pretty());
+ } else {
+ XMLNode* target;
+ if (source->Namespace() == XLIFF_XMLNS && source->Name() == "g") {
+ // XXX don't translate these
+ target = XMLNode::NewElement(source->Position(), source->Namespace(),
+ source->Name(), source->Attributes(), source->Pretty());
+ } else {
+ target = XMLNode::NewElement(source->Position(), source->Namespace(),
+ source->Name(), source->Attributes(), source->Pretty());
+ }
+
+ const vector<XMLNode*>& children = source->Children();
+ const size_t I = children.size();
+ for (size_t i=0; i<I; i++) {
+ target->EditChildren().push_back(pseudolocalize_xml_node(children[i], settings));
+ }
+
+ return target;
+ }
+}
+
+void
+pseudolocalize_trans_unit(const string&file, TransUnit* unit, void* cookie)
+{
+ const PseudolocalizeSettings* settings = (PseudolocalizeSettings*)cookie;
+
+ const StringResource& source = unit->source;
+ StringResource* target = &unit->target;
+ *target = source;
+
+ target->config = settings->xliff->TargetConfig();
+
+ delete target->value;
+ target->value = pseudolocalize_xml_node(source.value, settings);
+}
+
+int
+pseudolocalize_xliff(XLIFFFile* xliff, bool expand)
+{
+ PseudolocalizeSettings settings;
+
+ settings.xliff = xliff;
+ settings.expand = expand;
+ xliff->Map(pseudolocalize_trans_unit, &settings);
+ return 0;
+}
+
+static int
+do_pseudo(const string& infile, const string& outfile, bool expand)
+{
+ int err;
+
+ XLIFFFile* xliff = XLIFFFile::Parse(infile);
+ if (xliff == NULL) {
+ return 1;
+ }
+
+ pseudolocalize_xliff(xliff, expand);
+
+ err = write_to_file(outfile, xliff->ToString());
+
+ delete xliff;
+
+ return err;
+}
+
+void
+log_printf(const char *fmt, ...)
+{
+ int ret;
+ va_list ap;
+
+ if (g_logFile != NULL) {
+ va_start(ap, fmt);
+ ret = vfprintf(g_logFile, fmt, ap);
+ va_end(ap);
+ fflush(g_logFile);
+ }
+}
+
+void
+close_log_file()
+{
+ if (g_logFile != NULL) {
+ fclose(g_logFile);
+ }
+}
+
+void
+open_log_file(const char* file)
+{
+ g_logFile = fopen(file, "w");
+ printf("log file: %s -- %p\n", file, g_logFile);
+ atexit(close_log_file);
+}
+
+static int
+usage()
+{
+ fprintf(stderr,
+ "usage: localize export OPTIONS CONFIGS...\n"
+ " REQUIRED OPTIONS\n"
+ " --settings SETTINGS The settings file to use. See CONFIGS below.\n"
+ " --root TREE_ROOT The location in Perforce of the files. e.g. //device\n"
+ " --target LOCALE The target locale. See LOCALES below.\n"
+ "\n"
+ " OPTIONAL OPTIONS\n"
+ " --out DIR Directory to put the output files. Defaults to the\n"
+ " current directory if not supplied. Files are\n"
+ " named as follows:\n"
+ " CONFIG-LOCALE-CURRENT_CL.xliff\n"
+ "\n"
+ "\n"
+ "usage: localize import XLIFF_FILE...\n"
+ "\n"
+ "Import a translated XLIFF file back into the tree.\n"
+ "\n"
+ "\n"
+ "usage: localize xlb XMB_FILE VALUES_FILES...\n"
+ "\n"
+ "Read resource files from the tree file and write the corresponding XLB file\n"
+ "\n"
+ "Supply all of the android resource files (values files) to export after that.\n"
+ "\n"
+ "\n"
+ "\n"
+ "CONFIGS\n"
+ "\n"
+ "LOCALES\n"
+ "Locales are specified in the form en_US They will be processed correctly\n"
+ "to locate the resouce files in the tree.\n"
+ "\n"
+ "\n"
+ "usage: localize pseudo OPTIONS INFILE [OUTFILE]\n"
+ " OPTIONAL OPTIONS\n"
+ " --big Pad strings so they get longer.\n"
+ "\n"
+ "Read INFILE, an XLIFF file, and output a pseudotranslated version of that file. If\n"
+ "OUTFILE is specified, the results are written there; otherwise, the results are\n"
+ "written back to INFILE.\n"
+ "\n"
+ "\n"
+ "usage: localize rescheck FILES...\n"
+ "\n"
+ "Reads the base strings and prints warnings about bad resources from the given files.\n"
+ "\n");
+ return 1;
+}
+
+int
+main(int argc, const char** argv)
+{
+ //open_log_file("log.txt");
+ //g_logFile = stdout;
+
+ if (argc == 2 && 0 == strcmp(argv[1], "--test")) {
+ return test();
+ }
+
+ if (argc < 2) {
+ return usage();
+ }
+
+ int index = 1;
+
+ if (0 == strcmp("export", argv[index])) {
+ string settingsFile;
+ string rootDir;
+ string outDir;
+ string baseLocale = "en";
+ string targetLocale;
+ string language, region;
+ vector<string> configs;
+
+ index++;
+ while (index < argc) {
+ if (0 == strcmp("--settings", argv[index])) {
+ settingsFile = argv[index+1];
+ index += 2;
+ }
+ else if (0 == strcmp("--root", argv[index])) {
+ rootDir = argv[index+1];
+ index += 2;
+ }
+ else if (0 == strcmp("--out", argv[index])) {
+ outDir = argv[index+1];
+ index += 2;
+ }
+ else if (0 == strcmp("--target", argv[index])) {
+ targetLocale = argv[index+1];
+ index += 2;
+ }
+ else if (argv[index][0] == '-') {
+ fprintf(stderr, "unknown argument %s\n", argv[index]);
+ return usage();
+ }
+ else {
+ break;
+ }
+ }
+ for (; index<argc; index++) {
+ configs.push_back(argv[index]);
+ }
+
+ if (settingsFile == "" || rootDir == "" || configs.size() == 0 || targetLocale == "") {
+ return usage();
+ }
+ if (!split_locale(targetLocale, &language, &region)) {
+ fprintf(stderr, "illegal --target locale: '%s'\n", targetLocale.c_str());
+ return usage();
+ }
+
+
+ return do_export(settingsFile, rootDir, outDir, targetLocale, configs);
+ }
+ else if (0 == strcmp("import", argv[index])) {
+ vector<string> xliffFilenames;
+
+ index++;
+ for (; index<argc; index++) {
+ xliffFilenames.push_back(argv[index]);
+ }
+
+ return do_merge(xliffFilenames);
+ }
+ else if (0 == strcmp("xlb", argv[index])) {
+ string outfile;
+ vector<string> resFiles;
+
+ index++;
+ if (argc < index+1) {
+ return usage();
+ }
+
+ outfile = argv[index];
+
+ index++;
+ for (; index<argc; index++) {
+ resFiles.push_back(argv[index]);
+ }
+
+ return do_xlb_export(outfile, resFiles);
+ }
+ else if (0 == strcmp("pseudo", argv[index])) {
+ string infile;
+ string outfile;
+ bool big = false;
+
+ index++;
+ while (index < argc) {
+ if (0 == strcmp("--big", argv[index])) {
+ big = true;
+ index += 1;
+ }
+ else if (argv[index][0] == '-') {
+ fprintf(stderr, "unknown argument %s\n", argv[index]);
+ return usage();
+ }
+ else {
+ break;
+ }
+ }
+
+ if (index == argc-1) {
+ infile = argv[index];
+ outfile = argv[index];
+ }
+ else if (index == argc-2) {
+ infile = argv[index];
+ outfile = argv[index+1];
+ }
+ else {
+ fprintf(stderr, "unknown argument %s\n", argv[index]);
+ return usage();
+ }
+
+ return do_pseudo(infile, outfile, big);
+ }
+ else if (0 == strcmp("rescheck", argv[index])) {
+ vector<string> files;
+
+ index++;
+ while (index < argc) {
+ if (argv[index][0] == '-') {
+ fprintf(stderr, "unknown argument %s\n", argv[index]);
+ return usage();
+ }
+ else {
+ break;
+ }
+ }
+ for (; index<argc; index++) {
+ files.push_back(argv[index]);
+ }
+
+ if (files.size() == 0) {
+ return usage();
+ }
+
+ return do_rescheck(files);
+ }
+ else {
+ return usage();
+ }
+
+ if (SourcePos::HasErrors()) {
+ SourcePos::PrintErrors(stderr);
+ return 1;
+ }
+
+ return 0;
+}
+