http://comments.gmane.org/gmane.comp.nlp.moses.user/4194 ### Tensorflow Build bazel clean && bazel build -c opt //tensorflow/tools/pip_package:build_pip_package bazel-bin/tensorflow/tools/pip_package/build_pip_package /home/onetipp/deeptest/tf/ ### Moses Build Instructions ./bjam clean && ./bjam --with-srilm=/home/onetipp/deeptest/moses/tools/srilm --with-irstlm=/home/onetipp/deeptest/moses/tools/irstlm-5.80.08 --with-giza=/home/onetipp/deeptest/moses/tools/giza-pp -j20 ### Hierarchical Model Training with Moses: Part 2 Translation cd /home/onetipp/deeptest/moses/work/corpus # tokenizer cat DGT.de-en.en | ../../moses/scripts/tokenizer/tokenizer.perl -l en -threads 40 > text.tok.en cat DGT.de-en.de | ../../moses/scripts/tokenizer/tokenizer.perl -l de -threads 40 > text.tok.de # cleaner perl ../../moses/scripts/training/clean-corpus-n.perl text.tok de en text.tok.clean 1 350 # LowerCase perl ../../moses/scripts/tokenizer/lowercase.perl < text.tok.clean.de > text.lower.de perl ../../moses/scripts/tokenizer/lowercase.perl < text.tok.clean.en > text.lower.en # We will use SRILM to build a tri-gram language model. ../../tools/srilm/bin/i686-m64/ngram-count -order 3 -interpolate -kndiscount -unk -text text.lower.de -lm ../lm/text.de.lm ../../tools/srilm/bin/i686-m64/ngram-count -order 3 -interpolate -kndiscount -unk -text text.lower.en -lm ../lm/text.en.lm # Hierarchical Training perl /home/onetipp/software/mosesdecoder/scripts/training/train-model.perl --parallel --glue-grammar --hierarchical --root-dir /home/onetipp/deeptest/moses/work -f de -e en --corpus /home/onetipp/deeptest/moses/work/corpus/text.lower -max-phrase-length 150 -external-bin-dir /home/onetipp/software/mosesdecoder/tools/ --lm 0:3:/home/onetipp/deeptest/moses/work/lm/text.de.lm:9 -mgiza -mgiza-cpus 20 -cores 40 -parallel -sort-buffer-size 10G -sort-batch-size 253 -sort-compress gzip -sort-parallel 10 --alignment grow-diag-final-and --score-options "--GoodTuring" --extract-options "--MaxSpan 1000 --MinHoleSource 1 --MinWords 0 --NonTermConsecSource --AllowOnlyUnalignedWords" # IRSTLM Binary Language Model tools/bin/compile-lm --text=yes work/lm/text.de.lm work/lm/text.de.blm tools/bin/compile-lm --text=yes work/lm/text.en.lm work/lm/text.en.blm export TMP=/home/cache # Building a KenLM binary file bin/build_binary -a 64 trie file.arpa file.binlm # read binary language model into memory cat $file >/dev/null # Tuning mkdir -p work/tuning.dev rm -rf /home/onetipp/deeptest/moses/work/tuning.dev/filtered && perl /home/onetipp/software/mosesdecoder/scripts/training/filter-model-given-input.pl work/tuning.dev/filtered work/model/moses.ini /home/onetipp/deeptest/moses/work/corpus/text.lower.de -Hierarchical #Start using it: /home/onetipp/deeptest/moses/moses/bin/moses_chart -config /home/onetipp/deeptest/moses/work2/model/moses.ini -max-chart-span 1000 -threads 4 -inputtype 3