' + ' '.join(['' + sentence + '' for sentence in sentences]) + '
'
body = body.encode('utf8')
tf_example = example_pb2.Example()
tf_example.features.feature['article'].bytes_list.value.extend([body])
tf_example.features.feature['abstract'].bytes_list.value.extend([title])
tf_example_str = tf_example.SerializeToString()
str_len = len(tf_example_str)
writer.write(struct.pack('q', str_len))
writer.write(struct.pack('%ds' % str_len, tf_example_str))
def main(unused_argv):
assert FLAGS.command and FLAGS.in_directories and FLAGS.out_files
output_filenames = FLAGS.out_files.split(',')
input_directories = FLAGS.in_directories.split(',')
if FLAGS.command == 'text_to_binary':
assert FLAGS.split
split_fractions = [float(s) for s in FLAGS.split.split(',')]
assert len(output_filenames) == len(split_fractions)
_text_to_binary(input_directories, output_filenames, split_fractions)
elif FLAGS.command == 'text_to_vocabulary':
assert len(output_filenames) == 1
_text_to_vocabulary(input_directories, output_filenames[0])
if __name__ == '__main__':
tf.app.run()