From 6c636291adce428b46da7f85b30e301982a0b650 Mon Sep 17 00:00:00 2001 From: Tilps Date: Sat, 7 Dec 2019 11:26:59 +1100 Subject: [PATCH 01/39] Silence some warnings in 1.14 at the expense of making 1.14 the minimum version --- tf/chunkparser.py | 10 +-- tf/mixprec.py | 4 +- tf/net_to_model.py | 8 +-- tf/tfprocess.py | 172 ++++++++++++++++++++++----------------------- tf/train.py | 4 +- tf/update_steps.py | 8 +-- tf/upgrade.py | 20 +++--- 7 files changed, 113 insertions(+), 113 deletions(-) diff --git a/tf/chunkparser.py b/tf/chunkparser.py index 4a5d169d..9f654a23 100644 --- a/tf/chunkparser.py +++ b/tf/chunkparser.py @@ -141,10 +141,10 @@ def parse_function(planes, probs, winner, q): """ Convert unpacked record batches to tensors for tensorflow training """ - planes = tf.decode_raw(planes, tf.float32) - probs = tf.decode_raw(probs, tf.float32) - winner = tf.decode_raw(winner, tf.float32) - q = tf.decode_raw(q, tf.float32) + planes = tf.io.decode_raw(planes, tf.float32) + probs = tf.io.decode_raw(probs, tf.float32) + winner = tf.io.decode_raw(winner, tf.float32) + q = tf.io.decode_raw(q, tf.float32) planes = tf.reshape(planes, (ChunkParser.BATCH_SIZE, 112, 8*8)) probs = tf.reshape(probs, (ChunkParser.BATCH_SIZE, 1858)) @@ -426,7 +426,7 @@ def test_tensorflow_parsing(self): best_q = best_q.reshape(batch_size, 3) # Pass it through tensorflow - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: graph = ChunkParser.parse_function(data[0], data[1], data[2], data[3]) tf_planes, tf_probs, tf_winner, tf_q = sess.run(graph) diff --git a/tf/mixprec.py b/tf/mixprec.py index 889fb6ab..cf161ac0 100644 --- a/tf/mixprec.py +++ b/tf/mixprec.py @@ -16,7 +16,7 @@ def float32_variable_storage_getter(getter, name, shape=None, dtype=None, if trainable and dtype != tf.float32: cast_name = name + '/fp16_cast' try: - cast_variable = tf.get_default_graph().get_tensor_by_name( + cast_variable = tf.compat.v1.get_default_graph().get_tensor_by_name( cast_name + ':0') except KeyError: cast_variable = tf.cast(variable, dtype, name=cast_name) @@ -25,7 +25,7 @@ def float32_variable_storage_getter(getter, name, shape=None, dtype=None, return variable -class LossScalingOptimizer(tf.train.Optimizer): +class LossScalingOptimizer(tf.compat.v1.train.Optimizer): """An optimizer that scales loss and un-scales gradients.""" def __init__(self, optimizer, diff --git a/tf/net_to_model.py b/tf/net_to_model.py index 7e6f52f0..8d3190d6 100755 --- a/tf/net_to_model.py +++ b/tf/net_to_model.py @@ -28,10 +28,10 @@ weights = net.get_weights() x = [ - tf.placeholder(tf.float32, [None, 112, 8*8]), - tf.placeholder(tf.float32, [None, 1858]), - tf.placeholder(tf.float32, [None, 3]), - tf.placeholder(tf.float32, [None, 3]), + tf.compat.v1.placeholder(tf.float32, [None, 112, 8*8]), + tf.compat.v1.placeholder(tf.float32, [None, 1858]), + tf.compat.v1.placeholder(tf.float32, [None, 3]), + tf.compat.v1.placeholder(tf.float32, [None, 3]), ] tfp = tfprocess.TFProcess(cfg) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 8719e402..c8268050 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -43,10 +43,10 @@ def weight_variable(shape, name=None, dtype=tf.float32): stddev = trunc_correction * np.sqrt(2.0 / (fan_in + fan_out)) # Do not use a constant as the initializer, that will cause the # variable to be stored in wrong dtype. - weights = tf.get_variable( + weights = tf.compat.v1.get_variable( name, shape, dtype=dtype, - initializer=tf.truncated_normal_initializer(stddev=stddev, dtype=dtype)) - tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, weights) + initializer=tf.compat.v1.truncated_normal_initializer(stddev=stddev)) + tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES, weights) return weights # Bias weights for layers not followed by BatchNorm @@ -55,11 +55,11 @@ def weight_variable(shape, name=None, dtype=tf.float32): def bias_variable(shape, name=None, dtype=tf.float32): - return tf.get_variable(name, shape, dtype=dtype, - initializer=tf.zeros_initializer()) + return tf.compat.v1.get_variable(name, shape, dtype=dtype, + initializer=tf.compat.v1.zeros_initializer()) def conv2d(x, W): - return tf.nn.conv2d(x, W, data_format='NCHW', + return tf.nn.conv2d(input=x, filters=W, data_format='NCHW', strides=[1, 1, 1, 1], padding='SAME') class TFProcess: @@ -128,25 +128,25 @@ def __init__(self, cfg): self.renorm_max_d = self.cfg['training'].get('renorm_max_d', 0) self.renorm_momentum = self.cfg['training'].get('renorm_momentum', 0.99) - gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.90, + gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.90, allow_growth=True, visible_device_list="{}".format(self.cfg['gpu'])) - config = tf.ConfigProto(gpu_options=gpu_options) - self.session = tf.Session(config=config) + config = tf.compat.v1.ConfigProto(gpu_options=gpu_options) + self.session = tf.compat.v1.Session(config=config) - self.training = tf.placeholder(tf.bool) + self.training = tf.compat.v1.placeholder(tf.bool) self.global_step = tf.Variable(0, name='global_step', trainable=False) - self.learning_rate = tf.placeholder(tf.float32) + self.learning_rate = tf.compat.v1.placeholder(tf.float32) def init(self, dataset, train_iterator, test_iterator): # TF variables - self.handle = tf.placeholder(tf.string, shape=[]) - iterator = tf.data.Iterator.from_string_handle( - self.handle, dataset.output_types, dataset.output_shapes) + self.handle = tf.compat.v1.placeholder(tf.string, shape=[]) + iterator = tf.compat.v1.data.Iterator.from_string_handle( + self.handle, tf.compat.v1.data.get_output_types(dataset), tf.compat.v1.data.get_output_shapes(dataset)) self.next_batch = iterator.get_next() self.train_handle = self.session.run(train_iterator.string_handle()) self.test_handle = self.session.run(test_iterator.string_handle()) # This forces trainable variables to be stored as fp32 - with tf.variable_scope("fp32_storage", + with tf.compat.v1.variable_scope("fp32_storage", custom_getter=float32_variable_storage_getter): self.init_net(self.next_batch) @@ -168,14 +168,14 @@ def init_net(self, next_batch): move_is_legal = tf.greater_equal(self.y_, 0) # replace logits of illegal moves with large negative value (so that it doesn't affect policy of legal moves) without gradient illegal_filler = tf.zeros_like(self.y_conv) - 1.0e10 - self.y_conv = tf.where(move_is_legal, self.y_conv, illegal_filler) + self.y_conv = tf.compat.v1.where_v2(move_is_legal, self.y_conv, illegal_filler) # y_ still has -1 on illegal moves, flush them to 0 self.y_ = tf.nn.relu(self.y_) policy_cross_entropy = \ - tf.nn.softmax_cross_entropy_with_logits(labels=self.y_, + tf.compat.v1.nn.softmax_cross_entropy_with_logits_v2(labels=tf.stop_gradient(self.y_), logits=self.y_conv) - self.policy_loss = tf.reduce_mean(policy_cross_entropy) + self.policy_loss = tf.reduce_mean(input_tensor=policy_cross_entropy) q_ratio = self.cfg['training'].get('q_ratio', 0) assert 0 <= q_ratio <= 1 @@ -188,22 +188,22 @@ def init_net(self, next_batch): # Loss on value head if self.wdl: value_cross_entropy = \ - tf.nn.softmax_cross_entropy_with_logits(labels=target, + tf.compat.v1.nn.softmax_cross_entropy_with_logits_v2(labels=tf.stop_gradient(target), logits=self.z_conv) - self.value_loss = tf.reduce_mean(value_cross_entropy) + self.value_loss = tf.reduce_mean(input_tensor=value_cross_entropy) scalar_z_conv = tf.matmul(tf.nn.softmax(self.z_conv), wdl) self.mse_loss = \ - tf.reduce_mean(tf.squared_difference(scalar_target, scalar_z_conv)) + tf.reduce_mean(input_tensor=tf.math.squared_difference(scalar_target, scalar_z_conv)) else: self.value_loss = tf.constant(0) self.mse_loss = \ - tf.reduce_mean(tf.squared_difference(scalar_target, self.z_conv)) + tf.reduce_mean(input_tensor=tf.math.squared_difference(scalar_target, self.z_conv)) # Regularizer - regularizer = tf.contrib.layers.l2_regularizer(scale=0.0001) - reg_variables = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) - self.reg_term = \ - tf.contrib.layers.apply_regularization(regularizer, reg_variables) + regularizer = tf.keras.regularizers.l2(l=0.5 * (0.0001)) + reg_variables = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) + penalties = [regularizer(w) for w in reg_variables] + self.reg_term = tf.math.add_n(penalties) if self.model_dtype != tf.float32: self.reg_term = tf.cast(self.reg_term, tf.float32) @@ -226,7 +226,7 @@ def init_net(self, next_batch): # You need to change the learning rate here if you are training # from a self-play training set, for example start with 0.005 instead. - opt_op = tf.train.MomentumOptimizer( + opt_op = tf.compat.v1.train.MomentumOptimizer( learning_rate=self.learning_rate, momentum=0.9, use_nesterov=True) opt_op = LossScalingOptimizer(opt_op, scale=self.loss_scale) @@ -244,19 +244,19 @@ def init_net(self, next_batch): var = tf.Variable( tf.zeros(shape=w.shape), name='swa/'+name, trainable=False) accum.append( - tf.assign(var, var * (n / (n + 1.)) + tf.stop_gradient(w) * (1. / (n + 1.)))) - load.append(tf.assign(w, var)) + tf.compat.v1.assign(var, var * (n / (n + 1.)) + tf.stop_gradient(w) * (1. / (n + 1.)))) + load.append(tf.compat.v1.assign(w, var)) with tf.control_dependencies(accum): - self.swa_accum_op = tf.assign_add(n, 1.) + self.swa_accum_op = tf.compat.v1.assign_add(n, 1.) self.swa_load_op = tf.group(*load) # Accumulate (possibly multiple) gradient updates to simulate larger batch sizes than can be held in GPU memory. gradient_accum = [tf.Variable(tf.zeros_like( - var.initialized_value()), trainable=False) for var in tf.trainable_variables()] + var.initialized_value()), trainable=False) for var in tf.compat.v1.trainable_variables()] self.zero_op = [var.assign(tf.zeros_like(var)) for var in gradient_accum] - self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) + self.update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS) with tf.control_dependencies(self.update_ops): gradients = opt_op.compute_gradients(loss) self.accum_op = [accum.assign_add( @@ -270,13 +270,13 @@ def init_net(self, next_batch): [(accum, gradient[1]) for accum, gradient in zip(gradient_accum, gradients)], global_step=self.global_step) correct_policy_prediction = \ - tf.equal(tf.argmax(self.y_conv, 1), tf.argmax(self.y_, 1)) + tf.equal(tf.argmax(input=self.y_conv, axis=1), tf.argmax(input=self.y_, axis=1)) correct_policy_prediction = tf.cast(correct_policy_prediction, tf.float32) - self.policy_accuracy = tf.reduce_mean(correct_policy_prediction) + self.policy_accuracy = tf.reduce_mean(input_tensor=correct_policy_prediction) correct_value_prediction = \ - tf.equal(tf.argmax(self.z_conv, 1), tf.argmax(self.z_, 1)) + tf.equal(tf.argmax(input=self.z_conv, axis=1), tf.argmax(input=self.z_, axis=1)) correct_value_prediction = tf.cast(correct_value_prediction, tf.float32) - self.value_accuracy = tf.reduce_mean(correct_value_prediction) + self.value_accuracy = tf.reduce_mean(input_tensor=correct_value_prediction) self.avg_policy_loss = [] self.avg_value_loss = [] @@ -286,18 +286,18 @@ def init_net(self, next_batch): self.last_steps = None # Summary part - self.test_writer = tf.summary.FileWriter( + self.test_writer = tf.compat.v1.summary.FileWriter( os.path.join(os.getcwd(), "leelalogs/{}-test".format(self.cfg['name']))) - self.train_writer = tf.summary.FileWriter( + self.train_writer = tf.compat.v1.summary.FileWriter( os.path.join(os.getcwd(), "leelalogs/{}-train".format(self.cfg['name']))) if self.swa_enabled: - self.swa_writer = tf.summary.FileWriter( + self.swa_writer = tf.compat.v1.summary.FileWriter( os.path.join(os.getcwd(), "leelalogs/{}-swa-test".format(self.cfg['name']))) - self.histograms = [tf.summary.histogram( + self.histograms = [tf.compat.v1.summary.histogram( weight.name, weight) for weight in self.weights] - self.init = tf.global_variables_initializer() - self.saver = tf.train.Saver() + self.init = tf.compat.v1.global_variables_initializer() + self.saver = tf.compat.v1.train.Saver() self.session.run(self.init) @@ -325,7 +325,7 @@ def replace_weights(self, new_weights): shape = [s[i] for i in [3, 2, 0, 1]] new_weight = tf.constant(new_weights[e], shape=shape) all_evals.append(weights.assign( - tf.transpose(new_weight, [2, 3, 1, 0]))) + tf.transpose(a=new_weight, perm=[2, 3, 1, 0]))) elif weights.shape.ndims == 2: # Fully connected layers are [in, out] in TF # @@ -335,11 +335,11 @@ def replace_weights(self, new_weights): shape = [s[i] for i in [1, 0]] new_weight = tf.constant(new_weights[e], shape=shape) all_evals.append(weights.assign( - tf.transpose(new_weight, [1, 0]))) + tf.transpose(a=new_weight, perm=[1, 0]))) else: # Biases, batchnorm etc new_weight = tf.constant(new_weights[e], shape=weights.shape) - all_evals.append(tf.assign(weights, new_weight)) + all_evals.append(tf.compat.v1.assign(weights, new_weight)) self.session.run(all_evals) # This should result in identical file to the starting one # self.save_leelaz_weights('restored.txt') @@ -351,7 +351,7 @@ def restore(self, file): def process_loop(self, batch_size, test_batches, batch_splits=1): # Get the initial steps value in case this is a resume from a step count # which is not a multiple of total_steps. - steps = tf.train.global_step(self.session, self.global_step) + steps = tf.compat.v1.train.global_step(self.session, self.global_step) total_steps = self.cfg['training']['total_steps'] for _ in range(steps % total_steps, total_steps): self.process(batch_size, test_batches, batch_splits=batch_splits) @@ -361,7 +361,7 @@ def process(self, batch_size, test_batches, batch_splits=1): self.time_start = time.time() # Get the initial steps value before we do a training step. - steps = tf.train.global_step(self.session, self.global_step) + steps = tf.compat.v1.train.global_step(self.session, self.global_step) if not self.last_steps: self.last_steps = steps @@ -419,7 +419,7 @@ def process(self, batch_size, test_batches, batch_splits=1): feed_dict={self.learning_rate: corrected_lr, self.training: True, self.handle: self.train_handle}) # Update steps since training should have incremented it. - steps = tf.train.global_step(self.session, self.global_step) + steps = tf.compat.v1.train.global_step(self.session, self.global_step) if steps % self.cfg['training']['train_avg_report_steps'] == 0 or steps % self.cfg['training']['total_steps'] == 0: pol_loss_w = self.cfg['training']['policy_loss_weight'] @@ -443,14 +443,14 @@ def process(self, batch_size, test_batches, batch_splits=1): update_ratio_summaries = self.compute_update_ratio( before_weights, after_weights) - train_summaries = tf.Summary(value=[ - tf.Summary.Value(tag="Policy Loss", simple_value=avg_policy_loss), - tf.Summary.Value(tag="Value Loss", simple_value=avg_value_loss), - tf.Summary.Value(tag="Reg term", simple_value=avg_reg_term), - tf.Summary.Value(tag="LR", simple_value=self.lr), - tf.Summary.Value(tag="Gradient norm", + train_summaries = tf.compat.v1.Summary(value=[ + tf.compat.v1.Summary.Value(tag="Policy Loss", simple_value=avg_policy_loss), + tf.compat.v1.Summary.Value(tag="Value Loss", simple_value=avg_value_loss), + tf.compat.v1.Summary.Value(tag="Reg term", simple_value=avg_reg_term), + tf.compat.v1.Summary.Value(tag="LR", simple_value=self.lr), + tf.compat.v1.Summary.Value(tag="Gradient norm", simple_value=grad_norm / batch_splits), - tf.Summary.Value(tag="MSE Loss", simple_value=avg_mse_loss)]) + tf.compat.v1.Summary.Value(tag="MSE Loss", simple_value=avg_mse_loss)]) self.train_writer.add_summary(train_summaries, steps) self.train_writer.add_summary(update_ratio_summaries, steps) self.time_start = time_end @@ -524,18 +524,18 @@ def calculate_test_summaries(self, test_batches, steps): # TODO store value and value accuracy in pb self.net.pb.training_params.accuracy = sum_policy_accuracy if self.wdl: - test_summaries = tf.Summary(value=[ - tf.Summary.Value(tag="Policy Accuracy", simple_value=sum_policy_accuracy), - tf.Summary.Value(tag="Value Accuracy", simple_value=sum_value_accuracy), - tf.Summary.Value(tag="Policy Loss", simple_value=sum_policy), - tf.Summary.Value(tag="Value Loss", simple_value=sum_value), - tf.Summary.Value(tag="MSE Loss", simple_value=sum_mse)]).SerializeToString() + test_summaries = tf.compat.v1.Summary(value=[ + tf.compat.v1.Summary.Value(tag="Policy Accuracy", simple_value=sum_policy_accuracy), + tf.compat.v1.Summary.Value(tag="Value Accuracy", simple_value=sum_value_accuracy), + tf.compat.v1.Summary.Value(tag="Policy Loss", simple_value=sum_policy), + tf.compat.v1.Summary.Value(tag="Value Loss", simple_value=sum_value), + tf.compat.v1.Summary.Value(tag="MSE Loss", simple_value=sum_mse)]).SerializeToString() else: - test_summaries = tf.Summary(value=[ - tf.Summary.Value(tag="Policy Accuracy", simple_value=sum_policy_accuracy), - tf.Summary.Value(tag="Policy Loss", simple_value=sum_policy), - tf.Summary.Value(tag="MSE Loss", simple_value=sum_mse)]).SerializeToString() - test_summaries = tf.summary.merge( + test_summaries = tf.compat.v1.Summary(value=[ + tf.compat.v1.Summary.Value(tag="Policy Accuracy", simple_value=sum_policy_accuracy), + tf.compat.v1.Summary.Value(tag="Policy Loss", simple_value=sum_policy), + tf.compat.v1.Summary.Value(tag="MSE Loss", simple_value=sum_mse)]).SerializeToString() + test_summaries = tf.compat.v1.summary.merge( [test_summaries] + self.histograms).eval(session=self.session) self.test_writer.add_summary(test_summaries, steps) print("step {}, policy={:g} value={:g} policy accuracy={:g}% value accuracy={:g}% mse={:g}".\ @@ -552,12 +552,12 @@ def compute_update_ratio(self, before_weights, after_weights): weight_norms = [np.linalg.norm(w.ravel()) for w in before_weights] ratios = [(tensor.name, d / w) for d, w, tensor in zip(delta_norms, weight_norms, self.weights) if not 'moving' in tensor.name] all_summaries = [ - tf.Summary.Value(tag='update_ratios/' + + tf.compat.v1.Summary.Value(tag='update_ratios/' + name, simple_value=ratio) for name, ratio in ratios] ratios = np.log10([r for (_, r) in ratios if 0 < r < np.inf]) all_summaries.append(self.log_histogram('update_ratios_log10', ratios)) - return tf.Summary(value=all_summaries) + return tf.compat.v1.Summary(value=all_summaries) def log_histogram(self, tag, values, bins=1000): """Logs the histogram of a list/vector of values. @@ -571,7 +571,7 @@ def log_histogram(self, tag, values, bins=1000): counts, bin_edges = np.histogram(values, bins=bins) # Fill fields of histogram proto - hist = tf.HistogramProto() + hist = tf.compat.v1.HistogramProto() hist.min = float(np.min(values)) hist.max = float(np.max(values)) hist.num = int(np.prod(values.shape)) @@ -589,7 +589,7 @@ def log_histogram(self, tag, values, bins=1000): for c in counts: hist.bucket.append(c) - return tf.Summary.Value(tag=tag, histo=hist) + return tf.compat.v1.Summary.Value(tag=tag, histo=hist) def update_swa(self): # Add the current weight vars to the running average. @@ -604,11 +604,11 @@ def snap_save(self): rest_ops = [] for var in self.weights: if isinstance(var, str): - var = tf.get_default_graph().get_tensor_by_name(var) + var = tf.compat.v1.get_default_graph().get_tensor_by_name(var) name = var.name.split(':')[0] v = tf.Variable(var, name='save/'+name, trainable=False) - save_ops.append(tf.assign(v, var)) - rest_ops.append(tf.assign(var, v)) + save_ops.append(tf.compat.v1.assign(v, var)) + rest_ops.append(tf.compat.v1.assign(var, v)) self.snap_save_op = tf.group(*save_ops) self.snap_restore_op = tf.group(*rest_ops) self.session.run(self.snap_save_op) @@ -637,13 +637,13 @@ def save_leelaz_weights(self, filename): # # Leela/cuDNN/Caffe (kOutputInputYX) # [output, input, filter_size, filter_size] - work_weights = tf.transpose(weights, [3, 2, 0, 1]) + work_weights = tf.transpose(a=weights, perm=[3, 2, 0, 1]) elif weights.shape.ndims == 2: # Fully connected layers are [in, out] in TF # # [out, in] in Leela # - work_weights = tf.transpose(weights, [1, 0]) + work_weights = tf.transpose(a=weights, perm=[1, 0]) else: # Biases, batchnorm etc work_weights = weights @@ -677,7 +677,7 @@ def get_batchnorm_key(self): def add_weights(self, var): if var.name[-11:] == "fp16_cast:0": name = var.name[:-12] + ":0" - var = tf.get_default_graph().get_tensor_by_name(name) + var = tf.compat.v1.get_default_graph().get_tensor_by_name(name) # All trainable variables should be stored as fp32 assert var.dtype.base_dtype == tf.float32 self.weights.append(var) @@ -687,7 +687,7 @@ def batch_norm(self, net, scope, scale=False): # a unique scope that we can store, and use to look them back up # later on. - with tf.variable_scope(scope, custom_getter=float32_variable_storage_getter): + with tf.compat.v1.variable_scope(scope, custom_getter=float32_variable_storage_getter): if self.renorm_enabled: clipping = { "rmin": 1.0/self.renorm_max_r, @@ -695,7 +695,7 @@ def batch_norm(self, net, scope, scale=False): "dmax": self.renorm_max_d } # Renorm has issues with fp16, cast to fp32. - net = tf.layers.batch_normalization( + net = tf.compat.v1.layers.batch_normalization( tf.cast(net, tf.float32), epsilon=1e-5, axis=1, fused=True, center=True, scale=scale, renorm=True, renorm_clipping=clipping, @@ -705,7 +705,7 @@ def batch_norm(self, net, scope, scale=False): else: # Virtual batch doesn't work with fp16 virtual_batch = 64 if self.model_dtype == tf.float32 else None - net = tf.layers.batch_normalization( + net = tf.compat.v1.layers.batch_normalization( net, epsilon=1e-5, axis=1, fused=True, center=True, scale=scale, virtual_batch_size=virtual_batch, @@ -718,7 +718,7 @@ def batch_norm(self, net, scope, scale=False): dtype=tf.float32) else: name = "fp32_storage/" + scope + '/batch_normalization/' + v + ':0' - var = tf.get_default_graph().get_tensor_by_name(name) + var = tf.compat.v1.get_default_graph().get_tensor_by_name(name) self.add_weights(var) return net @@ -727,7 +727,7 @@ def squeeze_excitation(self, x, channels, ratio): assert channels % ratio == 0 # NCHW format reduced to NC - net = tf.reduce_mean(x, axis=[2, 3]) + net = tf.reduce_mean(input_tensor=x, axis=[2, 3]) W_fc1 = weight_variable([channels, channels // ratio], name='se_fc1_w', dtype=self.model_dtype) @@ -793,7 +793,7 @@ def residual_block(self, inputs, channels): self.add_weights(W_conv_2) h_bn2 = self.batch_norm(conv2d(h_out_1, W_conv_2), weight_key_2, scale=True) - with tf.variable_scope(weight_key_2): + with tf.compat.v1.variable_scope(weight_key_2): h_se = self.squeeze_excitation(h_bn2, channels, self.SE_ratio) h_out_2 = tf.nn.relu(tf.add(h_se, orig)) @@ -826,7 +826,7 @@ def construct_net(self, planes): dtype=self.model_dtype) self.add_weights(W_pol_conv) - tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, b_pol_conv) + tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES, b_pol_conv) self.add_weights(b_pol_conv) conv_pol2 = tf.nn.bias_add( @@ -834,7 +834,7 @@ def construct_net(self, planes): h_conv_pol_flat = tf.reshape(conv_pol2, [-1, 80*8*8]) fc1_init = tf.constant(lc0_az_policy_map.make_map(), dtype=self.model_dtype) - W_fc1 = tf.get_variable("policy_map", + W_fc1 = tf.compat.v1.get_variable("policy_map", initializer=fc1_init, trainable=False, dtype=self.model_dtype) @@ -852,7 +852,7 @@ def construct_net(self, planes): b_fc1 = bias_variable([1858], name='fc1/bias', dtype=self.model_dtype) self.add_weights(W_fc1) - tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, b_fc1) + tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES, b_fc1) self.add_weights(b_fc1) h_fc1 = tf.add(tf.matmul(h_conv_pol_flat, W_fc1), b_fc1, name='policy_head') @@ -882,7 +882,7 @@ def construct_net(self, planes): if not self.wdl: h_fc3 = tf.nn.tanh(h_fc3) else: - tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, b_fc3) + tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES, b_fc3) return h_fc1, h_fc3 diff --git a/tf/train.py b/tf/train.py index 7e9189cf..9384fb66 100755 --- a/tf/train.py +++ b/tf/train.py @@ -123,7 +123,7 @@ def main(cmd): train_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) - train_iterator = dataset.make_one_shot_iterator() + train_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) shuffle_size = int(shuffle_size*(1.0-train_ratio)) test_parser = ChunkParser(FileDataSrc(test_chunks), @@ -132,7 +132,7 @@ def main(cmd): test_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string)) dataset = dataset.map(ChunkParser.parse_function) dataset = dataset.prefetch(4) - test_iterator = dataset.make_one_shot_iterator() + test_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) tfprocess = TFProcess(cfg) tfprocess.init(dataset, train_iterator, test_iterator) diff --git a/tf/update_steps.py b/tf/update_steps.py index 0d7795ad..49f357f2 100644 --- a/tf/update_steps.py +++ b/tf/update_steps.py @@ -17,10 +17,10 @@ def main(cmd): os.makedirs(root_dir) x = [ - tf.placeholder(tf.float32, [None, 112, 8*8]), - tf.placeholder(tf.float32, [None, 1858]), - tf.placeholder(tf.float32, [None, 3]), - tf.placeholder(tf.float32, [None, 3]), + tf.compat.v1.placeholder(tf.float32, [None, 112, 8*8]), + tf.compat.v1.placeholder(tf.float32, [None, 1858]), + tf.compat.v1.placeholder(tf.float32, [None, 3]), + tf.compat.v1.placeholder(tf.float32, [None, 3]), ] tfprocess = TFProcess(cfg) diff --git a/tf/upgrade.py b/tf/upgrade.py index d18f7648..49e3c11d 100644 --- a/tf/upgrade.py +++ b/tf/upgrade.py @@ -17,10 +17,10 @@ def main(cmd): os.makedirs(root_dir) x = [ - tf.placeholder(tf.float32, [None, 112, 8*8]), - tf.placeholder(tf.float32, [None, 1858]), - tf.placeholder(tf.float32, [None, 3]), - tf.placeholder(tf.float32, [None, 3]), + tf.compat.v1.placeholder(tf.float32, [None, 112, 8*8]), + tf.compat.v1.placeholder(tf.float32, [None, 1858]), + tf.compat.v1.placeholder(tf.float32, [None, 3]), + tf.compat.v1.placeholder(tf.float32, [None, 3]), ] tfprocess = TFProcess(cfg) @@ -28,20 +28,20 @@ def main(cmd): if os.path.exists(os.path.join(root_dir, 'checkpoint')): cp = tf.train.latest_checkpoint(root_dir) - reader = tf.train.NewCheckpointReader(cp) + reader = tf.compat.v1.train.NewCheckpointReader(cp) saved_shapes = reader.get_variable_to_shape_map() new_names = sorted( - [var.name.split(':')[0] for var in tf.global_variables() + [var.name.split(':')[0] for var in tf.compat.v1.global_variables() if var.name.split(':')[0] not in saved_shapes]) for saved_var_name in new_names: print("New name {} will use default value".format(saved_var_name)) var_names = sorted( - [(var.name, var.name.split(':')[0]) for var in tf.global_variables() + [(var.name, var.name.split(':')[0]) for var in tf.compat.v1.global_variables() if var.name.split(':')[0] in saved_shapes]) restore_vars = [] restore_names = [] for var_name, saved_var_name in var_names: - curr_var = tf.get_default_graph().get_tensor_by_name(var_name) + curr_var = tf.compat.v1.get_default_graph().get_tensor_by_name(var_name) var_shape = curr_var.get_shape().as_list() if var_shape == saved_shapes[saved_var_name]: restore_vars.append(curr_var) @@ -53,13 +53,13 @@ def main(cmd): if name not in restore_names]) for saved_var_name in legacy_names: print("Dropping {} as no longer used".format(saved_var_name)) - opt_saver = tf.train.Saver(restore_vars) + opt_saver = tf.compat.v1.train.Saver(restore_vars) opt_saver.restore(tfprocess.session, cp) else: print("No checkpoint to upgrade!") exit(1) - steps = tf.train.global_step(tfprocess.session, tfprocess.global_step) + steps = tf.compat.v1.train.global_step(tfprocess.session, tfprocess.global_step) path = os.path.join(root_dir, cfg['name']) save_path = tfprocess.saver.save(tfprocess.session, path, global_step=steps) tfprocess.session.close() From a9f7129d1b344eca0599aa53f9f0994c5ffa1986 Mon Sep 17 00:00:00 2001 From: Tilps Date: Sat, 7 Dec 2019 11:29:10 +1100 Subject: [PATCH 02/39] Update requirements. --- tf/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf/requirements.txt b/tf/requirements.txt index e395c72f..4184551a 100644 --- a/tf/requirements.txt +++ b/tf/requirements.txt @@ -1,3 +1,3 @@ numpy==1.13.3 -tensorflow==1.12.2 +tensorflow==1.14.0 tensorflow-tensorboard==0.4.0rc2 From 7ad15ced37ce7ca9a22efb3bf647dfb6c81c85c4 Mon Sep 17 00:00:00 2001 From: Tilps Date: Sat, 7 Dec 2019 12:13:28 +1100 Subject: [PATCH 03/39] Minimal changes to get training running on 2.0 --- tf/tfprocess.py | 4 +++- tf/train.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index c8268050..3af5f9c0 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -132,7 +132,9 @@ def __init__(self, cfg): allow_growth=True, visible_device_list="{}".format(self.cfg['gpu'])) config = tf.compat.v1.ConfigProto(gpu_options=gpu_options) self.session = tf.compat.v1.Session(config=config) - + gpus = tf.config.experimental.list_physical_devices('GPU') + tf.config.experimental.set_visible_devices(gpus[self.cfg['gpu']], 'GPU') + self.training = tf.compat.v1.placeholder(tf.bool) self.global_step = tf.Variable(0, name='global_step', trainable=False) self.learning_rate = tf.compat.v1.placeholder(tf.float32) diff --git a/tf/train.py b/tf/train.py index 9384fb66..f632e52d 100755 --- a/tf/train.py +++ b/tf/train.py @@ -25,6 +25,7 @@ import random import multiprocessing as mp import tensorflow as tf +tf.compat.v1.disable_v2_behavior() from tfprocess import TFProcess from chunkparser import ChunkParser @@ -116,6 +117,7 @@ def main(cmd): root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) + tfprocess = TFProcess(cfg) train_parser = ChunkParser(FileDataSrc(train_chunks), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) @@ -134,7 +136,6 @@ def main(cmd): dataset = dataset.prefetch(4) test_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) - tfprocess = TFProcess(cfg) tfprocess.init(dataset, train_iterator, test_iterator) if os.path.exists(os.path.join(root_dir, 'checkpoint')): From d170e2048a7214aca71f6a6dccdabff9053113bd Mon Sep 17 00:00:00 2001 From: Tilps Date: Sat, 7 Dec 2019 12:14:53 +1100 Subject: [PATCH 04/39] Update requirements. --- tf/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf/requirements.txt b/tf/requirements.txt index 4184551a..2be066a3 100644 --- a/tf/requirements.txt +++ b/tf/requirements.txt @@ -1,3 +1,3 @@ numpy==1.13.3 -tensorflow==1.14.0 +tensorflow==2.0.0 tensorflow-tensorboard==0.4.0rc2 From 3fcbe2b5f56fc0518cf74e8f5f27d5f68a4a22b0 Mon Sep 17 00:00:00 2001 From: Tilps Date: Sat, 7 Dec 2019 13:34:11 +1100 Subject: [PATCH 05/39] Remove uses of get_variable since its initializer don't seem to be called anymore... --- tf/tfprocess.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 3af5f9c0..5bb2e687 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -43,9 +43,7 @@ def weight_variable(shape, name=None, dtype=tf.float32): stddev = trunc_correction * np.sqrt(2.0 / (fan_in + fan_out)) # Do not use a constant as the initializer, that will cause the # variable to be stored in wrong dtype. - weights = tf.compat.v1.get_variable( - name, shape, dtype=dtype, - initializer=tf.compat.v1.truncated_normal_initializer(stddev=stddev)) + weights = tf.Variable(tf.compat.v1.truncated_normal_initializer(stddev=stddev)(shape, dtype), name=name) tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES, weights) return weights @@ -55,8 +53,7 @@ def weight_variable(shape, name=None, dtype=tf.float32): def bias_variable(shape, name=None, dtype=tf.float32): - return tf.compat.v1.get_variable(name, shape, dtype=dtype, - initializer=tf.compat.v1.zeros_initializer()) + return tf.Variable(tf.compat.v1.zeros_initializer()(shape, dtype), name=name) def conv2d(x, W): return tf.nn.conv2d(input=x, filters=W, data_format='NCHW', @@ -836,10 +833,7 @@ def construct_net(self, planes): h_conv_pol_flat = tf.reshape(conv_pol2, [-1, 80*8*8]) fc1_init = tf.constant(lc0_az_policy_map.make_map(), dtype=self.model_dtype) - W_fc1 = tf.compat.v1.get_variable("policy_map", - initializer=fc1_init, - trainable=False, - dtype=self.model_dtype) + W_fc1 = tf.Variable(fcl_init, trainable=False, name="policy_map") h_fc1 = tf.matmul(h_conv_pol_flat, W_fc1, name='policy_head') elif self.POLICY_HEAD == pb.NetworkFormat.POLICY_CLASSICAL: From 162fa408f94c0d4d25fab57ac3e11c649db3cda9 Mon Sep 17 00:00:00 2001 From: Tilps Date: Sat, 7 Dec 2019 14:02:45 +1100 Subject: [PATCH 06/39] Fix typo. --- tf/tfprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 5bb2e687..defeedc7 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -833,7 +833,7 @@ def construct_net(self, planes): h_conv_pol_flat = tf.reshape(conv_pol2, [-1, 80*8*8]) fc1_init = tf.constant(lc0_az_policy_map.make_map(), dtype=self.model_dtype) - W_fc1 = tf.Variable(fcl_init, trainable=False, name="policy_map") + W_fc1 = tf.Variable(fc1_init, trainable=False, name="policy_map") h_fc1 = tf.matmul(h_conv_pol_flat, W_fc1, name='policy_head') elif self.POLICY_HEAD == pb.NetworkFormat.POLICY_CLASSICAL: From ff2e2565bcf75823139080a593e040bdc330249d Mon Sep 17 00:00:00 2001 From: Tilps Date: Sun, 8 Dec 2019 12:11:51 +1100 Subject: [PATCH 07/39] Migration in progress some more. This point 'basic' training seems to be functional, maybe. No test data processing, no grad norm clipping, no swa, no metrics, no saving to pb.gz - lots still to do. --- tf/tfprocess.py | 349 ++++++++++++++++++++++++++++++++++++++++++++---- tf/train.py | 43 +++--- 2 files changed, 349 insertions(+), 43 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index defeedc7..f26b5c4b 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -28,29 +28,34 @@ from net import Net - -def weight_variable(shape, name=None, dtype=tf.float32): - """Xavier initialization""" - if len(shape) == 4: - receptive_field = shape[0] * shape[1] - fan_in = shape[2] * receptive_field - fan_out = shape[3] * receptive_field - else: - fan_in = shape[0] - fan_out = shape[1] - # truncated normal has lower stddev than a regular normal distribution, so need to correct for that - trunc_correction = np.sqrt(1.3) - stddev = trunc_correction * np.sqrt(2.0 / (fan_in + fan_out)) - # Do not use a constant as the initializer, that will cause the - # variable to be stored in wrong dtype. - weights = tf.Variable(tf.compat.v1.truncated_normal_initializer(stddev=stddev)(shape, dtype), name=name) - tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES, weights) - return weights - # Bias weights for layers not followed by BatchNorm # We do not regularlize biases, so they are not # added to the regularlizer collection +class ApplySqueezeExcitation(tf.keras.layers.Layer): + def __init__(self, **kwargs): + super(ApplySqueezeExcitation, self).__init__(**kwargs) + + def build(self, input_dimens): + self.reshape_size = input_dimens[1][1] + + def call(self, inputs): + x = inputs[0] + excited = inputs[1] + gammas, betas = tf.split(tf.reshape(excited, [-1, self.reshape_size, 1, 1]), 2, axis=1) + return tf.nn.sigmoid(gammas) * x + betas + + +class ApplyPolicyMap(tf.keras.layers.Layer): + def __init__(self, **kwargs): + super(ApplyPolicyMap, self).__init__(**kwargs) + fc1_init = tf.constant(lc0_az_policy_map.make_map()) + self.fc1 = tf.Variable(fc1_init, trainable=False) + + def call(self, inputs): + h_conv_pol_flat = tf.reshape(inputs, [-1, 80*8*8]) + return tf.matmul(h_conv_pol_flat, self.fc1) + def bias_variable(shape, name=None, dtype=tf.float32): return tf.Variable(tf.compat.v1.zeros_initializer()(shape, dtype), name=name) @@ -132,11 +137,22 @@ def __init__(self, cfg): gpus = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_visible_devices(gpus[self.cfg['gpu']], 'GPU') - self.training = tf.compat.v1.placeholder(tf.bool) self.global_step = tf.Variable(0, name='global_step', trainable=False) - self.learning_rate = tf.compat.v1.placeholder(tf.float32) + + def init_v2(self, train_dataset, test_dataset): + self.l2reg = tf.keras.regularizers.l2(l=0.5 * (0.0001)) + self.train_dataset = train_dataset + self.train_iter = iter(train_dataset) + self.test_dataset = test_dataset + self.test_iter = iter(test_dataset) + self.init_net_v2() + self.checkpoint = tf.train.Checkpoint(optimizer=self.optimizer, model=self.model, global_step=self.global_step) + self.manager = tf.train.CheckpointManager( + self.checkpoint, directory=self.root_dir, max_to_keep=50, keep_checkpoint_every_n_hours=24) def init(self, dataset, train_iterator, test_iterator): + self.training = tf.compat.v1.placeholder(tf.bool) + self.learning_rate = tf.compat.v1.placeholder(tf.float32) # TF variables self.handle = tf.compat.v1.placeholder(tf.string, shape=[]) iterator = tf.compat.v1.data.Iterator.from_string_handle( @@ -144,11 +160,82 @@ def init(self, dataset, train_iterator, test_iterator): self.next_batch = iterator.get_next() self.train_handle = self.session.run(train_iterator.string_handle()) self.test_handle = self.session.run(test_iterator.string_handle()) + self.l2reg = tf.keras.regularizers.l2(l=0.5 * (0.0001)) + # This forces trainable variables to be stored as fp32 with tf.compat.v1.variable_scope("fp32_storage", custom_getter=float32_variable_storage_getter): self.init_net(self.next_batch) + def init_net_v2(self): + input_var = tf.keras.Input(shape=(112, 8*8)) + x_planes = tf.keras.layers.Reshape([112, 8, 8])(input_var) + self.model = tf.keras.Model(inputs=input_var, outputs=self.construct_net_v2(x_planes)) + self.active_lr = 0.01 + # TODO set up optimizers and loss functions. + self.optimizer = tf.keras.optimizers.SGD(learning_rate=lambda: self.active_lr, momentum=0.9, nesterov=True) + def policy_loss(target, output): + # Calculate loss on policy head + if self.cfg['training'].get('mask_legal_moves'): + # extract mask for legal moves from target policy + move_is_legal = tf.greater_equal(target, 0) + # replace logits of illegal moves with large negative value (so that it doesn't affect policy of legal moves) without gradient + illegal_filler = tf.zeros_like(output) - 1.0e10 + output = tf.where(move_is_legal, output, illegal_filler) + # y_ still has -1 on illegal moves, flush them to 0 + target = tf.nn.relu(target) + + policy_cross_entropy = \ + tf.nn.softmax_cross_entropy_with_logits(labels=tf.stop_gradient(target), + logits=output) + return tf.reduce_mean(input_tensor=policy_cross_entropy) + self.policy_loss_fn = policy_loss + + + q_ratio = self.cfg['training'].get('q_ratio', 0) + assert 0 <= q_ratio <= 1 + + # Linear conversion to scalar to compute MSE with, for comparison to old values + wdl = tf.expand_dims(tf.constant([1.0, 0.0, -1.0]), 1) + + self.qMix = lambda z, q: q * q_ratio + z *(1 - q_ratio) + # Loss on value head + if self.wdl: + def value_loss(target, output): + value_cross_entropy = \ + tf.nn.softmax_cross_entropy_with_logits(labels=tf.stop_gradient(target), + logits=output) + return tf.reduce_mean(input_tensor=value_cross_entropy) + self.value_loss_fn = value_loss + def mse_loss(target, output): + scalar_z_conv = tf.matmul(tf.nn.softmax(output), wdl) + scalar_target = tf.matmul(target, wdl) + return tf.reduce_mean(input_tensor=tf.math.squared_difference(scalar_target, scalar_z_conv)) + self.mse_loss_fn = mse_loss + else: + def value_loss(target, output): + return tf.constant(0) + self.value_loss_fn = value_loss + def mse_loss(target, output): + scalar_target = tf.matmul(target, wdl) + return tf.reduce_mean(input_tensor=tf.math.squared_difference(scalar_target, output)) + self.mse_loss_fn = mse_loss + + pol_loss_w = self.cfg['training']['policy_loss_weight'] + val_loss_w = self.cfg['training']['value_loss_weight'] + self.lossMix = lambda policy, value: pol_loss_w * policy + val_loss_w * value + + self.avg_policy_loss = [] + self.avg_value_loss = [] + self.avg_mse_loss = [] + self.avg_reg_term = [] + self.time_start = None + self.last_steps = None + # Set adaptive learning rate during training + self.cfg['training']['lr_boundaries'].sort() + self.warmup_steps = self.cfg['training'].get('warmup_steps', 0) + self.lr = self.cfg['training']['lr_values'][0] + def init_net(self, next_batch): self.x = next_batch[0] # tf.placeholder(tf.float32, [None, 112, 8*8]) self.y_ = next_batch[1] # tf.placeholder(tf.float32, [None, 1858]) @@ -199,9 +286,8 @@ def init_net(self, next_batch): tf.reduce_mean(input_tensor=tf.math.squared_difference(scalar_target, self.z_conv)) # Regularizer - regularizer = tf.keras.regularizers.l2(l=0.5 * (0.0001)) reg_variables = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - penalties = [regularizer(w) for w in reg_variables] + penalties = [self.l2reg(w) for w in reg_variables] self.reg_term = tf.math.add_n(penalties) if self.model_dtype != tf.float32: @@ -343,10 +429,24 @@ def replace_weights(self, new_weights): # This should result in identical file to the starting one # self.save_leelaz_weights('restored.txt') + def restore_v2(self): + if self.manager.latest_checkpoint is not None: + print("Restoring from {0}".format(self.manager.latest_checkpoint)) + self.checkpoint.restore(self.manager.latest_checkpoint) + + def restore(self, file): print("Restoring from {0}".format(file)) self.saver.restore(self.session, file) + def process_loop_v2(self, batch_size, test_batches, batch_splits=1): + # Get the initial steps value in case this is a resume from a step count + # which is not a multiple of total_steps. + steps = self.global_step.read_value() + total_steps = self.cfg['training']['total_steps'] + for _ in range(steps % total_steps, total_steps): + self.process_v2(batch_size, test_batches, batch_splits=batch_splits) + def process_loop(self, batch_size, test_batches, batch_splits=1): # Get the initial steps value in case this is a resume from a step count # which is not a multiple of total_steps. @@ -355,6 +455,145 @@ def process_loop(self, batch_size, test_batches, batch_splits=1): for _ in range(steps % total_steps, total_steps): self.process(batch_size, test_batches, batch_splits=batch_splits) + def process_v2(self, batch_size, test_batches, batch_splits=1): + if not self.time_start: + self.time_start = time.time() + + # Get the initial steps value before we do a training step. + steps = self.global_step.read_value() + if not self.last_steps: + self.last_steps = steps + + if self.swa_enabled: + # split half of test_batches between testing regular weights and SWA weights + test_batches //= 2 + + # Run test before first step to see delta since end of last run. + #if steps % self.cfg['training']['total_steps'] == 0: + # Steps is given as one higher than current in order to avoid it + # being equal to the value the end of a run is stored against. + # self.calculate_test_summaries_v2(test_batches, steps + 1) + # if self.swa_enabled: + # self.calculate_swa_summaries_v2(test_batches, steps + 1) + + # Make sure that ghost batch norm can be applied + if batch_size % 64 != 0: + # Adjust required batch size for batch splitting. + required_factor = 64 * \ + self.cfg['training'].get('num_batch_splits', 1) + raise ValueError( + 'batch_size must be a multiple of {}'.format(required_factor)) + + # Determine learning rate + lr_values = self.cfg['training']['lr_values'] + lr_boundaries = self.cfg['training']['lr_boundaries'] + steps_total = steps % self.cfg['training']['total_steps'] + self.lr = lr_values[bisect.bisect_right(lr_boundaries, steps_total)] + if self.warmup_steps > 0 and steps < self.warmup_steps: + self.lr = self.lr * tf.cast(steps + 1, tf.float32) / self.warmup_steps + + # need to add 1 to steps because steps will be incremented after gradient update + #if (steps + 1) % self.cfg['training']['train_avg_report_steps'] == 0 or (steps + 1) % self.cfg['training']['total_steps'] == 0: + # before_weights = self.session.run(self.weights) + + # Run training for this batch + grads = None + for _ in range(batch_splits): + x, y, z, q = next(self.train_iter) + with tf.GradientTape() as tape: + policy, value = self.model(x) + policy_loss = self.policy_loss_fn(y, policy) + reg_term = sum(self.model.losses) + if self.wdl: + value_loss = self.value_loss_fn(self.qMix(z, q), value) + total_loss = self.lossMix(policy_loss, value_loss) + reg_term + else: + mse_loss = self.mse_loss_fn(self.qMix(z, q), value) + total_loss = self.lossMix(policy_loss, mse_loss) + reg_term + if self.wdl: + mse_loss = self.mse_loss_fn(self.qMix(z, q), value) + if not grads: + grads = tape.gradient(total_loss, self.model.trainable_weights) + else: + grads += tape.gradient(total_loss, self.model.trainable_weights) + # Keep running averages + # Google's paper scales MSE by 1/4 to a [0, 1] range, so do the same to + # get comparable values. + mse_loss /= 4.0 + self.avg_policy_loss.append(policy_loss) + if self.wdl: + self.avg_value_loss.append(value_loss) + self.avg_mse_loss.append(mse_loss) + self.avg_reg_term.append(reg_term) + # Gradients of batch splits are summed, not averaged like usual, so need to scale lr accordingly to correct for this. + self.active_lr = self.lr / batch_splits + self.optimizer.apply_gradients(zip(grads, self.model.trainable_weights)) + #grad_norm = compute_norm(self.model.trainable_weights) + + # Update steps. + self.global_step.assign_add(1) + steps = self.global_step.read_value() + + if steps % self.cfg['training']['train_avg_report_steps'] == 0 or steps % self.cfg['training']['total_steps'] == 0: + pol_loss_w = self.cfg['training']['policy_loss_weight'] + val_loss_w = self.cfg['training']['value_loss_weight'] + time_end = time.time() + speed = 0 + if self.time_start: + elapsed = time_end - self.time_start + steps_elapsed = steps - self.last_steps + speed = batch_size * (tf.cast(steps_elapsed, tf.float32) / elapsed) + avg_policy_loss = np.mean(self.avg_policy_loss or [0]) + avg_value_loss = np.mean(self.avg_value_loss or [0]) + avg_mse_loss = np.mean(self.avg_mse_loss or [0]) + avg_reg_term = np.mean(self.avg_reg_term or [0]) + print("step {}, lr={:g} policy={:g} value={:g} mse={:g} reg={:g} total={:g} ({:g} pos/s)".format( + steps, self.lr, avg_policy_loss, avg_value_loss, avg_mse_loss, avg_reg_term, + pol_loss_w * avg_policy_loss + val_loss_w * avg_value_loss + avg_reg_term, + speed)) + + #after_weights = self.session.run(self.weights) + #update_ratio_summaries = self.compute_update_ratio( + # before_weights, after_weights) + + #train_summaries = tf.compat.v1.Summary(value=[ + # tf.compat.v1.Summary.Value(tag="Policy Loss", simple_value=avg_policy_loss), + # tf.compat.v1.Summary.Value(tag="Value Loss", simple_value=avg_value_loss), + # tf.compat.v1.Summary.Value(tag="Reg term", simple_value=avg_reg_term), + # tf.compat.v1.Summary.Value(tag="LR", simple_value=self.lr), + # tf.compat.v1.Summary.Value(tag="Gradient norm", + # simple_value=grad_norm / batch_splits), + # tf.compat.v1.Summary.Value(tag="MSE Loss", simple_value=avg_mse_loss)]) + #self.train_writer.add_summary(train_summaries, steps) + #self.train_writer.add_summary(update_ratio_summaries, steps) + self.time_start = time_end + self.last_steps = steps + self.avg_policy_loss, self.avg_value_loss, self.avg_mse_loss, self.avg_reg_term = [], [], [], [] + + #if self.swa_enabled and steps % self.cfg['training']['swa_steps'] == 0: + # self.update_swa_v2() + + # Calculate test values every 'test_steps', but also ensure there is + # one at the final step so the delta to the first step can be calculted. + #if steps % self.cfg['training']['test_steps'] == 0 or steps % self.cfg['training']['total_steps'] == 0: + # self.calculate_test_summaries_v2(test_batches, steps) + # if self.swa_enabled: + # self.calculate_swa_summaries_v2(test_batches, steps) + + # Save session and weights at end, and also optionally every 'checkpoint_steps'. + if steps % self.cfg['training']['total_steps'] == 0 or ( + 'checkpoint_steps' in self.cfg['training'] and steps % self.cfg['training']['checkpoint_steps'] == 0): + self.manager.save() + #print("Model saved in file: {}".format(save_path)) + #leela_path = path + "-" + str(steps) + #swa_path = path + "-swa-" + str(steps) + self.net.pb.training_params.training_steps = steps + #self.save_leelaz_weights(leela_path) + #print("Weights saved in file: {}".format(leela_path)) + #if self.swa_enabled: + # self.save_swa_weights(swa_path) + # print("SWA Weights saved in file: {}".format(swa_path)) + def process(self, batch_size, test_batches, batch_splits=1): if not self.time_start: self.time_start = time.time() @@ -681,6 +920,23 @@ def add_weights(self, var): assert var.dtype.base_dtype == tf.float32 self.weights.append(var) + def batch_norm_v2(self, input, scale=False): + if self.renorm_enabled: + clipping = { + "rmin": 1.0/self.renorm_max_r, + "rmax": self.renorm_max_r, + "dmax": self.renorm_max_d + } + return tf.keras.layers.BatchNormalization( + epsilon=1e-5, axis=1, fused=True, center=True, + scale=scale, renorm=True, renorm_clipping=clipping, + renorm_momentum=self.renorm_momentum)(input) + else: + return tf.keras.layers.BatchNormalization( + epsilon=1e-5, axis=1, fused=False, center=True, + scale=scale, virtual_batch_size=64)(input) + + def batch_norm(self, net, scope, scale=False): # The weights are internal to the batchnorm layer, so apply # a unique scope that we can store, and use to look them back up @@ -720,6 +976,13 @@ def batch_norm(self, net, scope, scale=False): var = tf.compat.v1.get_default_graph().get_tensor_by_name(name) self.add_weights(var) return net + + + def squeeze_excitation_v2(self, inputs, channels): + pooled = tf.keras.layers.GlobalAveragePooling2D()(inputs) + squeezed = tf.keras.layers.Activation('relu')(tf.keras.layers.Dense(channels // self.SE_ratio, kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg)(pooled)) + excited = tf.keras.layers.Dense(2 * channels, kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg)(squeezed) + return ApplySqueezeExcitation()([inputs, excited]) def squeeze_excitation(self, x, channels, ratio): @@ -755,6 +1018,10 @@ def squeeze_excitation(self, x, channels, ratio): return out + def conv_block_v2(self, inputs, filter_size, output_channels, bn_scale=False): + conv = tf.keras.layers.Conv2D(output_channels, filter_size, use_bias=False, padding='same', kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg, data_format='channels_first')(inputs) + return tf.keras.layers.Activation('relu')(self.batch_norm_v2(conv, scale=bn_scale)) + def conv_block(self, inputs, filter_size, input_channels, output_channels, bn_scale=False): # The weights are internal to the batchnorm layer, so apply # a unique scope that we can store, and use to look them back up @@ -771,6 +1038,14 @@ def conv_block(self, inputs, filter_size, input_channels, output_channels, bn_sc return h_conv + def residual_block_v2(self, inputs, channels): + conv1 = tf.keras.layers.Conv2D(channels, 3, use_bias=False, padding='same', kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg, data_format='channels_first')(inputs) + out1 = tf.keras.layers.Activation('relu')(self.batch_norm_v2(conv1, scale=False)) + conv2 = tf.keras.layers.Conv2D(channels, 3, use_bias=False, padding='same', kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg, data_format='channels_first')(out1) + out2 = self.squeeze_excitation_v2(self.batch_norm_v2(conv1, scale=True), channels) + return tf.keras.layers.Activation('relu')(tf.keras.layers.add([inputs, out2])) + + def residual_block(self, inputs, channels): # First convnet orig = tf.identity(inputs) @@ -798,6 +1073,34 @@ def residual_block(self, inputs, channels): return h_out_2 + def construct_net_v2(self, inputs): + flow = self.conv_block_v2(inputs, filter_size=3, output_channels=self.RESIDUAL_FILTERS, bn_scale=True) + for _ in range(0, self.RESIDUAL_BLOCKS): + flow = self.residual_block_v2(flow, self.RESIDUAL_FILTERS) + # Policy head + if self.POLICY_HEAD == pb.NetworkFormat.POLICY_CONVOLUTION: + conv_pol = self.conv_block_v2(flow, filter_size=3, output_channels=self.RESIDUAL_FILTERS) + conv_pol2 = tf.keras.layers.Conv2D(80, 3, use_bias=True, padding='same', kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg, bias_regularizer=self.l2reg, data_format='channels_first')(conv_pol) + h_fc1 = ApplyPolicyMap()(conv_pol2) + elif self.POLICY_HEAD == pb.NetworkFormat.POLICY_CLASSICAL: + conv_pol = self.conv_block_v2(flow, filter_size=1, output_channels=self.policy_channels) + h_conv_pol_flat = tf.keras.layers.Flatten()(conv_pol) + h_fc1 = tf.keras.layers.Dense(1858, kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg, bias_regularizer=self.l2reg)(h_conv_pol_flat) + else: + raise ValueError( + "Unknown policy head type {}".format(self.POLICY_HEAD)) + + # Value head + conv_val = self.conv_block_v2(flow, filter_size=1, output_channels=32) + h_conv_val_flat = tf.keras.layers.Flatten()(conv_val) + h_fc2 = tf.keras.layers.Dense(128, kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg, activation='relu')(h_conv_val_flat) + if self.wdl: + h_fc3 = tf.keras.layers.Dense(3, kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg, bias_regularizer=self.l2reg)(h_fc2) + else: + h_fc3 = tf.keras.layers.Dense(1, kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg, activation='tanh')(h_fc2) + return h_fc1, h_fc3 + + def construct_net(self, planes): # NCHW format # batch, 112 input channels, 8 x 8 diff --git a/tf/train.py b/tf/train.py index f632e52d..b9a7db0c 100755 --- a/tf/train.py +++ b/tf/train.py @@ -25,7 +25,7 @@ import random import multiprocessing as mp import tensorflow as tf -tf.compat.v1.disable_v2_behavior() +#tf.compat.v1.disable_v2_behavior() from tfprocess import TFProcess from chunkparser import ChunkParser @@ -121,26 +121,28 @@ def main(cmd): train_parser = ChunkParser(FileDataSrc(train_chunks), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) - dataset = tf.data.Dataset.from_generator( + train_dataset = tf.data.Dataset.from_generator( train_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string)) - dataset = dataset.map(ChunkParser.parse_function) - dataset = dataset.prefetch(4) - train_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + train_dataset = train_dataset.map(ChunkParser.parse_function) + train_dataset = train_dataset.prefetch(4) + #train_iterator = tf.compat.v1.data.make_one_shot_iterator(train_dataset) shuffle_size = int(shuffle_size*(1.0-train_ratio)) test_parser = ChunkParser(FileDataSrc(test_chunks), shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) - dataset = tf.data.Dataset.from_generator( + test_dataset = tf.data.Dataset.from_generator( test_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string)) - dataset = dataset.map(ChunkParser.parse_function) - dataset = dataset.prefetch(4) - test_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + test_dataset = test_dataset.map(ChunkParser.parse_function) + test_dataset = test_dataset.prefetch(4) + #test_iterator = tf.compat.v1.data.make_one_shot_iterator(test_dataset) - tfprocess.init(dataset, train_iterator, test_iterator) + #tfprocess.init(test_dataset, train_iterator, test_iterator) + tfprocess.init_v2(train_dataset, test_dataset) - if os.path.exists(os.path.join(root_dir, 'checkpoint')): - cp = tf.train.latest_checkpoint(root_dir) - tfprocess.restore(cp) + #if os.path.exists(os.path.join(root_dir, 'checkpoint')): + # cp = tf.train.latest_checkpoint(root_dir) + # tfprocess.restore(cp) + tfprocess.restore_v2() # If number of test positions is not given # sweeps through all test chunks statistically @@ -151,15 +153,16 @@ def main(cmd): num_evals = max(1, num_evals // ChunkParser.BATCH_SIZE) print("Using {} evaluation batches".format(num_evals)) - tfprocess.process_loop(total_batch_size, num_evals, batch_splits=batch_splits) + #tfprocess.process_loop(total_batch_size, num_evals, batch_splits=batch_splits) + tfprocess.process_loop_v2(total_batch_size, num_evals, batch_splits=batch_splits) - if cmd.output is not None: - if cfg['training'].get('swa_output', False): - tfprocess.save_swa_weights(cmd.output) - else: - tfprocess.save_leelaz_weights(cmd.output) + #if cmd.output is not None: + # if cfg['training'].get('swa_output', False): + # tfprocess.save_swa_weights(cmd.output) + # else: + # tfprocess.save_leelaz_weights(cmd.output) - tfprocess.session.close() + #tfprocess.session.close() train_parser.shutdown() test_parser.shutdown() From f202c7884a6e7a56832a61031945e6db922a0d4d Mon Sep 17 00:00:00 2001 From: Tilps Date: Sun, 8 Dec 2019 13:08:32 +1100 Subject: [PATCH 08/39] Add grad norm clipping back in. --- tf/tfprocess.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index f26b5c4b..8119fa0c 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -527,8 +527,9 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): self.avg_reg_term.append(reg_term) # Gradients of batch splits are summed, not averaged like usual, so need to scale lr accordingly to correct for this. self.active_lr = self.lr / batch_splits + max_grad_norm = self.cfg['training'].get('max_grad_norm', 10000.0) * batch_splits + grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm) self.optimizer.apply_gradients(zip(grads, self.model.trainable_weights)) - #grad_norm = compute_norm(self.model.trainable_weights) # Update steps. self.global_step.assign_add(1) From 1398d2a52ddd242fdb93e7b64e1b85ab45679036 Mon Sep 17 00:00:00 2001 From: Tilps Date: Sun, 8 Dec 2019 21:42:05 +1100 Subject: [PATCH 09/39] Add swa support and test result reporting. --- tf/tfprocess.py | 106 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 95 insertions(+), 11 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 8119fa0c..dc84be67 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -146,7 +146,8 @@ def init_v2(self, train_dataset, test_dataset): self.test_dataset = test_dataset self.test_iter = iter(test_dataset) self.init_net_v2() - self.checkpoint = tf.train.Checkpoint(optimizer=self.optimizer, model=self.model, global_step=self.global_step) + self.checkpoint = tf.train.Checkpoint(optimizer=self.optimizer, model=self.model, global_step=self.global_step, swa_count=self.swa_count) + self.checkpoint.listed = self.swa_weights self.manager = tf.train.CheckpointManager( self.checkpoint, directory=self.root_dir, max_to_keep=50, keep_checkpoint_every_n_hours=24) @@ -171,6 +172,13 @@ def init_net_v2(self): input_var = tf.keras.Input(shape=(112, 8*8)) x_planes = tf.keras.layers.Reshape([112, 8, 8])(input_var) self.model = tf.keras.Model(inputs=input_var, outputs=self.construct_net_v2(x_planes)) + self.swa_count = None + self.swa_weights = None + if self.swa_enabled: + # Count of networks accumulated into SWA + self.swa_count = tf.Variable(0., name='swa_count', trainable=False) + self.swa_weights = [tf.Variable(w, trainable=False) for w in self.model.weights] + self.active_lr = 0.01 # TODO set up optimizers and loss functions. self.optimizer = tf.keras.optimizers.SGD(learning_rate=lambda: self.active_lr, momentum=0.9, nesterov=True) @@ -469,12 +477,12 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): test_batches //= 2 # Run test before first step to see delta since end of last run. - #if steps % self.cfg['training']['total_steps'] == 0: + if steps % self.cfg['training']['total_steps'] == 0: # Steps is given as one higher than current in order to avoid it # being equal to the value the end of a run is stored against. - # self.calculate_test_summaries_v2(test_batches, steps + 1) - # if self.swa_enabled: - # self.calculate_swa_summaries_v2(test_batches, steps + 1) + self.calculate_test_summaries_v2(test_batches, steps + 1) + if self.swa_enabled: + self.calculate_swa_summaries_v2(test_batches, steps + 1) # Make sure that ghost batch norm can be applied if batch_size % 64 != 0: @@ -512,6 +520,8 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): total_loss = self.lossMix(policy_loss, mse_loss) + reg_term if self.wdl: mse_loss = self.mse_loss_fn(self.qMix(z, q), value) + else: + value_loss = self.value_loss_fn(self.qMix(z, q), value) if not grads: grads = tape.gradient(total_loss, self.model.trainable_weights) else: @@ -571,15 +581,15 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): self.last_steps = steps self.avg_policy_loss, self.avg_value_loss, self.avg_mse_loss, self.avg_reg_term = [], [], [], [] - #if self.swa_enabled and steps % self.cfg['training']['swa_steps'] == 0: - # self.update_swa_v2() + if self.swa_enabled and steps % self.cfg['training']['swa_steps'] == 0: + self.update_swa_v2() # Calculate test values every 'test_steps', but also ensure there is # one at the final step so the delta to the first step can be calculted. - #if steps % self.cfg['training']['test_steps'] == 0 or steps % self.cfg['training']['total_steps'] == 0: - # self.calculate_test_summaries_v2(test_batches, steps) - # if self.swa_enabled: - # self.calculate_swa_summaries_v2(test_batches, steps) + if steps % self.cfg['training']['test_steps'] == 0 or steps % self.cfg['training']['total_steps'] == 0: + self.calculate_test_summaries_v2(test_batches, steps) + if self.swa_enabled: + self.calculate_swa_summaries_v2(test_batches, steps) # Save session and weights at end, and also optionally every 'checkpoint_steps'. if steps % self.cfg['training']['total_steps'] == 0 or ( @@ -721,6 +731,17 @@ def process(self, batch_size, test_batches, batch_splits=1): self.save_swa_weights(swa_path) print("SWA Weights saved in file: {}".format(swa_path)) + def calculate_swa_summaries_v2(self, test_batches, steps): + backup = [w.read_value() for w in self.model.weights] + for (swa, w) in zip(self.swa_weights, self.model.weights): + w.assign(swa.read_value()) + #true_test_writer, self.test_writer = self.test_writer, self.swa_writer + print('swa', end=' ') + self.calculate_test_summaries_v2(test_batches, steps) + #self.test_writer = true_test_writer + for (old, w) in zip(backup, self.model.weights): + w.assign(old) + def calculate_swa_summaries(self, test_batches, steps): self.snap_save() self.session.run(self.swa_load_op) @@ -730,6 +751,63 @@ def calculate_swa_summaries(self, test_batches, steps): self.test_writer = true_test_writer self.snap_restore() + def calculate_test_summaries_v2(self, test_batches, steps): + sum_policy_accuracy = 0 + sum_value_accuracy = 0 + sum_mse = 0 + sum_policy = 0 + sum_value = 0 + for _ in range(0, test_batches): + x, y, z, q = next(self.test_iter) + policy, value = self.model(x) + policy_loss = self.policy_loss_fn(y, policy) + reg_term = sum(self.model.losses) + if self.wdl: + value_loss = self.value_loss_fn(self.qMix(z, q), value) + mse_loss = self.mse_loss_fn(self.qMix(z, q), value) + total_loss = self.lossMix(policy_loss, value_loss) + reg_term + else: + value_loss = self.value_loss_fn(self.qMix(z, q), value) + mse_loss = self.mse_loss_fn(self.qMix(z, q), value) + total_loss = self.lossMix(policy_loss, mse_loss) + reg_term + #sum_policy_accuracy += test_policy_accuracy + sum_mse += mse_loss + sum_policy += policy_loss + if self.wdl: + #sum_value_accuracy += test_value_accuracy + sum_value += value_loss + #sum_policy_accuracy /= test_batches + #sum_policy_accuracy *= 100 + sum_policy /= test_batches + sum_value /= test_batches + #if self.wdl: + #sum_value_accuracy /= test_batches + #sum_value_accuracy *= 100 + # Additionally rescale to [0, 1] so divide by 4 + sum_mse /= (4.0 * test_batches) + self.net.pb.training_params.learning_rate = self.lr + self.net.pb.training_params.mse_loss = sum_mse + self.net.pb.training_params.policy_loss = sum_policy + # TODO store value and value accuracy in pb + #self.net.pb.training_params.accuracy = sum_policy_accuracy + #if self.wdl: + # test_summaries = tf.compat.v1.Summary(value=[ + # tf.compat.v1.Summary.Value(tag="Policy Accuracy", simple_value=sum_policy_accuracy), + # tf.compat.v1.Summary.Value(tag="Value Accuracy", simple_value=sum_value_accuracy), + # tf.compat.v1.Summary.Value(tag="Policy Loss", simple_value=sum_policy), + # tf.compat.v1.Summary.Value(tag="Value Loss", simple_value=sum_value), + # tf.compat.v1.Summary.Value(tag="MSE Loss", simple_value=sum_mse)]).SerializeToString() + #else: + # test_summaries = tf.compat.v1.Summary(value=[ + # tf.compat.v1.Summary.Value(tag="Policy Accuracy", simple_value=sum_policy_accuracy), + # tf.compat.v1.Summary.Value(tag="Policy Loss", simple_value=sum_policy), + # tf.compat.v1.Summary.Value(tag="MSE Loss", simple_value=sum_mse)]).SerializeToString() + #test_summaries = tf.compat.v1.summary.merge( + # [test_summaries] + self.histograms).eval(session=self.session) + #self.test_writer.add_summary(test_summaries, steps) + print("step {}, policy={:g} value={:g} policy accuracy={:g}% value accuracy={:g}% mse={:g}".\ + format(steps, sum_policy, sum_value, sum_policy_accuracy, sum_value_accuracy, sum_mse)) + def calculate_test_summaries(self, test_batches, steps): sum_policy_accuracy = 0 sum_value_accuracy = 0 @@ -830,6 +908,12 @@ def log_histogram(self, tag, values, bins=1000): return tf.compat.v1.Summary.Value(tag=tag, histo=hist) + def update_swa_v2(self): + num = self.swa_count.read_value() + for (w, swa) in zip(self.model.weights, self.swa_weights): + swa.assign(swa.read_value() * (num / (num + 1.)) + w.read_value() * (1. / (num + 1.))) + self.swa_count.assign(min(num + 1., self.swa_max_n)) + def update_swa(self): # Add the current weight vars to the running average. num = self.session.run(self.swa_accum_op) From 2d97b80bc7ddad3ee9e4aa76aa4b79efc1eef95d Mon Sep 17 00:00:00 2001 From: Tilps Date: Sun, 8 Dec 2019 22:57:56 +1100 Subject: [PATCH 10/39] Actually sum the gradients... --- tf/tfprocess.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index dc84be67..2e9a59f4 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -525,7 +525,8 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): if not grads: grads = tape.gradient(total_loss, self.model.trainable_weights) else: - grads += tape.gradient(total_loss, self.model.trainable_weights) + new_grads = tape.gradient(total_loss, self.model.trainable_weights) + grads = [tf.math.add(a, b) for (a, b) in zip(grads, new_grads)] # Keep running averages # Google's paper scales MSE by 1/4 to a [0, 1] range, so do the same to # get comparable values. From cd3c69dab0f7361d9c600e7b31961d511d68340c Mon Sep 17 00:00:00 2001 From: Tilps Date: Sun, 8 Dec 2019 23:31:39 +1100 Subject: [PATCH 11/39] Fix bad bug with residual blocks, also include the stub for the incomplete save code. --- tf/tfprocess.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 2e9a59f4..13f5a27e 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -596,11 +596,11 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): if steps % self.cfg['training']['total_steps'] == 0 or ( 'checkpoint_steps' in self.cfg['training'] and steps % self.cfg['training']['checkpoint_steps'] == 0): self.manager.save() - #print("Model saved in file: {}".format(save_path)) - #leela_path = path + "-" + str(steps) + print("Model saved in file: {}".format(self.manager.latest_checkpoint)) + leela_path = self.manager.latest_checkpoint + "-" + str(steps) #swa_path = path + "-swa-" + str(steps) self.net.pb.training_params.training_steps = steps - #self.save_leelaz_weights(leela_path) + self.save_leelaz_weights_v2(leela_path) #print("Weights saved in file: {}".format(leela_path)) #if self.swa_enabled: # self.save_swa_weights(swa_path) @@ -947,6 +947,10 @@ def save_swa_weights(self, filename): self.save_leelaz_weights(filename) self.snap_restore() + def save_leelaz_weights_v2(self, filename): + for w in self.model.weights: + tf.print(w.name) + def save_leelaz_weights(self, filename): all_weights = [] if not hasattr(self, 'pb_save_op'): @@ -1128,7 +1132,7 @@ def residual_block_v2(self, inputs, channels): conv1 = tf.keras.layers.Conv2D(channels, 3, use_bias=False, padding='same', kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg, data_format='channels_first')(inputs) out1 = tf.keras.layers.Activation('relu')(self.batch_norm_v2(conv1, scale=False)) conv2 = tf.keras.layers.Conv2D(channels, 3, use_bias=False, padding='same', kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg, data_format='channels_first')(out1) - out2 = self.squeeze_excitation_v2(self.batch_norm_v2(conv1, scale=True), channels) + out2 = self.squeeze_excitation_v2(self.batch_norm_v2(conv2, scale=True), channels) return tf.keras.layers.Activation('relu')(tf.keras.layers.add([inputs, out2])) From d3f95eb8a20b5fc262d3c3767de172a8bc60c531 Mon Sep 17 00:00:00 2001 From: Tilps Date: Mon, 9 Dec 2019 00:00:03 +1100 Subject: [PATCH 12/39] Constant policy map shouldn't be a variable, or it gets saved. --- tf/tfprocess.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 13f5a27e..36a8b268 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -49,8 +49,7 @@ def call(self, inputs): class ApplyPolicyMap(tf.keras.layers.Layer): def __init__(self, **kwargs): super(ApplyPolicyMap, self).__init__(**kwargs) - fc1_init = tf.constant(lc0_az_policy_map.make_map()) - self.fc1 = tf.Variable(fc1_init, trainable=False) + self.fc1 = tf.constant(lc0_az_policy_map.make_map()) def call(self, inputs): h_conv_pol_flat = tf.reshape(inputs, [-1, 80*8*8]) From c90a9c49fb1aed38f92bf68f69a257d3e554b316 Mon Sep 17 00:00:00 2001 From: Tilps Date: Mon, 9 Dec 2019 20:22:41 +1100 Subject: [PATCH 13/39] Add untested network saving no idea if its output is the right format yet. --- tf/tfprocess.py | 82 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 5 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 36a8b268..dca5a453 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -441,7 +441,6 @@ def restore_v2(self): print("Restoring from {0}".format(self.manager.latest_checkpoint)) self.checkpoint.restore(self.manager.latest_checkpoint) - def restore(self, file): print("Restoring from {0}".format(file)) self.saver.restore(self.session, file) @@ -596,9 +595,10 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): 'checkpoint_steps' in self.cfg['training'] and steps % self.cfg['training']['checkpoint_steps'] == 0): self.manager.save() print("Model saved in file: {}".format(self.manager.latest_checkpoint)) - leela_path = self.manager.latest_checkpoint + "-" + str(steps) + evaled_steps = steps.numpy() + leela_path = self.manager.latest_checkpoint + "-" + str(evaled_steps) #swa_path = path + "-swa-" + str(steps) - self.net.pb.training_params.training_steps = steps + self.net.pb.training_params.training_steps = evaled_steps self.save_leelaz_weights_v2(leela_path) #print("Weights saved in file: {}".format(leela_path)) #if self.swa_enabled: @@ -940,6 +940,14 @@ def snap_restore(self): # Restore variables in the current graph from the snapshot. self.session.run(self.snap_restore_op) + def save_swa_weights_v2(self, filename): + backup = [w.read_value() for w in self.model.weights] + for (swa, w) in zip(self.swa_weights, self.model.weights): + w.assign(swa.read_value()) + self.save_leelaz_weights_v2(self, filename) + for (old, w) in zip(backup, self.model.weights): + w.assign(old) + def save_swa_weights(self, filename): self.snap_save() self.session.run(self.swa_load_op) @@ -947,8 +955,72 @@ def save_swa_weights(self, filename): self.snap_restore() def save_leelaz_weights_v2(self, filename): - for w in self.model.weights: - tf.print(w.name) + all_tensors = [] + all_weights = [] + last_was_gamma = False + for weights in self.model.weights: + work_weights = None + if weights.shape.ndims == 4: + # Convolution weights need a transpose + # + # TF (kYXInputOutput) + # [filter_height, filter_width, in_channels, out_channels] + # + # Leela/cuDNN/Caffe (kOutputInputYX) + # [output, input, filter_size, filter_size] + work_weights = tf.transpose(a=weights, perm=[3, 2, 0, 1]) + elif weights.shape.ndims == 2: + # Fully connected layers are [in, out] in TF + # + # [out, in] in Leela + # + work_weights = tf.transpose(a=weights, perm=[1, 0]) + else: + # Biases, batchnorm etc + # pb expects every batch norm to have gammas, but not all of our + # batch norms have gammas, so manually add pretend gammas. + if 'beta:' in weights.name and not last_was_gamma: + all_tensors.append(tf.ones_like(weights)) + work_weights = weights.read_value() + all_tensors.append(work_weights) + last_was_gamma = 'gamma:' in weights.name + + # HACK: model weights ordering is some kind of breadth first traversal, + # but pb expects a specific ordering which BFT is not a match for once + # we get to the heads. Apply manual permutation. + # This is fragile and at minimum should have some checks to ensure it isn't breaking things. + #TODO: also support classic policy head as it has a different set of layers and hence changes the permutation. + permuted_tensors = [w for w in all_tensors] + permuted_tensors[-5] = all_tensors[-10] + permuted_tensors[-6] = all_tensors[-11] + permuted_tensors[-7] = all_tensors[-12] + permuted_tensors[-8] = all_tensors[-14] + permuted_tensors[-9] = all_tensors[-5] + permuted_tensors[-10] = all_tensors[-6] + permuted_tensors[-11] = all_tensors[-7] + permuted_tensors[-12] = all_tensors[-8] + permuted_tensors[-13] = all_tensors[-9] + permuted_tensors[-14] = all_tensors[-13] + all_tensors = permuted_tensors + + for e, nparray in enumerate(all_tensors): + # Rescale rule50 related weights as clients do not normalize the input. + if e == 0: + num_inputs = 112 + # 50 move rule is the 110th input, or 109 starting from 0. + rule50_input = 109 + wt_flt = [] + for i, weight in enumerate(np.ravel(nparray)): + if (i % (num_inputs*9))//9 == rule50_input: + wt_flt.append(weight/99) + else: + wt_flt.append(weight) + else: + wt_flt = [wt for wt in np.ravel(nparray)] + all_weights.append(wt_flt) + + self.net.fill_net(all_weights) + self.net.save_proto(filename) def save_leelaz_weights(self, filename): all_weights = [] From 61158e22459be00646a200d5a4402924a925a823 Mon Sep 17 00:00:00 2001 From: Tilps Date: Mon, 9 Dec 2019 21:37:24 +1100 Subject: [PATCH 14/39] Fix permutation which didn't take into account the inserted gammas. --- tf/tfprocess.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index dca5a453..633080e7 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -597,7 +597,7 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): print("Model saved in file: {}".format(self.manager.latest_checkpoint)) evaled_steps = steps.numpy() leela_path = self.manager.latest_checkpoint + "-" + str(evaled_steps) - #swa_path = path + "-swa-" + str(steps) + #swa_path = path + "-swa-" + str(evaled_steps) self.net.pb.training_params.training_steps = evaled_steps self.save_leelaz_weights_v2(leela_path) #print("Weights saved in file: {}".format(leela_path)) @@ -991,16 +991,18 @@ def save_leelaz_weights_v2(self, filename): # This is fragile and at minimum should have some checks to ensure it isn't breaking things. #TODO: also support classic policy head as it has a different set of layers and hence changes the permutation. permuted_tensors = [w for w in all_tensors] - permuted_tensors[-5] = all_tensors[-10] - permuted_tensors[-6] = all_tensors[-11] - permuted_tensors[-7] = all_tensors[-12] + permuted_tensors[-5] = all_tensors[-11] + permuted_tensors[-6] = all_tensors[-12] + permuted_tensors[-7] = all_tensors[-13] permuted_tensors[-8] = all_tensors[-14] - permuted_tensors[-9] = all_tensors[-5] - permuted_tensors[-10] = all_tensors[-6] - permuted_tensors[-11] = all_tensors[-7] - permuted_tensors[-12] = all_tensors[-8] - permuted_tensors[-13] = all_tensors[-9] - permuted_tensors[-14] = all_tensors[-13] + permuted_tensors[-9] = all_tensors[-16] + permuted_tensors[-10] = all_tensors[-5] + permuted_tensors[-11] = all_tensors[-6] + permuted_tensors[-12] = all_tensors[-7] + permuted_tensors[-13] = all_tensors[-8] + permuted_tensors[-14] = all_tensors[-9] + permuted_tensors[-15] = all_tensors[-10] + permuted_tensors[-16] = all_tensors[-15] all_tensors = permuted_tensors for e, nparray in enumerate(all_tensors): From 8681df6c93e9640dea47c6c7af52e23fa9ead25c Mon Sep 17 00:00:00 2001 From: Tilps Date: Mon, 9 Dec 2019 22:02:58 +1100 Subject: [PATCH 15/39] GlobalAveragePooling2D needs to know the data format. --- tf/tfprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 633080e7..2735e8db 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -1142,7 +1142,7 @@ def batch_norm(self, net, scope, scale=False): def squeeze_excitation_v2(self, inputs, channels): - pooled = tf.keras.layers.GlobalAveragePooling2D()(inputs) + pooled = tf.keras.layers.GlobalAveragePooling2D(data_format='channels_first')(inputs) squeezed = tf.keras.layers.Activation('relu')(tf.keras.layers.Dense(channels // self.SE_ratio, kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg)(pooled)) excited = tf.keras.layers.Dense(2 * channels, kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg)(squeezed) return ApplySqueezeExcitation()([inputs, excited]) From 215445d41e10a3790741040ea3db68a58e2bd99a Mon Sep 17 00:00:00 2001 From: Tilps Date: Tue, 10 Dec 2019 10:17:06 +1100 Subject: [PATCH 16/39] Use tf.function on the inner loop of training for massive speed up --- tf/tfprocess.py | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 2735e8db..02ded6ad 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -461,6 +461,26 @@ def process_loop(self, batch_size, test_batches, batch_splits=1): for _ in range(steps % total_steps, total_steps): self.process(batch_size, test_batches, batch_splits=batch_splits) + @tf.function() + def process_inner_loop(self): + print('tracing inner loop!') + x, y, z, q = next(self.train_iter) + with tf.GradientTape() as tape: + policy, value = self.model(x) + policy_loss = self.policy_loss_fn(y, policy) + reg_term = sum(self.model.losses) + if self.wdl: + value_loss = self.value_loss_fn(self.qMix(z, q), value) + total_loss = self.lossMix(policy_loss, value_loss) + reg_term + else: + mse_loss = self.mse_loss_fn(self.qMix(z, q), value) + total_loss = self.lossMix(policy_loss, mse_loss) + reg_term + if self.wdl: + mse_loss = self.mse_loss_fn(self.qMix(z, q), value) + else: + value_loss = self.value_loss_fn(self.qMix(z, q), value) + return policy_loss, value_loss, mse_loss, reg_term, tape.gradient(total_loss, self.model.trainable_weights) + def process_v2(self, batch_size, test_batches, batch_splits=1): if not self.time_start: self.time_start = time.time() @@ -505,25 +525,10 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): # Run training for this batch grads = None for _ in range(batch_splits): - x, y, z, q = next(self.train_iter) - with tf.GradientTape() as tape: - policy, value = self.model(x) - policy_loss = self.policy_loss_fn(y, policy) - reg_term = sum(self.model.losses) - if self.wdl: - value_loss = self.value_loss_fn(self.qMix(z, q), value) - total_loss = self.lossMix(policy_loss, value_loss) + reg_term - else: - mse_loss = self.mse_loss_fn(self.qMix(z, q), value) - total_loss = self.lossMix(policy_loss, mse_loss) + reg_term - if self.wdl: - mse_loss = self.mse_loss_fn(self.qMix(z, q), value) - else: - value_loss = self.value_loss_fn(self.qMix(z, q), value) + policy_loss, value_loss, mse_loss, reg_term, new_grads = self.process_inner_loop() if not grads: - grads = tape.gradient(total_loss, self.model.trainable_weights) + grads = new_grads else: - new_grads = tape.gradient(total_loss, self.model.trainable_weights) grads = [tf.math.add(a, b) for (a, b) in zip(grads, new_grads)] # Keep running averages # Google's paper scales MSE by 1/4 to a [0, 1] range, so do the same to From bdec90303ededf7c613db79a2b3f3f76087b10f7 Mon Sep 17 00:00:00 2001 From: Tilps Date: Tue, 10 Dec 2019 10:56:26 +1100 Subject: [PATCH 17/39] Extract test summary inner loop to tf.function for a bit more performance --- tf/tfprocess.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 02ded6ad..54d7fc58 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -756,6 +756,20 @@ def calculate_swa_summaries(self, test_batches, steps): self.test_writer = true_test_writer self.snap_restore() + @tf.function() + def calculate_test_summaries_inner_loop(self): + print('tracing summaries inner loop!') + x, y, z, q = next(self.test_iter) + policy, value = self.model(x) + policy_loss = self.policy_loss_fn(y, policy) + if self.wdl: + value_loss = self.value_loss_fn(self.qMix(z, q), value) + mse_loss = self.mse_loss_fn(self.qMix(z, q), value) + else: + value_loss = self.value_loss_fn(self.qMix(z, q), value) + mse_loss = self.mse_loss_fn(self.qMix(z, q), value) + return policy_loss, value_loss, mse_loss + def calculate_test_summaries_v2(self, test_batches, steps): sum_policy_accuracy = 0 sum_value_accuracy = 0 @@ -763,18 +777,7 @@ def calculate_test_summaries_v2(self, test_batches, steps): sum_policy = 0 sum_value = 0 for _ in range(0, test_batches): - x, y, z, q = next(self.test_iter) - policy, value = self.model(x) - policy_loss = self.policy_loss_fn(y, policy) - reg_term = sum(self.model.losses) - if self.wdl: - value_loss = self.value_loss_fn(self.qMix(z, q), value) - mse_loss = self.mse_loss_fn(self.qMix(z, q), value) - total_loss = self.lossMix(policy_loss, value_loss) + reg_term - else: - value_loss = self.value_loss_fn(self.qMix(z, q), value) - mse_loss = self.mse_loss_fn(self.qMix(z, q), value) - total_loss = self.lossMix(policy_loss, mse_loss) + reg_term + policy_loss, value_loss, mse_loss = self.calculate_test_summaries_inner_loop() #sum_policy_accuracy += test_policy_accuracy sum_mse += mse_loss sum_policy += policy_loss From 0b173560d4877e8251426576bd721555d60c8118 Mon Sep 17 00:00:00 2001 From: Tilps Date: Wed, 11 Dec 2019 11:08:47 +1100 Subject: [PATCH 18/39] Basic tensorboard summary data writting and some other cleanup related to saving. --- tf/tfprocess.py | 47 +++++++++++++++++++++++++++-------------------- tf/train.py | 10 +++++----- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 54d7fc58..359c9be5 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -136,7 +136,7 @@ def __init__(self, cfg): gpus = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_visible_devices(gpus[self.cfg['gpu']], 'GPU') - self.global_step = tf.Variable(0, name='global_step', trainable=False) + self.global_step = tf.Variable(0, name='global_step', trainable=False, dtype=tf.int64) def init_v2(self, train_dataset, test_dataset): self.l2reg = tf.keras.regularizers.l2(l=0.5 * (0.0001)) @@ -242,6 +242,13 @@ def mse_loss(target, output): self.cfg['training']['lr_boundaries'].sort() self.warmup_steps = self.cfg['training'].get('warmup_steps', 0) self.lr = self.cfg['training']['lr_values'][0] + self.test_writer = tf.summary.create_file_writer( + os.path.join(os.getcwd(), "leelalogs/{}-test".format(self.cfg['name']))) + self.train_writer = tf.summary.create_file_writer( + os.path.join(os.getcwd(), "leelalogs/{}-train".format(self.cfg['name']))) + if self.swa_enabled: + self.swa_writer = tf.summary.create_file_writer( + os.path.join(os.getcwd(), "leelalogs/{}-swa-test".format(self.cfg['name']))) def init_net(self, next_batch): self.x = next_batch[0] # tf.placeholder(tf.float32, [None, 112, 8*8]) @@ -570,16 +577,14 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): #after_weights = self.session.run(self.weights) #update_ratio_summaries = self.compute_update_ratio( # before_weights, after_weights) - - #train_summaries = tf.compat.v1.Summary(value=[ - # tf.compat.v1.Summary.Value(tag="Policy Loss", simple_value=avg_policy_loss), - # tf.compat.v1.Summary.Value(tag="Value Loss", simple_value=avg_value_loss), - # tf.compat.v1.Summary.Value(tag="Reg term", simple_value=avg_reg_term), - # tf.compat.v1.Summary.Value(tag="LR", simple_value=self.lr), - # tf.compat.v1.Summary.Value(tag="Gradient norm", - # simple_value=grad_norm / batch_splits), - # tf.compat.v1.Summary.Value(tag="MSE Loss", simple_value=avg_mse_loss)]) - #self.train_writer.add_summary(train_summaries, steps) + with self.train_writer.as_default(): + tf.summary.scalar("Policy Loss", avg_policy_loss, step=steps) + tf.summary.scalar("Value Loss", avg_value_loss, step=steps) + tf.summary.scalar("Reg term", avg_reg_term, step=steps) + tf.summary.scalar("LR", self.lr, step=steps) + tf.summary.scalar("Gradient norm", grad_norm / batch_splits, step=steps) + tf.summary.scalar("MSE Loss", avg_mse_loss, step=steps) + self.train_writer.flush() #self.train_writer.add_summary(update_ratio_summaries, steps) self.time_start = time_end self.last_steps = steps @@ -602,13 +607,13 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): print("Model saved in file: {}".format(self.manager.latest_checkpoint)) evaled_steps = steps.numpy() leela_path = self.manager.latest_checkpoint + "-" + str(evaled_steps) - #swa_path = path + "-swa-" + str(evaled_steps) + swa_path = self.manager.latest_checkpoint + "-swa-" + str(evaled_steps) self.net.pb.training_params.training_steps = evaled_steps self.save_leelaz_weights_v2(leela_path) - #print("Weights saved in file: {}".format(leela_path)) - #if self.swa_enabled: - # self.save_swa_weights(swa_path) - # print("SWA Weights saved in file: {}".format(swa_path)) + print("Weights saved in file: {}".format(leela_path)) + if self.swa_enabled: + self.save_swa_weights(swa_path) + print("SWA Weights saved in file: {}".format(swa_path)) def process(self, batch_size, test_batches, batch_splits=1): if not self.time_start: @@ -798,18 +803,20 @@ def calculate_test_summaries_v2(self, test_batches, steps): self.net.pb.training_params.policy_loss = sum_policy # TODO store value and value accuracy in pb #self.net.pb.training_params.accuracy = sum_policy_accuracy + with self.test_writer.as_default(): + tf.summary.scalar("Policy Loss", sum_policy, step=steps) + tf.summary.scalar("Value Loss", sum_value, step=steps) + tf.summary.scalar("MSE Loss", sum_mse, step=steps) + self.test_writer.flush() + #if self.wdl: # test_summaries = tf.compat.v1.Summary(value=[ # tf.compat.v1.Summary.Value(tag="Policy Accuracy", simple_value=sum_policy_accuracy), # tf.compat.v1.Summary.Value(tag="Value Accuracy", simple_value=sum_value_accuracy), - # tf.compat.v1.Summary.Value(tag="Policy Loss", simple_value=sum_policy), - # tf.compat.v1.Summary.Value(tag="Value Loss", simple_value=sum_value), - # tf.compat.v1.Summary.Value(tag="MSE Loss", simple_value=sum_mse)]).SerializeToString() #else: # test_summaries = tf.compat.v1.Summary(value=[ # tf.compat.v1.Summary.Value(tag="Policy Accuracy", simple_value=sum_policy_accuracy), # tf.compat.v1.Summary.Value(tag="Policy Loss", simple_value=sum_policy), - # tf.compat.v1.Summary.Value(tag="MSE Loss", simple_value=sum_mse)]).SerializeToString() #test_summaries = tf.compat.v1.summary.merge( # [test_summaries] + self.histograms).eval(session=self.session) #self.test_writer.add_summary(test_summaries, steps) diff --git a/tf/train.py b/tf/train.py index b9a7db0c..a4c98462 100755 --- a/tf/train.py +++ b/tf/train.py @@ -156,11 +156,11 @@ def main(cmd): #tfprocess.process_loop(total_batch_size, num_evals, batch_splits=batch_splits) tfprocess.process_loop_v2(total_batch_size, num_evals, batch_splits=batch_splits) - #if cmd.output is not None: - # if cfg['training'].get('swa_output', False): - # tfprocess.save_swa_weights(cmd.output) - # else: - # tfprocess.save_leelaz_weights(cmd.output) + if cmd.output is not None: + if cfg['training'].get('swa_output', False): + tfprocess.save_swa_weights_v2(cmd.output) + else: + tfprocess.save_leelaz_weights_v2(cmd.output) #tfprocess.session.close() train_parser.shutdown() From 43cdaf24f17a9cd705f8565ac63a0ee44fda6621 Mon Sep 17 00:00:00 2001 From: Tilps Date: Wed, 11 Dec 2019 11:32:23 +1100 Subject: [PATCH 19/39] Remove some v1 code which I've done a second pass checking conversion. --- tf/tfprocess.py | 220 +----------------------------------------------- 1 file changed, 2 insertions(+), 218 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 359c9be5..25f32877 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -1085,19 +1085,6 @@ def save_leelaz_weights(self, filename): self.net.fill_net(all_weights) self.net.save_proto(filename) - def get_batchnorm_key(self): - result = "bn" + str(self.batch_norm_count) - self.batch_norm_count += 1 - return result - - def add_weights(self, var): - if var.name[-11:] == "fp16_cast:0": - name = var.name[:-12] + ":0" - var = tf.compat.v1.get_default_graph().get_tensor_by_name(name) - # All trainable variables should be stored as fp32 - assert var.dtype.base_dtype == tf.float32 - self.weights.append(var) - def batch_norm_v2(self, input, scale=False): if self.renorm_enabled: clipping = { @@ -1114,108 +1101,18 @@ def batch_norm_v2(self, input, scale=False): epsilon=1e-5, axis=1, fused=False, center=True, scale=scale, virtual_batch_size=64)(input) - - def batch_norm(self, net, scope, scale=False): - # The weights are internal to the batchnorm layer, so apply - # a unique scope that we can store, and use to look them back up - # later on. - - with tf.compat.v1.variable_scope(scope, custom_getter=float32_variable_storage_getter): - if self.renorm_enabled: - clipping = { - "rmin": 1.0/self.renorm_max_r, - "rmax": self.renorm_max_r, - "dmax": self.renorm_max_d - } - # Renorm has issues with fp16, cast to fp32. - net = tf.compat.v1.layers.batch_normalization( - tf.cast(net, tf.float32), epsilon=1e-5, axis=1, fused=True, - center=True, scale=scale, - renorm=True, renorm_clipping=clipping, - renorm_momentum=self.renorm_momentum, - training=self.training) - net = tf.cast(net, self.model_dtype) - else: - # Virtual batch doesn't work with fp16 - virtual_batch = 64 if self.model_dtype == tf.float32 else None - net = tf.compat.v1.layers.batch_normalization( - net, epsilon=1e-5, axis=1, fused=True, - center=True, scale=scale, - virtual_batch_size=virtual_batch, - training=self.training) - - for v in ['gamma', 'beta', 'moving_mean', 'moving_variance' ]: - if v == 'gamma' and not scale: - var = tf.Variable(tf.ones(shape=[net.shape[1]]), - name=scope + '/fixed_gamma', trainable=False, - dtype=tf.float32) - else: - name = "fp32_storage/" + scope + '/batch_normalization/' + v + ':0' - var = tf.compat.v1.get_default_graph().get_tensor_by_name(name) - self.add_weights(var) - return net - - def squeeze_excitation_v2(self, inputs, channels): + assert channels % self.SE_ratio == 0 + pooled = tf.keras.layers.GlobalAveragePooling2D(data_format='channels_first')(inputs) squeezed = tf.keras.layers.Activation('relu')(tf.keras.layers.Dense(channels // self.SE_ratio, kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg)(pooled)) excited = tf.keras.layers.Dense(2 * channels, kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg)(squeezed) return ApplySqueezeExcitation()([inputs, excited]) - def squeeze_excitation(self, x, channels, ratio): - - assert channels % ratio == 0 - - # NCHW format reduced to NC - net = tf.reduce_mean(input_tensor=x, axis=[2, 3]) - - W_fc1 = weight_variable([channels, channels // ratio], name='se_fc1_w', - dtype=self.model_dtype) - b_fc1 = bias_variable([channels // ratio], name='se_fc1_b', - dtype=self.model_dtype) - self.add_weights(W_fc1) - self.add_weights(b_fc1) - - net = tf.nn.relu(tf.add(tf.matmul(net, W_fc1), b_fc1)) - - W_fc2 = weight_variable( - [channels // ratio, 2 * channels], name='se_fc2_w', - dtype=self.model_dtype) - b_fc2 = bias_variable([2 * channels], name='se_fc2_b', - dtype=self.model_dtype) - self.add_weights(W_fc2) - self.add_weights(b_fc2) - - net = tf.add(tf.matmul(net, W_fc2), b_fc2) - net = tf.reshape(net, [-1, 2 * channels, 1, 1]) - - # Split to scale and bias - gammas, betas = tf.split(net, 2, axis=1) - - out = tf.nn.sigmoid(gammas) * x + betas - - return out - def conv_block_v2(self, inputs, filter_size, output_channels, bn_scale=False): conv = tf.keras.layers.Conv2D(output_channels, filter_size, use_bias=False, padding='same', kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg, data_format='channels_first')(inputs) return tf.keras.layers.Activation('relu')(self.batch_norm_v2(conv, scale=bn_scale)) - def conv_block(self, inputs, filter_size, input_channels, output_channels, bn_scale=False): - # The weights are internal to the batchnorm layer, so apply - # a unique scope that we can store, and use to look them back up - # later on. - weight_key = self.get_batchnorm_key() - conv_key = weight_key + "/conv_weight" - W_conv = weight_variable([filter_size, filter_size, - input_channels, output_channels], name=conv_key, - dtype=self.model_dtype) - - self.add_weights(W_conv) - h_bn = self.batch_norm(conv2d(inputs, W_conv), weight_key, scale=bn_scale) - h_conv = tf.nn.relu(h_bn) - - return h_conv - def residual_block_v2(self, inputs, channels): conv1 = tf.keras.layers.Conv2D(channels, 3, use_bias=False, padding='same', kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg, data_format='channels_first')(inputs) out1 = tf.keras.layers.Activation('relu')(self.batch_norm_v2(conv1, scale=False)) @@ -1223,34 +1120,6 @@ def residual_block_v2(self, inputs, channels): out2 = self.squeeze_excitation_v2(self.batch_norm_v2(conv2, scale=True), channels) return tf.keras.layers.Activation('relu')(tf.keras.layers.add([inputs, out2])) - - def residual_block(self, inputs, channels): - # First convnet - orig = tf.identity(inputs) - weight_key_1 = self.get_batchnorm_key() - conv_key_1 = weight_key_1 + "/conv_weight" - W_conv_1 = weight_variable([3, 3, channels, channels], name=conv_key_1, - dtype=self.model_dtype) - - # Second convnet - weight_key_2 = self.get_batchnorm_key() - conv_key_2 = weight_key_2 + "/conv_weight" - W_conv_2 = weight_variable([3, 3, channels, channels], name=conv_key_2, - dtype=self.model_dtype) - - self.add_weights(W_conv_1) - h_bn1 = self.batch_norm(conv2d(inputs, W_conv_1), weight_key_1, scale=False) - h_out_1 = tf.nn.relu(h_bn1) - - self.add_weights(W_conv_2) - h_bn2 = self.batch_norm(conv2d(h_out_1, W_conv_2), weight_key_2, scale=True) - - with tf.compat.v1.variable_scope(weight_key_2): - h_se = self.squeeze_excitation(h_bn2, channels, self.SE_ratio) - h_out_2 = tf.nn.relu(tf.add(h_se, orig)) - - return h_out_2 - def construct_net_v2(self, inputs): flow = self.conv_block_v2(inputs, filter_size=3, output_channels=self.RESIDUAL_FILTERS, bn_scale=True) for _ in range(0, self.RESIDUAL_BLOCKS): @@ -1278,88 +1147,3 @@ def construct_net_v2(self, inputs): h_fc3 = tf.keras.layers.Dense(1, kernel_initializer='glorot_normal', kernel_regularizer=self.l2reg, activation='tanh')(h_fc2) return h_fc1, h_fc3 - - def construct_net(self, planes): - # NCHW format - # batch, 112 input channels, 8 x 8 - x_planes = tf.reshape(planes, [-1, 112, 8, 8]) - x_planes = tf.cast(x_planes, dtype=self.model_dtype) - - # Input convolution - flow = self.conv_block(x_planes, filter_size=3, - input_channels=112, - output_channels=self.RESIDUAL_FILTERS, - bn_scale=True) - # Residual tower - for _ in range(0, self.RESIDUAL_BLOCKS): - flow = self.residual_block(flow, self.RESIDUAL_FILTERS) - - # Policy head - if self.POLICY_HEAD == pb.NetworkFormat.POLICY_CONVOLUTION: - conv_pol = self.conv_block(flow, filter_size=3, - input_channels=self.RESIDUAL_FILTERS, - output_channels=self.RESIDUAL_FILTERS) - W_pol_conv = weight_variable([3, 3, - self.RESIDUAL_FILTERS, 80], name='W_pol_conv2', - dtype=self.model_dtype) - b_pol_conv = bias_variable([80], name='b_pol_conv2', - dtype=self.model_dtype) - - self.add_weights(W_pol_conv) - tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES, b_pol_conv) - self.add_weights(b_pol_conv) - - conv_pol2 = tf.nn.bias_add( - conv2d(conv_pol, W_pol_conv), b_pol_conv, data_format='NCHW') - - h_conv_pol_flat = tf.reshape(conv_pol2, [-1, 80*8*8]) - fc1_init = tf.constant(lc0_az_policy_map.make_map(), dtype=self.model_dtype) - W_fc1 = tf.Variable(fc1_init, trainable=False, name="policy_map") - - h_fc1 = tf.matmul(h_conv_pol_flat, W_fc1, name='policy_head') - elif self.POLICY_HEAD == pb.NetworkFormat.POLICY_CLASSICAL: - conv_pol = self.conv_block(flow, filter_size=1, - input_channels=self.RESIDUAL_FILTERS, - output_channels=self.policy_channels) - h_conv_pol_flat = tf.reshape( - conv_pol, [-1, self.policy_channels*8*8]) - W_fc1 = weight_variable( - [self.policy_channels*8*8, 1858], name='fc1/weight', - dtype=self.model_dtype) - b_fc1 = bias_variable([1858], name='fc1/bias', - dtype=self.model_dtype) - self.add_weights(W_fc1) - tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES, b_fc1) - self.add_weights(b_fc1) - h_fc1 = tf.add(tf.matmul(h_conv_pol_flat, W_fc1), - b_fc1, name='policy_head') - else: - raise ValueError( - "Unknown policy head type {}".format(self.POLICY_HEAD)) - - # Value head - conv_val = self.conv_block(flow, filter_size=1, - input_channels=self.RESIDUAL_FILTERS, - output_channels=32) - h_conv_val_flat = tf.reshape(conv_val, [-1, 32*8*8]) - W_fc2 = weight_variable([32 * 8 * 8, 128], name='fc2/weight', - dtype=self.model_dtype) - b_fc2 = bias_variable([128], name='fc2/bias', dtype=self.model_dtype) - self.add_weights(W_fc2) - self.add_weights(b_fc2) - h_fc2 = tf.nn.relu(tf.add(tf.matmul(h_conv_val_flat, W_fc2), b_fc2)) - value_outputs = 3 if self.wdl else 1 - W_fc3 = weight_variable([128, value_outputs], name='fc3/weight', - dtype=self.model_dtype) - b_fc3 = bias_variable([value_outputs], name='fc3/bias', - dtype=self.model_dtype) - self.add_weights(W_fc3) - self.add_weights(b_fc3) - h_fc3 = tf.add(tf.matmul(h_fc2, W_fc3), b_fc3, name='value_head') - if not self.wdl: - h_fc3 = tf.nn.tanh(h_fc3) - else: - tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES, b_fc3) - - - return h_fc1, h_fc3 From 53c60575507cc13fd84746733e4c3810af4cd898 Mon Sep 17 00:00:00 2001 From: Tilps Date: Wed, 11 Dec 2019 12:04:43 +1100 Subject: [PATCH 20/39] Add a missing _v2. --- tf/tfprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 25f32877..0e760e53 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -612,7 +612,7 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): self.save_leelaz_weights_v2(leela_path) print("Weights saved in file: {}".format(leela_path)) if self.swa_enabled: - self.save_swa_weights(swa_path) + self.save_swa_weights_v2(swa_path) print("SWA Weights saved in file: {}".format(swa_path)) def process(self, batch_size, test_batches, batch_splits=1): From 6892b1c8f0bca60e103c69465f6cbf717e0628d1 Mon Sep 17 00:00:00 2001 From: Tilps Date: Wed, 11 Dec 2019 12:16:41 +1100 Subject: [PATCH 21/39] Fix bug in saving swa_weights that I hadn't tested before committing... --- tf/tfprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 0e760e53..0267790e 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -959,7 +959,7 @@ def save_swa_weights_v2(self, filename): backup = [w.read_value() for w in self.model.weights] for (swa, w) in zip(self.swa_weights, self.model.weights): w.assign(swa.read_value()) - self.save_leelaz_weights_v2(self, filename) + self.save_leelaz_weights_v2(filename) for (old, w) in zip(backup, self.model.weights): w.assign(old) From c6109361f50b7fb73a90536c8a7a1c223fa4a3c9 Mon Sep 17 00:00:00 2001 From: Tilps Date: Wed, 11 Dec 2019 14:28:50 +1100 Subject: [PATCH 22/39] Add mixed precision support to tf2 version. --- tf/tfprocess.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 0267790e..ecc8c325 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -53,7 +53,7 @@ def __init__(self, **kwargs): def call(self, inputs): h_conv_pol_flat = tf.reshape(inputs, [-1, 80*8*8]) - return tf.matmul(h_conv_pol_flat, self.fc1) + return tf.matmul(h_conv_pol_flat, tf.cast(self.fc1, h_conv_pol_flat.dtype)) def bias_variable(shape, name=None, dtype=tf.float32): @@ -135,6 +135,9 @@ def __init__(self, cfg): self.session = tf.compat.v1.Session(config=config) gpus = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_visible_devices(gpus[self.cfg['gpu']], 'GPU') + if self.model_dtype == tf.float16: + tf.keras.mixed_precision.experimental.set_policy('mixed_float16') + self.global_step = tf.Variable(0, name='global_step', trainable=False, dtype=tf.int64) @@ -145,7 +148,7 @@ def init_v2(self, train_dataset, test_dataset): self.test_dataset = test_dataset self.test_iter = iter(test_dataset) self.init_net_v2() - self.checkpoint = tf.train.Checkpoint(optimizer=self.optimizer, model=self.model, global_step=self.global_step, swa_count=self.swa_count) + self.checkpoint = tf.train.Checkpoint(optimizer=self.orig_optimizer, model=self.model, global_step=self.global_step, swa_count=self.swa_count) self.checkpoint.listed = self.swa_weights self.manager = tf.train.CheckpointManager( self.checkpoint, directory=self.root_dir, max_to_keep=50, keep_checkpoint_every_n_hours=24) @@ -181,7 +184,11 @@ def init_net_v2(self): self.active_lr = 0.01 # TODO set up optimizers and loss functions. self.optimizer = tf.keras.optimizers.SGD(learning_rate=lambda: self.active_lr, momentum=0.9, nesterov=True) - def policy_loss(target, output): + self.orig_optimizer = self.optimizer + if self.loss_scale != 1: + self.optimizer = tf.keras.mixed_precision.experimental.LossScaleOptimizer(self.optimizer, self.loss_scale) + def policy_loss(target, output): + output = tf.cast(output, tf.float32) # Calculate loss on policy head if self.cfg['training'].get('mask_legal_moves'): # extract mask for legal moves from target policy @@ -209,12 +216,14 @@ def policy_loss(target, output): # Loss on value head if self.wdl: def value_loss(target, output): + output = tf.cast(output, tf.float32) value_cross_entropy = \ tf.nn.softmax_cross_entropy_with_logits(labels=tf.stop_gradient(target), logits=output) return tf.reduce_mean(input_tensor=value_cross_entropy) self.value_loss_fn = value_loss def mse_loss(target, output): + output = tf.cast(output, tf.float32) scalar_z_conv = tf.matmul(tf.nn.softmax(output), wdl) scalar_target = tf.matmul(target, wdl) return tf.reduce_mean(input_tensor=tf.math.squared_difference(scalar_target, scalar_z_conv)) @@ -224,6 +233,7 @@ def value_loss(target, output): return tf.constant(0) self.value_loss_fn = value_loss def mse_loss(target, output): + output = tf.cast(output, tf.float32) scalar_target = tf.matmul(target, wdl) return tf.reduce_mean(input_tensor=tf.math.squared_difference(scalar_target, output)) self.mse_loss_fn = mse_loss @@ -482,6 +492,8 @@ def process_inner_loop(self): else: mse_loss = self.mse_loss_fn(self.qMix(z, q), value) total_loss = self.lossMix(policy_loss, mse_loss) + reg_term + if self.loss_scale != 1: + total_loss = self.optimizer.get_scaled_loss(total_loss) if self.wdl: mse_loss = self.mse_loss_fn(self.qMix(z, q), value) else: @@ -548,6 +560,8 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): self.avg_reg_term.append(reg_term) # Gradients of batch splits are summed, not averaged like usual, so need to scale lr accordingly to correct for this. self.active_lr = self.lr / batch_splits + if self.loss_scale != 1: + grads = self.optimizer.get_unscaled_gradients(grads) max_grad_norm = self.cfg['training'].get('max_grad_norm', 10000.0) * batch_splits grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm) self.optimizer.apply_gradients(zip(grads, self.model.trainable_weights)) From 5a2a95b2a8ef37b815ee7b0a0d666fe48e43b122 Mon Sep 17 00:00:00 2001 From: Tilps Date: Wed, 11 Dec 2019 15:00:05 +1100 Subject: [PATCH 23/39] Renorm also doesn't actually support fused. --- tf/tfprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index ecc8c325..b7dc3509 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -1107,7 +1107,7 @@ def batch_norm_v2(self, input, scale=False): "dmax": self.renorm_max_d } return tf.keras.layers.BatchNormalization( - epsilon=1e-5, axis=1, fused=True, center=True, + epsilon=1e-5, axis=1, fused=False, center=True, scale=scale, renorm=True, renorm_clipping=clipping, renorm_momentum=self.renorm_momentum)(input) else: From 6a37e9f941fc249abe45bc6f154b19e830f3776a Mon Sep 17 00:00:00 2001 From: Tilps Date: Wed, 11 Dec 2019 19:42:38 +1100 Subject: [PATCH 24/39] Re-add accuracy reporting. --- tf/tfprocess.py | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index b7dc3509..6a976843 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -182,7 +182,6 @@ def init_net_v2(self): self.swa_weights = [tf.Variable(w, trainable=False) for w in self.model.weights] self.active_lr = 0.01 - # TODO set up optimizers and loss functions. self.optimizer = tf.keras.optimizers.SGD(learning_rate=lambda: self.active_lr, momentum=0.9, nesterov=True) self.orig_optimizer = self.optimizer if self.loss_scale != 1: @@ -241,7 +240,12 @@ def mse_loss(target, output): pol_loss_w = self.cfg['training']['policy_loss_weight'] val_loss_w = self.cfg['training']['value_loss_weight'] self.lossMix = lambda policy, value: pol_loss_w * policy + val_loss_w * value - + + def accuracy(target, output): + output = tf.cast(output, tf.float32) + return tf.reduce_mean(tf.cast(tf.equal(tf.argmax(input=target, axis=1), tf.argmax(input=output, axis=1)), tf.float32)) + self.accuracy_fn = accuracy + self.avg_policy_loss = [] self.avg_value_loss = [] self.avg_mse_loss = [] @@ -780,14 +784,17 @@ def calculate_test_summaries_inner_loop(self): print('tracing summaries inner loop!') x, y, z, q = next(self.test_iter) policy, value = self.model(x) - policy_loss = self.policy_loss_fn(y, policy) + policy_loss = self.policy_loss_fn(y, policy) + policy_accuracy = self.accuracy_fn(y, policy) if self.wdl: value_loss = self.value_loss_fn(self.qMix(z, q), value) mse_loss = self.mse_loss_fn(self.qMix(z, q), value) + value_accuracy = self.accuracy_fn(self.qMix(z,q), value) else: value_loss = self.value_loss_fn(self.qMix(z, q), value) mse_loss = self.mse_loss_fn(self.qMix(z, q), value) - return policy_loss, value_loss, mse_loss + value_accuracy = tf.constant(0.) + return policy_loss, value_loss, mse_loss, policy_accuracy, value_accuracy def calculate_test_summaries_v2(self, test_batches, steps): sum_policy_accuracy = 0 @@ -796,41 +803,36 @@ def calculate_test_summaries_v2(self, test_batches, steps): sum_policy = 0 sum_value = 0 for _ in range(0, test_batches): - policy_loss, value_loss, mse_loss = self.calculate_test_summaries_inner_loop() - #sum_policy_accuracy += test_policy_accuracy + policy_loss, value_loss, mse_loss, policy_accuracy, value_accuracy = self.calculate_test_summaries_inner_loop() + sum_policy_accuracy += policy_accuracy sum_mse += mse_loss sum_policy += policy_loss if self.wdl: - #sum_value_accuracy += test_value_accuracy + sum_value_accuracy += value_accuracy sum_value += value_loss - #sum_policy_accuracy /= test_batches - #sum_policy_accuracy *= 100 + sum_policy_accuracy /= test_batches + sum_policy_accuracy *= 100 sum_policy /= test_batches sum_value /= test_batches - #if self.wdl: - #sum_value_accuracy /= test_batches - #sum_value_accuracy *= 100 + if self.wdl: + sum_value_accuracy /= test_batches + sum_value_accuracy *= 100 # Additionally rescale to [0, 1] so divide by 4 sum_mse /= (4.0 * test_batches) self.net.pb.training_params.learning_rate = self.lr self.net.pb.training_params.mse_loss = sum_mse self.net.pb.training_params.policy_loss = sum_policy # TODO store value and value accuracy in pb - #self.net.pb.training_params.accuracy = sum_policy_accuracy + self.net.pb.training_params.accuracy = sum_policy_accuracy with self.test_writer.as_default(): tf.summary.scalar("Policy Loss", sum_policy, step=steps) tf.summary.scalar("Value Loss", sum_value, step=steps) tf.summary.scalar("MSE Loss", sum_mse, step=steps) + tf.summary.scalar("Policy Accuracy", sum_policy_accuracy, step=steps) + if self.wdl: + tf.summary.scalar("Value Accuracy", sum_value_accuracy, step=steps) self.test_writer.flush() - #if self.wdl: - # test_summaries = tf.compat.v1.Summary(value=[ - # tf.compat.v1.Summary.Value(tag="Policy Accuracy", simple_value=sum_policy_accuracy), - # tf.compat.v1.Summary.Value(tag="Value Accuracy", simple_value=sum_value_accuracy), - #else: - # test_summaries = tf.compat.v1.Summary(value=[ - # tf.compat.v1.Summary.Value(tag="Policy Accuracy", simple_value=sum_policy_accuracy), - # tf.compat.v1.Summary.Value(tag="Policy Loss", simple_value=sum_policy), #test_summaries = tf.compat.v1.summary.merge( # [test_summaries] + self.histograms).eval(session=self.session) #self.test_writer.add_summary(test_summaries, steps) From f2f32a2fb4cebd0ae09706d4938d4a6c7bae5de7 Mon Sep 17 00:00:00 2001 From: Tilps Date: Wed, 11 Dec 2019 19:52:44 +1100 Subject: [PATCH 25/39] Some more cleanup of pre-v2 code that is converted or close enough to no longer be useful, and a fix for swa summaries being written to the wrong file. --- tf/tfprocess.py | 454 +----------------------------------------------- 1 file changed, 2 insertions(+), 452 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 6a976843..6b5173a2 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -116,9 +116,6 @@ def __init__(self, cfg): self.net.set_valueformat(self.VALUE_HEAD) - # For exporting - self.weights = [] - self.swa_enabled = self.cfg['training'].get('swa', False) # Limit momentum of SWA exponential average to 1 - 1/(swa_max_n + 1) @@ -129,16 +126,11 @@ def __init__(self, cfg): self.renorm_max_d = self.cfg['training'].get('renorm_max_d', 0) self.renorm_momentum = self.cfg['training'].get('renorm_momentum', 0.99) - gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.90, - allow_growth=True, visible_device_list="{}".format(self.cfg['gpu'])) - config = tf.compat.v1.ConfigProto(gpu_options=gpu_options) - self.session = tf.compat.v1.Session(config=config) gpus = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_visible_devices(gpus[self.cfg['gpu']], 'GPU') if self.model_dtype == tf.float16: tf.keras.mixed_precision.experimental.set_policy('mixed_float16') - self.global_step = tf.Variable(0, name='global_step', trainable=False, dtype=tf.int64) def init_v2(self, train_dataset, test_dataset): @@ -153,23 +145,6 @@ def init_v2(self, train_dataset, test_dataset): self.manager = tf.train.CheckpointManager( self.checkpoint, directory=self.root_dir, max_to_keep=50, keep_checkpoint_every_n_hours=24) - def init(self, dataset, train_iterator, test_iterator): - self.training = tf.compat.v1.placeholder(tf.bool) - self.learning_rate = tf.compat.v1.placeholder(tf.float32) - # TF variables - self.handle = tf.compat.v1.placeholder(tf.string, shape=[]) - iterator = tf.compat.v1.data.Iterator.from_string_handle( - self.handle, tf.compat.v1.data.get_output_types(dataset), tf.compat.v1.data.get_output_shapes(dataset)) - self.next_batch = iterator.get_next() - self.train_handle = self.session.run(train_iterator.string_handle()) - self.test_handle = self.session.run(test_iterator.string_handle()) - self.l2reg = tf.keras.regularizers.l2(l=0.5 * (0.0001)) - - # This forces trainable variables to be stored as fp32 - with tf.compat.v1.variable_scope("fp32_storage", - custom_getter=float32_variable_storage_getter): - self.init_net(self.next_batch) - def init_net_v2(self): input_var = tf.keras.Input(shape=(112, 8*8)) x_planes = tf.keras.layers.Reshape([112, 8, 8])(input_var) @@ -264,156 +239,6 @@ def accuracy(target, output): self.swa_writer = tf.summary.create_file_writer( os.path.join(os.getcwd(), "leelalogs/{}-swa-test".format(self.cfg['name']))) - def init_net(self, next_batch): - self.x = next_batch[0] # tf.placeholder(tf.float32, [None, 112, 8*8]) - self.y_ = next_batch[1] # tf.placeholder(tf.float32, [None, 1858]) - self.z_ = next_batch[2] # tf.placeholder(tf.float32, [None, 3]) - self.q_ = next_batch[3] # tf.placeholder(tf.float32, [None, 3]) - self.batch_norm_count = 0 - self.y_conv, self.z_conv = self.construct_net(self.x) - - if self.model_dtype != tf.float32: - self.y_conv = tf.cast(self.y_conv, tf.float32) - self.z_conv = tf.cast(self.z_conv, tf.float32) - - # Calculate loss on policy head - if self.cfg['training'].get('mask_legal_moves'): - # extract mask for legal moves from target policy - move_is_legal = tf.greater_equal(self.y_, 0) - # replace logits of illegal moves with large negative value (so that it doesn't affect policy of legal moves) without gradient - illegal_filler = tf.zeros_like(self.y_conv) - 1.0e10 - self.y_conv = tf.compat.v1.where_v2(move_is_legal, self.y_conv, illegal_filler) - # y_ still has -1 on illegal moves, flush them to 0 - self.y_ = tf.nn.relu(self.y_) - - policy_cross_entropy = \ - tf.compat.v1.nn.softmax_cross_entropy_with_logits_v2(labels=tf.stop_gradient(self.y_), - logits=self.y_conv) - self.policy_loss = tf.reduce_mean(input_tensor=policy_cross_entropy) - - q_ratio = self.cfg['training'].get('q_ratio', 0) - assert 0 <= q_ratio <= 1 - target = self.q_ * q_ratio + self.z_ * (1 - q_ratio) - - # Linear conversion to scalar to compute MSE with, for comparison to old values - wdl = tf.expand_dims(tf.constant([1.0, 0.0, -1.0]), 1) - scalar_target = tf.matmul(target, wdl) - - # Loss on value head - if self.wdl: - value_cross_entropy = \ - tf.compat.v1.nn.softmax_cross_entropy_with_logits_v2(labels=tf.stop_gradient(target), - logits=self.z_conv) - self.value_loss = tf.reduce_mean(input_tensor=value_cross_entropy) - scalar_z_conv = tf.matmul(tf.nn.softmax(self.z_conv), wdl) - self.mse_loss = \ - tf.reduce_mean(input_tensor=tf.math.squared_difference(scalar_target, scalar_z_conv)) - else: - self.value_loss = tf.constant(0) - self.mse_loss = \ - tf.reduce_mean(input_tensor=tf.math.squared_difference(scalar_target, self.z_conv)) - - # Regularizer - reg_variables = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) - penalties = [self.l2reg(w) for w in reg_variables] - self.reg_term = tf.math.add_n(penalties) - - if self.model_dtype != tf.float32: - self.reg_term = tf.cast(self.reg_term, tf.float32) - - # For training from a (smaller) dataset of strong players, you will - # want to reduce the factor in front of self.mse_loss here. - pol_loss_w = self.cfg['training']['policy_loss_weight'] - val_loss_w = self.cfg['training']['value_loss_weight'] - if self.wdl: - value_loss = self.value_loss - else: - value_loss = self.mse_loss - loss = pol_loss_w * self.policy_loss + \ - val_loss_w * value_loss + self.reg_term - - # Set adaptive learning rate during training - self.cfg['training']['lr_boundaries'].sort() - self.warmup_steps = self.cfg['training'].get('warmup_steps', 0) - self.lr = self.cfg['training']['lr_values'][0] - - # You need to change the learning rate here if you are training - # from a self-play training set, for example start with 0.005 instead. - opt_op = tf.compat.v1.train.MomentumOptimizer( - learning_rate=self.learning_rate, momentum=0.9, use_nesterov=True) - - opt_op = LossScalingOptimizer(opt_op, scale=self.loss_scale) - - # Do swa after we contruct the net - if self.swa_enabled: - # Count of networks accumulated into SWA - self.swa_count = tf.Variable(0., name='swa_count', trainable=False) - # Build the SWA variables and accumulators - accum = [] - load = [] - n = self.swa_count - for w in self.weights: - name = w.name.split(':')[0] - var = tf.Variable( - tf.zeros(shape=w.shape), name='swa/'+name, trainable=False) - accum.append( - tf.compat.v1.assign(var, var * (n / (n + 1.)) + tf.stop_gradient(w) * (1. / (n + 1.)))) - load.append(tf.compat.v1.assign(w, var)) - with tf.control_dependencies(accum): - self.swa_accum_op = tf.compat.v1.assign_add(n, 1.) - self.swa_load_op = tf.group(*load) - - # Accumulate (possibly multiple) gradient updates to simulate larger batch sizes than can be held in GPU memory. - gradient_accum = [tf.Variable(tf.zeros_like( - var.initialized_value()), trainable=False) for var in tf.compat.v1.trainable_variables()] - self.zero_op = [var.assign(tf.zeros_like(var)) - for var in gradient_accum] - - self.update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS) - with tf.control_dependencies(self.update_ops): - gradients = opt_op.compute_gradients(loss) - self.accum_op = [accum.assign_add( - gradient[0]) for accum, gradient in zip(gradient_accum, gradients)] - # gradients are num_batch_splits times higher due to accumulation by summing, so the norm will be too - max_grad_norm = self.cfg['training'].get( - 'max_grad_norm', 10000.0) * self.cfg['training'].get('num_batch_splits', 1) - gradient_accum, self.grad_norm = tf.clip_by_global_norm( - gradient_accum, max_grad_norm) - self.train_op = opt_op.apply_gradients( - [(accum, gradient[1]) for accum, gradient in zip(gradient_accum, gradients)], global_step=self.global_step) - - correct_policy_prediction = \ - tf.equal(tf.argmax(input=self.y_conv, axis=1), tf.argmax(input=self.y_, axis=1)) - correct_policy_prediction = tf.cast(correct_policy_prediction, tf.float32) - self.policy_accuracy = tf.reduce_mean(input_tensor=correct_policy_prediction) - correct_value_prediction = \ - tf.equal(tf.argmax(input=self.z_conv, axis=1), tf.argmax(input=self.z_, axis=1)) - correct_value_prediction = tf.cast(correct_value_prediction, tf.float32) - self.value_accuracy = tf.reduce_mean(input_tensor=correct_value_prediction) - - self.avg_policy_loss = [] - self.avg_value_loss = [] - self.avg_mse_loss = [] - self.avg_reg_term = [] - self.time_start = None - self.last_steps = None - - # Summary part - self.test_writer = tf.compat.v1.summary.FileWriter( - os.path.join(os.getcwd(), "leelalogs/{}-test".format(self.cfg['name']))) - self.train_writer = tf.compat.v1.summary.FileWriter( - os.path.join(os.getcwd(), "leelalogs/{}-train".format(self.cfg['name']))) - if self.swa_enabled: - self.swa_writer = tf.compat.v1.summary.FileWriter( - os.path.join(os.getcwd(), "leelalogs/{}-swa-test".format(self.cfg['name']))) - self.histograms = [tf.compat.v1.summary.histogram( - weight.name, weight) for weight in self.weights] - - self.init = tf.compat.v1.global_variables_initializer() - self.saver = tf.compat.v1.train.Saver() - - self.session.run(self.init) - def replace_weights(self, new_weights): all_evals = [] for e, weights in enumerate(self.weights): @@ -462,10 +287,6 @@ def restore_v2(self): print("Restoring from {0}".format(self.manager.latest_checkpoint)) self.checkpoint.restore(self.manager.latest_checkpoint) - def restore(self, file): - print("Restoring from {0}".format(file)) - self.saver.restore(self.session, file) - def process_loop_v2(self, batch_size, test_batches, batch_splits=1): # Get the initial steps value in case this is a resume from a step count # which is not a multiple of total_steps. @@ -474,14 +295,6 @@ def process_loop_v2(self, batch_size, test_batches, batch_splits=1): for _ in range(steps % total_steps, total_steps): self.process_v2(batch_size, test_batches, batch_splits=batch_splits) - def process_loop(self, batch_size, test_batches, batch_splits=1): - # Get the initial steps value in case this is a resume from a step count - # which is not a multiple of total_steps. - steps = tf.compat.v1.train.global_step(self.session, self.global_step) - total_steps = self.cfg['training']['total_steps'] - for _ in range(steps % total_steps, total_steps): - self.process(batch_size, test_batches, batch_splits=batch_splits) - @tf.function() def process_inner_loop(self): print('tracing inner loop!') @@ -633,152 +446,17 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): self.save_swa_weights_v2(swa_path) print("SWA Weights saved in file: {}".format(swa_path)) - def process(self, batch_size, test_batches, batch_splits=1): - if not self.time_start: - self.time_start = time.time() - - # Get the initial steps value before we do a training step. - steps = tf.compat.v1.train.global_step(self.session, self.global_step) - if not self.last_steps: - self.last_steps = steps - - if self.swa_enabled: - # split half of test_batches between testing regular weights and SWA weights - test_batches //= 2 - - # Run test before first step to see delta since end of last run. - if steps % self.cfg['training']['total_steps'] == 0: - # Steps is given as one higher than current in order to avoid it - # being equal to the value the end of a run is stored against. - self.calculate_test_summaries(test_batches, steps + 1) - if self.swa_enabled: - self.calculate_swa_summaries(test_batches, steps + 1) - - # Make sure that ghost batch norm can be applied - if batch_size % 64 != 0: - # Adjust required batch size for batch splitting. - required_factor = 64 * \ - self.cfg['training'].get('num_batch_splits', 1) - raise ValueError( - 'batch_size must be a multiple of {}'.format(required_factor)) - - # Determine learning rate - lr_values = self.cfg['training']['lr_values'] - lr_boundaries = self.cfg['training']['lr_boundaries'] - steps_total = steps % self.cfg['training']['total_steps'] - self.lr = lr_values[bisect.bisect_right(lr_boundaries, steps_total)] - if self.warmup_steps > 0 and steps < self.warmup_steps: - self.lr = self.lr * (steps + 1) / self.warmup_steps - - # need to add 1 to steps because steps will be incremented after gradient update - if (steps + 1) % self.cfg['training']['train_avg_report_steps'] == 0 or (steps + 1) % self.cfg['training']['total_steps'] == 0: - before_weights = self.session.run(self.weights) - - # Run training for this batch - self.session.run(self.zero_op) - for _ in range(batch_splits): - policy_loss, value_loss, mse_loss, reg_term, _, _ = self.session.run( - [self.policy_loss, self.value_loss, self.mse_loss, self.reg_term, self.accum_op, - self.next_batch], - feed_dict={self.training: True, self.handle: self.train_handle}) - # Keep running averages - # Google's paper scales MSE by 1/4 to a [0, 1] range, so do the same to - # get comparable values. - mse_loss /= 4.0 - self.avg_policy_loss.append(policy_loss) - if self.wdl: - self.avg_value_loss.append(value_loss) - self.avg_mse_loss.append(mse_loss) - self.avg_reg_term.append(reg_term) - # Gradients of batch splits are summed, not averaged like usual, so need to scale lr accordingly to correct for this. - corrected_lr = self.lr / batch_splits - _, grad_norm = self.session.run([self.train_op, self.grad_norm], - feed_dict={self.learning_rate: corrected_lr, self.training: True, self.handle: self.train_handle}) - - # Update steps since training should have incremented it. - steps = tf.compat.v1.train.global_step(self.session, self.global_step) - - if steps % self.cfg['training']['train_avg_report_steps'] == 0 or steps % self.cfg['training']['total_steps'] == 0: - pol_loss_w = self.cfg['training']['policy_loss_weight'] - val_loss_w = self.cfg['training']['value_loss_weight'] - time_end = time.time() - speed = 0 - if self.time_start: - elapsed = time_end - self.time_start - steps_elapsed = steps - self.last_steps - speed = batch_size * (steps_elapsed / elapsed) - avg_policy_loss = np.mean(self.avg_policy_loss or [0]) - avg_value_loss = np.mean(self.avg_value_loss or [0]) - avg_mse_loss = np.mean(self.avg_mse_loss or [0]) - avg_reg_term = np.mean(self.avg_reg_term or [0]) - print("step {}, lr={:g} policy={:g} value={:g} mse={:g} reg={:g} total={:g} ({:g} pos/s)".format( - steps, self.lr, avg_policy_loss, avg_value_loss, avg_mse_loss, avg_reg_term, - pol_loss_w * avg_policy_loss + val_loss_w * avg_value_loss + avg_reg_term, - speed)) - - after_weights = self.session.run(self.weights) - update_ratio_summaries = self.compute_update_ratio( - before_weights, after_weights) - - train_summaries = tf.compat.v1.Summary(value=[ - tf.compat.v1.Summary.Value(tag="Policy Loss", simple_value=avg_policy_loss), - tf.compat.v1.Summary.Value(tag="Value Loss", simple_value=avg_value_loss), - tf.compat.v1.Summary.Value(tag="Reg term", simple_value=avg_reg_term), - tf.compat.v1.Summary.Value(tag="LR", simple_value=self.lr), - tf.compat.v1.Summary.Value(tag="Gradient norm", - simple_value=grad_norm / batch_splits), - tf.compat.v1.Summary.Value(tag="MSE Loss", simple_value=avg_mse_loss)]) - self.train_writer.add_summary(train_summaries, steps) - self.train_writer.add_summary(update_ratio_summaries, steps) - self.time_start = time_end - self.last_steps = steps - self.avg_policy_loss, self.avg_value_loss, self.avg_mse_loss, self.avg_reg_term = [], [], [], [] - - if self.swa_enabled and steps % self.cfg['training']['swa_steps'] == 0: - self.update_swa() - - # Calculate test values every 'test_steps', but also ensure there is - # one at the final step so the delta to the first step can be calculted. - if steps % self.cfg['training']['test_steps'] == 0 or steps % self.cfg['training']['total_steps'] == 0: - self.calculate_test_summaries(test_batches, steps) - if self.swa_enabled: - self.calculate_swa_summaries(test_batches, steps) - - # Save session and weights at end, and also optionally every 'checkpoint_steps'. - if steps % self.cfg['training']['total_steps'] == 0 or ( - 'checkpoint_steps' in self.cfg['training'] and steps % self.cfg['training']['checkpoint_steps'] == 0): - path = os.path.join(self.root_dir, self.cfg['name']) - save_path = self.saver.save(self.session, path, global_step=steps) - print("Model saved in file: {}".format(save_path)) - leela_path = path + "-" + str(steps) - swa_path = path + "-swa-" + str(steps) - self.net.pb.training_params.training_steps = steps - self.save_leelaz_weights(leela_path) - print("Weights saved in file: {}".format(leela_path)) - if self.swa_enabled: - self.save_swa_weights(swa_path) - print("SWA Weights saved in file: {}".format(swa_path)) - def calculate_swa_summaries_v2(self, test_batches, steps): backup = [w.read_value() for w in self.model.weights] for (swa, w) in zip(self.swa_weights, self.model.weights): w.assign(swa.read_value()) - #true_test_writer, self.test_writer = self.test_writer, self.swa_writer + true_test_writer, self.test_writer = self.test_writer, self.swa_writer print('swa', end=' ') self.calculate_test_summaries_v2(test_batches, steps) - #self.test_writer = true_test_writer + self.test_writer = true_test_writer for (old, w) in zip(backup, self.model.weights): w.assign(old) - def calculate_swa_summaries(self, test_batches, steps): - self.snap_save() - self.session.run(self.swa_load_op) - true_test_writer, self.test_writer = self.test_writer, self.swa_writer - print('swa', end=' ') - self.calculate_test_summaries(test_batches, steps) - self.test_writer = true_test_writer - self.snap_restore() - @tf.function() def calculate_test_summaries_inner_loop(self): print('tracing summaries inner loop!') @@ -839,56 +517,6 @@ def calculate_test_summaries_v2(self, test_batches, steps): print("step {}, policy={:g} value={:g} policy accuracy={:g}% value accuracy={:g}% mse={:g}".\ format(steps, sum_policy, sum_value, sum_policy_accuracy, sum_value_accuracy, sum_mse)) - def calculate_test_summaries(self, test_batches, steps): - sum_policy_accuracy = 0 - sum_value_accuracy = 0 - sum_mse = 0 - sum_policy = 0 - sum_value = 0 - for _ in range(0, test_batches): - test_policy, test_value, test_policy_accuracy, test_value_accuracy, test_mse, _ = self.session.run( - [self.policy_loss, self.value_loss, self.policy_accuracy, self.value_accuracy, self.mse_loss, - self.next_batch], - feed_dict={self.training: False, - self.handle: self.test_handle}) - sum_policy_accuracy += test_policy_accuracy - sum_mse += test_mse - sum_policy += test_policy - if self.wdl: - sum_value_accuracy += test_value_accuracy - sum_value += test_value - sum_policy_accuracy /= test_batches - sum_policy_accuracy *= 100 - sum_policy /= test_batches - sum_value /= test_batches - if self.wdl: - sum_value_accuracy /= test_batches - sum_value_accuracy *= 100 - # Additionally rescale to [0, 1] so divide by 4 - sum_mse /= (4.0 * test_batches) - self.net.pb.training_params.learning_rate = self.lr - self.net.pb.training_params.mse_loss = sum_mse - self.net.pb.training_params.policy_loss = sum_policy - # TODO store value and value accuracy in pb - self.net.pb.training_params.accuracy = sum_policy_accuracy - if self.wdl: - test_summaries = tf.compat.v1.Summary(value=[ - tf.compat.v1.Summary.Value(tag="Policy Accuracy", simple_value=sum_policy_accuracy), - tf.compat.v1.Summary.Value(tag="Value Accuracy", simple_value=sum_value_accuracy), - tf.compat.v1.Summary.Value(tag="Policy Loss", simple_value=sum_policy), - tf.compat.v1.Summary.Value(tag="Value Loss", simple_value=sum_value), - tf.compat.v1.Summary.Value(tag="MSE Loss", simple_value=sum_mse)]).SerializeToString() - else: - test_summaries = tf.compat.v1.Summary(value=[ - tf.compat.v1.Summary.Value(tag="Policy Accuracy", simple_value=sum_policy_accuracy), - tf.compat.v1.Summary.Value(tag="Policy Loss", simple_value=sum_policy), - tf.compat.v1.Summary.Value(tag="MSE Loss", simple_value=sum_mse)]).SerializeToString() - test_summaries = tf.compat.v1.summary.merge( - [test_summaries] + self.histograms).eval(session=self.session) - self.test_writer.add_summary(test_summaries, steps) - print("step {}, policy={:g} value={:g} policy accuracy={:g}% value accuracy={:g}% mse={:g}".\ - format(steps, sum_policy, sum_value, sum_policy_accuracy, sum_value_accuracy, sum_mse)) - def compute_update_ratio(self, before_weights, after_weights): """Compute the ratio of gradient norm to weight norm. @@ -945,32 +573,6 @@ def update_swa_v2(self): swa.assign(swa.read_value() * (num / (num + 1.)) + w.read_value() * (1. / (num + 1.))) self.swa_count.assign(min(num + 1., self.swa_max_n)) - def update_swa(self): - # Add the current weight vars to the running average. - num = self.session.run(self.swa_accum_op) - num = min(num, self.swa_max_n) - self.swa_count.load(float(num), self.session) - - def snap_save(self): - # Save a snapshot of all the variables in the current graph. - if not hasattr(self, 'snap_save_op'): - save_ops = [] - rest_ops = [] - for var in self.weights: - if isinstance(var, str): - var = tf.compat.v1.get_default_graph().get_tensor_by_name(var) - name = var.name.split(':')[0] - v = tf.Variable(var, name='save/'+name, trainable=False) - save_ops.append(tf.compat.v1.assign(v, var)) - rest_ops.append(tf.compat.v1.assign(var, v)) - self.snap_save_op = tf.group(*save_ops) - self.snap_restore_op = tf.group(*rest_ops) - self.session.run(self.snap_save_op) - - def snap_restore(self): - # Restore variables in the current graph from the snapshot. - self.session.run(self.snap_restore_op) - def save_swa_weights_v2(self, filename): backup = [w.read_value() for w in self.model.weights] for (swa, w) in zip(self.swa_weights, self.model.weights): @@ -979,12 +581,6 @@ def save_swa_weights_v2(self, filename): for (old, w) in zip(backup, self.model.weights): w.assign(old) - def save_swa_weights(self, filename): - self.snap_save() - self.session.run(self.swa_load_op) - self.save_leelaz_weights(filename) - self.snap_restore() - def save_leelaz_weights_v2(self, filename): all_tensors = [] all_weights = [] @@ -1055,52 +651,6 @@ def save_leelaz_weights_v2(self, filename): self.net.fill_net(all_weights) self.net.save_proto(filename) - def save_leelaz_weights(self, filename): - all_weights = [] - if not hasattr(self, 'pb_save_op'): - all_evals = [] - for weights in self.weights: - work_weights = None - if weights.shape.ndims == 4: - # Convolution weights need a transpose - # - # TF (kYXInputOutput) - # [filter_height, filter_width, in_channels, out_channels] - # - # Leela/cuDNN/Caffe (kOutputInputYX) - # [output, input, filter_size, filter_size] - work_weights = tf.transpose(a=weights, perm=[3, 2, 0, 1]) - elif weights.shape.ndims == 2: - # Fully connected layers are [in, out] in TF - # - # [out, in] in Leela - # - work_weights = tf.transpose(a=weights, perm=[1, 0]) - else: - # Biases, batchnorm etc - work_weights = weights - all_evals.append(work_weights) - self.pb_save_op = all_evals - nparrays = self.session.run(self.pb_save_op) - for e, nparray in enumerate(nparrays): - # Rescale rule50 related weights as clients do not normalize the input. - if e == 0: - num_inputs = 112 - # 50 move rule is the 110th input, or 109 starting from 0. - rule50_input = 109 - wt_flt = [] - for i, weight in enumerate(np.ravel(nparray)): - if (i % (num_inputs*9))//9 == rule50_input: - wt_flt.append(weight/99) - else: - wt_flt.append(weight) - else: - wt_flt = [wt for wt in np.ravel(nparray)] - all_weights.append(wt_flt) - - self.net.fill_net(all_weights) - self.net.save_proto(filename) - def batch_norm_v2(self, input, scale=False): if self.renorm_enabled: clipping = { From 7cbe7754c522424bf6cafac5bc7ccb86a1470932 Mon Sep 17 00:00:00 2001 From: Tilps Date: Wed, 11 Dec 2019 20:50:53 +1100 Subject: [PATCH 26/39] Readd basic update ratios. --- tf/tfprocess.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 6b5173a2..542d5444 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -355,8 +355,9 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): self.lr = self.lr * tf.cast(steps + 1, tf.float32) / self.warmup_steps # need to add 1 to steps because steps will be incremented after gradient update - #if (steps + 1) % self.cfg['training']['train_avg_report_steps'] == 0 or (steps + 1) % self.cfg['training']['total_steps'] == 0: - # before_weights = self.session.run(self.weights) + if (steps + 1) % self.cfg['training']['train_avg_report_steps'] == 0 or (steps + 1) % self.cfg['training']['total_steps'] == 0: + before_weights = [w.read_value() for w in self.model.weights] + # Run training for this batch grads = None @@ -405,9 +406,7 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): pol_loss_w * avg_policy_loss + val_loss_w * avg_value_loss + avg_reg_term, speed)) - #after_weights = self.session.run(self.weights) - #update_ratio_summaries = self.compute_update_ratio( - # before_weights, after_weights) + after_weights = [w.read_value() for w in self.model.weights] with self.train_writer.as_default(): tf.summary.scalar("Policy Loss", avg_policy_loss, step=steps) tf.summary.scalar("Value Loss", avg_value_loss, step=steps) @@ -415,8 +414,9 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): tf.summary.scalar("LR", self.lr, step=steps) tf.summary.scalar("Gradient norm", grad_norm / batch_splits, step=steps) tf.summary.scalar("MSE Loss", avg_mse_loss, step=steps) + self.compute_update_ratio_v2( + before_weights, after_weights, steps) self.train_writer.flush() - #self.train_writer.add_summary(update_ratio_summaries, steps) self.time_start = time_end self.last_steps = steps self.avg_policy_loss, self.avg_value_loss, self.avg_mse_loss, self.avg_reg_term = [], [], [], [] @@ -517,6 +517,22 @@ def calculate_test_summaries_v2(self, test_batches, steps): print("step {}, policy={:g} value={:g} policy accuracy={:g}% value accuracy={:g}% mse={:g}".\ format(steps, sum_policy, sum_value, sum_policy_accuracy, sum_value_accuracy, sum_mse)) + def compute_update_ratio_v2(self, before_weights, after_weights, steps): + """Compute the ratio of gradient norm to weight norm. + + Adapted from https://github.com/tensorflow/minigo/blob/c923cd5b11f7d417c9541ad61414bf175a84dc31/dual_net.py#L567 + """ + deltas = [after - before for after, + before in zip(after_weights, before_weights)] + delta_norms = [np.linalg.norm(d.numpy().ravel()) for d in deltas] + weight_norms = [np.linalg.norm(w.numpy().ravel()) for w in before_weights] + ratios = [(tensor.name, d / w) for d, w, tensor in zip(delta_norms, weight_norms, self.model.weights) if not 'moving' in tensor.name and w != 0.] + for name, ratio in ratios: + tf.summary.scalar('update_ratios/' + name, ratio, step=steps) + #ratios = np.log10([r for (_, r) in ratios if 0 < r < np.inf]) + #all_summaries.append(self.log_histogram('update_ratios_log10', ratios)) + #return tf.compat.v1.Summary(value=all_summaries) + def compute_update_ratio(self, before_weights, after_weights): """Compute the ratio of gradient norm to weight norm. From 47208df63494f6f5d1d62261d838605a99bddcc8 Mon Sep 17 00:00:00 2001 From: Tilps Date: Wed, 11 Dec 2019 21:02:19 +1100 Subject: [PATCH 27/39] Small performance optimization to offset the cost of update ratios. --- tf/tfprocess.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 542d5444..6214cf2a 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -295,6 +295,10 @@ def process_loop_v2(self, batch_size, test_batches, batch_splits=1): for _ in range(steps % total_steps, total_steps): self.process_v2(batch_size, test_batches, batch_splits=batch_splits) + @tf.function() + def read_weights(self): + return [w.read_value() for w in self.model.weights] + @tf.function() def process_inner_loop(self): print('tracing inner loop!') @@ -356,7 +360,7 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): # need to add 1 to steps because steps will be incremented after gradient update if (steps + 1) % self.cfg['training']['train_avg_report_steps'] == 0 or (steps + 1) % self.cfg['training']['total_steps'] == 0: - before_weights = [w.read_value() for w in self.model.weights] + before_weights = self.read_weights() # Run training for this batch @@ -406,7 +410,7 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): pol_loss_w * avg_policy_loss + val_loss_w * avg_value_loss + avg_reg_term, speed)) - after_weights = [w.read_value() for w in self.model.weights] + after_weights = self.read_weights() with self.train_writer.as_default(): tf.summary.scalar("Policy Loss", avg_policy_loss, step=steps) tf.summary.scalar("Value Loss", avg_value_loss, step=steps) @@ -447,7 +451,7 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): print("SWA Weights saved in file: {}".format(swa_path)) def calculate_swa_summaries_v2(self, test_batches, steps): - backup = [w.read_value() for w in self.model.weights] + backup = self.read_weights() for (swa, w) in zip(self.swa_weights, self.model.weights): w.assign(swa.read_value()) true_test_writer, self.test_writer = self.test_writer, self.swa_writer @@ -590,7 +594,7 @@ def update_swa_v2(self): self.swa_count.assign(min(num + 1., self.swa_max_n)) def save_swa_weights_v2(self, filename): - backup = [w.read_value() for w in self.model.weights] + backup = self.read_weights() for (swa, w) in zip(self.swa_weights, self.model.weights): w.assign(swa.read_value()) self.save_leelaz_weights_v2(filename) From d22b9b8f1d77231b2c4954073d37332a1a369b20 Mon Sep 17 00:00:00 2001 From: Tilps Date: Wed, 11 Dec 2019 23:53:39 +1100 Subject: [PATCH 28/39] Optimize compute update ratio since adding update_ratio_log10 had a noticeable performance impact. --- tf/tfprocess.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 6214cf2a..0c5da63f 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -521,6 +521,7 @@ def calculate_test_summaries_v2(self, test_batches, steps): print("step {}, policy={:g} value={:g} policy accuracy={:g}% value accuracy={:g}% mse={:g}".\ format(steps, sum_policy, sum_value, sum_policy_accuracy, sum_value_accuracy, sum_mse)) + @tf.function() def compute_update_ratio_v2(self, before_weights, after_weights, steps): """Compute the ratio of gradient norm to weight norm. @@ -528,14 +529,14 @@ def compute_update_ratio_v2(self, before_weights, after_weights, steps): """ deltas = [after - before for after, before in zip(after_weights, before_weights)] - delta_norms = [np.linalg.norm(d.numpy().ravel()) for d in deltas] - weight_norms = [np.linalg.norm(w.numpy().ravel()) for w in before_weights] - ratios = [(tensor.name, d / w) for d, w, tensor in zip(delta_norms, weight_norms, self.model.weights) if not 'moving' in tensor.name and w != 0.] + delta_norms = [tf.math.reduce_euclidean_norm(d) for d in deltas] + weight_norms = [tf.math.reduce_euclidean_norm(w) for w in before_weights] + ratios = [(tensor.name, tf.cond(w != 0., lambda: d / w, lambda: -1.)) for d, w, tensor in zip(delta_norms, weight_norms, self.model.weights) if not 'moving' in tensor.name] for name, ratio in ratios: tf.summary.scalar('update_ratios/' + name, ratio, step=steps) - #ratios = np.log10([r for (_, r) in ratios if 0 < r < np.inf]) - #all_summaries.append(self.log_histogram('update_ratios_log10', ratios)) - #return tf.compat.v1.Summary(value=all_summaries) + # Filtering is hard, so just push infinities/NaNs to an unreasonably large value. + ratios = [tf.cond(r > 0, lambda: tf.math.log(r) / 2.30258509299, lambda: 200.) for (_, r) in ratios] + tf.summary.histogram('update_ratios_log10', tf.stack(ratios), buckets=1000, step=steps) def compute_update_ratio(self, before_weights, after_weights): """Compute the ratio of gradient norm to weight norm. From ef0636550c9238ed5fc5495f7c277fd5b9a17722 Mon Sep 17 00:00:00 2001 From: Tilps Date: Thu, 12 Dec 2019 14:45:36 +1100 Subject: [PATCH 29/39] Some more cleanup, add weight histograms back. --- tf/tfprocess.py | 63 ++----------------------------------------------- 1 file changed, 2 insertions(+), 61 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 0c5da63f..3bcae0ff 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -55,14 +55,6 @@ def call(self, inputs): h_conv_pol_flat = tf.reshape(inputs, [-1, 80*8*8]) return tf.matmul(h_conv_pol_flat, tf.cast(self.fc1, h_conv_pol_flat.dtype)) - -def bias_variable(shape, name=None, dtype=tf.float32): - return tf.Variable(tf.compat.v1.zeros_initializer()(shape, dtype), name=name) - -def conv2d(x, W): - return tf.nn.conv2d(input=x, filters=W, data_format='NCHW', - strides=[1, 1, 1, 1], padding='SAME') - class TFProcess: def __init__(self, cfg): self.cfg = cfg @@ -513,11 +505,10 @@ def calculate_test_summaries_v2(self, test_batches, steps): tf.summary.scalar("Policy Accuracy", sum_policy_accuracy, step=steps) if self.wdl: tf.summary.scalar("Value Accuracy", sum_value_accuracy, step=steps) + for w in self.model.weights: + tf.summary.histogram(w.name, w, buckets=1000, step=steps) self.test_writer.flush() - #test_summaries = tf.compat.v1.summary.merge( - # [test_summaries] + self.histograms).eval(session=self.session) - #self.test_writer.add_summary(test_summaries, steps) print("step {}, policy={:g} value={:g} policy accuracy={:g}% value accuracy={:g}% mse={:g}".\ format(steps, sum_policy, sum_value, sum_policy_accuracy, sum_value_accuracy, sum_mse)) @@ -538,56 +529,6 @@ def compute_update_ratio_v2(self, before_weights, after_weights, steps): ratios = [tf.cond(r > 0, lambda: tf.math.log(r) / 2.30258509299, lambda: 200.) for (_, r) in ratios] tf.summary.histogram('update_ratios_log10', tf.stack(ratios), buckets=1000, step=steps) - def compute_update_ratio(self, before_weights, after_weights): - """Compute the ratio of gradient norm to weight norm. - - Adapted from https://github.com/tensorflow/minigo/blob/c923cd5b11f7d417c9541ad61414bf175a84dc31/dual_net.py#L567 - """ - deltas = [after - before for after, - before in zip(after_weights, before_weights)] - delta_norms = [np.linalg.norm(d.ravel()) for d in deltas] - weight_norms = [np.linalg.norm(w.ravel()) for w in before_weights] - ratios = [(tensor.name, d / w) for d, w, tensor in zip(delta_norms, weight_norms, self.weights) if not 'moving' in tensor.name] - all_summaries = [ - tf.compat.v1.Summary.Value(tag='update_ratios/' + - name, simple_value=ratio) - for name, ratio in ratios] - ratios = np.log10([r for (_, r) in ratios if 0 < r < np.inf]) - all_summaries.append(self.log_histogram('update_ratios_log10', ratios)) - return tf.compat.v1.Summary(value=all_summaries) - - def log_histogram(self, tag, values, bins=1000): - """Logs the histogram of a list/vector of values. - - From https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 - """ - # Convert to a numpy array - values = np.array(values) - - # Create histogram using numpy - counts, bin_edges = np.histogram(values, bins=bins) - - # Fill fields of histogram proto - hist = tf.compat.v1.HistogramProto() - hist.min = float(np.min(values)) - hist.max = float(np.max(values)) - hist.num = int(np.prod(values.shape)) - hist.sum = float(np.sum(values)) - hist.sum_squares = float(np.sum(values**2)) - - # Requires equal number as bins, where the first goes from -DBL_MAX to bin_edges[1] - # See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/summary.proto#L30 - # Thus, we drop the start of the first bin - bin_edges = bin_edges[1:] - - # Add bin edges and counts - for edge in bin_edges: - hist.bucket_limit.append(edge) - for c in counts: - hist.bucket.append(c) - - return tf.compat.v1.Summary.Value(tag=tag, histo=hist) - def update_swa_v2(self): num = self.swa_count.read_value() for (w, swa) in zip(self.model.weights, self.swa_weights): From 753b91897beca3ac362d86f1d78993feda8c3103 Mon Sep 17 00:00:00 2001 From: Tilps Date: Thu, 12 Dec 2019 16:05:51 +1100 Subject: [PATCH 30/39] Fix net saving for renorm mode. --- tf/tfprocess.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 3bcae0ff..1491c7af 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -565,6 +565,16 @@ def save_leelaz_weights_v2(self, filename): # work_weights = tf.transpose(a=weights, perm=[1, 0]) else: + # batch renorm has extra weights, but we don't know what to do with them. + if 'renorm' in weights.name: + continue + # renorm has variance, but it is not the primary source of truth + if 'variance:' in weights.name and self.renorm_enabled: + continue + # Renorm has moving stddev not variance, undo the transform to make it compatible. + if 'stddev:' in weights.name: + all_tensors.append(tf.math.square(weights) - 1e-5) + continue # Biases, batchnorm etc # pb expects every batch norm to have gammas, but not all of our # batch norms have gammas, so manually add pretend gammas. From 72f7cb30d4844b006b35dd0f7b6934eab3c24a87 Mon Sep 17 00:00:00 2001 From: Tilps Date: Thu, 12 Dec 2019 17:32:55 +1100 Subject: [PATCH 31/39] Add net_to_model and update_steps support. Remove more unsupported code, including upgrade.py which doesn't have an obvious transition. --- tf/mixprec.py | 48 ------------------------ tf/net_to_model.py | 19 +++------- tf/tfprocess.py | 92 ++++++++++++++++++++++++++++++++++++---------- tf/update_steps.py | 21 ++--------- tf/upgrade.py | 73 ------------------------------------ 5 files changed, 81 insertions(+), 172 deletions(-) delete mode 100644 tf/mixprec.py delete mode 100644 tf/upgrade.py diff --git a/tf/mixprec.py b/tf/mixprec.py deleted file mode 100644 index cf161ac0..00000000 --- a/tf/mixprec.py +++ /dev/null @@ -1,48 +0,0 @@ -import tensorflow as tf - - -def float32_variable_storage_getter(getter, name, shape=None, dtype=None, - initializer=None, regularizer=None, - trainable=True, - *args, **kwargs): - """Custom variable getter that forces trainable variables to be stored in - float32 precision and then casts them to the training precision.""" - storage_dtype = tf.float32 if trainable else dtype - variable = getter(name, shape, dtype=storage_dtype, - initializer=initializer, - regularizer=regularizer, - trainable=trainable, - *args, **kwargs) - if trainable and dtype != tf.float32: - cast_name = name + '/fp16_cast' - try: - cast_variable = tf.compat.v1.get_default_graph().get_tensor_by_name( - cast_name + ':0') - except KeyError: - cast_variable = tf.cast(variable, dtype, name=cast_name) - cast_variable._ref = variable._ref - variable = cast_variable - return variable - - -class LossScalingOptimizer(tf.compat.v1.train.Optimizer): - """An optimizer that scales loss and un-scales gradients.""" - - def __init__(self, optimizer, - scale=None, - name="LossScalingOptimizer", - use_locking=False): - super(LossScalingOptimizer, self).__init__( - name=name, use_locking=use_locking) - self._optimizer = optimizer - self._scale = float(scale) if scale is not None else 1.0 - - def compute_gradients(self, loss, var_list=None, *args, **kwargs): - if self._scale != 1.0: - loss = tf.scalar_mul(self._scale, loss) - gradvar = self._optimizer.compute_gradients(loss, var_list, *args, **kwargs) - gradvar = [(tf.scalar_mul(1. / self._scale, g), v) for g, v in gradvar] - return gradvar - - def apply_gradients(self, *args, **kwargs): - return self._optimizer.apply_gradients(*args, **kwargs) diff --git a/tf/net_to_model.py b/tf/net_to_model.py index 8d3190d6..1d3059b0 100755 --- a/tf/net_to_model.py +++ b/tf/net_to_model.py @@ -27,22 +27,13 @@ raise ValueError("Number of blocks in YAML doesn't match the network") weights = net.get_weights() -x = [ - tf.compat.v1.placeholder(tf.float32, [None, 112, 8*8]), - tf.compat.v1.placeholder(tf.float32, [None, 1858]), - tf.compat.v1.placeholder(tf.float32, [None, 3]), - tf.compat.v1.placeholder(tf.float32, [None, 3]), - ] - tfp = tfprocess.TFProcess(cfg) -tfp.init_net(x) -tfp.replace_weights(weights) -update_global_step = tfp.global_step.assign(START_FROM) -tfp.session.run(update_global_step) +tfp.init_net_v2() +tfp.replace_weights_v2(weights) +tfp.global_step.assign(START_FROM) root_dir = os.path.join(cfg['training']['path'], cfg['name']) if not os.path.exists(root_dir): os.makedirs(root_dir) -path = os.path.join(root_dir, cfg['name']) -save_path = tfp.saver.save(tfp.session, path, global_step=START_FROM) -print("Wrote model to {}".format(root_dir)) +tfp.manager.save() +print("Wrote model to {}".format(tfp.manager.latest_checkpoint)) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 1491c7af..1d9a44ea 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -24,7 +24,6 @@ import bisect import lc0_az_policy_map import proto.net_pb2 as pb -from mixprec import float32_variable_storage_getter, LossScalingOptimizer from net import Net @@ -126,18 +125,14 @@ def __init__(self, cfg): self.global_step = tf.Variable(0, name='global_step', trainable=False, dtype=tf.int64) def init_v2(self, train_dataset, test_dataset): - self.l2reg = tf.keras.regularizers.l2(l=0.5 * (0.0001)) self.train_dataset = train_dataset self.train_iter = iter(train_dataset) self.test_dataset = test_dataset self.test_iter = iter(test_dataset) self.init_net_v2() - self.checkpoint = tf.train.Checkpoint(optimizer=self.orig_optimizer, model=self.model, global_step=self.global_step, swa_count=self.swa_count) - self.checkpoint.listed = self.swa_weights - self.manager = tf.train.CheckpointManager( - self.checkpoint, directory=self.root_dir, max_to_keep=50, keep_checkpoint_every_n_hours=24) def init_net_v2(self): + self.l2reg = tf.keras.regularizers.l2(l=0.5 * (0.0001)) input_var = tf.keras.Input(shape=(112, 8*8)) x_planes = tf.keras.layers.Reshape([112, 8, 8])(input_var) self.model = tf.keras.Model(inputs=input_var, outputs=self.construct_net_v2(x_planes)) @@ -230,19 +225,58 @@ def accuracy(target, output): if self.swa_enabled: self.swa_writer = tf.summary.create_file_writer( os.path.join(os.getcwd(), "leelalogs/{}-swa-test".format(self.cfg['name']))) + self.checkpoint = tf.train.Checkpoint(optimizer=self.orig_optimizer, model=self.model, global_step=self.global_step, swa_count=self.swa_count) + self.checkpoint.listed = self.swa_weights + self.manager = tf.train.CheckpointManager( + self.checkpoint, directory=self.root_dir, max_to_keep=50, keep_checkpoint_every_n_hours=24) + + def replace_weights_v2(self, new_weights_orig): + new_weights = [w for w in new_weights_orig] + # self.model.weights ordering doesn't match up nicely, so first shuffle the new weights to match up. + # input order is (for convolutional policy): + # policy conv + # policy bn * 4 + # policy raw conv and bias + # value conv + # value bn * 4 + # value dense with bias + # value dense with bias + # + # output order is (for convolutional policy): + # value conv + # policy conv + # value bn * 4 + # policy bn * 4 + # policy raw conv and bias + # value dense with bias + # value dense with bias + new_weights[-5] = new_weights_orig[-10] + new_weights[-6] = new_weights_orig[-11] + new_weights[-7] = new_weights_orig[-12] + new_weights[-8] = new_weights_orig[-13] + new_weights[-9] = new_weights_orig[-14] + new_weights[-10] = new_weights_orig[-15] + new_weights[-11] = new_weights_orig[-5] + new_weights[-12] = new_weights_orig[-6] + new_weights[-13] = new_weights_orig[-7] + new_weights[-14] = new_weights_orig[-8] + new_weights[-15] = new_weights_orig[-16] + new_weights[-16] = new_weights_orig[-9] - def replace_weights(self, new_weights): all_evals = [] - for e, weights in enumerate(self.weights): + offset = 0 + last_was_gamma = False + for e, weights in enumerate(self.model.weights): + source_idx = e+offset if weights.shape.ndims == 4: # Rescale rule50 related weights as clients do not normalize the input. if e == 0: num_inputs = 112 # 50 move rule is the 110th input, or 109 starting from 0. rule50_input = 109 - for i in range(len(new_weights[e])): + for i in range(len(new_weights[source_idx])): if (i % (num_inputs*9))//9 == rule50_input: - new_weights[e][i] = new_weights[e][i]*99 + new_weights[source_idx][i] = new_weights[source_idx][i]*99 # Convolution weights need a transpose # @@ -253,9 +287,9 @@ def replace_weights(self, new_weights): # [output, input, filter_size, filter_size] s = weights.shape.as_list() shape = [s[i] for i in [3, 2, 0, 1]] - new_weight = tf.constant(new_weights[e], shape=shape) - all_evals.append(weights.assign( - tf.transpose(a=new_weight, perm=[2, 3, 1, 0]))) + new_weight = tf.constant(new_weights[source_idx], shape=shape) + weights.assign( + tf.transpose(a=new_weight, perm=[2, 3, 1, 0])) elif weights.shape.ndims == 2: # Fully connected layers are [in, out] in TF # @@ -263,16 +297,34 @@ def replace_weights(self, new_weights): # s = weights.shape.as_list() shape = [s[i] for i in [1, 0]] - new_weight = tf.constant(new_weights[e], shape=shape) - all_evals.append(weights.assign( - tf.transpose(a=new_weight, perm=[1, 0]))) + new_weight = tf.constant(new_weights[source_idx], shape=shape) + weights.assign( + tf.transpose(a=new_weight, perm=[1, 0])) else: + # Can't populate renorm weights, but the current new_weight will need using elsewhere. + if 'renorm' in weights.name: + offset-=1 + continue + # betas without gamms need to skip the gamma in the input. + if 'beta:' in weights.name and not last_was_gamma: + source_idx+=1 + offset+=1 # Biases, batchnorm etc - new_weight = tf.constant(new_weights[e], shape=weights.shape) - all_evals.append(tf.compat.v1.assign(weights, new_weight)) - self.session.run(all_evals) + new_weight = tf.constant(new_weights[source_idx], shape=weights.shape) + if 'stddev:' in weights.name: + weights.assign(tf.math.sqrt(new_weight + 1e-5)) + else: + weights.assign(new_weight) + # need to use the variance to also populate the stddev for renorm, so adjust offset. + if 'variance:' in weights.name and self.renorm_enabled: + offset-=1 + last_was_gamma = 'gamma:' in weights.name + # Replace the SWA weights as well, ensuring swa accumulation is reset. + if self.swa_enabled: + self.swa_count.assign(tf.constant(0.)) + self.update_swa_v2() # This should result in identical file to the starting one - # self.save_leelaz_weights('restored.txt') + # self.save_leelaz_weights_v2('restored.pb.gz') def restore_v2(self): if self.manager.latest_checkpoint is not None: diff --git a/tf/update_steps.py b/tf/update_steps.py index 49f357f2..f4740bce 100644 --- a/tf/update_steps.py +++ b/tf/update_steps.py @@ -16,28 +16,15 @@ def main(cmd): if not os.path.exists(root_dir): os.makedirs(root_dir) - x = [ - tf.compat.v1.placeholder(tf.float32, [None, 112, 8*8]), - tf.compat.v1.placeholder(tf.float32, [None, 1858]), - tf.compat.v1.placeholder(tf.float32, [None, 3]), - tf.compat.v1.placeholder(tf.float32, [None, 3]), - ] - tfprocess = TFProcess(cfg) - tfprocess.init_net(x) + tfprocess.init_net_v2() - if os.path.exists(os.path.join(root_dir, 'checkpoint')): - cp = tf.train.latest_checkpoint(root_dir) - tfprocess.restore(cp) + tfprocess.restore_v2() START_FROM = cmd.start - update_global_step = tfprocess.global_step.assign(START_FROM) - tfprocess.session.run(update_global_step) - path = os.path.join(root_dir, cfg['name']) - save_path = tfprocess.saver.save(tfprocess.session, path, global_step=START_FROM) - - tfprocess.session.close() + tfprocess.global_step.assign(START_FROM) + tfprocess.manager.save() if __name__ == "__main__": argparser = argparse.ArgumentParser(description=\ diff --git a/tf/upgrade.py b/tf/upgrade.py deleted file mode 100644 index 49e3c11d..00000000 --- a/tf/upgrade.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import os -import yaml -import sys -import tensorflow as tf -from tfprocess import TFProcess - -START_FROM = 0 - -def main(cmd): - cfg = yaml.safe_load(cmd.cfg.read()) - print(yaml.dump(cfg, default_flow_style=False)) - - root_dir = os.path.join(cfg['training']['path'], cfg['name']) - if not os.path.exists(root_dir): - os.makedirs(root_dir) - - x = [ - tf.compat.v1.placeholder(tf.float32, [None, 112, 8*8]), - tf.compat.v1.placeholder(tf.float32, [None, 1858]), - tf.compat.v1.placeholder(tf.float32, [None, 3]), - tf.compat.v1.placeholder(tf.float32, [None, 3]), - ] - - tfprocess = TFProcess(cfg) - tfprocess.init_net(x) - - if os.path.exists(os.path.join(root_dir, 'checkpoint')): - cp = tf.train.latest_checkpoint(root_dir) - reader = tf.compat.v1.train.NewCheckpointReader(cp) - saved_shapes = reader.get_variable_to_shape_map() - new_names = sorted( - [var.name.split(':')[0] for var in tf.compat.v1.global_variables() - if var.name.split(':')[0] not in saved_shapes]) - for saved_var_name in new_names: - print("New name {} will use default value".format(saved_var_name)) - var_names = sorted( - [(var.name, var.name.split(':')[0]) for var in tf.compat.v1.global_variables() - if var.name.split(':')[0] in saved_shapes]) - restore_vars = [] - restore_names = [] - for var_name, saved_var_name in var_names: - curr_var = tf.compat.v1.get_default_graph().get_tensor_by_name(var_name) - var_shape = curr_var.get_shape().as_list() - if var_shape == saved_shapes[saved_var_name]: - restore_vars.append(curr_var) - restore_names.append(saved_var_name) - else: - print("Dropping {} due to shape change".format(saved_var_name)) - legacy_names = sorted( - [name for name in saved_shapes.keys() - if name not in restore_names]) - for saved_var_name in legacy_names: - print("Dropping {} as no longer used".format(saved_var_name)) - opt_saver = tf.compat.v1.train.Saver(restore_vars) - opt_saver.restore(tfprocess.session, cp) - else: - print("No checkpoint to upgrade!") - exit(1) - - steps = tf.compat.v1.train.global_step(tfprocess.session, tfprocess.global_step) - path = os.path.join(root_dir, cfg['name']) - save_path = tfprocess.saver.save(tfprocess.session, path, global_step=steps) - tfprocess.session.close() - -if __name__ == "__main__": - argparser = argparse.ArgumentParser(description=\ - 'Convert current checkpoint to new training script or incompatible training parameters.') - argparser.add_argument('--cfg', type=argparse.FileType('r'), - help='yaml configuration with training parameters') - - main(argparser.parse_args()) From 9f7defc733c29a29240660c14ac2ffe37d5b969f Mon Sep 17 00:00:00 2001 From: Tilps Date: Thu, 12 Dec 2019 20:01:06 +1100 Subject: [PATCH 32/39] Some more cleanup. --- tf/train.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tf/train.py b/tf/train.py index a4c98462..f15b7230 100755 --- a/tf/train.py +++ b/tf/train.py @@ -125,7 +125,6 @@ def main(cmd): train_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string)) train_dataset = train_dataset.map(ChunkParser.parse_function) train_dataset = train_dataset.prefetch(4) - #train_iterator = tf.compat.v1.data.make_one_shot_iterator(train_dataset) shuffle_size = int(shuffle_size*(1.0-train_ratio)) test_parser = ChunkParser(FileDataSrc(test_chunks), @@ -134,14 +133,9 @@ def main(cmd): test_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string)) test_dataset = test_dataset.map(ChunkParser.parse_function) test_dataset = test_dataset.prefetch(4) - #test_iterator = tf.compat.v1.data.make_one_shot_iterator(test_dataset) - #tfprocess.init(test_dataset, train_iterator, test_iterator) tfprocess.init_v2(train_dataset, test_dataset) - #if os.path.exists(os.path.join(root_dir, 'checkpoint')): - # cp = tf.train.latest_checkpoint(root_dir) - # tfprocess.restore(cp) tfprocess.restore_v2() # If number of test positions is not given @@ -153,7 +147,6 @@ def main(cmd): num_evals = max(1, num_evals // ChunkParser.BATCH_SIZE) print("Using {} evaluation batches".format(num_evals)) - #tfprocess.process_loop(total_batch_size, num_evals, batch_splits=batch_splits) tfprocess.process_loop_v2(total_batch_size, num_evals, batch_splits=batch_splits) if cmd.output is not None: @@ -162,7 +155,6 @@ def main(cmd): else: tfprocess.save_leelaz_weights_v2(cmd.output) - #tfprocess.session.close() train_parser.shutdown() test_parser.shutdown() From 7823f0d3e5c57f1e11af703733ff8d9f48978c01 Mon Sep 17 00:00:00 2001 From: Tilps Date: Thu, 12 Dec 2019 23:21:46 +1100 Subject: [PATCH 33/39] More cleanup. --- tf/tfprocess.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 1d9a44ea..377f40ea 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -27,10 +27,6 @@ from net import Net -# Bias weights for layers not followed by BatchNorm -# We do not regularlize biases, so they are not -# added to the regularlizer collection - class ApplySqueezeExcitation(tf.keras.layers.Layer): def __init__(self, **kwargs): super(ApplySqueezeExcitation, self).__init__(**kwargs) From 5f4d8c44de7ebe4929e8c700dae90afb1cde5428 Mon Sep 17 00:00:00 2001 From: Tilps Date: Thu, 12 Dec 2019 23:23:21 +1100 Subject: [PATCH 34/39] Small fix needed to make script work in ubuntu. --- tf/tfprocess.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index 377f40ea..ce24fdd8 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -115,6 +115,7 @@ def __init__(self, cfg): gpus = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_visible_devices(gpus[self.cfg['gpu']], 'GPU') + tf.config.experimental.set_memory_growth(gpus[self.cfg['gpu']], True) if self.model_dtype == tf.float16: tf.keras.mixed_precision.experimental.set_policy('mixed_float16') From da94d7635f85bca0ba46f3a9460d719a42225293 Mon Sep 17 00:00:00 2001 From: Tilps Date: Fri, 13 Dec 2019 08:00:40 +1100 Subject: [PATCH 35/39] explicitly add the training flags to model calls - as apparently needed. --- tf/tfprocess.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index ce24fdd8..d5c5f2e1 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -345,7 +345,7 @@ def process_inner_loop(self): print('tracing inner loop!') x, y, z, q = next(self.train_iter) with tf.GradientTape() as tape: - policy, value = self.model(x) + policy, value = self.model(x, training=True) policy_loss = self.policy_loss_fn(y, policy) reg_term = sum(self.model.losses) if self.wdl: @@ -506,7 +506,7 @@ def calculate_swa_summaries_v2(self, test_batches, steps): def calculate_test_summaries_inner_loop(self): print('tracing summaries inner loop!') x, y, z, q = next(self.test_iter) - policy, value = self.model(x) + policy, value = self.model(x, training=False) policy_loss = self.policy_loss_fn(y, policy) policy_accuracy = self.accuracy_fn(y, policy) if self.wdl: From 20fe10a72e053648ae7da119c9545c090225b797 Mon Sep 17 00:00:00 2001 From: Tilps Date: Fri, 13 Dec 2019 08:18:57 +1100 Subject: [PATCH 36/39] More cleanup. --- tf/tfprocess.py | 20 +++++++++----------- tf/train.py | 1 - 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index d5c5f2e1..ef141294 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -133,11 +133,11 @@ def init_net_v2(self): input_var = tf.keras.Input(shape=(112, 8*8)) x_planes = tf.keras.layers.Reshape([112, 8, 8])(input_var) self.model = tf.keras.Model(inputs=input_var, outputs=self.construct_net_v2(x_planes)) - self.swa_count = None + # swa_count initialized reguardless to make checkpoint code simpler. + self.swa_count = tf.Variable(0., name='swa_count', trainable=False) self.swa_weights = None if self.swa_enabled: # Count of networks accumulated into SWA - self.swa_count = tf.Variable(0., name='swa_count', trainable=False) self.swa_weights = [tf.Variable(w, trainable=False) for w in self.model.weights] self.active_lr = 0.01 @@ -342,7 +342,6 @@ def read_weights(self): @tf.function() def process_inner_loop(self): - print('tracing inner loop!') x, y, z, q = next(self.train_iter) with tf.GradientTape() as tape: policy, value = self.model(x, training=True) @@ -504,7 +503,6 @@ def calculate_swa_summaries_v2(self, test_batches, steps): @tf.function() def calculate_test_summaries_inner_loop(self): - print('tracing summaries inner loop!') x, y, z, q = next(self.test_iter) policy, value = self.model(x, training=False) policy_loss = self.policy_loss_fn(y, policy) @@ -645,14 +643,14 @@ def save_leelaz_weights_v2(self, filename): permuted_tensors[-8] = all_tensors[-14] permuted_tensors[-9] = all_tensors[-16] permuted_tensors[-10] = all_tensors[-5] - permuted_tensors[-11] = all_tensors[-6] - permuted_tensors[-12] = all_tensors[-7] - permuted_tensors[-13] = all_tensors[-8] - permuted_tensors[-14] = all_tensors[-9] - permuted_tensors[-15] = all_tensors[-10] - permuted_tensors[-16] = all_tensors[-15] + permuted_tensors[-11] = all_tensors[-6] + permuted_tensors[-12] = all_tensors[-7] + permuted_tensors[-13] = all_tensors[-8] + permuted_tensors[-14] = all_tensors[-9] + permuted_tensors[-15] = all_tensors[-10] + permuted_tensors[-16] = all_tensors[-15] all_tensors = permuted_tensors - + for e, nparray in enumerate(all_tensors): # Rescale rule50 related weights as clients do not normalize the input. if e == 0: diff --git a/tf/train.py b/tf/train.py index f15b7230..ee76b390 100755 --- a/tf/train.py +++ b/tf/train.py @@ -25,7 +25,6 @@ import random import multiprocessing as mp import tensorflow as tf -#tf.compat.v1.disable_v2_behavior() from tfprocess import TFProcess from chunkparser import ChunkParser From 3ac88fa7aa0926c0d9a6e405e22e4919a107cd74 Mon Sep 17 00:00:00 2001 From: Tilps Date: Fri, 13 Dec 2019 16:44:53 +1100 Subject: [PATCH 37/39] Pull iterator advancement out of the tf.function loops, its not supposed to work there. Even if it was working just fine... this seems slightly faster. --- tf/tfprocess.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index ef141294..a923feb9 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -341,8 +341,7 @@ def read_weights(self): return [w.read_value() for w in self.model.weights] @tf.function() - def process_inner_loop(self): - x, y, z, q = next(self.train_iter) + def process_inner_loop(self, x, y, z, q): with tf.GradientTape() as tape: policy, value = self.model(x, training=True) policy_loss = self.policy_loss_fn(y, policy) @@ -406,7 +405,8 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): # Run training for this batch grads = None for _ in range(batch_splits): - policy_loss, value_loss, mse_loss, reg_term, new_grads = self.process_inner_loop() + x, y, z, q = next(self.train_iter) + policy_loss, value_loss, mse_loss, reg_term, new_grads = self.process_inner_loop(x, y, z, q) if not grads: grads = new_grads else: @@ -502,8 +502,7 @@ def calculate_swa_summaries_v2(self, test_batches, steps): w.assign(old) @tf.function() - def calculate_test_summaries_inner_loop(self): - x, y, z, q = next(self.test_iter) + def calculate_test_summaries_inner_loop(self, x, y, z, q): policy, value = self.model(x, training=False) policy_loss = self.policy_loss_fn(y, policy) policy_accuracy = self.accuracy_fn(y, policy) @@ -524,7 +523,8 @@ def calculate_test_summaries_v2(self, test_batches, steps): sum_policy = 0 sum_value = 0 for _ in range(0, test_batches): - policy_loss, value_loss, mse_loss, policy_accuracy, value_accuracy = self.calculate_test_summaries_inner_loop() + x, y, z, q = next(self.test_iter) + policy_loss, value_loss, mse_loss, policy_accuracy, value_accuracy = self.calculate_test_summaries_inner_loop(x, y, z, q) sum_policy_accuracy += policy_accuracy sum_mse += mse_loss sum_policy += policy_loss From 681ab7429f8411cc8a9e339cd3bcdbb00ae162f9 Mon Sep 17 00:00:00 2001 From: Tilps Date: Fri, 13 Dec 2019 17:43:28 +1100 Subject: [PATCH 38/39] Add experimental dataset loader. --- tf/train.py | 74 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 12 deletions(-) diff --git a/tf/train.py b/tf/train.py index ee76b390..d452f03c 100755 --- a/tf/train.py +++ b/tf/train.py @@ -83,6 +83,43 @@ def next(self): except: print("failed to parse {}".format(filename)) +def extract_inputs_outputs(raw): + # first 4 bytes in each batch entry are boring. + # Next 7432 are easy, policy extraction. + policy = tf.io.decode_raw(tf.strings.substr(raw, 4, 7432), tf.float32) + # Next are 104 bit packed chess boards, they have to be expanded. + bit_planes = tf.expand_dims(tf.reshape(tf.io.decode_raw(tf.strings.substr(raw, 7436, 832), tf.uint8), [-1, 104, 8]), -1) + bit_planes = tf.bitwise.bitwise_and(tf.tile(bit_planes, [1, 1, 1, 8]), [128, 64, 32, 16, 8, 4, 2, 1]) + bit_planes = tf.minimum(1., tf.cast(bit_planes, tf.float32)) + # Next 5 planes are 1 or 0 to indicate 8x8 of 1 or 0. + unit_planes = tf.expand_dims(tf.expand_dims(tf.io.decode_raw(tf.strings.substr(raw, 8268, 5), tf.uint8), -1), -1) + unit_planes = tf.cast(tf.tile(unit_planes, [1, 1, 8, 8]), tf.float32) + # rule50 count plane. + rule50_plane = tf.expand_dims(tf.expand_dims(tf.io.decode_raw(tf.strings.substr(raw, 8273, 1), tf.uint8), -1), -1) + rule50_plane = tf.cast(tf.tile(rule50_plane, [1, 1, 8, 8]), tf.float32) + rule50_plane = tf.divide(rule50_plane, 99.) + # zero plane and one plane + zero_plane = tf.zeros_like(rule50_plane) + one_plane = tf.ones_like(rule50_plane) + inputs = tf.reshape(tf.concat([bit_planes, unit_planes, rule50_plane, zero_plane, one_plane], 1), [-1, 112, 64]) + + # winner is stored in one signed byte and needs to be converted to one hot. + winner = tf.cast(tf.io.decode_raw(tf.strings.substr(raw, 8275, 1), tf.int8), tf.float32) + winner = tf.tile(winner, [1,3]) + z = tf.cast(tf.equal(winner, [1., 0., -1.]), tf.float32) + + # Outcome distribution needs to be calculated from q and d. + best_q = tf.io.decode_raw(tf.strings.substr(raw, 8280, 4), tf.float32) + best_d = tf.io.decode_raw(tf.strings.substr(raw, 8288, 4), tf.float32) + best_q_w = 0.5 * (1.0 - best_d + best_q) + best_q_l = 0.5 * (1.0 - best_d - best_q) + + q = tf.concat([best_q_w, best_d, best_q_l], 1) + + return (inputs, policy, z, q) + +def sample(x): + return tf.math.equal(tf.random.uniform([], 0, SKIP-1, dtype=tf.int32), 0) def main(cmd): cfg = yaml.safe_load(cmd.cfg.read()) @@ -91,6 +128,7 @@ def main(cmd): num_chunks = cfg['dataset']['num_chunks'] allow_less = cfg['dataset'].get('allow_less_chunks', False) train_ratio = cfg['dataset']['train_ratio'] + experimental_parser = cfg['dataset'].get('experimental_v4_only_dataset', False) num_train = int(num_chunks*train_ratio) num_test = num_chunks - num_train if 'input_test' in cfg['dataset']: @@ -118,20 +156,32 @@ def main(cmd): os.makedirs(root_dir) tfprocess = TFProcess(cfg) - train_parser = ChunkParser(FileDataSrc(train_chunks), - shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) - train_dataset = tf.data.Dataset.from_generator( - train_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string)) - train_dataset = train_dataset.map(ChunkParser.parse_function) - train_dataset = train_dataset.prefetch(4) + if experimental_parser: + train_dataset = tf.data.Dataset.from_tensor_slices(train_chunks).shuffle(len(train_chunks)).repeat()\ + .interleave(lambda x: tf.data.FixedLengthRecordDataset(x, 8292, compression_type='GZIP', num_parallel_reads=1).filter(sample), num_parallel_calls=tf.data.experimental.AUTOTUNE)\ + .shuffle(shuffle_size)\ + .batch(split_batch_size).map(extract_inputs_outputs).prefetch(4) + else: + train_parser = ChunkParser(FileDataSrc(train_chunks), + shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) + train_dataset = tf.data.Dataset.from_generator( + train_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string)) + train_dataset = train_dataset.map(ChunkParser.parse_function) + train_dataset = train_dataset.prefetch(4) shuffle_size = int(shuffle_size*(1.0-train_ratio)) - test_parser = ChunkParser(FileDataSrc(test_chunks), - shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) - test_dataset = tf.data.Dataset.from_generator( - test_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string)) - test_dataset = test_dataset.map(ChunkParser.parse_function) - test_dataset = test_dataset.prefetch(4) + if experimental_parser: + test_dataset = tf.data.Dataset.from_tensor_slices(test_chunks).shuffle(len(test_chunks)).repeat()\ + .interleave(lambda x: tf.data.FixedLengthRecordDataset(x, 8292, compression_type='GZIP', num_parallel_reads=1).filter(sample), num_parallel_calls=tf.data.experimental.AUTOTUNE)\ + .shuffle(shuffle_size)\ + .batch(split_batch_size).map(extract_inputs_outputs).prefetch(4) + else: + test_parser = ChunkParser(FileDataSrc(test_chunks), + shuffle_size=shuffle_size, sample=SKIP, batch_size=ChunkParser.BATCH_SIZE) + test_dataset = tf.data.Dataset.from_generator( + test_parser.parse, output_types=(tf.string, tf.string, tf.string, tf.string)) + test_dataset = test_dataset.map(ChunkParser.parse_function) + test_dataset = test_dataset.prefetch(4) tfprocess.init_v2(train_dataset, test_dataset) From 121545f93feae224293db8fad32b5752691c0bc2 Mon Sep 17 00:00:00 2001 From: Tilps Date: Fri, 13 Dec 2019 20:28:16 +1100 Subject: [PATCH 39/39] Some micro-optimizations. --- tf/tfprocess.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/tf/tfprocess.py b/tf/tfprocess.py index a923feb9..cc9d516e 100644 --- a/tf/tfprocess.py +++ b/tf/tfprocess.py @@ -360,6 +360,10 @@ def process_inner_loop(self, x, y, z, q): value_loss = self.value_loss_fn(self.qMix(z, q), value) return policy_loss, value_loss, mse_loss, reg_term, tape.gradient(total_loss, self.model.trainable_weights) + @tf.function() + def add_lists(self, x, y): + return [tf.math.add(a, b) for (a, b) in zip(x, y)] + def process_v2(self, batch_size, test_batches, batch_splits=1): if not self.time_start: self.time_start = time.time() @@ -410,7 +414,7 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): if not grads: grads = new_grads else: - grads = [tf.math.add(a, b) for (a, b) in zip(grads, new_grads)] + grads = self.add_lists(grads, new_grads) # Keep running averages # Google's paper scales MSE by 1/4 to a [0, 1] range, so do the same to # get comparable values. @@ -490,16 +494,25 @@ def process_v2(self, batch_size, test_batches, batch_splits=1): self.save_swa_weights_v2(swa_path) print("SWA Weights saved in file: {}".format(swa_path)) - def calculate_swa_summaries_v2(self, test_batches, steps): + @tf.function() + def switch_to_swa(self): backup = self.read_weights() for (swa, w) in zip(self.swa_weights, self.model.weights): w.assign(swa.read_value()) + return backup + + @tf.function() + def restore_weights(self, backup): + for (old, w) in zip(backup, self.model.weights): + w.assign(old) + + def calculate_swa_summaries_v2(self, test_batches, steps): + backup = self.switch_to_swa() true_test_writer, self.test_writer = self.test_writer, self.swa_writer print('swa', end=' ') self.calculate_test_summaries_v2(test_batches, steps) self.test_writer = true_test_writer - for (old, w) in zip(backup, self.model.weights): - w.assign(old) + self.restore_weights(backup) @tf.function() def calculate_test_summaries_inner_loop(self, x, y, z, q): @@ -576,19 +589,17 @@ def compute_update_ratio_v2(self, before_weights, after_weights, steps): ratios = [tf.cond(r > 0, lambda: tf.math.log(r) / 2.30258509299, lambda: 200.) for (_, r) in ratios] tf.summary.histogram('update_ratios_log10', tf.stack(ratios), buckets=1000, step=steps) + @tf.function() def update_swa_v2(self): num = self.swa_count.read_value() for (w, swa) in zip(self.model.weights, self.swa_weights): swa.assign(swa.read_value() * (num / (num + 1.)) + w.read_value() * (1. / (num + 1.))) - self.swa_count.assign(min(num + 1., self.swa_max_n)) + self.swa_count.assign(tf.math.minimum(num + 1., self.swa_max_n)) def save_swa_weights_v2(self, filename): - backup = self.read_weights() - for (swa, w) in zip(self.swa_weights, self.model.weights): - w.assign(swa.read_value()) + backup = self.switch_to_swa() self.save_leelaz_weights_v2(filename) - for (old, w) in zip(backup, self.model.weights): - w.assign(old) + self.restore_weights(backup) def save_leelaz_weights_v2(self, filename): all_tensors = []