Source code for tensorforce.core.optimizers.synchronization

# Copyright 2018 Tensorforce Team. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import tensorflow as tf

from tensorforce import util
from tensorforce.core import Module, parameter_modules
from tensorforce.core.optimizers import Optimizer


[docs]class Synchronization(Optimizer):
    """
    Synchronization optimizer, which updates variables periodically to the value of a corresponding  
    set of source variables (specification key: `synchronization`).

    Args:
        name (string): Module name
            (<span style="color:#0000C0"><b>internal use</b></span>).
        optimizer (specification): Optimizer configuration
            (<span style="color:#C00000"><b>required</b></span>).
        sync_frequency (parameter, int > 0): Timestep interval between updates which also perform a
            synchronization step (<span style="color:#00C000"><b>default</b></span>: every time).
        update_weight (parameter, 0.0 < float <= 1.0): Update weight
            (<span style="color:#00C000"><b>default</b></span>: 1.0).
        summary_labels ('all' | iter[string]): Labels of summaries to record
            (<span style="color:#00C000"><b>default</b></span>: inherit value of parent module).
    """

    def __init__(self, name, sync_frequency=1, update_weight=1.0, summary_labels=None):
        super().__init__(name=name, summary_labels=summary_labels)

        self.sync_frequency = self.add_module(
            name='sync-frequency', module=sync_frequency, modules=parameter_modules, dtype='long'
        )

        self.update_weight = self.add_module(
            name='update-weight', module=update_weight, modules=parameter_modules, dtype='float'
        )

    def tf_initialize(self):
        super().tf_initialize()

        self.last_sync = self.add_variable(
            name='last-sync', dtype='long', shape=(), is_trainable=False, initializer=-1
        )

    def tf_step(self, variables, source_variables, **kwargs):
        assert all(
            util.shape(source) == util.shape(target)
            for source, target in zip(source_variables, variables)
        )

        timestep = Module.retrieve_tensor(name='timestep')

        def apply_sync():
            update_weight = self.update_weight.value()
            deltas = list()
            for source_variable, target_variable in zip(source_variables, variables):
                delta = update_weight * (source_variable - target_variable)
                deltas.append(delta)

            applied = self.apply_step(variables=variables, deltas=deltas)
            last_sync_updated = self.last_sync.assign(value=timestep)

            with tf.control_dependencies(control_inputs=(applied, last_sync_updated)):
                # Trivial operation to enforce control dependency
                return util.fmap(function=util.identity_operation, xs=deltas)

        def no_sync():
            deltas = list()
            for variable in variables:
                delta = tf.zeros(shape=util.shape(variable), dtype=util.tf_dtype(dtype='float'))
                deltas.append(delta)
            return deltas

        sync_frequency = self.sync_frequency.value()
        zero = tf.constant(value=0, dtype=util.tf_dtype(dtype='long'))
        skip_sync = tf.math.less(x=(timestep - self.last_sync), y=sync_frequency)
        skip_sync = tf.math.logical_and(
            x=skip_sync, y=tf.math.greater_equal(x=self.last_sync, y=zero)
        )

        return self.cond(pred=skip_sync, true_fn=no_sync, false_fn=apply_sync)