Skip to content

SI

SI

Bases: PytorchRegularizationBaseAlgorithm

Synaptic Intelligence Algorithm Class. Inherits from PytorchBaseAlgorithm.

The equivalent JAX implementation is SI in JAX.

References

[1] Zenke, F., Poole, B. & Ganguli, S. Continual Learning Through Synaptic Intelligence. in Proceedings of the 34th International Conference on Machine Learning, ICML 2017.

Source code in sequel/algos/pytorch/si.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
class SI(PytorchRegularizationBaseAlgorithm):
    """Synaptic Intelligence Algorithm Class. Inherits from PytorchBaseAlgorithm.

    The equivalent JAX implementation is [`SI in JAX`][sequel.algos.jax.si.SI].

    References:
        [1] Zenke, F., Poole, B. & Ganguli, S. Continual Learning Through Synaptic Intelligence. in Proceedings of the
            34th International Conference on Machine Learning, ICML 2017.
    """

    def __init__(self, si_lambda: float = 1.0, xi: float = 0.1, *args, **kwargs):
        super().__init__(regularization_coefficient=si_lambda, *args, **kwargs)
        # hyperparameters
        self.xi = xi

        for name, param in self.backbone.named_parameters():
            name = name.replace(".", "_")
            self.backbone.register_buffer(f"{name}_w", torch.zeros_like(param))

    def __repr__(self) -> str:
        return f"SI(si_lambda={self.regularization_coefficient}, xi={self.xi})"

    def on_before_training_step(self, *args, **kwargs):
        for name, param in self.backbone.named_parameters():
            name = name.replace(".", "_")
            setattr(self.backbone, f"{name}_prev", param.data.clone())

    def on_after_training_step(self, *args, **kwargs):
        for name, param in self.backbone.named_parameters():
            name = name.replace(".", "_")
            if param.grad is not None:
                delta = param.clone().detach() - getattr(self.backbone, f"{name}_prev")
                w = getattr(self.backbone, f"{name}_w")
                setattr(self.backbone, f"{name}_w", w - w * delta)

    def calculate_parameter_importance(self):
        logging.info("Updating importance parameters for Synaptic Intelligence")
        importances = {}
        for (name, p) in self.backbone.named_parameters():
            name = name.replace(".", "_")
            old_importance = getattr(self.backbone, f"{name}_importance")
            omega: Tensor = getattr(self.backbone, f"{name}_w")
            delta: Tensor = p.detach() - getattr(self.backbone, f"{name}_old")

            # see Eq. 5 from paper.
            importances[name] = old_importance + omega / (delta.pow(2) + self.xi)

            # reset (small) omega for next task
            setattr(self.backbone, f"{name}_w", omega.clone().zero_())

        return importances