diff --git a/CHANGELOG.md b/CHANGELOG.md
index fc0be4e350..6887984586 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,6 +50,7 @@
     - Added documentation for the Partitioner to `framework.rst` (#1828)
     - Added CIL vs SIRF tests comparing preconditioned ISTA in CIL and MLEM in SIRF (#1823)
     - Update to CCPi-Regularisation toolkit v24.0.1 (#1868)
+    - Updated the `SPDHG` algorithm to take a stochastic `Sampler` and to more easily set step sizes (#1644) 
   - Bug fixes:
     - gradient descent `update_objective` called twice on the initial point.(#1789)
     - ProjectionMap operator bug fix in adjoint and added documentation (#1743)
@@ -59,6 +60,7 @@
     - Update dataexample remote data download to work with windows and use zenodo_get for data download (#1774)
   - Changes that break backwards compatibility:
     - Merged the files `BlockGeometry.py` and `BlockDataContainer.py` in `framework` to one file `block.py`. Please use `from cil.framework import BlockGeometry, BlockDataContainer` as before (#1799)
+    - Deprecated `norms` and `prob` in the `SPDHG` algorithm to be set in the `BlockOperator` and `Sampler` respectively (#1644)
     - Bug fix in `FGP_TV` function to set the default behaviour not to enforce non-negativity (#1826).
 
 * 24.0.0
@@ -83,7 +85,6 @@
   - ZeroOperator no longer relies on the default of allocate
   - Bug fix in SIRF TotalVariation unit tests with warm_start
   - Allow show2D to be used with 3D `DataContainer` instances
-  - Added the a `Sampler` class as a CIL optimisation utility
   - Update documentation for symmetrised gradient
   - Added documentation for CompositionOperator and SumOperator
   - Updated FISTA and ISTA algorithms to allow input functions to be None
diff --git a/Wrappers/Python/cil/optimisation/algorithms/SPDHG.py b/Wrappers/Python/cil/optimisation/algorithms/SPDHG.py
index ce48384924..b8d04069a6 100644
--- a/Wrappers/Python/cil/optimisation/algorithms/SPDHG.py
+++ b/Wrappers/Python/cil/optimisation/algorithms/SPDHG.py
@@ -18,21 +18,25 @@
 # Claire Delplancke (University of Bath)
 
 from cil.optimisation.algorithms import Algorithm
+from cil.optimisation.operators import BlockOperator
 import numpy as np
 import logging
+from cil.optimisation.utilities import Sampler
+from numbers import Number
+import warnings
+from cil.framework import BlockDataContainer
 
 log = logging.getLogger(__name__)
 
 
 class SPDHG(Algorithm):
-    r'''Stochastic Primal Dual Hybrid Gradient
-
-    Problem:
-
+    r'''Stochastic Primal Dual Hybrid Gradient (SPDHG) solves separable optimisation problems of the type: 
     .. math::
 
       \min_{x} f(Kx) + g(x) = \min_{x} \sum f_i(K_i x) + g(x)
 
+    where :math:`f_i` and the regulariser :math:`g` need to be proper, convex and lower semi-continuous.
+
     Parameters
     ----------
     f : BlockFunction
@@ -41,124 +45,146 @@ class SPDHG(Algorithm):
         A convex function with a "simple" proximal
     operator : BlockOperator
         BlockOperator must contain Linear Operators
-    tau : positive float, optional, default=None
-        Step size parameter for Primal problem
-    sigma : list of positive float, optional, default=None
-        List of Step size parameters for Dual problem
-    initial : DataContainer, optional, default=None
-        Initial point for the SPDHG algorithm
-    prob : list of floats, optional, default=None
-        List of probabilities. If None each subset will have probability = 1/number of subsets
-    gamma : float
-        parameter controlling the trade-off between the primal and dual step sizes
-
-    **kwargs:
-    norms : list of floats
-        precalculated list of norms of the operators
+    tau : positive float, optional
+        Step size parameter for the primal problem. If `None` will be computed by algorithm, see note for details. 
+    sigma : list of positive float, optional
+        List of Step size parameters for dual problem.  If `None` will be computed by algorithm, see note for details. 
+    initial : DataContainer, optional
+        Initial point for the SPDHG algorithm. The default value is a zero DataContainer in the range of the `operator`.
+    gamma : float, optional
+            Parameter controlling the trade-off between the primal and dual step sizes
+    sampler: `cil.optimisation.utilities.Sampler`, optional 
+            A `Sampler` controllingthe selection of the next index for the SPDHG update. If `None`, a sampler will be created for uniform random sampling with replacement. See notes.  
+
+    prob_weights: list of floats, optional,  
+             Consider that the sampler is called a large number of times this argument holds the expected number of times each index would be called,  normalised to 1. Note that this should not be passed if the provided sampler has it as an attribute: if the sampler has a `prob_weight` attribute it will take precedence on this parameter. Should be a list of floats of length `num_indices` that sum to 1. If no sampler with `prob_weights` is passed, it defaults to `[1/len(operator)]*len(operator)`.
+
+
+    Note  
+    -----  
+    The `sampler` can be an instance of the `cil.optimisation.utilities.Sampler` class or a custom class with the `__next__(self)` method implemented, which outputs an integer index from {1, ..., len(operator)}. 
+
+    Note  
+    -----  
+    "Random sampling with replacement" will select the next index with equal probability from  `1 - len(operator)`.  
+
 
     Example
     -------
+    >>> data = dataexample.SIMULATED_PARALLEL_BEAM_DATA.get()
+    >>> data.reorder('astra')
+    >>> data = data.get_slice(vertical='centre')
+    >>> ig = ag.get_ImageGeometry()
 
-    Example of usage: See https://github.com/vais-ral/CIL-Demos/blob/master/Tomography/Simulated/Single%20Channel/PDHG_vs_SPDHG.py
+    >>> data_partitioned = data.partition(num_batches=10, mode='staggered')
+    >>> A_partitioned = ProjectionOperator(ig, data_partitioned.geometry, device = "cpu")
 
 
-    Note
-    ----
+    >>> F = BlockFunction(*[L2NormSquared(b=data_partitioned[i])
+                            for i in range(10)])
 
-    Convergence is guaranteed provided that [2, eq. (12)]:
+    >>> alpha = 0.025
+    >>> G = alpha * TotalVariation()
+    >>> spdhg = SPDHG(f=F, g=G, operator=A_partitioned, sampler=Sampler.sequential(len(A)),
+                      initial=A.domain_geometry().allocate(1), update_objective_interval=10)
+    >>> spdhg.run(100)
 
-    .. math::
 
-    \|\sigma[i]^{1/2} * K[i] * tau^{1/2} \|^2  < p_i for all i
+    Example
+    -------
+    Further examples of usage see the [CIL demos.](https://github.com/vais-ral/CIL-Demos/blob/master/Tomography/Simulated/Single%20Channel/PDHG_vs_SPDHG.py)
 
     Note
-    ----
+    -----
+    When setting `sigma` and `tau`, there are 4 possible cases considered by setup function. In all cases the probabilities :math:`p_i` are set by a default or user defined sampler:
+
+    - Case 1: If neither `sigma` or `tau` are provided then `sigma` is set using the formula:
+
+    .. math:: \sigma_i= \frac{0.99}{\|K_i\|^2}
+
+    and `tau` is set as per case 2
+
+    - Case 2: If `sigma` is provided but not `tau` then `tau` is calculated using the formula 
+
+    .. math:: \tau = 0.99\min_i( \frac{p_i}{ (\sigma_i  \|K_i\|^2) })
+
+    - Case 3: If `tau` is provided but not `sigma` then `sigma` is calculated using the formula
+
+    .. math:: \sigma_i= \frac{0.99 p_i}{\tau\|K_i\|^2}
+
+    - Case 4: Both `sigma` and `tau` are provided.
 
-    Notation for primal and dual step-sizes are reversed with comparison
-        to PDHG.py
 
     Note
     ----
 
-    this code implements serial sampling only, as presented in [2]
-        (to be extended to more general case of [1] as future work)
+    Convergence is guaranteed provided that [2, eq. (12)]:
+
+    .. math:: \|\sigma[i]^{1/2}  K[i]  \tau^{1/2} \|^2  < p_i \text{ for all } i
 
     References
     ----------
 
-    [1]"Stochastic primal-dual hybrid gradient algorithm with arbitrary
+    [1]"Stochastic primal-dual hybrid gradient algorithm with arbitrary 
     sampling and imaging applications",
     Chambolle, Antonin, Matthias J. Ehrhardt, Peter Richtárik, and Carola-Bibiane Schonlieb,
-    SIAM Journal on Optimization 28, no. 4 (2018): 2783-2808.
+    SIAM Journal on Optimization 28, no. 4 (2018): 2783-2808.   https://doi.org/10.1137/17M1134834 
 
     [2]"Faster PET reconstruction with non-smooth priors by randomization and preconditioning",
     Matthias J Ehrhardt, Pawel Markiewicz and Carola-Bibiane Schönlieb,
-    Physics in Medicine & Biology, Volume 64, Number 22, 2019.
+    Physics in Medicine & Biology, Volume 64, Number 22, 2019. https://doi.org/10.1088/1361-6560/ab3d07
     '''
 
     def __init__(self, f=None, g=None, operator=None, tau=None, sigma=None,
-                 initial=None, prob=None, gamma=1.,**kwargs):
-
-        super(SPDHG, self).__init__(**kwargs)
-
+                 initial=None, sampler=None, prob_weights=None,   **kwargs):
 
-        if f is not None and operator is not None and g is not None:
-            self.set_up(f=f, g=g, operator=operator, tau=tau, sigma=sigma,
-                        initial=initial, prob=prob, gamma=gamma, norms=kwargs.get('norms', None))
+        update_objective_interval = kwargs.pop('update_objective_interval', 1)
+        super(SPDHG, self).__init__(
+            update_objective_interval=update_objective_interval)
 
+        self.set_up(f=f, g=g, operator=operator, sigma=sigma, tau=tau,
+                    initial=initial,  sampler=sampler, prob_weights=prob_weights,  **kwargs)
 
-    def set_up(self, f, g, operator, tau=None, sigma=None, \
-               initial=None, prob=None, gamma=1., norms=None):
-
+    def set_up(self, f, g, operator, sigma=None, tau=None,
+               initial=None,   sampler=None, prob_weights=None, **deprecated_kwargs):
         '''set-up of the algorithm
-        Parameters
-        ----------
-        f : BlockFunction
-            Each must be a convex function with a "simple" proximal method of its conjugate
-        g : Function
-            A convex function with a "simple" proximal
-        operator : BlockOperator
-            BlockOperator must contain Linear Operators
-        tau : positive float, optional, default=None
-            Step size parameter for Primal problem
-        sigma : list of positive float, optional, default=None
-            List of Step size parameters for Dual problem
-        initial : DataContainer, optional, default=None
-            Initial point for the SPDHG algorithm
-        prob : list of floats, optional, default=None
-            List of probabilities. If None each subset will have probability = 1/number of subsets
-        gamma : float
-            parameter controlling the trade-off between the primal and dual step sizes
-
-        **kwargs:
-        norms : list of floats
-            precalculated list of norms of the operators
         '''
         log.info("%s setting up", self.__class__.__name__)
+
         # algorithmic parameters
         self.f = f
         self.g = g
         self.operator = operator
-        self.tau = tau
-        self.sigma = sigma
-        self.prob = prob
-        self.ndual_subsets = len(self.operator)
-        self.gamma = gamma
-        self.rho = .99
-
-        if self.prob is None:
-            self.prob = [1/self.ndual_subsets] * self.ndual_subsets
-
-
-        if self.sigma is None:
-            if norms is None:
-                # Compute norm of each sub-operator
-                norms = [operator.get_item(i,0).norm() for i in range(self.ndual_subsets)]
-            self.norms = norms
-            self.sigma = [self.gamma * self.rho / ni for ni in norms]
-        if self.tau is None:
-            self.tau = min( [ pi / ( si * ni**2 ) for pi, ni, si in zip(self.prob, norms, self.sigma)] )
-            self.tau *= (self.rho / self.gamma)
+
+        if not isinstance(operator, BlockOperator):
+            raise TypeError("operator should be a BlockOperator")
+
+        self._ndual_subsets = len(self.operator)
+        
+        self._prob_weights = getattr(sampler, 'prob_weights', prob_weights) 
+        
+        self._deprecated_set_prob(deprecated_kwargs, sampler) 
+        
+        if self._prob_weights is None: 
+            self._prob_weights = [1/self._ndual_subsets]*self._ndual_subsets
+        
+        if  prob_weights is not None and self._prob_weights != prob_weights:
+                    raise ValueError(' You passed a `prob_weights` argument and a sampler with a different attribute `prob_weights`, please remove the `prob_weights` argument.')
+
+        if sampler is None:
+            self._sampler = Sampler.random_with_replacement(
+                len(operator), prob=self._prob_weights)
+        else:
+            self._sampler = sampler
+        
+        #Set the norms of the operators
+        self._deprecated_set_norms(deprecated_kwargs) 
+        self._norms = operator.get_norms_as_list()
+        #Check for other kwargs
+        if deprecated_kwargs:
+            raise ValueError("Additional keyword arguments passed but not used: {}".format(deprecated_kwargs))
+
+        self.set_step_sizes(sigma=sigma, tau=tau)
 
         # initialize primal variable
         if initial is None:
@@ -166,63 +192,264 @@ def set_up(self, f, g, operator, tau=None, sigma=None, \
         else:
             self.x = initial.copy()
 
-        self.x_tmp = self.operator.domain_geometry().allocate(0)
+        self._x_tmp = self.operator.domain_geometry().allocate(0)
 
         # initialize dual variable to 0
-        self.y_old = operator.range_geometry().allocate(0)
+        self._y_old = operator.range_geometry().allocate(0)
+        if not isinstance(self._y_old, BlockDataContainer): #This can be removed once #1863 is fixed
+            self._y_old =BlockDataContainer(self._y_old)
 
         # initialize variable z corresponding to back-projected dual variable
-        self.z = operator.domain_geometry().allocate(0)
-        self.zbar= operator.domain_geometry().allocate(0)
+        self._z = operator.domain_geometry().allocate(0)
+        self._zbar = operator.domain_geometry().allocate(0)
         # relaxation parameter
-        self.theta = 1
+        self._theta = 1
+
         self.configured = True
-        log.info("%s configured", self.__class__.__name__)
+        logging.info("{} configured".format(self.__class__.__name__, ))
+
+    def _deprecated_set_prob(self, deprecated_kwargs, sampler):
+        """
+        Handle deprecated keyword arguments for backward compatibility.
+
+        Parameters
+        ----------
+        deprecated_kwargs : dict
+            Dictionary of keyword arguments.
+        sampler : Sampler           
+            Sampler class for selecting the next index for the SPDHG update.
+
+        Notes
+        -----
+        This method is called by the set_up method.
+        """
+        
+        prob = deprecated_kwargs.pop('prob', None)
+
+        if prob is not None:
+            if (self._prob_weights is None) and (sampler is None):
+                warnings.warn('`prob` is being deprecated to be replaced with a sampler class and `prob_weights`. To randomly sample with replacement use "sampler=Sampler.randomWithReplacement(number_of_subsets,  prob=prob). To pass probabilities to the calculation for `sigma` and `tau` please use `prob_weights`. ', DeprecationWarning, stacklevel=2)
+                self._prob_weights = prob
+            else:
+
+                raise ValueError(
+                    '`prob` is being deprecated to be replaced with a sampler class and `prob_weights`. You passed  a `prob` argument, and either a `prob_weights` argument or a sampler. Please remove the `prob` argument.')
+
+
+
+    def _deprecated_set_norms(self, deprecated_kwargs):
+        """
+        Handle deprecated keyword arguments for backward compatibility.
+
+        Parameters
+        ----------
+        deprecated_kwargs : dict
+            Dictionary of keyword arguments.
+
+        Notes
+        -----
+        This method is called by the set_up method.
+        """
+        norms = deprecated_kwargs.pop('norms', None)
+        
+        if norms is not None:
+            self.operator.set_norms(norms)
+            warnings.warn(
+                ' `norms` is being deprecated, use instead the `BlockOperator` function `set_norms`', DeprecationWarning, stacklevel=2)
+
+
+        
+    @property
+    def sigma(self):
+        return self._sigma
+
+    @property
+    def tau(self):
+        return self._tau
+
+    def set_step_sizes_from_ratio(self, gamma=1.0, rho=0.99):
+        r""" Sets gamma, the step-size ratio for the SPDHG algorithm. Currently gamma takes a scalar value.
+
+        The step sizes `sigma` and `tau` are set using the equations:
+
+        .. math:: \sigma_i= \frac{\gamma\rho }{\|K_i\|^2}
+
+        .. math::  \tau = \rho\min_i([ \frac{p_i }{\sigma_i  \|K_i\|^2})
+
+
+        Parameters
+        ----------
+            gamma : Positive float
+                parameter controlling the trade-off between the primal and dual step sizes
+            rho : Positive float
+                 parameter controlling the size of the product :math: \sigma\tau :math:
+
+
+
+        """
+        if isinstance(gamma, Number):
+            if gamma <= 0:
+                raise ValueError(
+                    "The step-sizes of SPDHG are positive, gamma should also be positive")
+
+        else:
+            raise ValueError(
+                "We currently only support scalar values of gamma")
+        if isinstance(rho, Number):
+            if rho <= 0:
+                raise ValueError(
+                    "The step-sizes of SPDHG are positive, rho should also be positive")
+
+        else:
+            raise ValueError(
+                "We currently only support scalar values of gamma")
+
+        self._sigma = [gamma * rho / ni for ni in self._norms]
+        values = [rho*pi / (si * ni**2) for pi, ni,
+                  si in zip(self._prob_weights, self._norms, self._sigma)]
+        self._tau = min([value for value in values if value > 1e-8])
+
+    def set_step_sizes(self, sigma=None, tau=None):
+        r""" Sets sigma and tau step-sizes for the SPDHG algorithm after the initial set-up. The step sizes can be either scalar or array-objects.
+
+        When setting `sigma` and `tau`, there are 4 possible cases considered by setup function: 
+
+        - Case 1: If neither `sigma` or `tau` are provided then `sigma` is set using the formula:
+
+        .. math:: \sigma_i= \frac{0.99}{\|K_i\|^2}
+
+        and `tau` is set as per case 2
+
+        - Case 2: If `sigma` is provided but not `tau` then `tau` is calculated using the formula 
+
+        .. math:: \tau = 0.99\min_i( \frac{p_i}{ (\sigma_i  \|K_i\|^2) })
+
+        - Case 3: If `tau` is provided but not `sigma` then `sigma` is calculated using the formula
+
+        .. math:: \sigma_i= \frac{0.99 p_i}{\tau\|K_i\|^2}
+
+        - Case 4: Both `sigma` and `tau` are provided.
+
+
+        Parameters
+        ----------
+            sigma : list of positive float, optional, default= see docstring
+                List of Step size parameters for dual problem
+            tau : positive float, optional, default= see docstring
+                Step size parameter for primal problem
+
+        """
+        gamma = 1.
+        rho = .99
+        if sigma is not None:
+            if len(sigma) == self._ndual_subsets:
+                if all(isinstance(x, Number) and x > 0 for x in sigma):
+                    pass
+                else:
+                    raise ValueError(
+                        "Sigma expected to be a positive number.")
+
+            else:
+                raise ValueError(
+                    "Please pass a list of floats to sigma with the same number of entries as number of operators")
+            self._sigma = sigma
+
+        elif tau is None:
+            self._sigma = [gamma * rho / ni for ni in self._norms]
+        else:
+            self._sigma = [
+                rho*pi / (tau*ni**2) for ni, pi in zip(self._norms, self._prob_weights)]
+
+        if tau is None:
+            values = [rho*pi / (si * ni**2) for pi, ni,
+                      si in zip(self._prob_weights, self._norms, self._sigma)]
+            self._tau = min([value for value in values if value > 1e-8])
+
+        else:
+            if not ( isinstance(tau, Number) and tau > 0):
+                raise ValueError(
+                    "The step-sizes of SPDHG must be positive, passed tau = {}".format(tau))
+
+            self._tau = tau
+
+    def check_convergence(self):
+        """  Checks whether convergence criterion for SPDHG is satisfied with the current scalar values of tau and sigma
+
+        Returns
+        -------
+        Boolean
+            True if convergence criterion is satisfied. False if not satisfied or convergence is unknown. 
+
+        Note
+        -----
+        Convergence criterion currently can only be checked for scalar values of tau.
+
+        Note
+        ----
+        This checks the convergence criterion. Numerical errors may mean some sigma and tau values that satisfy the convergence criterion may not converge. 
+        Alternatively, step sizes outside the convergence criterion may still allow (fast) convergence. 
+        """
+        for i in range(self._ndual_subsets):
+            if isinstance(self._tau, Number) and isinstance(self._sigma[i], Number):
+                if self._sigma[i] * self._tau * self._norms[i]**2 > self._prob_weights[i]:
+                    return False
+                return True
+            else:
+                raise ValueError('Convergence criterion currently can only be checked for scalar values of tau and sigma[i].')
 
     def update(self):
+        """  Runs one iteration of SPDHG 
+
+        """
         # Gradient descent for the primal variable
         # x_tmp = x - tau * zbar
-        self.x.sapyb(1., self.zbar,  -self.tau, out=self.x_tmp)
+        self._zbar.sapyb(self._tau,  self.x, -1., out=self._x_tmp )
+        self._x_tmp*=-1
 
-        self.g.proximal(self.x_tmp, self.tau, out=self.x)
+        self.g.proximal(self._x_tmp, self._tau, out=self.x)
 
         # Choose subset
-        i = int(np.random.choice(len(self.sigma), 1, p=self.prob))
+        i = next(self._sampler)
 
         # Gradient ascent for the dual variable
         # y_k = y_old[i] + sigma[i] * K[i] x
-        y_k = self.operator[i].direct(self.x)
+        try:
+            y_k = self.operator[i].direct(self.x)
+        except IndexError:
+            raise IndexError(
+                'The sampler has outputted an index larger than the number of operators to sample from. Please ensure your sampler samples from {0,1,...,len(operator)-1} only.')
 
-        y_k.sapyb(self.sigma[i], self.y_old[i], 1., out=y_k)
+        y_k.sapyb(self._sigma[i], self._y_old[i], 1., out=y_k)
 
-        y_k = self.f[i].proximal_conjugate(y_k, self.sigma[i])
+        y_k = self.f[i].proximal_conjugate(y_k, self._sigma[i])
 
         # Back-project
         # x_tmp = K[i]^*(y_k - y_old[i])
-        y_k.subtract(self.y_old[i], out=self.y_old[i])
+        y_k.subtract(self._y_old[i], out=self._y_old[i])
 
-        self.operator[i].adjoint(self.y_old[i], out = self.x_tmp)
+        self.operator[i].adjoint(self._y_old[i], out=self._x_tmp)
         # Update backprojected dual variable and extrapolate
         # zbar = z + (1 + theta/p[i]) x_tmp
 
         # z = z + x_tmp
-        self.z.add(self.x_tmp, out =self.z)
+        self._z.add(self._x_tmp, out=self._z)
         # zbar = z + (theta/p[i]) * x_tmp
 
-        self.z.sapyb(1., self.x_tmp, self.theta / self.prob[i], out = self.zbar)
+        self._z.sapyb(1., self._x_tmp, self._theta /
+                      self._prob_weights[i], out=self._zbar)
 
         # save previous iteration
-        self.save_previous_iteration(i, y_k)
+        self._save_previous_iteration(i, y_k)
 
     def update_objective(self):
         # p1 = self.f(self.operator.direct(self.x)) + self.g(self.x)
         p1 = 0.
-        for i,op in enumerate(self.operator.operators):
+        for i, op in enumerate(self.operator.operators):
             p1 += self.f[i](op.direct(self.x))
         p1 += self.g(self.x)
 
-        d1 = - self.f.convex_conjugate(self.y_old)
-        tmp = self.operator.adjoint(self.y_old)
+        d1 = - self.f.convex_conjugate(self._y_old)
+        tmp = self.operator.adjoint(self._y_old)
         tmp *= -1
         d1 -= self.g.convex_conjugate(tmp)
 
@@ -230,14 +457,38 @@ def update_objective(self):
 
     @property
     def objective(self):
-         '''alias of loss'''
-         return [x[0] for x in self.loss]
+        '''The saved primal objectives. 
+
+        Returns
+        -------
+        list
+            The saved primal objectives from `update_objective`. The number of saved values depends on the `update_objective_interval` kwarg. 
+        '''
+        return [x[0] for x in self.loss]
+
     @property
     def dual_objective(self):
+        '''The saved dual objectives. 
+
+        Returns
+        -------
+        list
+            The saved dual objectives from `update_objective`. The number of saved values depends on the `update_objective_interval` kwarg. 
+        '''
         return [x[1] for x in self.loss]
 
     @property
     def primal_dual_gap(self):
+        '''The saved primal-dual gap. 
+
+        Returns
+        -------
+        list
+            The saved primal dual gap from `update_objective`. The number of saved values depends on the `update_objective_interval` kwarg. 
+        '''
         return [x[2] for x in self.loss]
-    def save_previous_iteration(self, index, y_current):
-        self.y_old[index].fill(y_current)
+
+    def _save_previous_iteration(self, index, y_current):
+        ''' Internal function used to save the previous iteration 
+        '''
+        self._y_old[index].fill(y_current)
diff --git a/Wrappers/Python/test/test_algorithms.py b/Wrappers/Python/test/test_algorithms.py
index 845b5a44ce..981cc9c3c2 100644
--- a/Wrappers/Python/test/test_algorithms.py
+++ b/Wrappers/Python/test/test_algorithms.py
@@ -24,10 +24,21 @@
 import numpy as np
 import logging
 
+
+from cil.framework import VectorData
+from cil.framework import ImageData
+from cil.framework import AcquisitionData
+from cil.framework import ImageGeometry
+from cil.framework import AcquisitionGeometry
+from cil.framework import BlockDataContainer
+from cil.framework import BlockGeometry
+from cil.optimisation.utilities import Sampler
+
 from cil.framework import (ImageGeometry, ImageData, AcquisitionData, AcquisitionGeometry,
                            BlockDataContainer, BlockGeometry, VectorData)
 from cil.framework.labels import FillType
 
+
 from cil.optimisation.utilities import ArmijoStepSizeRule, ConstantStepSize
 from cil.optimisation.operators import IdentityOperator
 from cil.optimisation.operators import GradientOperator, BlockOperator, MatrixOperator
@@ -58,18 +69,22 @@
 
 # Fast Gradient Projection algorithm for Total Variation(TV)
 from cil.optimisation.functions import TotalVariation
+
+from cil.plugins.ccpi_regularisation.functions import FGP_TV
+import logging
 from testclass import CCPiTestClass
-from utils import has_astra, initialise_tests
+from utils import has_astra
 
 from unittest.mock import MagicMock
 
 log = logging.getLogger(__name__)
-initialise_tests()
+
 
 if has_astra:
     from cil.plugins.astra import ProjectionOperator
 
 
+
 if has_cvxpy:
     import cvxpy
 
@@ -101,12 +116,14 @@ def test_GD(self):
         identity = IdentityOperator(ig)
 
         norm2sq = LeastSquares(identity, b)
+
         step_size = norm2sq.L / 3.
 
         alg = GD(initial=initial, objective_function=norm2sq, step_size=step_size,
                  atol=1e-9, rtol=1e-6)
         alg.run(1000,verbose=0)
         self.assertNumpyArrayAlmostEqual(alg.x.as_array(), b.as_array())
+
         alg = GD(initial=initial, objective_function=norm2sq, step_size=step_size,
                  atol=1e-9, rtol=1e-6, update_objective_interval=2)
         self.assertTrue(alg.update_objective_interval == 2)
@@ -1013,7 +1030,6 @@ def test_update(self):
         sirt = SIRT(initial=tmp_initial, operator=self.Aop,
                     data=self.bop)
         sirt.run(5)
-
         x = tmp_initial.copy()
         x_old = tmp_initial.copy()
 
@@ -1131,7 +1147,210 @@ def test_SIRT_with_TV_warm_start(self):
             sirt.x.as_array(), ig.allocate(0.25).as_array(), 3)
 
 
-class TestSPDHG(unittest.TestCase):
+class TestSPDHG(CCPiTestClass):
+    def setUp(self):
+        data = dataexample.SIMPLE_PHANTOM_2D.get(size=(20, 20))
+        self.subsets = 10
+        
+        data = dataexample.SIMPLE_PHANTOM_2D.get(size=(16, 16))
+
+        ig = data.geometry
+        ig.voxel_size_x = 0.1
+        ig.voxel_size_y = 0.1
+
+        detectors = ig.shape[0]
+        angles = np.linspace(0, np.pi, 90)
+        ag = AcquisitionGeometry.create_Parallel2D().set_angles(
+            angles, angle_unit='radian').set_panel(detectors, 0.1)
+        # Select device
+        dev = 'cpu'
+
+        Aop = ProjectionOperator(ig, ag, dev)
+
+        sin = Aop.direct(data)
+        partitioned_data = sin.partition(self.subsets, 'sequential')
+        self.A = BlockOperator(
+            *[IdentityOperator(partitioned_data[i].geometry) for i in range(self.subsets)])
+        self.A2 = BlockOperator(
+            *[IdentityOperator(partitioned_data[i].geometry) for i in range(self.subsets)])
+
+        # block function
+        self.F = BlockFunction(*[L2NormSquared(b=partitioned_data[i])
+                                 for i in range(self.subsets)])
+        alpha = 0.025
+        self.G = alpha * FGP_TV()
+
+    def test_SPDHG_defaults_and_setters(self):
+        gamma = 1.
+        rho = .99
+        spdhg = SPDHG(f=self.F, g=self.G, operator=self.A)
+
+        self.assertListEqual(spdhg._norms, [self.A.get_item(i, 0).norm()
+                                           for i in range(self.subsets)])
+        self.assertListEqual(spdhg._prob_weights, [
+                             1/self.subsets] * self.subsets)
+        self.assertEqual(spdhg._sampler._type, 'random_with_replacement')
+        self.assertListEqual(
+            spdhg.sigma, [rho / ni for ni in spdhg._norms])
+        self.assertEqual(spdhg.tau, min([rho*pi / (si * ni**2) for pi, ni,
+                                         si in zip(spdhg._prob_weights, spdhg._norms, spdhg.sigma)]))
+        self.assertNumpyArrayEqual(
+            spdhg.x.as_array(), self.A.domain_geometry().allocate(0).as_array())
+        self.assertEqual(spdhg.update_objective_interval, 1)
+
+        gamma = 3.7
+        rho = 5.6
+        spdhg.set_step_sizes_from_ratio(gamma, rho)
+        self.assertListEqual(
+            spdhg.sigma, [gamma * rho / ni for ni in spdhg._norms])
+        self.assertEqual(spdhg.tau, min([pi*rho / (si * ni**2) for pi, ni,
+                                         si in zip(spdhg._prob_weights, spdhg._norms, spdhg.sigma)]))
+
+        gamma = 1.
+        rho = .99
+        spdhg.set_step_sizes()
+        self.assertListEqual(
+            spdhg.sigma, [rho / ni for ni in spdhg._norms])
+        self.assertEqual(spdhg.tau, min([rho*pi / (si * ni**2) for pi, ni,
+                                         si in zip(spdhg._prob_weights, spdhg._norms, spdhg.sigma)]))
+
+        spdhg.set_step_sizes(sigma=[1]*self.subsets, tau=100)
+        self.assertListEqual(spdhg.sigma, [1]*self.subsets)
+        self.assertEqual(spdhg.tau, 100)
+
+        spdhg.set_step_sizes(sigma=[1]*self.subsets, tau=None)
+        self.assertListEqual(spdhg.sigma, [1]*self.subsets)
+        self.assertEqual(spdhg.tau, min([(rho*pi / (si * ni**2)) for pi, ni,
+                                         si in zip(spdhg._prob_weights, spdhg._norms, spdhg.sigma)]))
+
+        spdhg.set_step_sizes(sigma=None, tau=100)
+        self.assertListEqual(spdhg.sigma, [
+                             gamma * rho*pi / (spdhg.tau*ni**2) for ni, pi in zip(spdhg._norms, spdhg._prob_weights)])
+        self.assertEqual(spdhg.tau, 100)
+
+    def test_spdhg_non_default_init(self):
+        spdhg = SPDHG(f=self.F, g=self.G, operator=self.A, sampler=Sampler.random_with_replacement(10, list(np.arange(1, 11)/55.)),
+                      initial=self.A.domain_geometry().allocate(1), update_objective_interval=10)
+
+        self.assertListEqual(spdhg._prob_weights,  list(np.arange(1, 11)/55.))
+        self.assertNumpyArrayEqual(
+            spdhg.x.as_array(), self.A.domain_geometry().allocate(1).as_array())
+        self.assertEqual(spdhg.update_objective_interval, 10)
+        
+        with self.assertRaises(ValueError):
+            spdhg = SPDHG(f=self.F, g=self.G, operator=self.A, _norms=[1]*len(self.A), prob_weights=[1/(self.subsets-1)]*(
+                self.subsets-1)+[0], sampler=Sampler.random_with_replacement(10, list(np.arange(1, 11)/55.)))
+            
+        spdhg = SPDHG(f=self.F, g=self.G, operator=self.A, prob_weights=[1/(self.subsets-1)]*(
+                self.subsets-1)+[0],  initial=self.A.domain_geometry().allocate(1), update_objective_interval=10)
+        
+        self.assertListEqual(spdhg._prob_weights,  [1/(self.subsets-1)]*(self.subsets-1)+[0])     
+        self.assertEqual(spdhg._sampler._type, 'random_with_replacement')
+        
+        
+    def test_spdhg_sampler_gives_too_large_index(self):
+        spdhg = SPDHG(f=self.F, g=self.G, operator=self.A, sampler=Sampler.sequential(20),
+                      initial=self.A.domain_geometry().allocate(1), update_objective_interval=10)
+        with self.assertRaises(IndexError):
+            spdhg.run(12)   
+
+    def test_spdhg_deprecated_vargs(self):
+        spdhg = SPDHG(f=self.F, g=self.G, operator=self.A, norms=[
+                      1]*len(self.A), prob=[1/(self.subsets-1)]*(self.subsets-1)+[0])
+
+        self.assertListEqual(self.A.get_norms_as_list(), [1]*len(self.A))
+        self.assertListEqual(spdhg._norms, [1]*len(self.A))
+        self.assertListEqual(spdhg._sampler.prob_weights, [
+                             1/(self.subsets-1)]*(self.subsets-1)+[0])
+        self.assertListEqual(spdhg._prob_weights, [
+                             1/(self.subsets-1)]*(self.subsets-1)+[0])
+
+        with self.assertRaises(ValueError):
+            spdhg = SPDHG(f=self.F, g=self.G, operator=self.A, _norms=[1]*len(self.A), prob=[1/(self.subsets-1)]*(
+                self.subsets-1)+[0], sampler=Sampler.random_with_replacement(10, list(np.arange(1, 11)/55.)))
+            
+            
+        with self.assertRaises(ValueError):
+            spdhg = SPDHG(f=self.F, g=self.G, operator=self.A, _norms=[1]*len(self.A), prob=[1/(self.subsets-1)]*(
+                self.subsets-1)+[0], prob_weights= [1/(self.subsets-1)]*(self.subsets-1)+[0])
+
+        with self.assertRaises(ValueError):
+            spdhg = SPDHG(f=self.F, g=self.G, operator=self.A, sfdsdf=3,  _norms=[
+                          1]*len(self.A), sampler=Sampler.random_with_replacement(10, list(np.arange(1, 11)/55.)))
+
+
+    def test_spdhg_set_norms(self):
+
+        self.A2.set_norms([1]*len(self.A2))
+        spdhg = SPDHG(f=self.F, g=self.G, operator=self.A2)
+        self.assertListEqual(spdhg._norms, [1]*len(self.A2))
+
+    def test_spdhg_check_convergence(self):
+        spdhg = SPDHG(f=self.F, g=self.G, operator=self.A)
+
+        self.assertTrue(spdhg.check_convergence())
+
+        gamma = 3.7
+        rho = 0.9
+        spdhg.set_step_sizes_from_ratio(gamma, rho)
+        self.assertTrue(spdhg.check_convergence())
+
+        gamma = 3.7
+        rho = 100
+        spdhg.set_step_sizes_from_ratio(gamma, rho)
+        self.assertFalse(spdhg.check_convergence())
+
+        spdhg.set_step_sizes(sigma=[1]*self.subsets, tau=100)
+        self.assertFalse(spdhg.check_convergence())
+
+        spdhg.set_step_sizes(sigma=[1]*self.subsets, tau=None)
+        self.assertTrue(spdhg.check_convergence())
+
+        spdhg.set_step_sizes(sigma=None, tau=100)
+        self.assertTrue(spdhg.check_convergence())
+
+    def test_SPDHG_num_subsets_1(self): 
+        data = dataexample.SIMPLE_PHANTOM_2D.get(size=(10, 10))
+
+        subsets = 1
+
+        ig = data.geometry
+        ig.voxel_size_x = 0.1
+        ig.voxel_size_y = 0.1
+
+        detectors = ig.shape[0]
+        angles = np.linspace(0, np.pi, 90)
+        ag = AcquisitionGeometry.create_Parallel2D().set_angles(
+            angles, angle_unit='radian').set_panel(detectors, 0.1)
+        # Select device
+        dev = 'cpu'
+
+        Aop = ProjectionOperator(ig, ag, dev)
+
+        sin = Aop.direct(data)
+        partitioned_data = sin.partition(subsets, 'sequential')
+        A = BlockOperator(
+            *[IdentityOperator(partitioned_data[i].geometry) for i in range(subsets)])
+       
+        # block function
+        F = BlockFunction(*[L2NormSquared(b=partitioned_data[i])
+                                 for i in range(subsets)])
+        
+        F_phdhg=L2NormSquared(b=partitioned_data[0])
+        A_pdhg = IdentityOperator(partitioned_data[0].geometry) 
+                            
+        alpha = 0.025
+        G = alpha * FGP_TV()
+        
+        spdhg = SPDHG(f=F, g=G, operator=A,  update_objective_interval=10)
+        
+        spdhg.run(7)   
+        
+        pdhg = PDHG(f=F_phdhg, g=G, operator=A_pdhg, update_objective_interval=10)
+        
+        pdhg.run(7) 
+        
+        self.assertNumpyArrayAlmostEqual(pdhg.solution.as_array(), spdhg.solution.as_array(), decimal=3)
 
     def add_noise(self, sino, noise='gaussian', seed=10):
         if noise == 'poisson':
@@ -1206,6 +1425,7 @@ def test_SPDHG_vs_PDHG_implicit(self):
               mse(spdhg.get_output(), pdhg.get_output()),
               psnr(spdhg.get_output(), pdhg.get_output())
               )
+
         log.info("Quality measures %r", qm)
 
         np.testing.assert_almost_equal(mae(spdhg.get_output(), pdhg.get_output()),
@@ -1281,6 +1501,7 @@ def test_SPDHG_vs_PDHG_explicit(self):
               mse(spdhg.get_output(), pdhg.get_output()),
               psnr(spdhg.get_output(), pdhg.get_output())
               )
+
         log.info("Quality measures: %r", qm)
         np.testing.assert_almost_equal(mae(spdhg.get_output(), pdhg.get_output()),
                                        0.0031962995, decimal=3)
@@ -1288,6 +1509,7 @@ def test_SPDHG_vs_PDHG_explicit(self):
                                        4.242368e-05, decimal=3)
 
 
+
 class TestCallbacks(unittest.TestCase):
     class PrintAlgo(Algorithm):
         def __init__(self, update_objective_interval=10, **kwargs):
@@ -1350,6 +1572,7 @@ def __call__(self, algorithm: Algorithm):
         algo.run(20, callbacks=[EarlyStopping()])
         self.assertEqual(algo.iteration, 15)
 
+
     def test_CGLSEarlyStopping(self):
         ig = ImageGeometry(10, 2)
         np.random.seed(2)
@@ -1456,6 +1679,7 @@ def do_test_with_fidelity(self, fidelity):
         admm_noaxpby = LADMM(f=G, g=F, operator=K, tau=self.tau, sigma=self.sigma,
                              update_objective_interval=10)
         admm_noaxpby.run(1, verbose=0)
+
         np.testing.assert_array_almost_equal(
             admm.solution.as_array(), admm_noaxpby.solution.as_array())
 
diff --git a/docs/source/optimisation.rst b/docs/source/optimisation.rst
index c7d1fa7eba..cb187a1197 100644
--- a/docs/source/optimisation.rst
+++ b/docs/source/optimisation.rst
@@ -152,7 +152,7 @@ Each iteration considers just one index of the sum, potentially reducing computa
 
 
 .. autoclass:: cil.optimisation.algorithms.SPDHG
-   :members:
+   :members: update, set_step_sizes, set_step_sizes_from_ratio, update_objective
    :inherited-members: run, update_objective_interval, max_iteration