Skip to content

Commit

Permalink
ft: significantly increased brier test speed
Browse files Browse the repository at this point in the history
  • Loading branch information
pabloitu committed Jan 15, 2024
1 parent e36bc42 commit 5fb55ca
Showing 1 changed file with 36 additions and 44 deletions.
80 changes: 36 additions & 44 deletions csep/core/brier_evaluations.py
Original file line number Diff line number Diff line change
@@ -1,96 +1,88 @@
import numpy
import scipy.stats
import scipy.spatial
import numpy as np
from scipy.stats import poisson

from csep.models import EvaluationResult
from csep.core.exceptions import CSEPCatalogException


def bier_score_ndarray(forecast, observations):
def _brier_score_ndarray(forecast, observations):
""" Computes the brier score
"""

observations = (observations >= 1).astype(int)
prob_success = 1 - scipy.stats.poisson.cdf(0, forecast)
brier = []

for p, o in zip(prob_success.ravel(), observations.ravel()):

if o == 0:
brier.append(-2 * p ** 2)
else:
brier.append(-2 * (p - 1) ** 2)
brier = numpy.sum(brier)
prob_success = 1 - poisson.cdf(0, forecast)
brier_cell = np.square(prob_success.ravel() - (observations.ravel() > 0))
brier = -2 * brier_cell.sum()

for n_dim in observations.shape:
brier /= n_dim

return brier


def _simulate_catalog(sim_cells, sampling_weights, sim_fore, random_numbers=None):
# Modified this code to generate simulations in a way that every cell gets one earthquake
# Generate uniformly distributed random numbers in [0,1), this
def _simulate_catalog(sim_cells, sampling_weights, random_numbers=None):
sim_fore = numpy.zeros(sampling_weights.shape)

if random_numbers is None:
# Reset simulation array to zero, but don't reallocate
sim_fore.fill(0)
num_active_cells = 0
while num_active_cells < sim_cells:
random_num = numpy.random.uniform(0,1)
loc = numpy.searchsorted(sampling_weights, random_num, side='right')
loc = numpy.searchsorted(sampling_weights, random_num,
side='right')
if sim_fore[loc] == 0:
sim_fore[loc] = 1
num_active_cells = num_active_cells + 1
num_active_cells += 1
else:
# Find insertion points using binary search inserting to satisfy a[i-1] <= v < a[i]
pnts = numpy.searchsorted(sampling_weights, random_numbers, side='right')
# Find insertion points using binary search inserting
# to satisfy a[i-1] <= v < a[i]
pnts = numpy.searchsorted(sampling_weights, random_numbers,
side='right')
# Create simulated catalog by adding to the original locations
numpy.add.at(sim_fore, pnts, 1)

assert sim_fore.sum() == sim_cells, "simulated the wrong number of events!"
return sim_fore


def _brier_score_test(forecast_data, observed_data, num_simulations=1000, random_numbers=None,
seed=None, use_observed_counts=True, verbose=True):
""" Computes binary conditional-likelihood test from CSEP using an efficient simulation based approach.
def _brier_score_test(forecast_data, observed_data, num_simulations=1000,
random_numbers=None, seed=None, verbose=True):
""" Computes binary conditional-likelihood test from CSEP using an
efficient simulation based approach.
Args:
"""

# Array-masking that avoids log singularities:
forecast_data = numpy.ma.masked_where(forecast_data <= 0.0, forecast_data)

# set seed for the likelihood test
if seed is not None:
numpy.random.seed(seed)
import time
start = time.process_time()

# used to determine where simulated earthquake should be placed, by definition of cumsum these are sorted
sampling_weights = numpy.cumsum(forecast_data.ravel()) / numpy.sum(forecast_data)
# used to determine where simulated earthquake should
# be placed, by definition of cumsum these are sorted
sampling_weights = (numpy.cumsum(forecast_data.ravel()) /
numpy.sum(forecast_data))

# data structures to store results
sim_fore = numpy.zeros(sampling_weights.shape)
simulated_brier = []
n_active_cells = len(numpy.unique(numpy.nonzero(observed_data.ravel())))
num_cells_to_simulate = int(n_active_cells)

# main simulation step in this loop
for idx in range(num_simulations):
if use_observed_counts:
num_cells_to_simulate = int(n_active_cells)

if random_numbers is None:
sim_fore = _simulate_catalog(num_cells_to_simulate,
sampling_weights,
sim_fore)
sampling_weights)
else:
sim_fore = _simulate_catalog(num_cells_to_simulate,
sampling_weights,
sim_fore,
random_numbers=random_numbers[idx, :])

# compute Brier score
current_brier = bier_score_ndarray(forecast_data.data, sim_fore)
current_brier = _brier_score_ndarray(forecast_data.data, sim_fore)

# append to list of simulated Brier score
simulated_brier.append(current_brier)
Expand All @@ -100,8 +92,7 @@ def _brier_score_test(forecast_data, observed_data, num_simulations=1000, random
if (idx + 1) % 100 == 0:
print(f'... {idx + 1} catalogs simulated.')

# observed Brier score
obs_brier = bier_score_ndarray(forecast_data.data, observed_data)
obs_brier = _brier_score_ndarray(forecast_data.data, observed_data)

# quantile score
qs = numpy.sum(simulated_brier <= obs_brier) / num_simulations
Expand All @@ -116,10 +107,12 @@ def brier_score_test(gridded_forecast,
seed=None,
random_numbers=None,
verbose=False):
""" Performs the Brier conditional test on Gridded Forecast using an Observed Catalog.
Normalizes the forecast so the forecasted rate are consistent with the observations. This modification
eliminates the strong impact differences in the number distribution have on the forecasted rates.
""" Performs the Brier conditional test on Gridded Forecast using an
Observed Catalog.
Normalizes the forecast so the forecasted rate are consistent with the
observations. This modification
eliminates the strong impact differences in the number distribution
have on the forecasted rates.
"""

Expand All @@ -137,7 +130,6 @@ def brier_score_test(gridded_forecast,
num_simulations=num_simulations,
seed=seed,
random_numbers=random_numbers,
use_observed_counts=True,
verbose=verbose
)

Expand Down

0 comments on commit 5fb55ca

Please sign in to comment.