Here we present an effort to enable ease of access to a large data set of gloabl primary production data from the paper of Matteri & Scardi (2021). The goal is to provide students and early career researchers with the tools to access data more easily. The project team is working on extending the analysis of this particular data set to include parameter estimation via inverse modelling. Once this effort is completed a paper will be published on the topic of parameter estimation. If you are interested in such data sets please write to us!
In 2021 Matteri and Scardi published a paper in Earth System Science data titled: Collection and analysis of a global marine phytoplankton primary-production dataset. In this paper they present a synthesis of in situ measured primary production profiles from accross the globe. Here we present an analysis done on this data set. The paper is available online at: https://essd.copernicus.org/articles/13/4967/2021/. The pdf of the paper is located in the same folder as this notebook, along with the data set.
import numpy as np
import matplotlib.pyplot as plt
import scipy.io
import math
from scipy.optimize import curve_fit
# Loading the data set
data = scipy.io.loadmat("matrix.mat")
data = data['matrix']
Description of rows in the data matrix:
fig, axs = plt.subplots(3, 2, figsize=(10, 8))
plt.subplots_adjust(hspace=0.5)
D_all = np.array(data[:,6])
axs[0,0].hist(D_all[D_all!=24], bins=20, edgecolor='black')
axs[0,0].set_title('Daylength')
axs[0,0].set_xlabel(r'$D\ \left[h \right]$')
axs[0,0].set_ylabel('Frequency')
Z = np.reshape(data[:,14:27],(data[:,14:27].shape[0]*data[:,14:27].shape[1],1))
axs[0,1].hist(Z, bins=20, edgecolor='black', color = 'grey', range = [0, 100])
axs[0,1].set_title('Measurement depths')
axs[0,1].set_xlabel(r'$z\ \left[m \right]$')
axs[0,1].set_ylabel('Frequency')
PAR = np.array(data[:,10])
axs[1,0].hist(PAR[PAR!=14.51], bins=20, edgecolor='black', color = 'orange')
axs[1,0].set_title('Surface PAR');
axs[1,0].set_xlabel(r'$z\ \left[\frac{E} {m^2 day} \right]$')
axs[1,0].set_ylabel('Frequency')
MLD = np.array(data[:,8])
axs[1,1].hist(MLD, bins=20, edgecolor='black', color = 'red', range = [0, 100])
axs[1,1].set_title('Mixed layer depth')
axs[1,1].set_xlabel(r'$Z_m\ \left[m\right]$')
axs[1,1].set_ylabel('Frequency')
Ze = np.array(data[:,9])
axs[2,0].hist(Ze, bins=20, edgecolor='black', color = 'green')
axs[2,0].set_title('Euphotic zone');
axs[2,0].set_xlabel(r'$z\ \left[m \right]$')
axs[2,0].set_ylabel('Frequency')
Zb = np.array(data[:,7])
axs[2,1].hist(Zb, bins=20, edgecolor='black', color = 'purple')
axs[2,1].set_title('Bottom depth')
axs[2,1].set_xlabel(r'$Z_m\ \left[m\right]$')
axs[2,1].set_ylabel('Frequency');
# Plotting the histograms of all chlorophyll and production measurements
fig, axs = plt.subplots(1, 2, figsize=(15, 5))
# Plotting the chlorophyll histogram from all cruises
B_all = np.reshape(data[:,28:41],(data[:,28:41].shape[0]*data[:,28:41].shape[1],1))
axs[0].hist(B_all, bins=50, edgecolor='black',color = 'green', range = [0, 3])
#axs[0].axvline(np.nanmean(B_all),lw = 4, c = 'black')
#axs[0].axvline(np.nanmedian(B_all),lw = 4, c = 'grey')
axs[0].set_xlabel(r'$B(z) \left[\frac{mgChl}{m^{3}} \right]$')
axs[0].set_ylabel('Frequency')
axs[0].set_title('Chlorophyll histogram');
# Plotting the production histogram from all cruises
P_all = np.reshape(data[:,42:55],(data[:,42:55].shape[0]*data[:,42:55].shape[1],1))
plt.hist(P_all, bins=50, edgecolor='black', range = [0, 1000])
#axs[1].axvline(np.nanmean(P_all),lw = 4, c = 'black')
axs[1].set_yscale('log')
#axs[1].axvline(np.nanmedian(P_all),lw = 4, c = 'grey')
axs[1].set_xlabel(r'$P(z) \left[\frac{mgC}{m^{3}\ day} \right]$')
axs[1].set_ylabel('Frequency')
axs[1].set_title('Production histogram');
# Plotting the histogram of the ratio of the euphotic zone to mixed layer depth
plt.hist(Ze/MLD, bins = 50, edgecolor = 'black', range = [0,10], color = 'orange');
plt.title('Ratio of the euphotic zone depth to the mixed layer depth')
plt.xlabel(r'$Z_e/Z_m $')
plt.ylabel('Frequency');
# Plotting the attenuation coefficient estimated from the euphotic zone depth
plt.hist(4.6/Ze, bins = 50, edgecolor = 'black', range = [0, 0.4]);
plt.title('Attenuation coefficient from euphotic zone depth')
plt.xlabel(r'$K $')
#plt.xscale('log')
plt.ylabel('Frequency');
# Calculating optical depth of each measurement
zeta = np.reshape(np.tile(4.6/Ze, (13, 1)).T*data[:,14:27],(data[:,14:27].shape[0]*data[:,14:27].shape[1],1))
# Plotting the histogram of measurement depths as optical depths
plt.hist(zeta[zeta!=0], bins = 50, edgecolor = 'black', range = [0,10], color = 'red');
#plt.hist(4.6*MLD/Ze, bins = 50, edgecolor = 'black', range = [0,10], color = 'grey');
plt.title('Measurement depths as optical depths')
plt.xlabel(r'$\zeta $')
plt.ylabel('Frequency');
# Noon PAR expressed in Watts per meter squared
I0mW = 0.2174*PAR*1000000*np.pi/(2*D_all*3600)
# Plotting the histogram of measurement depths as optical depths
plt.hist(I0mW[I0mW!=116.74908797986504], bins = 50, edgecolor = 'black', color = 'orange');
#plt.hist(4.6*MLD/Ze, bins = 50, edgecolor = 'black', range = [0,10], color = 'grey');
plt.title('Noon irradiance')
plt.xlabel(r'$I_0^m \left[ \frac{W}{m^2}\right]$')
plt.ylabel('Frequency');
# Selecting one profile
n = 1269
# Number of measured depths
nz = int(data[n,13])
# Selecting the chlorophyll profile
B = np.array(data[n,28:28+nz])
B[np.isnan(B)] = 0
#while B[nz-1] == 0:
# nz = nz - 1
# B = data[n,28:28+nz]
# Selecting the measurement depths
depths = np.array(data[n,14:14+nz])
# Selecting the production profile
P = np.array(data[n,42:42+nz])
P[np.isnan(P)] = 0
P[np.isnan(B)] = 0
# Correction for zero biomass values
P = P[B!=0]
depths = depths[B!=0]
B = B[B!=0]
nz = len(B)
# Calculating the normalized production profile
PB = P/B
#PB[np.isnan(B)] = 0
# Plotting the measured profiles
fig, axs = plt.subplots(1, 3, figsize=(10, 8))
maxz = round(depths[nz-1]+depths[nz-1]/nz)
# Biomass profile
axs[0].scatter(B,-depths, color ='green')
axs[0].plot(B,-depths, lw = 2, c = 'lightgrey')
axs[0].set_ylim(-maxz,0)
axs[0].set_title('Chlorophyll ')
axs[0].set_ylabel('Depth [m]')
axs[0].set_xlabel(r'$B [\frac{mgChl}{m^{3}} ]$')
# Production profile
axs[1].scatter(P,-depths, color ='red')
axs[1].scatter(P[P==0],-depths[P==0], c = 'black')
axs[1].plot(P,-depths, lw = 2, c = 'lightgrey')
axs[1].set_ylim(-maxz,0)
axs[1].set_title('Production ')
axs[1].set_xlabel(r'$P\left[ \frac{mgC}{m^3} \right]$')
# Normalized production profile
axs[2].scatter(PB,-depths, color ='blue')
axs[2].scatter(PB[PB==0],-depths[P==0], c = 'black')
axs[2].plot(PB,-depths, lw = 2, c = 'lightgrey')
axs[2].set_ylim(-maxz,0)
axs[2].set_title('Normalized production ')
axs[2].set_xlabel(r'$P^B \left[ \frac{mgC}{mgChl} \right]$');
print('Latitude ' + str(data[n,4]))
print('Longitude ' + str(data[n,5]))
Latitude 43.4 Longitude -69.2
# Daylength
D = data[n,6]
# Attenuation ceofficient
K = 4.6/data[n,9]
# Surface noon irradiance
I0m = 0.2174*PAR[n]*1000000*np.pi/(2*D_all[n]*3600)
print('Daylength: ' + str(np.round(D,2)) + ' hours' )
print('Attenuation coefficient: ' + str(np.round(K,2)) + ' m^-1' )
print('Surface noon irradiance: ' + str(np.round(I0m,2)) + ' W m^-2')
# Number of levels
Nz = 100
# Depth increment [ m ]
dz = maxz/Nz
# Model levels [ m ]
Z = np.linspace(0,maxz,Nz)
# Irradiance profile [ W m^-2 ]
Iz = I0m*np.exp(-K*Z)
# Irradiance profil as a function
def fIz(i0,k,z):
return i0*np.exp(-k*z)
# Plotting the irradiance profile
plt.figure(figsize=(5,8))
plt.axhline(-1/K, c = 'lightgrey', lw = 3, label = 'First optical depth')
plt.axhline(-2.3/K, c = 'grey', lw = 3, label = '10% light level')
plt.axhline(-4.6/K, c = 'black', lw = 3, label = '1% light level')
plt.plot(fIz(I0m,K,Z), -Z, lw = 4, c = 'orange', label = 'I(z)')
plt.xlabel(r'$I \left[\frac{W}{m^2}\right]$', fontsize = 15)
plt.ylabel('z [m]', fontsize = 15);
plt.title('Irradiance profile $I(z)$');
plt.legend(loc=0, fontsize='small');
Daylength: 10.27 hours Attenuation coefficient: 0.19 m^-1 Surface noon irradiance: 157.92 W m^-2