As part of project activities we are working on collecting time series data on primary production. Alngisde with that we are aiming to enable ease of access to such data sets. Here we give an example code for data from the Hawaii Ocean Time Series. These data have been used numerous times in many publications and are freely available. We have also accessed data from the Bermuda Atlantic Time Series and Cariaco Ocean Time Series. We have successfully digitized data from two stations in the Adriatic sea. Currently we are searching for more data sets of this type. If you are interested in such data sets please write to us! Also, if you have such data sets write to us as well!
The Hawaii ocean time series is one of the longest and best currated time series in oceanography. More information on the time series as well as the data used here can be found at: https://hahana.soest.hawaii.edu/hot/hot-dogs/
import numpy as np
import matplotlib.pyplot as plt
import scipy.io
import math
from scipy.optimize import curve_fit
# Loading the data set
data = scipy.io.loadmat("HOT_final.mat")
# The measurement depths are
depths = np.array([5, 25, 45, 75, 100, 125, 150, 175])
# Days in months
months = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
# Station lattitude
lat = 19.75;
# Functions for caclulating daylength based on date and lattitude
# From degrees to radians
def SR(phi):
return phi*np.pi/180
# From radians to degrees
def RS(phi):
return phi*180/np.pi
# Declination in degrees, argument d is day number
def delta(d):
return 0.39637 - 22.9133*np.cos(d*2*np.pi/365) + 4.02543*np.sin(d*2*np.pi/365) - 0.3872*np.cos(2*d*2*np.pi/365) + 0.052*np.sin(2*d*2*np.pi/365)
# Sine of solar elevation angle, Gamma is latitude in radians, Delta is day number
def sin_beta(Delta, Gamma, t):
return np.sin(Gamma)*np.sin(SR(delta(Delta))) - np.cos(Gamma)*np.cos(SR(delta(Delta)))*np.cos(t*2*np.pi/24)
# Solar elevation angle
def beta(Delta, Gamma, t):
return np.arcsin(sin_beta(Delta, Gamma, t))
# Daylength as a function of day number and latitude, Gamma is now in degrees
def daylength(Delta, Gamma):
return 24 - 2 * np.arccos(np.tan(SR(Gamma)) * np.tan(SR(delta(Delta)))) * 24 / (2 * np.pi)
# Daylength as a function of daynumber for the station latitute
plt.figure(figsize=(15,5))
plt.plot(daylength(np.linspace(0,364,364),lat), lw = 3)
plt.xlabel('Daynumber')
plt.ylabel('Daylength [h]');
gaps = np.array(data['HOT'][:,2])
gaps[~np.isnan(gaps)]=0
gaps[np.isnan(gaps)]=1
plt.figure(figsize=(15,5))
plt.title('Gaps')
plt.plot(gaps,lw = 3);
plt.xticks(np.arange(0,420,step=24),np.arange(1989,2024,step=2));
# Percentage of missing months in the time series
np.round(100*sum(gaps)/((data['HOT'][-1,0]-data['HOT'][0,0])*12),0)
29.0
# Calculting when measurement were performed
measurements = np.array(data['HOT'][:,2])
measurements[~np.isnan(measurements)]=1
measurements[np.isnan(measurements)]=0
# Plotting the number of measurements per year
plt.figure(figsize=(15,5))
plt.title('Number of measurement by years')
plt.bar(np.linspace(0,33,34),np.sum(measurements.reshape((34,12)),1))
plt.axhline(10, c = 'black', lw = 3)
plt.xticks(np.arange(0,34,step=2),np.arange(1989,1989+34,step=2));
# Plotting the number of measurements per month
plt.figure(figsize=(15,5))
plt.title('Number of measurement by months')
plt.axhline(20, c = 'black', lw = 3)
plt.bar(np.linspace(0,11,12),np.sum(measurements.reshape((34,12)),0))
plt.xticks(np.arange(0,12,step=1),np.arange(0,12,step=1));
# Calculating the number of days between measurements
days = np.zeros(len(data['HOT'][:,0]))
for n in range(len(data['HOT'][:,0])):
days[n] = (data['HOT'][n,0]-1995)*365 + sum(months[0:int(data['HOT'][n,1])-1]) + data['HOT'][n,2]
days = days[~np.isnan(days)]
delta = days[1:-1] - days[0:-2]
# Plotting the histogram of the days between measurements
plt.hist(delta, bins = 25, edgecolor = 'black' )
plt.xlabel('Days between measurements')
plt.ylabel('Frequency');
# Plotting the histograms of all chlorophyll and production measurements
fig, axs = plt.subplots(1, 2, figsize=(15, 5))
# Plotting the chlorophyll histogram from all cruises
B_all = np.reshape(data['HOT'][:,3:11],(data['HOT'][:,3:11].shape[0]*data['HOT'][:,3:11].shape[1],1))
axs[0].hist(B_all, bins=40, edgecolor='black',color = 'green', range = [0, 0.5])
axs[0].axvline(np.nanmean(B_all),lw = 4, c = 'black')
axs[0].axvline(np.nanmedian(B_all),lw = 4, c = 'grey')
axs[0].set_xlabel(r'$B(z) \left[\frac{mgChl}{m^{3}} \right]$')
axs[0].set_ylabel('Frequency')
axs[0].set_title('Chlorophyll histogram');
# Plotting the production histogram from all cruises
P_all = np.reshape(data['HOT'][:,11:19],(data['HOT'][:,11:19].shape[0]*data['HOT'][:,11:19].shape[1],1))
plt.hist(P_all, bins=50, edgecolor='black', range = [0, 15])
axs[1].axvline(np.nanmean(P_all),lw = 4, c = 'black')
axs[1].set_yscale('log')
axs[1].axvline(np.nanmedian(P_all),lw = 4, c = 'grey')
axs[1].set_xlabel(r'$P(z) \left[\frac{mgC}{m^{3}\ h} \right]$')
axs[1].set_ylabel('Frequency')
axs[1].set_title('Production histogram');
# Selecting the depth number ( from 0 to 7 )
nz = 2
# Plotting the chlorophyll time series at that depth
plt.figure(figsize=(15,5))
plt.plot(data['HOT'][:,3 + nz], lw = 3, c = 'green')
plt.title('Chlorophyll at ' + str(depths[nz]) +' meters depth')
plt.ylabel(r'$B [\frac{mgChl}{m^{3}} ]$')
plt.xticks(np.arange(0,408,step=24),np.arange(1989,2023,step=2));
# Plotting the production time series at that depth
plt.figure(figsize=(15,5))
plt.plot(data['HOT'][:,11 + nz], lw = 3)
plt.title('Production at ' + str(depths[nz]) +' meters depth')
plt.ylabel(r'$P\left[ \frac{mgC}{m^3 h} \right]$')
plt.xticks(np.arange(0,408,step=24),np.arange(1989,2023,step=2));
# Selecting one profile
n = 100
# Calculating the day of the year
day = sum(months[0:int(data['HOT'][n,1])-1]) + int(data['HOT'][n,2])
# Selecting the chlorophyll profile
B = data['HOT'][n,3:11]
B[np.isnan(B)] = 0
# Selecting the production profile
P = data['HOT'][n,11:19]
P[np.isnan(P)] = 0
# Calculating the normalized production profile
PB = P/B
# Plotting the measured profiles
fig, axs = plt.subplots(1, 3, figsize=(10, 8))
# Biomass profile
axs[0].scatter(B,-depths, color ='green')
axs[0].plot(B,-depths, lw = 2, c = 'lightgrey')
axs[0].set_ylim(-205,0)
axs[0].set_title('Chlorophyll ')
axs[0].set_ylabel('Depth [m]')
axs[0].set_xlabel(r'$B [\frac{mgChl}{m^{3}} ]$')
# Production profile
axs[1].scatter(P/daylength(day,lat),-depths, color ='red')
axs[1].scatter(P[P==0],-depths[P==0], c = 'black')
axs[1].plot(P/daylength(day,lat),-depths, lw = 2, c = 'lightgrey')
axs[1].set_ylim(-205,0)
axs[1].set_title('Production ')
axs[1].set_xlabel(r'$P\left[ \frac{mgC}{m^3 h} \right]$')
# Normalized production profile
axs[2].scatter(PB/daylength(day,lat),-depths, color ='blue')
axs[2].scatter(PB[PB==0],-depths[P==0], c = 'black')
axs[2].plot(PB/daylength(day,lat),-depths, lw = 2, c = 'lightgrey')
axs[2].set_ylim(-205,0)
axs[2].set_title('Normalized production ')
axs[2].set_xlabel(r'$P^B \left[ \frac{mgC}{mgChl \ h} \right]$');
# Plotting the measured profiles
fig, axs = plt.subplots(1, 3, figsize=(10, 8))
# Biomass profile
axs[0].scatter(B,-depths, color ='green')
axs[0].plot(B,-depths, lw = 2, c = 'lightgrey')
axs[0].set_ylim(-205,0)
axs[0].set_title('Chlorophyll ')
axs[0].set_ylabel('Depth [m]')
axs[0].set_xlabel(r'$B [\frac{mgChl}{m^{3}} ]$')
# Daily production profile
axs[1].scatter(P,-depths, color ='red')
axs[1].scatter(P[PB==0],-depths[P==0], c = 'black')
axs[1].plot(P,-depths, lw = 2, c = 'lightgrey')
axs[1].set_ylim(-205,0)
axs[1].set_title('Daily production ')
axs[1].set_xlabel(r'$P\left[ \frac{mgC}{m^3} \right]$')
# Daily normalized production profile
axs[2].scatter(PB,-depths, color ='blue')
axs[2].scatter(PB[PB==0],-depths[PB==0], c = 'black')
axs[2].plot(PB,-depths, lw = 2, c = 'lightgrey')
axs[2].set_ylim(-205,0)
axs[2].set_title('Daily normalized production ')
axs[2].set_xlabel(r'$P^B \left[ \frac{mgC}{mgChl} \right]$');
# Calculating the depth intervals for numerical integration of watercolumn production
# Number of depths
nZ = len(depths)
# Initialize a list to store the computed values
dz = np.array([0] * nZ)
# Iterate over the indices of depths
for i in range(nZ):
# for the first depth
if i == 0:
dz[i] = depths[i] + (depths[i + 1] - depths[i]) / 2
# for all depth except the first and the last
elif 0 < i < nZ - 1:
dz[i] = (depths[i + 1] + depths[i]) / 2 - (depths[i] + depths[i - 1]) / 2
# for the last depth
elif i == nZ - 1:
dz[i] = depths[i] - depths[i - 1]
# Calculating watercolumn biomass
BZ = np.dot(B,dz)
print(BZ)
# Calculating watercolumn production
PZT = np.dot(P,dz)
print(PZT)
21.84 601.755
# Length of the time series in months
nT = data['HOT'][:,3:11].shape[0]
#PZT = np.array([0]*nT)
#BZ = np.array([0]*nT)
# Variables for storing watercolumn production
PZT = np.zeros(nT)
BZ = np.zeros(nT)
for n in range(nT):
if ~np.isnan(data['HOT'][n,2]):
# Calculating the day of the year
day = np.array(sum(months[0:int(data['HOT'][n,1])-1]) + int(data['HOT'][n,2]))
# Selecting the chlorophyll profile
B = data['HOT'][n,3:11]
B[np.isnan(B)] = 0
# Selecting the production profile
P = np.array(data['HOT'][n,11:19])
P[np.isnan(P)] = 0
# Calculating the normalized production profile
# PB = P/B
# Calculating watercolumn biomass
BZ[n] = np.dot(B,dz)
# Calculating watercolumn production
#PZT[n] = np.array(np.dot(P,dz)*daylength(day,lat))
PZT[n] = np.dot(P,dz)
elif np.isnan(data['HOT'][n,2]):
BZ[n] = np.nan
PZT[n] = np.nan
PZT[PZT==0] = np.nan
BZ[BZ==0] = np.nan
# Plotting the histograms of all watercolumn chlorophyll and watercolumn production
fig, axs = plt.subplots(1, 2, figsize=(15, 5))
axs[0].hist(BZ, bins=25, edgecolor='black', color = 'green', range = [10, 40]);
axs[0].axvline(np.nanmedian(BZ), lw = 4, c = 'grey')
axs[0].axvline(np.nanmean(BZ), lw = 4, c = 'black')
axs[0].set_xlabel(r'$B_Z \left[\frac{mgChl}{m^{2}} \right]$')
axs[0].set_ylabel('Frequency')
axs[0].set_title('Watercolumn Chlorophyll histogram');
axs[1].hist(PZT, bins=25, edgecolor='black', range = [0, 1000]);
axs[1].axvline(np.nanmedian(PZT), lw = 4, c = 'grey')
axs[1].axvline(np.nanmean(PZT), lw = 4, c = 'black')
axs[1].set_xlabel(r'$P_{Z,T} \left[\frac{mgC}{m^{2}} \right]$')
axs[1].set_ylabel('Frequency')
axs[1].set_title('Watercolumn production histogram');
# Plotting the watercolumn chlorophyll time series
plt.figure(figsize=(15,5))
plt.plot(BZ, lw = 3, c = 'green')
plt.title('Watercolumn chlorophyll')
plt.ylabel(r'$B_Z [\frac{mgChl}{m^{2}} ]$')
plt.xticks(np.arange(12,408,step=24),np.arange(1990,2023,step=2));
# Plotting the watercolumn production time series
plt.figure(figsize=(15,5))
plt.plot(PZT, lw = 3)
plt.title('Watercolumn production' )
plt.ylabel(r'$P_{Z,T} \left[ \frac{mgC}{m^2} \right]$')
plt.xticks(np.arange(12,408,step=24),np.arange(1990,2023,step=2));