"""
PropertyCalculation --- :mod:`domhmm.analysis.PropertyCalculation`
===========================================================
This module contains the :class:`PropertyCalculation` class.
"""
import logging as log
import sys
import matplotlib.pyplot as plt
import numpy as np
from hmmlearn.hmm import GaussianHMM
from scipy.sparse import csr_array
from scipy.spatial import Voronoi, ConvexHull
from sklearn import mixture
from tqdm import tqdm
from .base import LeafletAnalysisBase
[docs]
class PropertyCalculation(LeafletAnalysisBase):
"""
The DirectorOrder class calculates an order parameter for each selected lipid according to the formula:
P2 = 0.5 * (3 * cos(a)^2 - 1), (1)
where an is the angle between a pre-defined director in the lipid (e.g. C-H or CC) and a reference axis.
"""
def _prepare(self):
"""
Prepare results array before the calculation of the height spectrum begins
Attributes
----------
"""
if self.verbose:
log.basicConfig(level=log.INFO, stream=sys.stdout)
log.info("Preparation Step")
# Initialize result storage dictionaries
self.results.train_data_per_type = {}
self.results.GMM = {}
self.results.HMM = {}
self.results.HMM_Pred = {}
self.results.Getis_Ord = {}
self.max_tail_len = -1
for _, each in self.tails.items():
if len(each) > self.max_tail_len:
self.max_tail_len = len(each)
# Total number of parameters are area per lipid + order parameter for each tail = max_tail_len + 1
self.results.train = np.zeros(
(len(self.membrane_unique_resids), self.n_frames, 1 + self.max_tail_len + len(self.sterol_heads)),
dtype=np.float32)
# Initalize weight matrix storage for each leaflet.
setattr(self.results, "upper_weight_all", [])
setattr(self.results, "lower_weight_all", [])
# Initialized leaflet assignment array for each frame
self.leaflet_assignment = np.zeros((len(self.membrane_unique_resids), self.n_frames), dtype=np.int32)
[docs]
@staticmethod
def calc_order_parameter(chain):
"""
Calculate average Scc order parameters per acyl chain according to the equation:
S_cc = (3 * cos( theta )^2 - 1) / 2,
where theta describes the angle between the z-axis of the system and the vector between two subsequent tail
beads.
Parameters
----------
chain : Selection
MDAnalysis Selection object
Returns
-------
s_cc : numpy.ndarray
Mean S_cc parameter for the selected chain of each residue
"""
# Separate the coordinates according to their residue index
ridx = np.where(np.diff(chain.resids) > 0)[0] + 1
pos = np.split(chain.positions, ridx)
# Calculate the normalized orientation vector between two subsequent tail beads
vec = [np.diff(pos_i, axis=0) for pos_i in pos]
vec_norm = [np.sqrt((vec_i ** 2).sum(axis=-1)) for vec_i in vec]
vec = [vec_i / vec_norm_i.reshape(-1, 1) for vec_i, vec_norm_i in zip(vec, vec_norm)]
# TODO z axis multiplication inside. It needs to be changed in future work
# Choose the z-axis as membrane normal and take care of the machine precision
dot_prod = [np.clip(vec_i[:, 2], -1., 1.) for vec_i in vec]
# Calculate the order parameter
s_cc = np.array([np.mean(0.5 * (3 * dot ** 2 - 1)) for dot in dot_prod])
return s_cc
[docs]
def order_parameter(self):
"""
Calculation of scc order parameter for each chain of each residue.
"""
# Iterate over each tail with chain id
for chain, tail in self.resid_tails_selection.items():
# SCC calculation
s_cc = self.calc_order_parameter(tail)
idx = self.get_residue_idx(self.resids_index_map, np.unique(tail.resids))
self.results.train[idx, self.index, 1 + chain] = s_cc
for i, (resname, tail) in enumerate(self.sterol_tails_selection.items()):
s_cc = self.calc_order_parameter(tail)
idx = self.get_residue_idx(self.resids_index_map, np.unique(tail.resids))
self.results.train[idx, self.index, 1 + self.max_tail_len + i] = s_cc
[docs]
@staticmethod
def area_per_lipid_vor(coor_xy, boxdim, frac):
"""
Calculation of the area per lipid employing Voronoi tessellation on coordinates mapped to the xy plane.
The function takes also the periodic boundary conditions of the box into account.
Parameters
----------
coor_xy : numpy.ndarray
Coordinates of upper/lower leaflet
boxdim : array
Length of box vectors in all directions
frac : float
Fraction of box length in x and y outside the unit cell considered for Voronoi calculation
Returns
-------
vor : Voronoi Tesselation
Scipy's Voronoi Diagram object
apl : Area per lipid
Area per lipid based on Scipy's Voronoi Diagram
pbc_idx : Indices
Unit cell indices for periodic image coordinates
"""
# Number of points in the plane
ncoor = coor_xy.shape[0]
bx = boxdim[0]
by = boxdim[1]
# Create periodic images of the coordinates
# to take periodic boundary conditions into account
# Store coordinates of periodic images
pbc = np.zeros((9 * ncoor, 2), dtype=np.float32)
# Store unit cell indices for coordinates of periodic images
pbc_idx = np.arange(9 * ncoor, dtype=np.int64) % ncoor
# Iterate over all possible periodic images
k = 0
for i in [0, -1, 1]:
for j in [0, -1, 1]:
# Multiply the coordinates in a direction
pbc[k * ncoor: (k + 1) * ncoor, 0] = coor_xy[:, 0] % bx + i * bx
pbc[k * ncoor: (k + 1) * ncoor, 1] = coor_xy[:, 1] % by + j * by
k += 1
# Create a boolean mask for positions within the unit cell and a smaller fraction of positions outside the
# unit cell
# Check along x-axis
f0 = pbc[:, 0] >= -frac * bx
f1 = pbc[:, 0] <= bx + frac * bx
# Check along y-axis
f2 = pbc[:, 1] >= -frac * by
f3 = pbc[:, 1] <= by + frac * by
# Merge the four masks together
mask = f0 * f1 * f2 * f3
# Filter positions and unit cell indices of all periodic images
pbc = pbc[mask]
pbc_idx = pbc_idx[mask]
# Call scipy's Voronoi implementation
# There is the (rare!) possibility that two points have the exact same xy positions,
# to prevent issues at further calculation steps, the qhull_option "QJ" was employed to introduce small random
# displacement of the points to resolve these issue.
# IMPORTANT: The use of "QJ" makes the resulting Voronoi diagram depending on frac. Values for ridge lengths
# can vary!
vor = Voronoi(pbc, qhull_options="QJ")
# Iterate over all members of the unit cell and calculate their occupied area
apl = np.array([ConvexHull(vor.vertices[vor.regions[vor.point_region[i]]]).volume for i in range(ncoor)])
return vor, apl, pbc_idx
[docs]
@staticmethod
def weight_matrix(vor, pbc_idx, coor_xy):
"""
Calculate the weight factors between neighbored lipid pairs based on a Voronoi tessellation.
Parameters
----------
vor : Voronoi Tesselation
Scipy's Voronoi Diagram object
pbc_idx : Indices
Unit cell indices of periodic image coordinates
coor_xy : numpy.ndarray
Coordinates of upper/lower leaflet
Returns
-------
weight_matrix : numpy.ndarray
Weight factors wij between neighbored lipid pairs.
Is 0 if lipids are not directly neighbored.
"""
# Number of points in the plane
ncoor = coor_xy.shape[0]
# Calculate the distance for all pairs of points between which a ridge exists
dij = vor.points[vor.ridge_points[:, 0]] - vor.points[vor.ridge_points[:, 1]]
dij = np.sqrt(np.sum(dij ** 2, axis=1))
# There is the (rare!) possibility that two points have the exact same xy positions,
# to prevent issues at further calculation steps, their distance is set to a very small
# distance of 4.7 Angstrom (2 times the VdW radius of a regular bead in MARTINI3)
dij[dij < 1E-5] = 4.7
# Calculate the distance for all pairs of vertices connected via a ridge
vert_idx = np.array(vor.ridge_vertices)
bij = vor.vertices[vert_idx[:, 0]] - vor.vertices[vert_idx[:, 1]]
bij = np.sqrt(np.sum(bij ** 2, axis=1))
# INFO: vor.ridge_points and vor.ridge_vertices should be sorted -> Check vor.ridge_dict
# Calculate weight factor
wij = bij / dij
# Set up an empty array to store the weight factors for each lipid
weight_matrix = np.zeros((ncoor, ncoor))
# Select all indices of ridges that contain members of the unit cell
mask_unit_cell = np.logical_or(vor.ridge_points[:, 0] < ncoor, vor.ridge_points[:, 1] < ncoor)
# Apply the modulus operator since some of the indices in "unit_cell_point" will point to coordinates outside
# the unit cell. Applying the modulus operator "%" will allow an indexing of the "weight_matrix". However, some
# of the indices in "unit_cell_point" will be doubled that shouldn't be an issue since the same weight factor is
# then just put several times in the same entry of the array (no summing or something similar!)
# Previous: unit_cell_point = vor.ridge_points[mask_unit_cell] % ncoor
# Transform the indices in vor.ridge_points back to unit cell indices -> Filter than for all indices of
# ridges that contain members of the unit cell
unit_cell_point = pbc_idx[vor.ridge_points][mask_unit_cell]
weight_matrix[unit_cell_point[:, 0], unit_cell_point[:, 1]] = wij[mask_unit_cell]
weight_matrix[unit_cell_point[:, 1], unit_cell_point[:, 0]] = wij[mask_unit_cell]
return weight_matrix
def _single_frame(self):
"""
Calculate data from a single frame of the trajectory.
"""
# Get number of frame from trajectory
self.frame = self.universe.trajectory.ts.frame
# Calculate correct index if skipping step not equals 1 or start point not equals 0
self.index = ( self.frame - self.start ) // self.step
# Update leaflet assignment (if leaflet_frame_rate is None, leaflets will never get updated during analysis)
if self.leaflet_frame_rate is not None and not self.index % self.leaflet_frame_rate:
# Call leaflet assignment functions for non-sterol and sterol compounds
self.leaflet_selection_no_sterol = self.get_leaflets()
self.leaflet_selection = self.get_leaflets_sterol()
# Write assignments to array
assignment_index = int(self.index / self.leaflet_frame_rate)
start_index = assignment_index * self.leaflet_frame_rate
end_index = (assignment_index + 1) * self.leaflet_frame_rate
if end_index > self.leaflet_assignment.shape[1]:
end_index = self.leaflet_assignment.shape[1]
self.uidx = self.get_residue_idx(self.resids_index_map, self.leaflet_selection["0"].resids)
self.lidx = self.get_residue_idx(self.resids_index_map, self.leaflet_selection["1"].resids)
self.leaflet_assignment[self.lidx, start_index:end_index] = 1
self.leaflet_assignment[self.uidx, start_index:end_index] = 0
self.leaflet_assignment[self.lidx, start_index:end_index] = 1
# Update sterol assignment. Don't do the update if it was already done in the if-statement before
if not self.index % self.sterol_frame_rate and (
self.leaflet_frame_rate is None or self.index % self.leaflet_frame_rate):
# Call leaflet assignment function for sterol compounds
self.leaflet_selection = self.get_leaflets_sterol()
# Write assignments to array
assignment_index = int(self.index / self.sterol_frame_rate)
start_index = assignment_index * self.sterol_frame_rate
end_index = (assignment_index + 1) * self.sterol_frame_rate
if end_index > self.leaflet_assignment.shape[1]:
end_index = self.leaflet_assignment.shape[1]
self.uidx = self.get_residue_idx(self.resids_index_map, self.leaflet_selection["0"].resids)
self.lidx = self.get_residue_idx(self.resids_index_map, self.leaflet_selection["1"].resids)
self.leaflet_assignment[self.uidx, start_index:end_index] = 0
self.leaflet_assignment[self.lidx, start_index:end_index] = 1
if self.leaflet_selection["0"].select_atoms("group leaf1", leaf1=self.leaflet_selection["1"]):
raise ValueError("Atoms in both leaflets !")
# ------------------------------ Local Normals/Area per Lipid ------------------------------------------------ #
boxdim = self.universe.trajectory.ts.dimensions[0:3]
upper_coor_xy = self.leaflet_selection[str(0)].positions
lower_coor_xy = self.leaflet_selection[str(1)].positions
# Check Transmembrane domain existance
if self.tmd_protein is not None:
tmd_upper_coor_xy = self.tmd_protein["0"]
# Check if dimension of coordinates is same in both array
if tmd_upper_coor_xy.shape[1] == upper_coor_xy.shape[1]:
upper_coor_xy = np.append(upper_coor_xy, tmd_upper_coor_xy, axis=0)
tmd_lower_coor_xy = self.tmd_protein["1"]
# Check if dimension of coordinates is same in both array
if tmd_lower_coor_xy.shape[1] == lower_coor_xy.shape[1]:
lower_coor_xy = np.append(lower_coor_xy, tmd_lower_coor_xy, axis=0)
upper_vor, upper_apl, upper_pbc_idx = self.area_per_lipid_vor(coor_xy=upper_coor_xy, boxdim=boxdim,
frac=self.frac)
lower_vor, lower_apl, lower_pbc_idx = self.area_per_lipid_vor(coor_xy=lower_coor_xy, boxdim=boxdim,
frac=self.frac)
if self.tmd_protein is not None:
# Remove TMD Protein numbers from training data
upper_apl = np.array(upper_apl[:-len(tmd_upper_coor_xy)])
lower_apl = np.array(lower_apl[:-len(tmd_lower_coor_xy)])
self.results.train[self.uidx, self.index, 0] = upper_apl
self.results.train[self.lidx, self.index, 0] = lower_apl
# TODO Local normal calculation
# ------------------------------ Order parameter ------------------------------------------------------------- #
self.order_parameter()
# ------------------------------ Weight Matrix --------------------------------------------------------------- #
upper_weight_matrix = self.weight_matrix(upper_vor, pbc_idx=upper_pbc_idx, coor_xy=upper_coor_xy)
lower_weight_matrix = self.weight_matrix(lower_vor, pbc_idx=lower_pbc_idx, coor_xy=lower_coor_xy)
if self.tmd_protein is not None:
# Remove TMD Protein numbers from weight matrix
upper_weight_matrix = upper_weight_matrix[:-len(tmd_upper_coor_xy),:-len(tmd_upper_coor_xy)]
lower_weight_matrix = lower_weight_matrix[:-len(tmd_lower_coor_xy),:-len(tmd_lower_coor_xy)]
# Keep weight matrices in scipy.sparse.csr_array format since both is sparse matrices
self.results["upper_weight_all"].append(csr_array(upper_weight_matrix))
self.results["lower_weight_all"].append(csr_array(lower_weight_matrix))
def _conclude(self):
"""
Calculate the final results of the analysis
Extract the obtained data and put them into a clear and accessible data structure
"""
log.info("Conclusion step is starting.")
self.prepare_train_data()
# -------------------------------------------------------------
# Check for user-provided HMMs
if not any(self.trained_hmms):
# No user-provided HMMs, perform usual workflow
log.info("Gaussian Mixture Model training is starting.")
self.GMM(gmm_kwargs=self.gmm_kwargs)
log.info("Hidden Markov Model training is starting.")
self.HMM(hmm_kwargs=self.hmm_kwargs)
else:
# User-provided HMMs found, use them!
log.info("No Gaussian Mixture Model is trained.")
log.info("No Hidden Markov Model is trained. Instead use the user-provided HMMs.")
# Use the provided dictionary directly, the checks for validity were already done before
self.results['HMM'] = self.trained_hmms
# Same workflow as in self.HMM()
if self.result_plots:
# Plot result of hmm
self.plot_hmm_result()
# Make predictions based on HMM model
self.predict_states()
# Validate states and result prediction
self.state_validate()
if self.result_plots:
# Plot prediction result
self.predict_plot()
log.info("Getis-Ord Statistic calculation is starting.")
self.getis_ord()
log.info("Clustering is starting.")
self.result_clustering()
if self.result_plots:
self.clustering_plot()
[docs]
def prepare_train_data(self):
"""
Prepare train data for GMM and HMM while separating self.results.train with respect to each residue
"""
self.results.train_data_per_type = {}
resid_dict = {}
for resname in self.unique_resnames:
idx = self.get_residue_idx(self.resids_index_map, self.residue_ids[resname])
resid_dict[resname] = idx
self.results.train_data_per_type[f"{resname}"] = [[] for _ in range(3)]
if self.asymmetric_membrane:
# For asymmetric membranes, models will be trained for each leaflets' each residue type
leaflet_residx = {}
# leaflet_train_residx contains residue indexes that are not too flip-floppy for model training
leaflet_train_residx = {}
# Save each residues indexes with respect to leaflet assignment
for resname, idx in resid_dict.items():
leaflet_assign = self.leaflet_assignment[idx]
leaflet_train_residx[resname] = {0: [], 1: []}
leaflet_residx[resname] = {0: [], 1: []}
for i in range(len(leaflet_assign)):
if (leaflet_assign[i] == 0).all():
leaflet_train_residx[resname][0].append(idx[i])
leaflet_residx[resname][0].append(idx[i])
elif (leaflet_assign[i] == 1).all():
leaflet_train_residx[resname][1].append(idx[i])
leaflet_residx[resname][1].append(idx[i])
else:
# If a residue is %80 of time belongs to a leaflet, accept it as residue of that leaflet
lower_leaflet_percentage = len(np.nonzero(leaflet_assign[i] == 1)[0]) / len(leaflet_assign[i])
if lower_leaflet_percentage >= 0.8:
leaflet_train_residx[resname][1].append(idx[i])
elif lower_leaflet_percentage <= 0.2:
leaflet_train_residx[resname][0].append(idx[i])
if lower_leaflet_percentage > 0.5:
leaflet_residx[resname][1].append(idx[i])
else:
leaflet_residx[resname][0].append(idx[i])
self.leaflet_residx = leaflet_residx
# Prepare rest of train data for each residue
for resname, tails in self.tails.items():
rsn_ids = self.residue_ids[resname]
self.results.train_data_per_type[resname][0] = rsn_ids
# Select columns of area per lipid and tails' scc parameters
residx = leaflet_train_residx[resname]
upper_leaflet_data = self.results.train[residx[0]][:, :, 0:len(tails) + 1]
lower_leaflet_data = self.results.train[residx[1]][:, :, 0:len(tails) + 1]
self.results.train_data_per_type[resname][1] = [upper_leaflet_data, lower_leaflet_data]
# TODO idx might referenced before assignment ?
self.results.train_data_per_type[resname][2] = self.leaflet_assignment[idx]
for i, (resname, tail) in enumerate(self.sterol_tails_selection.items()):
rsn_ids = self.residue_ids[resname]
self.results.train_data_per_type[resname][0] = rsn_ids
idx = self.get_residue_idx(self.resids_index_map, rsn_ids)
# Select columns of area per lipid and sterol's scc parameter
residx = leaflet_train_residx[resname]
upper_leaflet_data = self.results.train[residx[0]][:, :, [0, 1 + self.max_tail_len + i]]
lower_leaflet_data = self.results.train[residx[1]][:, :, [0, 1 + self.max_tail_len + i]]
self.results.train_data_per_type[resname][1] = [upper_leaflet_data, lower_leaflet_data]
self.results.train_data_per_type[resname][2] = self.leaflet_assignment[idx]
else:
# For symmetric membranes, models will be trained for each residue type
for resname, tails in self.tails.items():
rsn_ids = self.residue_ids[resname]
self.results.train_data_per_type[resname][0] = rsn_ids
idx = resid_dict[resname]
# Select columns of area per lipid and tails' scc parameters
self.results.train_data_per_type[resname][1] = self.results.train[idx][:, :, 0:len(tails) + 1]
self.results.train_data_per_type[resname][2] = self.leaflet_assignment[idx]
for i, (resname, tail) in enumerate(self.sterol_tails_selection.items()):
rsn_ids = self.residue_ids[resname]
self.results.train_data_per_type[resname][0] = rsn_ids
idx = resid_dict[resname]
# Select columns of area per lipid and sterol's scc parameter
self.results.train_data_per_type[resname][1] = self.results.train[idx][:, :,
[0, 1 + self.max_tail_len + i]]
self.results.train_data_per_type[resname][2] = self.leaflet_assignment[idx]
# ------------------------------ FIT GAUSSIAN MIXTURE MODEL ------------------------------------------------------ #
[docs]
def GMM(self, gmm_kwargs):
"""
Fit Gaussian Mixture
Parameters
----------
gmm_kwargs : dict
Additional parameters for mixture.GaussianMixture
"""
# Iterate over each residue and implement gaussian mixture model
for res, data in self.results.train_data_per_type.items():
if self.asymmetric_membrane:
temp_dict = {}
for leaflet in range(2):
gmm_data = data[1][leaflet]
if len(gmm_data) == 0:
temp_dict[leaflet] = None
else:
gmm = mixture.GaussianMixture(n_components=2, **gmm_kwargs).fit(
gmm_data.reshape(-1, gmm_data.shape[2]))
temp_dict[leaflet] = gmm
log.info(f"Leaflet {leaflet}, {res} Gaussian Mixture Model is trained.")
self.results["GMM"][res] = temp_dict
else:
gmm = mixture.GaussianMixture(n_components=2, **gmm_kwargs).fit(data[1].reshape(-1, data[1].shape[2]))
self.results["GMM"][res] = gmm
log.info(f"{res} Gaussian Mixture Model is trained.")
# Check for convergence
for resname, each in self.results["GMM"].items():
if self.asymmetric_membrane:
if each[0] is not None and not each[0].converged_:
log.warning(f"{resname} lower leaflet Gaussian Mixture Model is not converged.")
if each[1] is not None and not each[1].converged_:
log.warning(f"{resname} upper leaflet Gaussian Mixture Model is not converged.")
else:
if not each.converged_:
log.warning(f"{resname} Gaussian Mixture Model is not converged.")
# ------------------------------ HIDDEN MARKOV MODEL ------------------------------------------------------------- #
[docs]
def HMM(self, hmm_kwargs):
"""
Create Gaussian based Hidden Markov Models for each residue type
Parameters
----------
hmm_kwargs : dict
Additional parameters for hmmlearn.hmm.GaussianHMM
"""
# Iterate over each residue and implement gaussian-hidden markov model
for resname, data in self.results.train_data_per_type.items():
if self.asymmetric_membrane:
temp_dict = {}
for leaflet in range(2):
gmm = self.results["GMM"][resname][leaflet]
if gmm is not None:
hmm_data = data[1][leaflet]
hmm = self.fit_hmm(data=hmm_data, gmm=gmm, hmm_kwargs=hmm_kwargs,
n_repeats=self.n_init_hmm)
temp_dict[leaflet] = hmm
else:
temp_dict[leaflet] = None
log.info(f"Leaflet {leaflet}, {resname} Gaussian Hidden Markov Model is trained.")
self.results["HMM"][resname] = temp_dict
else:
hmm = self.fit_hmm(data=data[1], gmm=self.results["GMM"][resname], hmm_kwargs=hmm_kwargs, n_repeats=self.n_init_hmm)
self.results["HMM"][resname] = hmm
log.info(f"{resname} Gaussian Hidden Markov Model is trained.")
if self.result_plots:
# Plot result of hmm
self.plot_hmm_result()
# Make predictions based on HMM model
self.predict_states()
# Validate states and result prediction
self.state_validate()
if self.result_plots:
# Plot prediction result
self.predict_plot()
[docs]
def fit_hmm(self, data, gmm, hmm_kwargs, n_repeats=10):
"""
Fit several HMM models to the data and return the best one.
Parameters
----------
data : numpy.ndarray
Data of the single lipid properties for one lipid type at each time step
gmm : GaussianMixture Model
Scikit-learn object
hmm_kwargs: dict
Additional parameters for Hidden Markov Model
n_repeats : int
Number of independent fits
Returns
-------
best_ghmm : GaussianHMM
hmmlearn object
"""
best_ghmm = None
# Specify the length of the sequence for each lipid
n_lipids = data.shape[0]
dim = data.shape[2]
lengths = np.repeat(self.n_frames, n_lipids)
# The HMM fitting is started multiple times from
# different starting conditions
# Initialize the best score with minus infinity
best_score = -np.inf
# Re-start the HMM fitting 10 times
for i in tqdm(range(n_repeats)):
# Initialize a HMM for one lipid type
ghmm_i = GaussianHMM(n_components=2,
means_prior=gmm.means_,
covars_prior=gmm.covariances_,
**hmm_kwargs)
# Train the HMM based on the data for every lipid and frame
ghmm_i.fit(data.reshape(-1, dim),
lengths=lengths)
# Obtain the log-likelihood
# probability of the current model
score_i = ghmm_i.score(data.reshape(-1, dim),
lengths=lengths)
# Check if the quality of the result improved
if score_i > best_score:
best_score = score_i
best_ghmm = ghmm_i
# Delete the current model
del ghmm_i
return best_ghmm
[docs]
def plot_hmm_result(self):
"""
Plots Hidden Markov Models training history
"""
x_len = 0
for resname, ghmm in self.results['HMM'].items():
if self.asymmetric_membrane:
for leaflet in range(2):
if ghmm[leaflet] is not None:
x_len = max(x_len, len(ghmm[leaflet].monitor_.history) - 1)
plt.semilogy(np.arange(len(ghmm[leaflet].monitor_.history) - 1),
np.diff(np.array(ghmm[leaflet].monitor_.history)),
ls="-", label=f"{resname}_{leaflet}", lw=2)
else:
x_len = max(x_len, len(ghmm.monitor_.history) - 1)
plt.semilogy(np.arange(len(ghmm.monitor_.history) - 1), np.diff(np.array(ghmm.monitor_.history)),
ls="-", label=resname, lw=2)
x_len = (x_len // 100 + 1) * 100
plt.legend(fontsize=15)
plt.semilogy(np.arange(x_len), np.repeat(1E-4, x_len), color="k", ls="--", lw=2)
plt.xlim(0, x_len)
plt.ylim(1E-5, 15E5)
plt.ylabel(r"$\Delta(log(\hat{L}))$", fontsize=18)
plt.xlabel("Iterations", fontsize=18)
plt.tick_params(axis="both", labelsize=11)
plt.text(s="Tolerance 1e-4", x=1, y=1E-4 + 0.00005, color="k", fontsize=15)
plt.title("a", fontsize=20, fontweight="bold", loc="left")
plt.tight_layout()
if self.save_plots:
plt.savefig("a.pdf")
plt.show()
[docs]
def predict_states(self):
"""
Each residues prediction assignment to ordered or disordered domains.
"""
if self.asymmetric_membrane:
# Since we select stable lipids for training, we need all training data of lipids to predict order of
# frequently flip-flop doing ones
for resname, tails in self.tails.items():
temp_array = []
for leaflet in range(2):
idx = self.leaflet_residx[resname][leaflet]
data = self.results.train[idx][:, :, 0:len(tails) + 1]
shape = data.shape
hmm = self.results['HMM'][resname][leaflet]
if hmm is not None:
lengths = np.repeat(shape[1], shape[0])
prediction = hmm.predict(data.reshape(-1, shape[2]), lengths=lengths).reshape(shape[0],
shape[1])
prediction = self.hmm_diff_checker(hmm.means_, prediction)
temp_array.append([idx, prediction])
else:
temp_array.append([])
if len(temp_array[0]) == 0:
idx = np.array(temp_array[1][0]).argsort()
result = temp_array[1][1]
elif len(temp_array[1]) == 0:
idx = np.array(temp_array[0][0]).argsort()
result = temp_array[0][1]
else:
idx = np.concatenate((temp_array[0][0], temp_array[1][0])).argsort()
result = np.concatenate((temp_array[0][1], temp_array[1][1]))
result = result[idx]
self.results['HMM_Pred'][resname] = result
for i, (resname, tail) in enumerate(self.sterol_tails_selection.items()):
temp_array = []
for leaflet in range(2):
idx = self.leaflet_residx[resname][leaflet]
data = self.results.train[idx][:, :, [0, 1 + self.max_tail_len + i]]
shape = data.shape
hmm = self.results['HMM'][resname][leaflet]
if hmm is not None:
lengths = np.repeat(shape[1], shape[0])
prediction = hmm.predict(data.reshape(-1, shape[2]), lengths=lengths).reshape(shape[0],
shape[1])
prediction = self.hmm_diff_checker(hmm.means_, prediction)
temp_array.append([idx, prediction])
else:
temp_array.append(None)
if temp_array[0] is None:
idx = np.array(temp_array[1][0]).argsort()
result = temp_array[1][1]
elif temp_array[1] is None:
idx = np.array(temp_array[0][0]).argsort()
result = temp_array[0][1]
else:
idx = np.concatenate((temp_array[0][0], temp_array[1][0])).argsort()
result = np.concatenate((temp_array[0][1], temp_array[1][1]))
result = result[idx]
self.results['HMM_Pred'][resname] = result
else:
for resname, data in self.results.train_data_per_type.items():
shape = data[1].shape
hmm = self.results['HMM'][resname]
# Lengths consists of number of frames and number of residues
lengths = np.repeat(shape[1], shape[0])
prediction = hmm.predict(data[1].reshape(-1, shape[2]), lengths=lengths).reshape(shape[0], shape[1])
# Save prediction result of each residue
self.results['HMM_Pred'][resname] = prediction
[docs]
def state_validate(self):
"""
Validate state assignments of HMM model by checking means of the model of each residue.
"""
if not self.asymmetric_membrane:
# Asymmetric membrane validation is done in prediction step due to nature of it
for resname, hmm in self.results["HMM"].items():
if hmm is not None:
means = hmm.means_
prediction_results = self.results['HMM_Pred'][resname]
self.results['HMM_Pred'][resname] = self.hmm_diff_checker(means, prediction_results)
[docs]
@staticmethod
def hmm_diff_checker(means, prediction_results):
"""
Checks if prediction assignments correct with respect to means. Since HMM is unsupervised, model can assign 0 to
disordered domains and 1 to ordered domains. In these cases needs to be changed to vice versa.
Parameters
----------
means : np.ndarray
Table of model which contains means of area per lipid and order parameters
prediction_results: np.ndarray
Initial prediction results which is 1s and 0s
Returns
-------
prediction_results : np.ndarray
Same or flipped prediction results with respect to means table
"""
diff_percents = (means[1, 0] - means[0, 0]) / means[0, 0]
if diff_percents > 0.0:
return np.abs(prediction_results - 1)
else:
return prediction_results
[docs]
@staticmethod
def get_residue_idx(resids_index_map, resids):
"""
Checks resids index map (lookup table) to return corresponding indexes to use in DomHMM result section.
Parameters
----------
resids_index_map: dict
Lookup table where each residue id's index in self.membrane_unique_resids is kepts
resids: numpy.ndarray
Residue id list for indexing
Returns
-------
idx: numpy.ndarray
Indexes in the same order with resids
"""
idx = np.array([resids_index_map[resid] for resid in resids])
return idx
[docs]
def predict_plot(self):
"""
Plots convergence of HMM model training process
"""
t = np.linspace(8, 10, self.n_frames)
for resname in self.unique_resnames:
plt.plot(t, self.results['HMM_Pred'][resname].mean(0), label=resname)
plt.xticks([8, 8.5, 9, 9.5, 10])
plt.xlabel(r"t ($\mu$s)", fontsize=18)
plt.ylabel(r"$\bar{O}_{Lipid}$", fontsize=18)
plt.legend(fontsize=15, ncols=1, loc="lower left")
plt.ylim(0, 1)
plt.xlim(8, 10)
plt.title("b", fontsize=20, fontweight="bold", loc="left")
plt.tight_layout()
if self.save_plots:
plt.savefig("b.pdf")
plt.show()
# ------------------------------ GETIS-ORD STATISTIC ------------------------------------------------------------- #
[docs]
def getis_ord(self):
"""
Calculates Getis-Ord statistic for identification model
"""
self.getis_ord_stat(self.results["upper_weight_all"], 0)
self.getis_ord_stat(self.results["lower_weight_all"], 1)
log.info("Getis-Ord for leaflets are calculated.")
if self.result_plots:
self.getis_ord_plot()
self.results["Getis_Ord"]["Permut_0"] = self.permut_getis_ord_stat(self.results["upper_weight_all"], 0)
self.results["Getis_Ord"]["Permut_1"] = self.permut_getis_ord_stat(self.results["lower_weight_all"], 1)
log.info("Permutations of Getis-Ord are calculated.")
self.results["z_score"] = self.z_score_calc()
log.info("Z score is calculated.")
[docs]
def getis_ord_stat(self, weight_matrix_all, leaflet):
"""
Getis-Ord Local Spatial Autocorrelation Statistics calculation based on the predicted order states
of each lipid and the weighting factors between neighbored lipids.
Parameters
----------
weight_matrix_all : sparse.csr_array
Weight matrix for all lipid in a leaflet at current time step
leaflet : int
0 = upper leaflet, 1 = lower leafet
Returns
-------
g_star_i : list of numpy.ndarrays
G*i values for each lipid at each time step
w_ii_all : list of numpy.ndarrays
Self-influence of the lipids
"""
# Get the number of frames
nframes = self.n_frames
# Initialize empty lists to store the G*i values and the self-influence of the lipids in a lipid
g_star_i = []
w_ii_all = []
# Iterate over frames
for step in range(nframes):
# Get the weight matrix of the leaflet at the current time step
weight_matrix = weight_matrix_all[step]
# Number of lipids in the leaflet
n = weight_matrix.shape[0]
# In case the code was already executed beforehand
weight_matrix[range(n), range(n)] = 0.
# Get the order state of each lipid in the leaflet at the current time step
order_states = self.get_leaflet_step_order(leaflet=leaflet, step=step)
# Number of neighbors per lipid -> The number is 0 (or close to 0) for not neighboured lipids
nneighbor = np.sum(weight_matrix > 1E-5, axis=1)
# Parameters for the Getis-Ord statistic
w_ii = np.sum(weight_matrix, axis=-1) / nneighbor # Self-influence!
weight_matrix[range(n), range(n)] = w_ii
w_star_i = np.sum(weight_matrix, axis=-1) # + w_ii
s_star_1i = np.sum(weight_matrix.power(2), axis=-1) # + w_ii**2
# Empirical standard deviation over all order states in the leaflet
s = np.std(order_states, ddof=1)
# Employ matrix-vector multiplication
so = weight_matrix @ order_states
# Calculate the nominator for G*i
nom = so - w_star_i * 0.5
# Calculate the denominator for G*i
denom = s * np.sqrt((n * s_star_1i - w_star_i ** 2) / (n - 1))
g_star = nom / denom
assert not np.any(nneighbor < 1), "Lipid found without a neighbor!"
g_star_i.append(g_star)
w_ii_all.append(w_ii)
self.results['Getis_Ord'][leaflet] = {f"g_star_i_{leaflet}": g_star_i, f"w_ii_{leaflet}": w_ii_all}
[docs]
def getis_ord_plot(self):
"""
Plots average order state for each lipid type per frame
"""
# Get number of lipid residues
resnum = len(self.unique_resnames)
# For each residue type there is an empty list initialized
g_star_i_temp = [[] for _ in range(resnum)]
# Iterate over all frames
for step in range(self.n_frames):
# Get the indices of the lipids in leaflet 0 and 1 and their corresponding positions
index_dict_0, pos_dict_0 = self.get_leaflet_step_order_index(leaflet=0, step=step)
index_dict_1, pos_dict_1 = self.get_leaflet_step_order_index(leaflet=1, step=step)
# Iterate over the lipid types
for i, rsn in enumerate(self.unique_resnames):
# Merge Getis-Ord values for the upper and the lower leaflet to display them as a histogram
# The G* values should be sorted according to the indices of the resids
g_start_sorted_0 = self.results['Getis_Ord'][0]['g_star_i_0'][step][index_dict_0[rsn]]
# The G* values should be sorted according to the indices of the resids
g_start_sorted_1 = self.results['Getis_Ord'][1]['g_star_i_1'][step][index_dict_1[rsn]]
g_star_i_temp[i] += list(np.append(g_start_sorted_0, g_start_sorted_1))
for i in range(resnum):
plt.hist(g_star_i_temp[i], bins=np.linspace(-3, 3, 201), density=True, histtype="step", lw=2,
label=self.unique_resnames[i])
plt.legend(fontsize=15, loc="upper left", ncols=2)
plt.xlim(-3, 3)
plt.ylim(0, .9)
_ = plt.xlabel("$G^*_i$", fontsize=18)
plt.ylabel("$p(G^*_i)$", fontsize=18)
plt.tick_params(labelsize=11)
plt.title("c", fontsize=20, fontweight="bold", loc="left")
plt.tight_layout()
if self.save_plots:
plt.savefig("c.pdf")
plt.show()
[docs]
def permut_getis_ord_stat(self, weight_matrix_all, leaflet):
"""
Getis-Ord Local Spatial Autocorrelation Statistics calculation based on the predicted order states
of each lipid and the weighting factors between neighbored lipids.
Parameters
----------
weight_matrix_all : sparse.csr_array
Weight matrices for all lipid in a leaflet at each time step
leaflet : int
0 = upper leaflet, 1 = lower leafet
Returns
-------
g_star_i : list of numpy.ndarrays
G*i values for each lipid at each time step
"""
# Get the number of frames
nframes = self.n_frames
# Initialize empty lists to store the G*i values and the self-influence of the lipids in a lipid
g_star_i = []
# Do 10 permutations per frame
n_permut = 10
# Iterate over frames
for step in tqdm(range(nframes)):
for permut in range(n_permut):
# Get the weightmatrix of the leaflet at the current time step
weight_matrix = weight_matrix_all[step]
# Number of lipids in the leaflet
n = weight_matrix.shape[0]
# In case the code was already executed beforehand
weight_matrix[range(n), range(n)] = 0.0
# Get the order state of each lipid in the leaflet at the current time step
order_states = self.get_leaflet_step_order(leaflet=leaflet, step=step)
np.random.shuffle(order_states)
# Number of neighbors per lipid -> The number is 0 (or close to 0) for not neighboured lipids
nneighbor = np.sum(weight_matrix > 1E-5, axis=1)
# Parameters for the Getis-Ord statistic
w_ii = np.sum(weight_matrix, axis=-1) / nneighbor # Self-influence!
weight_matrix[range(n), range(n)] = w_ii
w_star_i = np.sum(weight_matrix, axis=-1) # + w_ii
s_star_1i = np.sum(weight_matrix ** 2, axis=-1) # + w_ii**2
# Empirical standard deviation over all order states in the leaflet
s = np.std(order_states, ddof=1)
# Employ matrix-vector multiplication
so = weight_matrix @ order_states
# Calculate the nominator for G*i
nom = so - w_star_i * 0.5
# Calculate the denominator for G*i
denom = s * np.sqrt((n * s_star_1i - w_star_i ** 2) / (n - 1))
g_star = nom / denom
assert not np.any(nneighbor < 1), "Lipid found without a neighbor!"
g_star_i += list(g_star)
return g_star_i
[docs]
def z_score_calc(self):
"""
Z score calculation for Getis-Ord statistics
"""
result = {}
for i in range(2):
z_score = {}
getis_ord_permut = self.results["Getis_Ord"][f"Permut_{i}"]
z_score["z_a"] = np.quantile(getis_ord_permut, self.p_value)
z_score["z1_a"] = np.quantile(getis_ord_permut, 1 - self.p_value)
result[i] = z_score
return result
# ------------------------------ HIERARCHICAL CLUSTERING --------------------------------------------------------- #
[docs]
def clustering_plot(self):
"""
Runs hierarchical clustering and plots clustering results in different frames.
"""
n_frames = self.n_frames
# Plot %5, %50 and %95 points of frame list
frame_list = [int(n_frames / 20), int(n_frames / 2), int(n_frames / 1.05)]
fig, ax = plt.subplots(1, len(frame_list), figsize=(20, 5))
# Iterate over three frames illustrate the clustering results
for k, i in enumerate(frame_list):
order_states_0 = self.get_leaflet_step_order(0, i)
# Clustering
# ----------------------------------------------------------------------------------------------------------------------
core_lipids = self.assign_core_lipids(weight_matrix_f=self.results["upper_weight_all"][i],
g_star_i_f=self.results['Getis_Ord'][0]['g_star_i_0'][i],
order_states_f=order_states_0,
w_ii_f=self.results["Getis_Ord"][0]["w_ii_0"][i],
z_score=self.results["z_score"][0])
clusters = self.hierarchical_clustering(weight_matrix_f=self.results["upper_weight_all"][i],
w_ii_f=self.results["Getis_Ord"][0]["w_ii_0"][i],
core_lipids=core_lipids)
# Plot coordinates
# ----------------------------------------------------------------------------------------------------------------------
residue_indexes, positions = self.get_leaflet_step_order_index(leaflet=0, step=i)
for resname, index in residue_indexes.items():
ax[k].scatter(positions[resname][:, 0],
positions[resname][:, 1], marker="s", alpha=1, s=5, label=resname)
# Choose color scheme for clustering coloring
colors = plt.cm.viridis_r(np.linspace(0, 1.0, len(clusters.values())))
# Goto correct frame of the trajectory
# TODO Statement seems to have no effect
self.universe.trajectory[self.start:self.stop:self.step][i]
# Prepare positions for cluster plotting
leaflet_assignment_mask = self.leaflet_assignment[:, i] == 0
positions = (self.membrane.residues[leaflet_assignment_mask].atoms & self.all_heads).positions
label_length = len(residue_indexes)
# Iterate over clusters and plot the residues
print(f"Number of clusters in frame {i}: {len(clusters.values())}")
for j, val in enumerate(clusters.values()):
idx = np.array(list(val), dtype=int)
ax[k].scatter(positions[idx, 0],
positions[idx, 1],
s=100, marker="o", color=colors[j], zorder=-10)
if self.tmd_protein is not None:
label_length += len(self.tmd_protein["0"])
for protein in self.tmd_protein["0"]:
ax[k].scatter(protein[0], protein[1], s=100, marker="^", color="black", label="TMD Protein")
ax[k].set_xticks([])
ax[k].set_yticks([])
ax[k].set_aspect("equal")
handles, labels = ax[0].get_legend_handles_labels()
plt.figlegend(handles=handles, labels=labels, ncol=label_length, loc='lower center', fontsize=15, frameon=False)
ax[0].set_title("d", fontsize=20, fontweight="bold", loc="left")
ax[0].set_title(label=f"Frame {frame_list[0]}", fontsize=18)
ax[1].set_title(label=f"Frame {frame_list[1]}", fontsize=18)
ax[2].set_title(label=f"Frame {frame_list[2]}", fontsize=18)
if self.save_plots:
plt.savefig("d.pdf")
plt.show()
[docs]
def result_clustering(self):
"""
Runs hierarchical clustering for each frame and saves result
"""
self.results["Clustering"] = {'0': {}, '1': {}}
# Iterate over all frames
for i in tqdm(range(self.n_frames), total=self.n_frames):
# Iterate over both leaflets
for j, leaflet_ in enumerate(['upper', 'lower']):
# Get order states
order_states_leaf = self.get_leaflet_step_order(j, i)
core_lipids = self.assign_core_lipids(weight_matrix_f=self.results[f"{leaflet_}_weight_all"][i],
g_star_i_f=self.results['Getis_Ord'][j][f'g_star_i_{j}'][i],
order_states_f=order_states_leaf,
w_ii_f=self.results["Getis_Ord"][j][f"w_ii_{j}"][i],
z_score=self.results["z_score"][j])
clusters = self.hierarchical_clustering(weight_matrix_f=self.results[f"{leaflet_}_weight_all"][i],
w_ii_f=self.results["Getis_Ord"][j][f"w_ii_{j}"][i],
core_lipids=core_lipids)
frame_number = self.start + i * self.step
self.results["Clustering"][str(j)][frame_number] = list(clusters.values())
[docs]
@staticmethod
def assign_core_lipids(weight_matrix_f, g_star_i_f, order_states_f, w_ii_f, z_score):
"""
Assign lipids as core members (aka lipids with a high positive autocorrelation)
depending on the Getis-Ord spatial local autocorrelation statistic.
Parameters
----------
weight_matrix_f : numpy.ndarray
Matrix containing the weight factors between all lipid pairs at one time step
g_star_i_f : numpy.ndarray
Getis-Ord spatial local autocorrelation statistic for every lipid at one time step
order_states_f: numpy.ndarray
Order states for every lipid at one time step
w_ii_f: numpy.ndarray
Self-influence weight factor for every lipid at one time step
z_score: dict
Contains boundary of the reaction region
Returns
-------
core_lipids : numpy.ndarray (bool)
Contains a TRUE value if the lipid is a core member, otherwise it FALSE
"""
# Define boundary of the reaction region
z1_a = z_score["z1_a"]
z_a = z_score["z_a"]
# Assign core members according to their auto-correlation
core_lipids = g_star_i_f > z1_a
# Assign lipids with a mid-range auto-correlation (-z_1-a, z_1-a) * Ordered
low_corr = (g_star_i_f <= z1_a) & (g_star_i_f >= z_a) & (order_states_f == 1)
# Add iteratively new lipids to the core members
n_cores_old = np.inf
n_cores_new = np.sum(core_lipids)
# Iterate until self-consistency is reached
while n_cores_old != n_cores_new:
# Check how tightly the lipids are connected to the core members
new_core_lipids = (weight_matrix_f[core_lipids].sum(0) > w_ii_f) & low_corr
# Assign lipids to core members if condition is full-filled
core_lipids[new_core_lipids] = True
# Update number of core lipids
n_cores_old = n_cores_new
n_cores_new = np.sum(core_lipids)
return core_lipids
[docs]
@staticmethod
def hierarchical_clustering(weight_matrix_f, w_ii_f, core_lipids):
"""
Hierarchical clustering approach to identify spatial related Lo domains.
Parameters
----------
weight_matrix_f : numpy.ndarray
Matrix containing the weight factors between all lipid pairs at one time step
w_ii_f: numpy.ndarray
Self-influence weight factor for every lipid at one time step
core_lipids : numpy.ndarray
Array contains information which lipid is assigned as core member
Returns
-------
clusters : dict
The dictionary contains each found cluster
"""
# Merge iteratively clusters
n_clusters_old = np.inf
n_clusters_new = np.sum(core_lipids)
# Get the indices of the core lipids
core_lipids_id = np.where(core_lipids)[0]
# Store clusters in a Python dictionary
# Initialize all core lipids as clusters
clusters = dict(
zip(
core_lipids_id.astype("U"),
[[id] for id in core_lipids_id]
)
)
# Iterate until self-consistency is reached
while n_clusters_old != n_clusters_new:
# Get a list of the IDs of current clusters
cluster_ids = list(clusters.keys())
# Iterate over all clusters i
for i, id_i in enumerate(cluster_ids):
# If cluster i was already merged and deleted, skip it!
if id_i not in clusters.keys():
continue
# The cluster weights are defined as the sum
# over the weights of all lipid members
cluster_weights_i = np.sum(weight_matrix_f[clusters[id_i]], axis=0)
# Compare cluster weights to the self-influence of each lipid
merge_condition_i = cluster_weights_i > w_ii_f
# Iterate over all clusters j
for id_j in cluster_ids[(i + 1):]:
# Do not merge a cluster with itself
if id_i == id_j:
continue
# If cluster j was already merged and deleted, skip it!
if id_j not in clusters.keys():
continue
# Calculate cluster weights and compare to lipids self-influence
cluster_weights_j = np.sum(weight_matrix_f[clusters[id_j]], axis=0)
merge_condition_j = cluster_weights_j > w_ii_f
# If the condition is fullfilled for any lipid -> Merge the clusters
if np.any(merge_condition_i[clusters[id_j]]) or np.any(merge_condition_j[clusters[id_i]]):
# Merge cluster j into cluster i
clusters[id_i] += clusters[id_j]
# Delete cluster j from the cluster dict
del clusters[id_j]
# Update cluster numbers
n_clusters_old = n_clusters_new
n_clusters_new = len(clusters.keys())
return clusters
# ------------------------------ HELPER FUNCTIONS ---------------------------------------------------------------- #
[docs]
def get_leaflet_step_order(self, leaflet, step):
"""
Receive residue's order state with respect to the leaflet
Parameters
----------
leaflet : numpy.ndarray
leaflet index
step: numpy.ndarray
step index
Returns
-------
order_states : numpy.ndarray
Numpy array contains order state results of the leaflet at step in order of system's residues
"""
#Init two empty lists for ...
temp = [] #... order states prediction
idxs = [] #... indices of lipids
#Iterate over lipids
for res, data in self.results.train_data_per_type.items():
#Get indices from resids (e.g. resid 1, is index 0)
idx = self.get_residue_idx(self.resids_index_map, data[0])
#Store the indices of the residues in the current leaflet at the current step
idxs.append(idx[self.leaflet_assignment[idx, step] == leaflet])
#Get the predicted HMM order states for the residues in the current leaflet at the current step
temp.append(self.results["HMM_Pred"][res][:, step][self.leaflet_assignment[idx, step] == leaflet])
#Sort the obtained indices
sorted_idxs = np.argsort( np.concatenate(idxs) )
#It is not always the case that the lipid order is equal (e.g., depending on resids of cholesterol for example). Here the order states are sorted
#so that they correspond to the resids in the leaflet selection -> With that they should fit to the order of the lipids in the weight matrix
order_states = np.concatenate(temp)[sorted_idxs]
return order_states
[docs]
def get_leaflet_step_order_index(self, leaflet, step):
"""
Receive residue's indexes and positions for a specific leaflet at any frame of the trajecytory.
Parameters
----------
leaflet : numpy.ndarray
leaflet index
step: numpy.ndarray
step index
Returns
-------
indexes : dict
dictionary contains numpy.arrays containing residue's indexes for each unique residue type
positions : dict
dictionary contains numpy.arrays containing residue's positions for each unique residue type
"""
leaflet_assignment_step = self.leaflet_assignment[:, step]
leaflet_assignment_mask = leaflet_assignment_step == leaflet
indexes = {}
positions = {}
# Go to correct frame of the trajectory
self.universe.trajectory[self.start:self.stop:self.step][step]
for res in self.unique_resnames:
indexes[res] = np.where(self.membrane.residues[leaflet_assignment_mask].resnames == res)[0]
if res in self.heads.keys():
positions[res] = (self.membrane.residues[leaflet_assignment_mask].atoms & self.universe.select_atoms(
f"resname {res} and name {self.heads[res]}")).positions
else:
positions[res] = (self.membrane.residues[leaflet_assignment_mask].atoms & self.universe.select_atoms(
f"resname {res} and name {self.sterol_heads[res]}")).positions
return indexes, positions