Source code for dynasor.trajectory.trajectory

__all__ = ['Trajectory', 'WindowIterator']

import numpy as np

from collections import deque
from itertools import islice, chain
from os.path import isfile
from typing import Callable, Dict, Union, List

from dynasor.trajectory.atomic_indices import parse_gromacs_index_file
from dynasor.trajectory.ase_trajectory_reader import ASETrajectoryReader
from dynasor.trajectory.extxyz_trajectory_reader import ExtxyzTrajectoryReader
from dynasor.trajectory.lammps_trajectory_reader import LammpsTrajectoryReader
from dynasor.trajectory.mdanalysis_trajectory_reader import MDAnalysisTrajectoryReader
from dynasor.trajectory.trajectory_frame import TrajectoryFrame
from dynasor.logging_tools import logger


[docs]class Trajectory: """Instances of this class hold trajectories in a format suitable for the computation of correlation functions. They behave as iterators, where each step returns the next frame as a :class:`TrajectoryFrame` object. The latter hold information regarding atomic positions, types, and velocities. Parameters ---------- filename Name of input file trajectory_format Type of trajectory. Possible values are: ``'lammps_internal'``, ``'extxyz'``, ``'ase'`` or one of the formats supported by `MDAnalysis <https://www.mdanalysis.org/>`_ (except for ``'lammpsdump'``, which can be called by specifying ``'lammps_mdanalysis'`` to avoid ambiguity) atomic_indices Specify which indices belong to which atom type. Can be (1) a dictionary where the keys specicy species and the values are list of atomic indices, (2) ``'read_from_trajectory'``, in which case the species are read from the trajectory or (3) the path to an index file. length_unit Length unit of trajectory (``'Angstrom'``, ``'nm'``, ``'pm'``, ``'fm'``). Necessary for correct conversion to internal dynasor units if the trajectory file does not contain unit information. time_unit Time unit of trajectory (``'fs'``, ``'ps'``, ``'ns'``). Necessary for correct conversion to internal dynasor units if the trajectory file does not contain unit information. frame_start First frame to read; must be larger or equal ``0``. frame_stop Last frame to read. By default (``None``) the entire trajectory is read. frame_step Read every :attr:`frame_step`-th step of the input trajectory. By default (``1``) every frame is read. Must be larger than ``0``. """ def __init__( self, filename: str, trajectory_format: str, atomic_indices: Union[str, Dict[str, List[int]]] = None, length_unit: str = 'Angstrom', time_unit: str = 'fs', frame_start: int = 0, frame_stop: int = None, frame_step: int = 1 ): if frame_start < 0: raise ValueError('frame_start should be positive') if frame_step < 0: raise ValueError('frame_step should be positive') self._frame_start = frame_start self._frame_step = frame_step self._frame_stop = frame_stop # setup trajectory reader if not isfile(filename): raise IOError(f'File {filename} does not exist') self._filename = filename if trajectory_format == 'lammps_internal': reader = LammpsTrajectoryReader elif trajectory_format == 'extxyz': reader = ExtxyzTrajectoryReader elif trajectory_format == 'lammps_mdanalysis': reader = MDAnalysisTrajectoryReader trajectory_format = 'lammpsdump' elif trajectory_format == 'ase': reader = ASETrajectoryReader elif trajectory_format == 'lammps': raise IOError('Ambiguous trajectory format, ' 'did you mean lammps_internal or lammps_mdanalysis?') else: reader = MDAnalysisTrajectoryReader logger.debug(f'Using trajectory reader: {reader.__name__}') if reader == MDAnalysisTrajectoryReader: self._reader_obj = reader(self._filename, trajectory_format, length_unit=length_unit, time_unit=time_unit) else: self._reader_obj = reader(self._filename, length_unit=length_unit, time_unit=time_unit) # Get two frames to set cell etc. frame0 = next(self._reader_obj) frame1 = next(self._reader_obj) self._cell = frame0.cell self._n_atoms = frame0.n_atoms # Make sure cell is not changed during consecutive frames if not np.allclose(frame0.cell, frame1.cell): raise ValueError('The cell changes between the first and second frame. ' 'The concept of q-points becomes muddy if the simulation cell is ' 'changing, such as during NPT MD simulations, so trajectories where ' 'the cell changes are not supported by dynasor.') # setup iterator slice (reuse frame0 and frame1 via chain) self.number_of_frames_read = 0 self.current_frame_index = 0 self._reader_iter = islice(chain([frame0, frame1], self._reader_obj), self._frame_start, self._frame_stop, self._frame_step) # setup atomic indices if atomic_indices is None: # Default behaviour atomic_indices = {'X': np.arange(0, self.n_atoms)} elif isinstance(atomic_indices, str): # Str input if atomic_indices == 'read_from_trajectory': if frame0.atom_types is None: raise ValueError('Could not read atomic indices from the trajectory.') else: uniques = np.unique(frame0.atom_types) atomic_indices = {uniques[i]: (frame0.atom_types == uniques[i]).nonzero()[0] for i in range(len(uniques))} else: atomic_indices = parse_gromacs_index_file(atomic_indices) elif isinstance(atomic_indices, dict): # Dict input pass else: raise ValueError('Could not understand atomic_indices.') self._atomic_indices = atomic_indices # sanity checks for atomic_indices for key, indices in self._atomic_indices.items(): if np.max(indices) > self.n_atoms: raise ValueError('maximum index in atomic_indices exceeds number of atoms') if np.min(indices) < 0: raise ValueError('minimum index in atomic_indices is negative') if '_' in key: # Since '_' is what we use to distinguish atom types in the results, e.g. Sqw_Cs_Pb raise ValueError('The char "_" is not allowed in atomic_indices.') # log info on trajectory and atom types etc logger.info(f'Trajectory file: {self.filename}') logger.info(f'Total number of particles: {self.n_atoms}') logger.info(f'Number of atom types: {len(self.atom_types)}') for atom_type, indices in self._atomic_indices.items(): logger.info(f'Number of atoms of type {atom_type}: {len(indices)}') logger.info(f'Simulation cell (in Angstrom):\n{str(self._cell)}') def __iter__(self): return self def __next__(self): frame = next(self._reader_iter) new_frame = TrajectoryFrame(self.atomic_indices, frame.frame_index, frame.positions, frame.velocities) self.number_of_frames_read += 1 self.current_frame_index = frame.frame_index return new_frame def __str__(self) -> str: s = ['Trajectory'] s += ['{:12} : {}'.format('filename', self.filename)] s += ['{:12} : {}'.format('natoms', self.n_atoms)] s += ['{:12} : {}'.format('frame_start', self._frame_start)] s += ['{:12} : {}'.format('frame_stop', self._frame_stop)] s += ['{:12} : {}'.format('frame_step', self.frame_step)] s += ['{:12} : {}'.format('frame_index', self.current_frame_index)] s += ['{:12} : [{}\n {}\n {}]' .format('cell', self.cell[0], self.cell[1], self.cell[2])] return '\n'.join(s) def __repr__(self) -> str: return str(self) def _repr_html_(self) -> str: s = [f'<h3>{self.__class__.__name__}</h3>'] s += ['<table border="1" class="dataframe">'] s += ['<thead><tr><th style="text-align: left;">Field</th><th>Value</th></tr></thead>'] s += ['<tbody>'] s += [f'<tr"><td style="text-align: left;">File name</td><td>{self.filename}</td></tr>'] s += [f'<tr><td style="text-align: left;">Number of atoms</td><td>{self.n_atoms}</td></tr>'] s += [f'<tr><td style="text-align: left;">Cell metric</td><td>{self.cell}</td></tr>'] s += [f'<tr><td style="text-align: left;">Frame step</td><td>{self.frame_step}</td></tr>'] s += [f'<tr><td style="text-align: left;">Atom types</td><td>{self.atom_types}</td></tr>'] s += ['</tbody>'] s += ['</table>'] return '\n'.join(s) @property def cell(self): """ Simulation cell """ return self._cell @property def n_atoms(self): """ Number of atoms """ return self._n_atoms @property def filename(self): """ The trajectory filename """ return self._filename @property def atomic_indices(self): """ Return copy of index arrays """ atomic_indices = dict() for name, inds in self._atomic_indices.items(): atomic_indices[name] = inds.copy() return atomic_indices @property def atom_types(self) -> List[str]: return sorted(self._atomic_indices.keys()) @property def frame_step(self): """ Frame to access, trajectory will return every :attr:`frame_step`-th snapshot """ return self._frame_step
def consume(iterator, n): """ Advance the iterator by :attr:`n` steps. If :attr:`n` is ``None``, consume entirely. """ # From the python.org if n is None: deque(iterator, maxlen=0) else: next(islice(iterator, n, n), None) class WindowIterator: """Sliding window iterator. Returns consecutive windows (a window is represented as a list of objects), created from an input iterator. Parameters ---------- itraj Trajectory object width Length of window (``window_size`` + 1) window_step Distance between the start of two consecutive window frames element_processor Enables processing each non-discarded object; useful if ``window_step > width`` and ``map_item`` is expensive (as compared to directly passing ``map(fun, itraj)`` as ``itraj``); if ``window_step < width``, you could as well directly pass ``map(fun, itraj)``. """ def __init__(self, itraj: Trajectory, width: int, window_step: int = 1, element_processor: Callable = None): self._raw_it = itraj if element_processor: self._it = map(element_processor, self._raw_it) else: self._it = self._raw_it assert window_step >= 1 assert width >= 1 self.width = width self.window_step = window_step self._window = None def __iter__(self): return self def __next__(self): """ Returns next element in sequence. """ if self._window is None: self._window = deque(islice(self._it, self.width), self.width) else: if self.window_step >= self.width: self._window.clear() consume(self._raw_it, self.window_step - self.width) else: for _ in range(min((self.window_step, len(self._window)))): self._window.popleft() for f in islice(self._it, min((self.window_step, self.width))): self._window.append(f) if len(self._window) == 0: raise StopIteration return list(self._window)