Source code for pyprocar.io.vasp

import collections
import gzip
import logging
import os
import re
import warnings
import xml.etree.ElementTree as ET
from functools import cached_property
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union

import numpy as np
from numpy import array

from pyprocar.core import DensityOfStates, ElectronicBandStructure, KPath, Structure
from pyprocar.utils.strings import remove_comment

logger = logging.getLogger(__name__)
user_logger = logging.getLogger("user")


[docs] class Outcar(collections.abc.Mapping): """ A class to parse the OUTCAR file from a VASP run and extract electronic structure data. The OUTCAR file provides detailed output of a VASP run, including a summary of used input parameters, information about electronic steps and KS-eigenvalues, stress tensors, forces on atoms, local charges and magnetic moments, and dielectric properties. The amount of output written onto the OUTCAR file can be chosen by modifying the NWRITE tag in the INCAR file. The Outcar class acts as a Mapping, providing key-value access to the variables parsed from the OUTCAR file. """
[docs] def __init__(self, filepath: Union[str, Path]): """ Constructor method to initialize an Outcar object. Reads the file specified by filename and stores its content. Parameters ---------- filename : Union[str, Path], optional The OUTCAR filename. If not provided, defaults to "OUTCAR". """ self.filepath: Path = Path(filepath) self._get_axes_nk() with open(self.filepath, "r") as rf: self.file_str: str = rf.read() logger.info(f"Vasp Version: {self.version}")
def _get_axes_nk(self): """ n_kx Returns ------- n_kx n_kx """ try: raw_text = re.findall(r"generate\s*k-points\s*for:\s*(.*)", self.file_str)[ -1 ] self.n_kx = int(raw_text.split()[0]) self.n_ky = int(raw_text.split()[1]) self.n_kz = int(raw_text.split()[2]) except: self.n_kx = None self.n_ky = None self.n_kz = None return None @cached_property def version(self): """ Returns the version of the OUTCAR file. """ return re.findall(r"vasp\.\d+\.\d+\.\d+", self.file_str)[-1].strip("vasp.") @cached_property def version_tuple(self): """ Returns the version of the OUTCAR file as a tuple. """ return tuple(int(x) for x in self.version.split(".")) @cached_property def efermi(self): """ Just finds all E-fermi fields in the outcar file and keeps the last one (if more than one found). Returns ------- fermi the fermi energy """ return float(re.findall(r"E-fermi\s*:\s*(-?\d+.\d+)", self.file_str)[-1]) @cached_property def reciprocal_lattice(self): """ Finds and return the reciprocal lattice vectors, if more than one set present, it return just the last one. Returns ------- np.ndaaray return the reciprocal lattice vectors """ match = re.search( r"reciprocal lattice vectors[\s\S]+?(?=\n\s?\n\s?)", self.file_str ) numbers = re.findall(r"[-]?\d+\.\d+", match.group(0)) # Create a NumPy array from the found numbers, reshape it to 3 rows and 6 columns all_vectors = np.array(numbers, dtype=float).reshape(3, 6) # Slice the array to get the last 3 columns, reciprocal_lattice = all_vectors[:, 3:] return reciprocal_lattice @cached_property def rotations(self): """ Finds the point symmetry operations included in the OUTCAR file and returns them in matrix form. Returns ------- np.ndarray The rotation matrices """ sym_ops = self.get_symmetry_operations() if sym_ops is None: return None rotations = [] for sym_op in sym_ops: rotations.append(sym_op["rotation"]) return np.array(rotations)
[docs] def get_symmetry_operations(self): raw_spg_ops = re.search( r"Found\s+(\d+)\s+space group operations", self.file_str ) if raw_spg_ops is None: return None n_spg_operations = int(raw_spg_ops.group(1)) logger.debug(f"n_spg_operations: {n_spg_operations}") vasp54_block_match = re.search( r"Space group operators:\s*\n" # header line r"([ \t]*irot[\s\S]+?)" # from the column headers … r"(?=\n\s?\n\s?)", # … up to the next blank/non-indented line self.file_str, flags=re.IGNORECASE, ) vasp544_block_match = re.search( r"Space group operators:\s*\n" # header line r"([ \t]*irot[\s\S]+?)" # from the column headers … r"(?=\n\s\n\s)", # … up to the next blank/non-indented line self.file_str, flags=re.IGNORECASE, ) vasp60_block_pattern = re.compile( r""" irot\s*:\s*(?P<irot>\d+) # 1) operator index .*? # skip down to isymop isymop:\s* (?P<r1>-?\d+\s+-?\d+\s+-?\d+)\s* # 2) first row of 3 ints (?P<r2>-?\d+\s+-?\d+\s+-?\d+)\s* # 3) second row (?P<r3>-?\d+\s+-?\d+\s+-?\d+) # 4) third row .*? # skip to gtrans gtrans:\s* (?P<g1>-?\d+\.\d+)\s+ (?P<g2>-?\d+\.\d+)\s+ (?P<g3>-?\d+\.\d+) .*? # skip to ptrans ptrans:\s* (?P<p1>-?\d+\.\d+)\s+ (?P<p2>-?\d+\.\d+)\s+ (?P<p3>-?\d+\.\d+) .*? # skip to rotmap rotmap:\s*\n # the “rotmap:” line might stand alone (?P<rotmap> # now capture one or more “( a-> b )” entries (?:[ \t]*\(\s*\d+\s*->\s*\d+\s*\)\s*)+ ) (?=\n\n) # stop at double new line """, re.VERBOSE | re.DOTALL, ) vasp60_block_matches = vasp60_block_pattern.findall(self.file_str) if vasp54_block_match: logger.info("Detected Space Group Operators in a format from VASP 5.4") spg_operators = [] block = vasp54_block_match.group(1).rstrip() logger.debug(f"space group operators block: \n{block}") headers = None for i, line in enumerate(block.splitlines()): values = line.split() if i == 0: headers = values continue spg_operator = {} for ispg, value in enumerate(values): column_name = headers[ispg] if column_name == "irot": value = int(value) else: value = float(value) spg_operator[column_name] = value spg_operators.append(spg_operator) sym_ops = [] for operator in spg_operators: sym_op = {} irot = operator["irot"] det_A = operator["det(A)"] # convert alpha to radians alpha = np.pi * operator["alpha"] / 180.0 # get rotation axis x = operator["n_x"] y = operator["n_y"] z = operator["n_z"] tau_x = operator["tau_x"] tau_y = operator["tau_y"] tau_z = operator["tau_z"] gtrans = np.array([tau_x, tau_y, tau_z]) R = ( np.array( [ [ np.cos(alpha) + x**2 * (1 - np.cos(alpha)), x * y * (1 - np.cos(alpha)) - z * np.sin(alpha), x * z * (1 - np.cos(alpha)) + y * np.sin(alpha), ], [ y * x * (1 - np.cos(alpha)) + z * np.sin(alpha), np.cos(alpha) + y**2 * (1 - np.cos(alpha)), y * z * (1 - np.cos(alpha)) - x * np.sin(alpha), ], [ z * x * (1 - np.cos(alpha)) - y * np.sin(alpha), z * y * (1 - np.cos(alpha)) + x * np.sin(alpha), np.cos(alpha) + z**2 * (1 - np.cos(alpha)), ], ] ) * det_A ) R = ( np.linalg.inv(self.reciprocal_lattice.T) .dot(R) .dot(self.reciprocal_lattice.T) ) R = np.round(R, decimals=3) sym_op["irot"] = irot sym_op["rotation"] = R sym_op["gtrans"] = gtrans sym_op["ptrans"] = None sym_op["rotmap"] = None sym_ops.append(sym_op) return sym_ops elif vasp60_block_matches: logger.info("Detected Space Group Operators in a format from VASP 6.*") logger.debug(f"vasp60_block_matches: \n{vasp60_block_matches}") sym_ops = [] for vasp60_block_match in vasp60_block_matches: sym_op = {} irot = int(vasp60_block_match[0]) r1 = np.array([int(val) for val in vasp60_block_match[1].split()]) r2 = np.array([int(val) for val in vasp60_block_match[2].split()]) r3 = np.array([int(val) for val in vasp60_block_match[3].split()]) g1 = float(vasp60_block_match[4]) g2 = float(vasp60_block_match[5]) g3 = float(vasp60_block_match[6]) gtrans = np.array([g1, g2, g3]) p1 = float(vasp60_block_match[7]) p2 = float(vasp60_block_match[8]) p3 = float(vasp60_block_match[9]) ptrans = np.array([p1, p2, p3]) rotmap = vasp60_block_match[10].strip() rotation = np.array([r1, r2, r3]) rotation = np.array(rotation) sym_op["irot"] = irot sym_op["rotation"] = rotation.T sym_op["gtrans"] = gtrans sym_op["ptrans"] = ptrans sym_op["rotmap"] = rotmap sym_ops.append(sym_op) return sym_ops else: logger.info("No space group operators block found") return []
[docs] def to_dict(self): tmp_dict = {} symops = self.get_symmetry_operations() for symop in symops: for key in symop.keys(): if isinstance(symop[key], np.ndarray): symop[key] = symop[key].tolist() tmp_dict["symops"] = symops tmp_dict["efermi"] = self.efermi if self.reciprocal_lattice is not None: tmp_dict["reciprocal_lattice"] = self.reciprocal_lattice.tolist() if self.rotations is not None: tmp_dict["rotations"] = self.rotations.tolist() return tmp_dict
def __contains__(self, key): return key in self.__dict__ def __getitem__(self, key): return self.__dict__[key] def __iter__(self): return self.__dict__.__iter__() def __len__(self): return len(self.__dict__)
[docs] class Poscar(collections.abc.Mapping): """ A class to parse the POSCAR file Parameters ---------- filepath : str, optional The POSCAR filepath, by default "POSCAR" rotations : list, optional The rotations of the POSCAR file, by default None """
[docs] def __init__(self, filepath: Union[str, Path], rotations=None): self.filepath = Path(filepath) self.atoms, self.coordinates, self.lattice = self._parse_poscar()
def _parse_poscar(self): """ Reads VASP POSCAR file-type and returns the pyprocar structure Parameters ---------- filename : str, optional Path to POSCAR file. The default is 'CONTCAR'. Returns ------- None. """ with open(self.filepath, "r") as rf: lines = rf.readlines() comment = lines[0] self.comment = comment scale = float(remove_comment(lines[1])) lattice = np.zeros(shape=(3, 3)) for i in range(3): lattice[i, :] = [float(x) for x in remove_comment(lines[i + 2]).split()[:3]] lattice *= scale if any([char.isalpha() for char in lines[5]]): species = [x for x in lines[5].split()] shift = 1 else: shift = 0 base_dir = self.filename.parent potcar_path = base_dir / "POTCAR" if potcar_path.exists(): with open(potcar_path, "r") as rf: potcar = rf.read() species = re.findall( r"\s*PAW[PBE_\s]*([A-Z][a-z]*)[_a-z]*[0-9]*[a-zA-Z]*[0-9]*.*\s[0-9.]*", potcar, )[::2] composition = [int(x) for x in remove_comment(lines[5 + shift].strip()).split()] atoms = [] for i in range(len(composition)): for x in composition[i] * [species[i]]: atoms.append(x) natom = sum(composition) # if lines[6 + shift][0].lower() == "s": line = lines[6 + shift] if re.findall(r"\w+|$", line)[0].lower()[0] == "s": # shift = 2 shift += 1 match = re.findall(r"\w+|$", lines[6 + shift])[0].lower() if match[0] == "d": direct = True elif match[0] == "c": warnings.warn("Warning the POSCAR is not in Direct coordinates.") direct = False else: raise RuntimeError("The POSCAR is not in Direct or Cartesian coordinates.") coordinates = np.zeros(shape=(natom, 3)) for i in range(natom): coordinates[i, :] = [float(x) for x in lines[i + 7 + shift].split()[:3]] return atoms, coordinates, lattice def __contains__(self, key): return key in self.__dict__ def __getitem__(self, key): return self.__dict__[key] def __iter__(self): return self.__dict__.__iter__() def __len__(self): return len(self.__dict__)
[docs] class Kpoints(collections.abc.Mapping): """ A class to parse the KPOINTS file Parameters ---------- filename : str, optional The KPOINTS filename, by default "KPOINTS" has_time_reversal : bool, optional A boolean vlaue to determine if the kpioints has time reversal symmetry, by default True """
[docs] def __init__(self, filepath: Union[str, Path], has_time_reversal: bool = True): self.filepath = Path(filepath) self.file_str = None self.metadata = None self.mode = None self.kgrid = None self.kshift = None self.ngrids = None self.special_kpoints = None self.knames = None self.cartesian = False self.automatic = False self._parse_kpoints() if self.knames is not None: self.kpath = KPath( knames=self.knames, special_kpoints=self.special_kpoints, ngrids=self.ngrids, has_time_reversal=has_time_reversal, ) else: self.kpath = None
def _parse_kpoints(self): """A helper method to parse the KOINTS file""" with open(self.filepath, "r") as rf: self.comment = rf.readline() grids = rf.readline() grids = grids[: grids.find("!")] self.ngrids = [int(x) for x in grids.split()] if self.ngrids[0] == 0: self.automatic = True mode = rf.readline() if mode[0].lower() == "m": self.mode = "monkhorst-pack" elif mode[0].lower() == "g": self.mode = "gamma" elif mode[0].lower() == "l": self.mode = "line" if self.mode == "gamma" or self.mode == "monkhorst-pack": kgrid = rf.readline() self.kgrid = [int(x) for x in kgrid.split()] shift = rf.readline() self.kshift = [int(float(x)) for x in shift.split()] elif self.mode == "line": if rf.readline()[0].lower() == "c": self.cartesian = True else: self.cartesian = False self.file_str = rf.read() temp = np.array( re.findall( r"([0-9.-]+)\s*([0-9.-]+)\s*([0-9.-]+)(.*)", self.file_str ) ) temp_special_kp = temp[:, :3].astype(float) temp_knames = temp[:, -1] nsegments = temp_special_kp.shape[0] // 2 if len(self.ngrids) == 1: self.ngrids = [self.ngrids[0]] * nsegments self.knames = np.reshape( [x.replace("!", "").strip() for x in temp_knames], (nsegments, 2) ) self.special_kpoints = temp_special_kp.reshape(nsegments, 2, 3) def __contains__(self, key): return key in self.__dict__ def __getitem__(self, key): return self.__dict__[key] def __iter__(self): return self.__dict__.__iter__() def __len__(self): return len(self.__dict__)
[docs] class Procar(collections.abc.Mapping): """ A class to parse the PROCAR file Parameters ---------- filename : str, optional The PROCAR filename, by default "PROCAR" structure : pyprocar.core.Structure, optional The structure of the calculation, by default None reciprocal_lattice : np.ndarray, optional The reciprocal lattice matrix, by default None kpath : pyprocar.core.KPath, optional The pyprocar.core.KPath object, by default None kpoints : np.ndarray, optional The kpoints, by default None efermi : float, optional The fermi energy, by default None interpolation_factor : int, optional The interpolation factor, by default 1 """ orbital_names = [ "s", "py", "pz", "px", "dxy", "dyz", "dz2", "dxz", "x2-y2", "fy3x2", "fxyz", "fyz2", "fz3", "fxz2", "fzx2", "fx3", "tot", ] orbital_names_old = [ "s", "py", "pz", "px", "dxy", "dyz", "dz2", "dxz", "dx2", "tot", ] orbital_names_short = ["s", "p", "d", "f", "tot"] labels = orbital_names_old[:-1]
[docs] def __init__(self, filepath: Union[str, Path]): self.filepath = self._validate_file(filepath) self.filename = self.filepath.name self.file_str = None self.meta_lines = [] self.has_phase = None self.kpoints = None self.bands = None self.occupancies = None self.spd = None self.spd_phase = None self.kpointsCount = None self.bandsCount = None self.ionsCount = None self.ispin = None self._read() if self.has_phase: self.carray = self.spd_phase[:, :, :, :-1, 1:-1]
def _read(self): """ Helper method to parse the procar file """ file_stream = self._open_file(self.filepath) # Line 1: PROCAR lm decomposed self.meta_lines.append(file_stream.readline()) if "phase" in self.meta_lines[-1]: self.has_phase = True else: self.has_phase = False # Line 2: # of k-points: 816 # of bands: 52 # of ions: 8 self.meta_lines.append(file_stream.readline()) self.kpointsCount, self.bandsCount, self.ionsCount = map( int, re.findall(r"#[^:]+:([^#]+)", self.meta_lines[-1]) ) self.file_str = file_stream.read() self._repair() self._read_kpoints() self._read_bands() self._read_orbitals() if self.has_phase: self._read_phases() file_stream.close() return def _repair(self): """ It Tries to repair some stupid problems due the stupid fixed format of the stupid fortran. Up to now it only separes k-points as the following: k-point 61 : 0.00000000-0.50000000 0.00000000 ... to k-point 61 : 0.00000000 -0.50000000 0.00000000 ... But as I found new stupid errors they should be fixed here. """ if ( len(re.findall(r"(band\s)(\*\*\*)", self.file_str)) != 0 or len(re.findall(r"(\.\d{8})(\d{2}\.)", self.file_str)) != 0 or len(re.findall(r"(\d)-(\d)", self.file_str)) != 0 or len(re.findall(r"\*+", self.file_str)) != 0 ): logger.info("PROCAR needs repairing") # Fixing bands issues (when there are more than 999 bands) # band *** # energy 6.49554019 # occ. 0.00000000 self.file_str = re.sub(r"(band\s)(\*\*\*)", r"\1 1000", self.file_str) # Fixing k-point issues self.file_str = re.sub(r"(\.\d{8})(\d{2}\.)", r"\1 \2", self.file_str) self.file_str = re.sub(r"(\d)-(\d)", r"\1 -\2", self.file_str) self.file_str = re.sub(r"\*+", r" -10.0000 ", self.file_str) outfile = open(self.filename + "-repaired", "w") for iline in self.meta_lines: outfile.write(iline) outfile.write(self.file_str) outfile.close() print("Repaired PROCAR is written at {}-repaired".format(self.filename)) print( "Please use {}-repaired next time for better efficiency".format( self.filename ) ) return def _validate_file(self, filepath: Union[str, Path]): """ Tries to open a File, it has suitable values for PROCAR and can handle gzipped files Example: """ filepath = Path(filepath) if filepath.is_dir(): filepath / "PROCAR" elif filepath.is_file(): return filepath elif filepath.with_suffix(".gz").is_file(): return filepath.with_suffix(".gz") else: raise IOError("File not found") return filepath def _open_file(self, filepath: Union[str, Path]): """ Tries to open a File, it has suitable values for PROCAR and can handle gzipped files Example: """ file_stream = None if filepath.with_suffix(".gz").is_file(): file_stream = gzip.open(filepath.with_suffix(".gz"), mode="rt") elif filepath.is_file(): file_stream = open(filepath, "r") else: raise IOError("File not found") return file_stream def _read_kpoints(self): """ Reads the k-point headers. A typical k-point line is: k-point 1 : 0.00000000 0.00000000 0.00000000 weight = 0.00003704\n fills self.kpoint[kpointsCount][3] The weights are discarded (are they useful?) """ if not self.file_str: print("You should invoke `procar.readFile()` instead. Returning") return # finding all the K-points headers self.kpoints = re.findall(r"k-point\s+\d+\s*:\s+([-.\d\s]+)", self.file_str) # trying to build an array self.kpoints = [x.split() for x in self.kpoints] try: self.kpoints = np.array(self.kpoints, dtype=float) except ValueError: print("\n".join([str(x) for x in self.kpoints])) if self.permissive: # Discarding the kpoints list, however I need to set # self.ispin beforehand. if len(self.kpoints) == self.kpointsCount: self.ispin = 1 elif len(self.kpoints) == 2 * self.kpointsCount: self.ispin = 2 else: raise ValueError("Kpoints do not match with ispin=1 or 2.") self.kpoints = None return else: raise ValueError("Badly formated Kpoints headers, try `--permissive`") # if successful, go on # trying to identify an non-polarized or non-collinear case, a # polarized case or a defective file if len(self.kpoints) != self.kpointsCount: # if they do not match, may means two things a spin polarized # case or a bad file, lets check # lets start testing if it is spin polarized, if so, there # should be 2 identical blocks of kpoints. up, down = np.vsplit(self.kpoints, 2) if (up == down).all(): self.ispin = 2 # just keeping one set of kpoints (the other will be # discarded) self.kpoints = up else: raise RuntimeError("Bad Kpoints list.") # if ISPIN != 2 setting ISPIN=1 (later for the non-collinear case 1->4) # It is unknown until parsing the projected data else: self.ispin = 1 # checking again, for compatibility, if len(self.kpoints) != self.kpointsCount: raise RuntimeError( "Kpoints number do not match with metadata (header of PROCAR)" ) return @property def kpoints_reduced(self): """The kpoints in reduced coordinates Returns ------- np.ndarray The kpoints in reduced coordinates """ return self.kpoints def _read_bands(self): """ Reads the bands header. A typical bands is: band 1 # energy -7.11986315 # occ. 1.00000000 fills self.bands[kpointsCount][bandsCount] The occupation numbers are discarded (are they useful?) """ if not self.file_str: print("You should invoke `procar.read()` instead. Returning") return # finding all bands self.bands = re.findall( r"band\s*(\d+)\s*#\s*energy\s*([-.\d\s]+)", self.file_str ) # checking if the number of bands match if len(self.bands) != self.bandsCount * self.kpointsCount * self.ispin: raise RuntimeError("Number of bands don't match") # casting to array to manipulate the bands self.bands = np.array(self.bands, dtype=float)[:, 1] # Now I will deal with the spin polarized case. The goal is join # them like for a non-magnetic case # new version of pyprocar will have an (nkpoints, nbands, 2) dimensions if self.ispin == 2: # up and down are along the first axis up, down = np.split(self.bands, 2) # reshapping (the 2 means both band index and energy) up = up.reshape(self.kpointsCount, self.bandsCount) down = down.reshape(self.kpointsCount, self.bandsCount) # setting the correct number of bands (up+down) # self.bandsCount *= 2 # and joining along the second axis (axis=1), ie: bands-like # self.bands = np.concatenate((up, down), axis=1) self.bands = np.stack((up, down), axis=-1) else: self.bands = self.bands.reshape(self.kpointsCount, self.bandsCount, 1) # otherwise just reshaping is needed # else: # self.bands.shape = (self.kpointsCount, self.bandsCount, 2) # Making a test if the broadcast is rigth, otherwise just print # test = [x.max() - x.min() for x in self.bands[:, :, 0].transpose()] # if np.array(test).any(): # print( # "The indexes of bands do not match. CHECK IT. " # "Likely the data was wrongly broadcasted" # ) # print(str(self.bands[:, :, 0])) # # Now safely removing the band index # self.bands = self.bands[:, :, 1] return def _read_orbitals(self): """ Reads all the spd-projected data. A typical/expected block is: ion s py pz px dxy dyz dz2 dxz dx2 tot 1 0.079 0.000 0.001 0.000 0.000 0.000 0.000 0.000 0.000 0.079 2 0.152 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.152 3 0.079 0.000 0.001 0.000 0.000 0.000 0.000 0.000 0.000 0.079 4 0.188 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.188 5 0.188 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.188 tot 0.686 0.000 0.002 0.000 0.000 0.000 0.000 0.000 0.000 0.688 (x2 for spin-polarized -akwardkly formatted-, x4 non-collinear -nicely formatted-). The data is stored in an array self.spd[kpoint][band][ispin][atom][orbital] Undefined behavior in case of phase factors (LORBIT = 12). """ # if not self.file_str: # print("You should invoke `procar.readFile()` instead. Returning") # return # finding all orbital headers self.spd = re.findall(r"ion(.+)", self.file_str) # testing if the orbital names are known (the standard ones) FoundOrbs = self.spd[0].split() size = len(FoundOrbs) # only the first 'size' orbital StdOrbs = self.orbital_names[: size - 1] + self.orbital_names[-1:] StdOrbs_short = ( self.orbital_names_short[: size - 1] + self.orbital_names_short[-1:] ) StdOrbs_old = self.orbital_names_old[: size - 1] + self.orbital_names_old[-1:] if ( FoundOrbs != (StdOrbs) and FoundOrbs != (StdOrbs_short) and FoundOrbs != (StdOrbs_old) ): print( str(size) + " orbitals. (Some of) They are unknow (if " "you did 'filter' them it is OK)." ) self.orbitalCount = size self.orbitalNames = self.spd[0].split() # Now reading the bulk of data # The case of just one atom is handled differently since the VASP # output is a little different if self.ionsCount == 1: self.spd = re.findall(r"^(\s*1\s+.+)$", self.file_str, re.MULTILINE) raw_spd_natom_axis = self.ionsCount else: # Added by Francisco to speed up filtering on June 4th, 2019 # get rid of phase factors self.spd = re.findall(r"ion.+tot\n([-.\d\seto]+)", self.file_str) self.spd = "".join(self.spd) self.spd = re.findall(r"([-.\d\se]+tot.+)\n", self.spd) raw_spd_natom_axis = self.ionsCount + 1 # free the memory (could be a lot) if not self.has_phase: self.file_str = None # Now the method will try to find the value of self.ispin, # previously it was set to either 1 or 2. If "1", it could be 1 or # 4, but previously it was impossible to find the rigth value. If # "2" it has to macth with the number of entries of spd data. expected = self.bandsCount * self.kpointsCount * self.ispin if expected == len(self.spd): pass # catching a non-collinear calc. elif expected * 4 == len(self.spd): # testing if previous ispin value is ok if self.ispin != 1: print( "Incompatible data: self.ispin= " + str(self.ispin) + ". Now is 4" ) self.ispin = 4 else: raise RuntimeError("Incompatible file.") # checking for consistency for line in self.spd: if self.ionsCount != 1: if len(line.split()) != (self.ionsCount + 1) * (self.orbitalCount + 1): raise RuntimeError("Flats happens") # replacing the "tot" string by a number, to allows a conversion # to numpy self.spd = [x.replace("tot", "0").split() for x in self.spd] # self.spd = [x.split() for x in self.spd] self.spd = np.array(self.spd, dtype=float) # handling collinear polarized case if self.ispin == 2: # splitting both spin components, now they are along k-points # axis (1st axis) but, then should be concatenated along the # bands. up, down = np.vsplit(self.spd, 2) # ispin = 1 for a while, we will made the distinction up = up.reshape( self.kpointsCount, self.bandsCount, 1, raw_spd_natom_axis, self.orbitalCount + 1, ) down = down.reshape( self.kpointsCount, self.bandsCount, 1, raw_spd_natom_axis, self.orbitalCount + 1, ) # concatenating bandwise. Density and magntization, their # meaning is obvious, and do uses 2 times more memory than # required, but I *WANT* to keep it as close as possible to the # non-collinear or non-polarized case density = np.concatenate((up, down), axis=1) magnet = np.concatenate((up, -down), axis=1) # concatenated along 'ispin axis' self.spd = np.concatenate((density, magnet), axis=2) # otherwise, just a reshaping suffices else: self.spd = self.spd.reshape( self.kpointsCount, self.bandsCount, self.ispin, raw_spd_natom_axis, self.orbitalCount + 1, ) if self.ionsCount == 1: self.spd = np.pad( self.spd, ((0, 0), (0, 0), (0, 0), (0, 1), (0, 0)), "constant", constant_values=(0), ) self.spd[:, :, :, 1, :] = self.spd[:, :, :, 0, :] return def _read_phases(self): """ Helped method to parse the projection phases """ if self.ionsCount == 1: self.spd_phase = re.findall(r"^(\s*1\s+.+)$", self.file_str, re.MULTILINE) raw_spd_natom_axis = self.ionsCount else: # Added by Francisco to speed up filtering on June 4th, 2019 # get rid of phase factors self.spd_phase = re.findall( r"ion.+\n([-.\d\se]+charge[-.\d\se]+)", self.file_str ) self.spd_phase = "".join(self.spd_phase) self.spd_phase = re.findall(r"([-.\d\se]+charge.+)\n", self.spd_phase) raw_spd_natom_axis = self.ionsCount + 1 # free the memory (could be a lot) self.file_str = None # replacing the "charge" string by a number, to allows a conversion # to numpy, adding columns of zeros next to charge row to be able to # convert to imaginary self.spd_phase = [ x.replace( re.findall("charge.*", x)[0], ( " 0.000 ".join(re.findall("charge.*", x)[0].split()).replace( "charge", "" ) ), ) for x in self.spd_phase ] self.spd_phase = [x.split() for x in self.spd_phase] self.spd_phase = np.array(self.spd_phase, dtype=float) # handling collinear polarized case if self.ispin == 2: # splitting both spin components, now they are along k-points # axis (1st axis) but, then should be concatenated along the # bands. up, down = np.vsplit(self.spd_phase, 2) # ispin = 1 for a while, we will made the distinction up = up.reshape( self.kpointsCount, self.bandsCount, 1, self.ionsCount + 1, self.orbitalCount * 2, ) down = down.reshape( self.kpointsCount, self.bandsCount, 1, self.ionsCount + 1, self.orbitalCount * 2, ) # concatenating bandwise. Density and magntization, their # meaning is obvious, and do uses 2 times more memory than # required, but I *WANT* to keep it as close as possible to the # non-collinear or non-polarized case density = np.concatenate((up, down), axis=1) magnet = np.concatenate((up, -down), axis=1) # concatenated along 'ispin axis' self.spd_phase = np.concatenate((density, magnet), axis=2) # otherwise, just a reshaping suffices elif self.ispin == 4: self.spd_phase = self.spd_phase.reshape( self.kpointsCount, self.bandsCount, 1, self.ionsCount + 1, self.orbitalCount * 2, ) else: self.spd_phase = self.spd_phase.reshape( self.kpointsCount, self.bandsCount, self.ispin, self.ionsCount + 1, self.orbitalCount * 2, ) temp = np.zeros( shape=( self.spd_phase.shape[0], self.spd_phase.shape[1], self.spd_phase.shape[2], self.spd_phase.shape[3], int(self.spd_phase.shape[4] / 2) + 1, ), dtype=np.complex_, ) for i in range(1, (self.orbitalCount) * 2 - 2, 2): temp[:, :, :, :, (i + 1) // 2].real = self.spd_phase[:, :, :, :, i] temp[:, :, :, :, (i + 1) // 2].imag = self.spd_phase[:, :, :, :, i + 1] temp[:, :, :, :, 0].real = self.spd_phase[:, :, :, :, 0] temp[:, :, :, :, -1].real = self.spd_phase[:, :, :, :, -1] self.spd_phase = temp if self.ionsCount == 1: self.spd_phase = np.pad( self.spd_phase, ((0, 0), (0, 0), (0, 0), (0, 1), (0, 0)), "constant", constant_values=(0), ) self.spd_phase[:, :, :, 1, :] = self.spd_phase[:, :, :, 0, :] return def _spd2projected(self, spd, nprinciples=1): """ Helpermethod to project the spd array to the projected array which will be fed into pyprocar.coreElectronicBandStructure object Parameters ---------- spd : np.ndarray The spd array from the earlier parse. This has a structure simlar to the PROCAR output in vasp Has the shape [n_kpoints,n_band,n_spins,n-orbital,n_atoms] nprinciples : int, optional The prinicipal quantum numbers, by default 1 Returns ------- np.ndarray The projected array. Has the shape [n_kpoints,n_band,n_atom,n_principal,n-orbital,n_spin] """ # This function is for VASP # non-pol and colinear # spd is formed as (nkpoints,nbands, nspin, natom+1, norbital+2) # natom+1 > last column is total # norbital+2 > 1st column is the number of atom last is total # non-colinear # spd is formed as (nkpoints,nbands, nspin +1 , natom+1, norbital+2) # natom+1 > last column is total # norbital+2 > 1st column is the number of atom last is total # nspin +1 > last column is total if spd is None: return None natoms = spd.shape[3] - 1 nkpoints = spd.shape[0] nbands = spd.shape[1] norbitals = spd.shape[4] - 2 if spd.shape[2] == 4: nspins = 4 else: nspins = spd.shape[2] if nspins == 2: nbands = int(spd.shape[1] / 2) else: nbands = spd.shape[1] projected = np.zeros( shape=(nkpoints, nbands, natoms, nprinciples, norbitals, nspins), dtype=spd.dtype, ) temp_spd = spd.copy() # (nkpoints,nbands, nspin, natom, norbital) temp_spd = np.swapaxes(temp_spd, 2, 4) # (nkpoints,nbands, norbital , natom , nspin) temp_spd = np.swapaxes(temp_spd, 2, 3) # (nkpoints,nbands, natom, norbital, nspin) # projected[ikpoint][iband][iatom][iprincipal][iorbital][ispin] if nspins == 3: projected[:, :, :, 0, :, :] = temp_spd[:, :, :-1, 1:-1, :] elif nspins == 2: projected[:, :, :, 0, :, 0] = temp_spd[:, :nbands, :-1, 1:-1, 0] projected[:, :, :, 0, :, 1] = temp_spd[:, nbands:, :-1, 1:-1, 0] else: projected[:, :, :, 0, :, :] = temp_spd[:, :, :-1, 1:-1, :] return projected def __contains__(self, key): return key in self.__dict__ def __getitem__(self, key): return self.__dict__[key] def __iter__(self): return self.__dict__.__iter__() def __len__(self): return len(self.__dict__)
[docs] class VaspXML(collections.abc.Mapping): """A class to parse the vasprun xml file Parameters ---------- filename : str, optional The vasprun.xml filename, by default "vasprun.xml" Raises ------ ValueError File not found """ non_colinear_spins_dict = { "spin 1": "Spin-Total", "spin 2": "Spin-x", "spin 3": "Spin-y", "spin 4": "Spin-z", } colinear_spins_dict = {"spin 1": "Spin-up", "spin 2": "Spin-down"}
[docs] def __init__(self, filepath: Union[str, Path] = "vasprun.xml"): self.filepath = Path(filepath) self.filename = self.filepath.name self.data = self._parse_vasprun(self.filepath)
@property def has_dos(self): return "dos" in self.data["general"] @property def spins_dict(self): spins = list(self.data["general"]["dos"]["total"]["array"]["data"].keys()) if len(spins) == 4: return self.non_colinear_spins_dict else: return self.colinear_spins_dict @property def is_noncolinear(self): spins = list(self.data["general"]["dos"]["total"]["array"]["data"].keys()) if len(spins) == 4: return True else: return False @property def bands(self): """Parses the electronic bands Returns ------- np.ndarray The electronic bands """ spins = list(self.data["general"]["eigenvalues"]["array"]["data"].keys()) kpoints_list = list( self.data["general"]["eigenvalues"]["array"]["data"]["spin 1"].keys() ) eigen_values = {} nbands = len( self.data["general"]["eigenvalues"]["array"]["data"][spins[0]][ kpoints_list[0] ][kpoints_list[0]] ) nkpoints = len(kpoints_list) for ispin in spins: eigen_values[ispin] = {} eigen_values[ispin]["eigen_values"] = np.zeros(shape=(nbands, nkpoints)) eigen_values[ispin]["occupancies"] = np.zeros(shape=(nbands, nkpoints)) for ikpoint, kpt in enumerate(kpoints_list): temp = np.array( self.data["general"]["eigenvalues"]["array"]["data"][ispin][kpt][ kpt ] ) eigen_values[ispin]["eigen_values"][:, ikpoint] = ( temp[:, 0] - self.fermi ) eigen_values[ispin]["occupancies"][:, ikpoint] = temp[:, 1] return eigen_values @property def bands_projected(self): """Parse the band projections Returns ------- np.ndarray The band projections """ # projected[iatom][ikpoint][iband][iprincipal][iorbital][ispin] labels = self.data["general"]["projected"]["array"]["info"] spins = list(self.data["general"]["projected"]["array"]["data"].keys()) kpoints_list = list( self.data["general"]["projected"]["array"]["data"][spins[0]].keys() ) bands_list = list( self.data["general"]["projected"]["array"]["data"][spins[0]][ kpoints_list[0] ][kpoints_list[0]].keys() ) bands_projected = {"labels": labels} nspins = len(spins) nkpoints = len(kpoints_list) nbands = len(bands_list) norbitals = len(labels) natoms = self.initial_structure.natoms bands_projected["projection"] = np.zeros( shape=(nspins, nkpoints, nbands, natoms, norbitals) ) for ispin, spn in enumerate(spins): for ikpoint, kpt in enumerate(kpoints_list): for iband, bnd in enumerate(bands_list): bands_projected["projection"][ispin, ikpoint, iband, :, :] = ( np.array( self.data["general"]["projected"]["array"]["data"][spn][ kpt ][kpt][bnd][bnd] ) ) # ispin, ikpoint, iband, iatom, iorbital bands_projected["projection"] = np.swapaxes(bands_projected["projection"], 0, 3) # iatom, ikpoint, iband, ispin, iorbital bands_projected["projection"] = np.swapaxes(bands_projected["projection"], 3, 4) # iatom, ikpoint, iband, iorbital, ispin bands_projected["projection"] = bands_projected["projection"].reshape( natoms, nkpoints, nbands, 1, norbitals, nspins ) return bands_projected def _get_dos_total(self): """A helper method to get the total density of states Returns ------- tuple Returns the dos_total info as a dict and the a list of labels """ spins = list(self.data["general"]["dos"]["total"]["array"]["data"].keys()) energies = np.array( self.data["general"]["dos"]["total"]["array"]["data"][spins[0]] )[:, 0] dos_total = {"energies": energies} for spin_name in spins: dos_total[self.spins_dict[spin_name]] = np.array( self.data["general"]["dos"]["total"]["array"]["data"][spin_name] )[:, 1] return dos_total, list(dos_total.keys()) def _get_dos_projected(self, atoms=[]): """A helper method to get the projected density of states Parameters ---------- atoms : list, optional List of atoms, by default [] Returns ------- _type_ Returns the dos_total info as a dict and the a list of labels """ if len(atoms) == 0: atoms = np.arange(self.initial_structure.natoms) if "partial" in self.data["general"]["dos"]: dos_projected = {} ion_list = [ "ion %s" % str(x + 1) for x in atoms ] # using this name as vasrun.xml uses ion # for i in range(len(ion_list)): iatom = ion_list[i] name = self.initial_structure.atoms[atoms[i]] + str(atoms[i]) spins = list( self.data["general"]["dos"]["partial"]["array"]["data"][ iatom ].keys() ) energies = np.array( self.data["general"]["dos"]["partial"]["array"]["data"][iatom][ spins[0] ][spins[0]] )[:, 0] dos_projected[name] = {"energies": energies} for ispin in spins: dos_projected[name][self.spins_dict[ispin]] = np.array( self.data["general"]["dos"]["partial"]["array"]["data"][iatom][ ispin ][ispin] )[:, 1:] # if 'Spin-Total' in list(dos_projected[name].keys()): # del dos_projected[name]['Spin-Total'] return ( dos_projected, self.data["general"]["dos"]["partial"]["array"]["info"], ) else: print("This calculation does not include partial density of states") return None, None @property def dos(self): """The pyprocar.core.DensityOfStates Object Returns ------- pyprocar.core.DensityOfStates Returns the pyprocar.core.DensityOfStates for the calculation """ energies = self.dos_total["energies"] total = [] for ispin in self.dos_total: if ispin == "energies": continue total.append(self.dos_total[ispin]) # total = np.array(total).T return DensityOfStates( energies=energies, total=total, efermi=self.fermi, projected=self.dos_projected, ) @property def dos_to_dict(self): """ The complete density (total,projected) of states as a python dictionary Returns ------- dict The complete density (total,projected) of states as a python dictionary """ return {"total": self._get_dos_total(), "projected": self._get_dos_projected()} """ Returns the total density of states as a pychemia.visual.DensityOfSates object """ @property def dos_total(self): """Returns the total dos dict Returns ------- dict Returns the total dos dict """ dos_total, labels = self._get_dos_total() dos_total["energies"] -= self.fermi return dos_total @property def dos_projected(self): """ Returns the projected DOS as a multi-dimentional array, to be used in the pyprocar.core.dos object Returns ------- np.ndarray Returns the projected DOS as a multi-dimentional array, to be used in the pyprocar.core.dos object """ ret = [] dos_projected, info = self._get_dos_projected() if dos_projected is None: return None norbitals = len(info) - 1 info[0] = info[0].capitalize() labels = [] labels.append(info[0]) ret = [] for iatom in dos_projected: temp_atom = [] for iorbital in range(norbitals): temp_spin = [] for key in dos_projected[iatom]: if key == "energies": continue temp_spin.append(dos_projected[iatom][key][:, iorbital]) temp_atom.append(temp_spin) ret.append([temp_atom]) return ret @property def kpoints(self): """ Returns the kpoints used in the calculation in form of a pychemia.core.KPoints object Returns ------- np.ndarray Returns the kpoints used in the calculation in form of a pychemia.core.KPoints object """ if self.data["kpoints_info"]["mode"] == "listgenerated": kpoints = dict( mode="path", kvertices=self.data["kpoints_info"]["kpoint_vertices"] ) else: kpoints = dict( mode=self.data["kpoints_info"]["mode"].lower(), grid=self.data["kpoints_info"]["kgrid"], shifts=self.data["kpoints_info"]["user_shift"], ) return kpoints @property def kpoints_list(self): """ Returns the dict of kpoints and weights used in the calculation in form of a pychemia.core.KPoints object Returns ------- dict Returns a dict of kpoints information in form of a pychemia.core.KPoints object """ return dict( mode="reduced", kpoints_list=self.data["kpoints"]["kpoints_list"], weights=self.data["kpoints"]["k_weights"], ) @property def incar(self): """ Returns the incar parameters used in the calculation as pychemia.code.vasp.VaspIncar object Returns ------- Description Returns the incar parameters """ return self.data["incar"] @property def vasp_parameters(self): """ Returns all of the parameters vasp has used in this calculation """ return self.data["vasp_params"] @property def potcar_info(self): """ Returns the information about pseudopotentials(POTCAR) used in this calculation """ return self.data["atom_info"]["atom_types"] @property def fermi(self): """ Returns the fermi energy """ return self.data["general"]["dos"]["efermi"] @property def species(self): """ Returns the species in POSCAR """ return self.initial_structure.species @property def structures(self): """ Returns a list of pychemia.core.Structure representing all the ionic step structures """ symbols = [x.strip() for x in self.data["atom_info"]["symbols"]] structures = [] for ist in self.data["structures"]: st = Structure( atoms=symbols, fractional_coordinates=ist["reduced"], lattice=ist["cell"], ) structures.append(st) return structures @property def structure(self): """ crystal structure of the last step """ return self.structures[-1] @property def forces(self): """ Returns all the forces in ionic steps """ return self.data["forces"] @property def initial_structure(self): """ Returns the initial Structure as a pychemia structure """ return self.structures[0] @property def final_structure(self): """ Returns the final Structure as a pychemia structure """ return self.structures[-1] @property def iteration_data(self): """ Returns a list of information in each electronic and ionic step of calculation """ return self.data["calculation"] @property def energies(self): """ Returns a list of energies in each electronic and ionic step [ionic step,electronic step, energy] """ scf_step = 0 ion_step = 0 double_counter = 1 energies = [] for calc in self.data["calculation"]: if "ewald" in calc["energy"]: if double_counter == 0: double_counter += 1 scf_step += 1 elif double_counter == 1: double_counter = 0 ion_step += 1 scf_step = 1 else: scf_step += 1 energies.append([ion_step, scf_step, calc["energy"]["e_0_energy"]]) return energies @property def last_energy(self): """ Returns the last calculated energy of the system """ return self.energies[-1][-1] @property def energy(self): """ Returns the last calculated energy of the system """ return self.last_energy @property def convergence_electronic(self): """ Returns a boolian representing if the last electronic self-consistent calculation converged """ ediff = self.vasp_parameters["electronic"]["EDIFF"] last_dE = abs(self.energies[-1][-1] - self.energies[-2][-1]) if last_dE < ediff: return True else: return False @property def convergence_ionic(self): """ Returns a boolian representing if the ionic part of the calculation converged """ energies = np.array(self.energies) nsteps = len(np.unique(np.array(self.energies)[:, 0])) if nsteps == 1: print("This calculation does not have ionic steps") return True else: ediffg = self.vasp_parameters["ionic"]["EDIFFG"] if ediffg < 0: last_forces_abs = np.abs(np.array(self.forces[-1])) return not (np.any(last_forces_abs > abs(ediffg))) else: last_ionic_energy = energies[(energies[:, 0] == nsteps)][-1][-1] penultimate_ionic_energy = energies[(energies[:, 0] == (nsteps - 1))][ -1 ][-1] last_dE = abs(last_ionic_energy - penultimate_ionic_energy) if last_dE < ediffg: return True return False @property def convergence(self): """ Returns a boolian representing if the the electronic self-consistent and ionic calculation converged """ return self.convergence_electronic and self.convergence_ionic @property def is_finished(self): """ Always returns True, need to fix this according to reading the xml as if the calc is not finished we will have errors in xml parser """ # if vasprun.xml is read the calculation is finished return True
[docs] def text_to_bool(self, text): """boolians in vaspxml are stores as T or F in str format, this function coverts them to python boolians""" text = text.strip(" ") if text == "T" or text == ".True." or text == ".TRUE.": return True else: return False
[docs] def conv(self, ele, _type): """This function converts the xml text to the type specified in the attrib of xml tree""" if _type == "string": return ele.strip() elif _type == "int": return int(ele) elif _type == "logical": return self.text_to_bool(ele) elif _type == "float": if "*" in ele: return np.nan else: return float(ele)
[docs] def get_varray(self, xml_tree): """Returns an array for each varray tag in vaspxml""" ret = [] for ielement in xml_tree: tmp = [] for x in ielement.text.split(): try: tmp.append(float(x)) except ValueError: tmp.append(0.0) ret.append(tmp) return ret
[docs] def get_params(self, xml_tree, dest): """dest should be a dictionary This function is recurcive #check spelling""" for ielement in xml_tree: if ielement.tag == "separator": dest[ielement.attrib["name"].strip()] = {} dest[ielement.attrib["name"].strip()] = self.get_params( ielement, dest[ielement.attrib["name"]] ) else: if "type" in ielement.attrib: _type = ielement.attrib["type"] else: _type = "float" if ielement.text is None: dest[ielement.attrib["name"].strip()] = None elif len(ielement.text.split()) > 1: dest[ielement.attrib["name"].strip()] = [ self.conv(x, _type) for x in ielement.text.split() ] else: dest[ielement.attrib["name"].strip()] = self.conv( ielement.text, _type ) return dest
[docs] def get_structure(self, xml_tree): """Returns a dictionary of the structure""" ret = {} for ielement in xml_tree: if ielement.tag == "crystal": for isub in ielement: if isub.attrib["name"] == "basis": ret["cell"] = self.get_varray(isub) elif isub.attrib["name"] == "volume": ret["volume"] = float(isub.text) elif isub.attrib["name"] == "rec_basis": ret["rec_cell"] = self.get_varray(isub) elif ielement.tag == "varray": if ielement.attrib["name"] == "positions": ret["reduced"] = self.get_varray(ielement) return ret
[docs] def get_scstep(self, xml_tree): """This function extracts the self-consistent step information""" scstep = {"time": {}, "energy": {}} for isub in xml_tree: if isub.tag == "time": scstep["time"][isub.attrib["name"]] = [ float(x) for x in isub.text.split() ] elif isub.tag == "energy": for ienergy in isub: scstep["energy"][ienergy.attrib["name"]] = float(ienergy.text) return scstep
[docs] def get_set(self, xml_tree, ret): """This function will extract any element taged set recurcively""" if xml_tree[0].tag == "r": ret[xml_tree.attrib["comment"]] = self.get_varray(xml_tree) return ret else: ret[xml_tree.attrib["comment"]] = {} for ielement in xml_tree: if ielement.tag == "set": ret[xml_tree.attrib["comment"]][ielement.attrib["comment"]] = {} ret[xml_tree.attrib["comment"]][ielement.attrib["comment"]] = ( self.get_set( ielement, ret[xml_tree.attrib["comment"]][ielement.attrib["comment"]], ) ) return ret
[docs] def get_general(self, xml_tree, ret): """This function will parse any element in calculatio other than the structures, scsteps""" if "dimension" in [x.tag for x in xml_tree]: ret["info"] = [] ret["data"] = {} for ielement in xml_tree: if ielement.tag == "field": ret["info"].append(ielement.text.strip(" ")) elif ielement.tag == "set": for iset in ielement: ret["data"] = self.get_set(iset, ret["data"]) return ret else: for ielement in xml_tree: if ielement.tag == "i": if "name" in ielement.attrib: if ielement.attrib["name"] == "efermi": ret["efermi"] = float(ielement.text) continue ret[ielement.tag] = {} ret[ielement.tag] = self.get_general(ielement, ret[ielement.tag]) return ret
def _parse_vasprun(self, vasprun): tree = ET.parse(vasprun) root = tree.getroot() calculation = [] structures = [] forces = [] stresses = [] orbital_magnetization = {} run_info = {} incar = {} general = {} kpoints_info = {} vasp_params = {} kpoints_list = [] k_weights = [] atom_info = {} for ichild in root: if ichild.tag == "generator": for ielement in ichild: run_info[ielement.attrib["name"]] = ielement.text elif ichild.tag == "incar": incar = self.get_params(ichild, incar) # Skipping 1st structure which is primitive cell elif ichild.tag == "kpoints": for ielement in ichild: if ielement.items()[0][0] == "param": kpoints_info["mode"] = ielement.items()[0][1] if kpoints_info["mode"] == "listgenerated": kpoints_info["kpoint_vertices"] = [] for isub in ielement: if isub.attrib == "divisions": kpoints_info["ndivision"] = int(isub.text) else: if len(isub.text.split()) != 3: continue kpoints_info["kpoint_vertices"].append( [float(x) for x in isub.text.split()] ) else: for isub in ielement: if isub.attrib["name"] == "divisions": kpoints_info["kgrid"] = [ int(x) for x in isub.text.split() ] elif isub.attrib["name"] == "usershift": kpoints_info["user_shift"] = [ float(x) for x in isub.text.split() ] elif isub.attrib["name"] == "genvec1": kpoints_info["genvec1"] = [ float(x) for x in isub.text.split() ] elif isub.attrib["name"] == "genvec2": kpoints_info["genvec2"] = [ float(x) for x in isub.text.split() ] elif isub.attrib["name"] == "genvec3": kpoints_info["genvec3"] = [ float(x) for x in isub.text.split() ] elif isub.attrib["name"] == "shift": kpoints_info["shift"] = [ float(x) for x in isub.text.split() ] elif ielement.items()[0][1] == "kpointlist": for ik in ielement: kpoints_list.append([float(x) for x in ik.text.split()]) kpoints_list = array(kpoints_list) elif ielement.items()[0][1] == "weights": for ik in ielement: k_weights.append(float(ik.text)) k_weights = array(k_weights) # Vasp Parameters elif ichild.tag == "parameters": vasp_params = self.get_params(ichild, vasp_params) # Atom info elif ichild.tag == "atominfo": for ielement in ichild: if ielement.tag == "atoms": atom_info["natom"] = int(ielement.text) elif ielement.tag == "types": atom_info["nspecies"] = int(ielement.text) elif ielement.tag == "array": if ielement.attrib["name"] == "atoms": for isub in ielement: if isub.tag == "set": atom_info["symbols"] = [] for isym in isub: atom_info["symbols"].append(isym[0].text) elif ielement.attrib["name"] == "atomtypes": atom_info["atom_types"] = {} for isub in ielement: if isub.tag == "set": for iatom in isub: atom_info["atom_types"][iatom[1].text] = {} atom_info["atom_types"][iatom[1].text][ "natom_per_specie" ] = int(iatom[0].text) atom_info["atom_types"][iatom[1].text][ "mass" ] = float(iatom[2].text) atom_info["atom_types"][iatom[1].text][ "valance" ] = float(iatom[3].text) atom_info["atom_types"][iatom[1].text][ "pseudopotential" ] = iatom[4].text.strip() elif ichild.tag == "structure": if ichild.attrib["name"] == "initialpos": initial_pos = self.get_structure(ichild) elif ichild.attrib["name"] == "finalpos": final_pos = self.get_structure(ichild) elif ichild.tag == "calculation": for ielement in ichild: if ielement.tag == "scstep": calculation.append(self.get_scstep(ielement)) elif ielement.tag == "structure": structures.append(self.get_structure(ielement)) elif ielement.tag == "varray": if ielement.attrib["name"] == "forces": forces.append(self.get_varray(ielement)) elif ielement.attrib["name"] == "stress": stresses.append(self.get_varray(ielement)) # elif ielement.tag == 'eigenvalues': # for isub in ielement[0] : # if isub.tag == 'set': # for iset in isub : # eigen_values[iset.attrib['comment']] = {} # for ikpt in iset : # eigen_values[iset.attrib['comment']][ikpt.attrib['comment']] = get_varray(ikpt) elif ielement.tag == "separator": if ielement.attrib["name"] == "orbital magnetization": for isub in ielement: orbital_magnetization[isub.attrib["name"]] = [ float(x) for x in isub.text.split() ] # elif ielement.tag == 'dos': # for isub in ielement : # if 'name' in isub.attrib: # if isub.attrib['name'] == 'efermi' : # dos['efermi'] = float(isub.text) # else : # dos[isub.tag] = {} # dos[isub.tag]['info'] = [] # for iset in isub[0] : # if iset.tag == 'set' : # for isub_set in iset: # dos[isub.tag] = get_set(isub_set,dos[isub.tag]) # elif iset.tag == 'field' : # dos[isub.tag]['info'].append(iset.text.strip(' ')) else: general[ielement.tag] = {} general[ielement.tag] = self.get_general( ielement, general[ielement.tag] ) # NEED TO ADD ORBITAL MAGNETIZATION return { "calculation": calculation, "structures": structures, "forces": forces, "run_info": run_info, "incar": incar, "general": general, "kpoints_info": kpoints_info, "vasp_params": vasp_params, "kpoints": {"kpoints_list": kpoints_list, "k_weights": k_weights}, "atom_info": atom_info, } def __contains__(self, key): return key in self.__dict__ def __getitem__(self, key): return self.__dict__[key] def __iter__(self): return self.__dict__.__iter__() def __len__(self): return len(self.__dict__)
[docs] class VaspParser:
[docs] def __init__( self, incar: Union[str, Path] = "INCAR", outcar: Union[str, Path] = "OUTCAR", procar: Union[str, Path] = "PROCAR", kpoints: Union[str, Path] = "KPOINTS", poscar: Union[str, Path] = "POSCAR", vasprun: Union[str, Path] = "vasprun.xml", ): incar_filepath = Path(incar) outcar_filepath = Path(outcar) procar_filepath = Path(procar) kpoints_filepath = Path(kpoints) poscar_filepath = Path(poscar) vasprun_filepath = Path(vasprun) self.procar = None self.outcar = None self.kpoints = None self.poscar = None self.vasprun = None if outcar_filepath.exists(): self.outcar = Outcar(outcar) if procar_filepath.exists(): self.procar = Procar(procar) if kpoints_filepath.exists(): print(f"Parsing KPOINTS file: {kpoints_filepath}") self.kpoints = Kpoints(kpoints) if poscar_filepath.exists(): self.poscar = Poscar(poscar) if vasprun_filepath.exists(): self.vasprun = VaspXML(vasprun)
@cached_property def version(self): version = None if self.outcar: version = self.outcar.version elif self.vasprun: version = self.vasprun.version return version @cached_property def version_tuple(self): return tuple(int(x) for x in self.version.split(".")) @property def ebs(self): if self.procar is None: user_logger.warning( "Issue with procar file. Either it was not found or there is an issue with the parser" ) return None if self.outcar is None: user_logger.warning( "Issue with outcar file. Either it was not found or there is an issue with the parser" ) return None kgrid = self.kpoints.get("kgrid", None) if kgrid is None: n_kx, n_ky, n_kz = None, None, None else: n_kx, n_ky, n_kz = kgrid kpath = self.kpoints.get("kpath", None) return ElectronicBandStructure( kpoints=self.procar.kpoints, bands=self.procar.bands, projected=self.procar._spd2projected(self.procar.spd), efermi=self.outcar.efermi, kpath=kpath, n_kx=n_kx, n_ky=n_ky, n_kz=n_kz, projected_phase=self.procar._spd2projected(self.procar.spd_phase), labels=self.procar.orbitalNames[:-1], reciprocal_lattice=self.outcar.reciprocal_lattice, ) @property def dos(self): if self.vasprun is not None and self.vasprun.has_dos: return self.vasprun.dos else: user_logger.warning( "Issue with parsing the DOS. Either it was not found or there is an issue with the parser" ) return None @property def structure(self): if self.poscar is None: user_logger.warning( "Issue with poscar file. Either it was not found or there is an issue with the parser" ) return None return Structure( atoms=self.poscar.atoms, fractional_coordinates=self.poscar.coordinates, lattice=self.poscar.lattice, rotations=self.outcar.rotations, )