__author__ = "Logan Lang"
__maintainer__ = "Logan Lang"
__email__ = "lllang@mix.wvu.edu"
__date__ = "March 31, 2020"
import copy
import logging
import math
import os
import re
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import Union
import numpy as np
from pyprocar.core import DensityOfStates, ElectronicBandStructure, KPath, Structure
from pyprocar.utils.units import AU_TO_ANG, HARTREE_TO_EV
logger = logging.getLogger(__name__)
[docs]
class QEParser:
"""The class is used to parse Quantum Expresso files.
The most important objects that comes from this parser are the .ebs and .dos
Parameters
----------
dirpath : str, optional
Directory path to where calculation took place, by default ""
scf_in_filepath : str, optional
The scf filename, by default "scf.in"
bands_in_filepath : str, optional
The bands filename in the case of a band structure calculation, by default "bands.in"
pdos_in_filepath : str, optional
The pdos filename in the case of a density ofstates calculation, by default "pdos.in"
kpdos_in_filepath : str, optional
The kpdos filename, by default "kpdos.in"
atomic_proj_xml : str, optional
The atomic projection xml name. This is located in the where the outdir is and in the {prefix}.save directory, by default "atomic_proj.xml"
"""
[docs]
def __init__(
self,
dirpath: Union[str, Path] = ".",
scf_in_filepath: Union[str, Path] = "scf.in",
bands_in_filepath: Union[str, Path] = "bands.in",
pdos_in_filepath: Union[str, Path] = "pdos.in",
kpdos_in_filepath: Union[str, Path] = "kpdos.in",
atomic_proj_xml_filepath: Union[str, Path] = "atomic_proj.xml",
):
self.dirpath = Path(dirpath).resolve()
# Handles the pathing to the files
(
prefix,
xml_root,
atomic_proj_xml_filepath,
pdos_in_filepath,
bands_in_filepath,
pdos_out_filepath,
projwfc_out_filepath,
self.scf_out_filepath,
) = self._initialize_filenames(
scf_in_filepath=scf_in_filepath,
bands_in_filepath=bands_in_filepath,
pdos_in_filepath=pdos_in_filepath,
)
# Parsing structual and calculation type information
self._parse_efermi(main_xml_root=xml_root)
self._parse_magnetization(main_xml_root=xml_root)
self._parse_structure(main_xml_root=xml_root)
self._parse_band_structure_tag(main_xml_root=xml_root)
self._parse_symmetries(main_xml_root=xml_root)
# Parsing projections spd array and spd phase arrays
if atomic_proj_xml_filepath.exists():
self._parse_wfc_mapping(projwfc_out_filepath=projwfc_out_filepath)
self._parse_atomic_projections(
atomic_proj_xml_filepath=atomic_proj_xml_filepath
)
# Parsing density of states files
if pdos_in_filepath.exists():
self.dos = self._parse_pdos(
pdos_in_filepath=pdos_in_filepath, dirpath=self.dirpath
)
else:
self.dos = None
# Parsing information related to the bandstructure calculations kpath and klabels
self.kticks = None
self.knames = None
self.kpath = None
if (
xml_root.findall(".//input/control_variables/calculation")[0].text
== "bands"
):
self.isBandsCalc = True
with open(bands_in_filepath, "r") as f:
self.bandsIn = f.read()
self._get_kpoint_labels()
self.ebs = ElectronicBandStructure(
kpoints=self.kpoints,
n_kx=self.nkx,
n_ky=self.nky,
n_kz=self.nkz,
bands=self.bands,
projected=self._spd2projected(self.spd),
efermi=self.efermi,
kpath=self.kpath,
projected_phase=self._spd2projected(self.spd_phase),
labels=self.orbital_names[:-1],
reciprocal_lattice=self.reciprocal_lattice,
)
return None
[docs]
def kpoints_cart(self):
"""Returns the kpoints in cartesian coordinates
Returns
-------
np.ndarray
Kpoints in cartesian coordinates
"""
# cart_kpoints self.kpoints = self.kpoints*(2*np.pi /self.alat)
# Converting back to crystal basis
cart_kpoints = self.kpoints.dot(self.reciprocal_lattice)
return cart_kpoints
@property
def species(self):
"""Returns the species of the calculation
Returns
-------
List
Returns a list of string or atomic numbers[int]
"""
return self.initial_structure.species
@property
def structures(self):
"""Returns a list of pyprocar.core.Structure
Returns
-------
List
Returns a list of pyprocar.core.Structure
"""
symbols = [x.strip() for x in self.ions]
structures = []
st = Structure(
atoms=symbols,
lattice=self.direct_lattice,
fractional_coordinates=self.atomic_positions,
rotations=self.rotations,
)
structures.append(st)
return structures
@property
def structure(self):
"""Returns a the last element of a list of pyprocar.core.Structure
Returns
-------
pyprocar.core.Structure
Returns a the last element of a list of pyprocar.core.Structure
"""
return self.structures[-1]
@property
def initial_structure(self):
"""Returns a the first element of a list of pyprocar.core.Structure
Returns
-------
pyprocar.core.Structure
Returns a the first element of a list of pyprocar.core.Structure
"""
return self.structures[0]
@property
def final_structure(self):
"""Returns a the last element of a list of pyprocar.core.Structure
Returns
-------
pyprocar.core.Structure
Returns a the last element of a list of pyprocar.core.Structure
"""
return self.structures[-1]
def _parse_pdos(self, pdos_in_filepath, dirpath):
"""Helper method to parse the pdos files
Parameters
----------
pdos_in_filepath : str
The pdos.in filename
dirpath : str
The directory path where the calculation took place.
Returns
-------
pyprocar.core.DensityOfStates
The density of states object for the calculation
"""
with open(pdos_in_filepath, "r") as f:
pdos_in = f.read()
self.pdos_prefix = re.findall("filpdos\s*=\s*'(.*)'", pdos_in)[0]
self.proj_prefix = re.findall("filproj\s*=\s*'(.*)'", pdos_in)[0]
# Parsing total density of states
energies, total_dos = self._parse_dos_total(
dos_total_filename=f"{dirpath}{os.sep}{self.pdos_prefix}.pdos_tot"
)
self.n_energies = len(energies)
# Finding all the density of states projections files
wfc_filenames = self._parse_available_wfc_filenames()
projected_dos = self._parse_dos_projections(
wfc_filenames=wfc_filenames, n_energy=len(energies)
)
# print(projected_labels)
dos = DensityOfStates(
energies=energies,
total=total_dos,
efermi=self.efermi,
projected=projected_dos,
interpolation_factor=1,
)
return dos
def _parse_dos_total(self, dos_total_filename):
"""Helper method to parse the dos total file
Parameters
----------
dos_total_filename : str
The dos total filename
Returns
-------
Tupole
Returns a tuple with energies and the total dos arrays
"""
with open(dos_total_filename) as f:
tmp_text = f.readlines()
header = tmp_text[0]
dos_text = "".join(tmp_text[1:])
# Strip ending spaces away. Avoind empty string at the end
raw_dos_blocks_by_energy = dos_text.rstrip().split("\n")
n_energies = len(raw_dos_blocks_by_energy)
energies = np.zeros(shape=(n_energies))
# total_dos = np.zeros(shape=(n_energies, self.n_spin))
total_dos = np.zeros(shape=(self.n_spin, n_energies))
for ienergy, raw_dos_block_by_energy in enumerate(raw_dos_blocks_by_energy):
energies[ienergy] = float(raw_dos_block_by_energy.split()[0])
# Covers colinear spin-polarized. This is because these is a difference in energies
if self.n_spin == 2:
total_dos[:, ienergy] = [
float(val)
for val in raw_dos_block_by_energy.split()[-self.n_spin :]
]
# Covers colinear non-spin-polarized and non-colinear. This is because the energies are the same
else:
total_dos[0, ienergy] = float(raw_dos_block_by_energy.split()[2])
energies -= self.efermi
return energies, total_dos
def _parse_dos_projections(self, wfc_filenames, n_energy):
"""Parse the dos projection files using efficient array operations."""
n_principal_number = 1
projected_dos_array = np.zeros(
(self.n_atoms, n_principal_number, self.n_orbitals, self.n_spin, n_energy)
)
for filename in wfc_filenames:
self._validate_file(filename)
# Some pdos files are not non-colinear.
# In the version of the qe code where we get the spin projections.
# Non-colinear pdos files should have the word 'j' in the filename.
if not self._is_non_colinear_file(filename) and self.is_non_colinear:
continue
# Determine the parsing method based on calculation type
if self.is_non_colinear:
self._parse_non_colinear(filename, projected_dos_array)
else:
self._parse_colinear(filename, projected_dos_array)
return projected_dos_array
def _is_non_colinear_file(self, filename):
"""Determine if the file corresponds to non-colinear calculations."""
file_tag = filename.split("_")[-1]
return "j" in file_tag
def _validate_file(self, filename):
if not os.path.exists(filename):
raise ValueError(f"ERROR: pdos file not found: {filename}")
def _read_file_content(self, filename):
with open(filename) as f:
# Check if spin component projections are present
lines = f.readlines()[1:]
spin_projections_present = "lsigma" in lines[1]
if spin_projections_present:
pdos = self._parse_spin_components_lines(lines)
elif self.is_non_colinear:
pdos = self._parse_noncolinear_pdos(lines)
else:
pdos = self._parse_colinear_pdos(lines)
return pdos
def _parse_spin_components_lines(self, lines):
n_orbitals = len(lines[0].split()) - 2
pdos = np.zeros(shape=(self.n_energies, n_orbitals, self.n_spin))
for i_energy in range(self.n_energies):
iblock_start = i_energy * 6
total_line = lines[iblock_start].split()[2:]
x_line = lines[iblock_start + 2].split()[1:]
y_line = lines[iblock_start + 3].split()[1:]
z_line = lines[iblock_start + 4].split()[1:]
pdos[i_energy, :, 0] = [float(tot) for tot in total_line]
pdos[i_energy, :, 1] = [float(x) for x in x_line]
pdos[i_energy, :, 2] = [float(y) for y in y_line]
pdos[i_energy, :, 3] = [float(z) for z in z_line]
# Move energy to the last axis
pdos = np.moveaxis(pdos, 0, -1)
return pdos
def _parse_colinear_pdos(self, lines):
# The energy column is first, the sum totals by spin channel
n_orbitals = len(lines[0].split()) - self.n_spin - 1
n_orbitals //= self.n_spin
pdos = np.zeros(shape=(self.n_energies, n_orbitals, self.n_spin))
col_start = 1 + self.n_spin
for i_energy in range(self.n_energies):
# Skip energies and sum over projections
values = lines[i_energy].split()[col_start:]
if self.n_spin == 1:
pdos[i_energy, :, 0] = values
else:
pdos[i_energy, :, 0] = values[::2]
pdos[i_energy, :, 1] = values[1::2]
# Move energy to the last axis
pdos = np.moveaxis(pdos, 0, -1)
return pdos
def _parse_noncolinear_pdos(self, lines):
# The energy column is first, the sum totals by spin channel
n_orbitals = len(lines[0].split()) - 1 - 1
n_orbitals //= 1
pdos = np.zeros(shape=(self.n_energies, n_orbitals, 4))
col_start = 1 + 1
for i_energy in range(self.n_energies):
# Skip energies and sum over projections
values = lines[i_energy].split()[col_start:]
pdos[i_energy, :, 0] = values
# Move energy to the last axis
pdos = np.moveaxis(pdos, 0, -1)
return pdos
def _extract_file_info(self, filename):
atom_num = int(re.findall(r"#(\d*)", filename)[0]) - 1
wfc_name = re.findall(r"atm#\d*\(([a-zA-Z0-9]*)\)", filename)[0]
orbital_name = filename.split("(")[-1][0]
total_angular_momentum = None
if self.is_non_colinear:
tmp_str = filename.split("_")[-1]
total_angular_momentum = float(tmp_str.strip("j").strip(")"))
return atom_num, wfc_name, orbital_name, total_angular_momentum
def _parse_non_colinear(self, filename, projected_dos_array):
pdos_data = self._read_file_content(filename)
atom_num, wfc_name, orbital_name, total_angular_momentum = (
self._extract_file_info(filename)
)
orbital_nums, _ = self._get_orbital_info_non_colinear(
orbital_name, total_angular_momentum
)
if not orbital_nums:
raise ValueError(f"ERROR: orbital_nums is empty for {filename}")
projected_dos_array[
atom_num, 0, orbital_nums, :, : self.n_energies
] += pdos_data
def _parse_colinear(self, filename, projected_dos_array):
pdos_data = self._read_file_content(filename)
atom_num, wfc_name, orbital_name, total_angular_momentum = (
self._extract_file_info(filename)
)
orbital_nums, _ = self._get_orbital_info_colinear(orbital_name)
if not orbital_nums:
raise ValueError(f"ERROR: orbital_nums is empty for {filename}")
projected_dos_array[
atom_num, 0, orbital_nums, :, : self.n_energies
] += pdos_data
def _get_orbital_info_non_colinear(self, l_orbital_name, tot_ang_mom):
orbital_info = {
"s": {0.5: ([0, 1], np.linspace(-0.5, 0.5, 2))},
"p": {
0.5: ([2, 3], np.linspace(-0.5, 0.5, 2)),
1.5: ([4, 5, 6, 7], np.linspace(-1.5, 1.5, 4)),
},
"d": {
0.5: ([2, 3], np.linspace(-0.5, 0.5, 2)),
1.5: ([8, 9, 10, 11], np.linspace(-1.5, 1.5, 4)),
2.5: ([12, 13, 14, 15, 16, 17], np.linspace(-2.5, 2.5, 6)),
},
}
return orbital_info.get(l_orbital_name, {}).get(tot_ang_mom, ([], []))
def _get_orbital_info_colinear(self, orbital_name):
orbital_info = {
"s": ([0], np.linspace(0, 0, 1)),
"p": ([1, 2, 3], np.linspace(-1, 1, 3)),
"d": ([4, 5, 6, 7, 8], np.linspace(-2, 2, 5)),
}
return orbital_info.get(orbital_name, ([], []))
def _get_kpoint_labels(self):
"""
This method will parse the bands.in file to get the kpath information.
"""
# Parsing klabels
self.ngrids = []
kmethod = re.findall("K_POINTS[\s\{]*([a-z_]*)[\s\{]*", self.bandsIn)[0]
self.discontinuities = []
if kmethod == "crystal":
numK = int(re.findall("K_POINTS.*\n([0-9]*)", self.bandsIn)[0])
raw_khigh_sym = re.findall(
"K_POINTS.*\n\s*[0-9]*.*\n" + numK * "(.*)\n*", self.bandsIn
)[0]
tickCountIndex = 0
self.knames = []
self.kticks = []
for x in raw_khigh_sym:
if len(x.split()) == 5:
self.knames.append("%s" % x.split()[4].replace("!", ""))
self.kticks.append(tickCountIndex)
tickCountIndex += 1
self.nhigh_sym = len(self.knames)
elif kmethod == "crystal_b":
self.nhigh_sym = int(re.findall("K_POINTS.*\n([0-9]*)", self.bandsIn)[0])
raw_khigh_sym = re.findall(
"K_POINTS.*\n.*\n" + self.nhigh_sym * "(.*)\n*",
self.bandsIn,
)[0]
self.kticks = []
self.high_symmetry_points = np.zeros(shape=(self.nhigh_sym, 3))
tick_Count = 1
for ihs in range(self.nhigh_sym):
# In QE cyrstal_b mode, the user is able to specify grid on last high symmetry point.
# QE just uses 1 for the last high symmetry point.
grid_current = int(raw_khigh_sym[ihs].split()[3])
if ihs < self.nhigh_sym - 2:
self.ngrids.append(grid_current)
# Incrementing grid by 1 for seocnd to last high symmetry point
elif ihs == self.nhigh_sym - 2:
self.ngrids.append(grid_current + 1)
# I have no idea why I skip the last high symmetry point. I think it had to do with disconinuous points.
# Need to code test case for this. Otherwise leave it as is.
# elif ihs == self.nhigh_sym - 1:
# continue
self.kticks.append(tick_Count - 1)
tick_Count += grid_current
raw_ticks = re.findall(
"K_POINTS.*\n\s*[0-9]*\s*[0-9]*.*\n" + self.nhigh_sym * ".*!(.*)\n*",
self.bandsIn,
)[0]
if len(raw_ticks) != self.nhigh_sym:
self.knames = [str(x) for x in range(self.nhigh_sym)]
else:
self.knames = [
"%s" % (x.replace(",", "").replace("vlvp1d", "").replace(" ", ""))
for x in raw_ticks
]
# Formating to conform with Kpath class
self.special_kpoints = np.zeros(shape=(len(self.kticks) - 1, 2, 3))
self.modified_knames = []
for itick in range(len(self.kticks)):
if itick != len(self.kticks) - 1:
self.special_kpoints[itick, 0, :] = self.kpoints[self.kticks[itick]]
self.special_kpoints[itick, 1, :] = self.kpoints[self.kticks[itick + 1]]
self.modified_knames.append(
[self.knames[itick], self.knames[itick + 1]]
)
has_time_reversal = True
self.kpath = KPath(
knames=self.modified_knames,
special_kpoints=self.special_kpoints,
kticks=self.kticks,
ngrids=self.ngrids,
has_time_reversal=has_time_reversal,
)
def _initialize_filenames(
self,
scf_in_filepath: Union[str, Path],
bands_in_filepath: Union[str, Path],
pdos_in_filepath: Union[str, Path],
):
"""This helper method handles pathing to the to locate files
Parameters
----------
dirpath : str
The directory path where the calculation is
scf_in_filepath : str
The input scf filename
bands_in_filepath : str
The input bands filename
pdos_in_filepath : str
The input pdos filename
Returns
-------
Tuple
Returns a tuple of important pathing information.
Mainly, the directory path is prepended to the filenames.
"""
scf_filename = Path(scf_in_filepath).name
scf_in_filepath = Path(self.dirpath) / scf_filename
scf_out_filepath = Path(self.dirpath) / f"{scf_filename.split('.')[0]}.out"
bands_filename = Path(bands_in_filepath).name
bands_in_filepath = Path(self.dirpath) / bands_filename
bands_out_filepath = Path(self.dirpath) / f"{bands_filename.split('.')[0]}.out"
pdos_filename = Path(pdos_in_filepath).name
pdos_in_filepath = Path(self.dirpath) / pdos_filename
pdos_out_filepath = Path(self.dirpath) / f"pdos.out"
kpdos_out_filepath = Path(self.dirpath) / f"kpdos.out"
if kpdos_out_filepath.exists():
projwfc_out_filepath = kpdos_out_filepath
elif pdos_out_filepath.exists():
projwfc_out_filepath = pdos_out_filepath
else:
projwfc_out_filepath = None
with open(scf_in_filepath, "r") as f:
scf_in = f.read()
outdir = re.findall("outdir\s*=\s*'\S*?(.*)'", scf_in)[0]
prefix = re.findall("prefix\s*=\s*'(.*)'", scf_in)[0]
xml_filename = prefix + ".xml"
atomic_proj_xml_filepath = (
Path(self.dirpath) / outdir / f"{prefix}.save" / "atomic_proj.xml"
)
if not atomic_proj_xml_filepath.exists():
atomic_proj_xml_filepath = Path(self.dirpath) / "atomic_proj.xml"
output_xml_filepath = Path(self.dirpath) / outdir / xml_filename
logger.info(f"output_xml: {output_xml_filepath}")
if not output_xml_filepath.exists():
output_xml_filepath = Path(self.dirpath) / xml_filename
tree = ET.parse(output_xml_filepath)
xml_root = tree.getroot()
prefix = xml_root.findall(".//input/control_variables/prefix")[0].text
pdos_in_filepath = Path(self.dirpath) / pdos_in_filepath
bands_in_filepath = Path(self.dirpath) / bands_in_filepath
if pdos_out_filepath.exists():
pdos_out_filepath = pdos_out_filepath
return (
prefix,
xml_root,
atomic_proj_xml_filepath,
pdos_in_filepath,
bands_in_filepath,
pdos_out_filepath,
projwfc_out_filepath,
scf_out_filepath,
)
def _parse_available_wfc_filenames(self):
"""Helper method to parse the projection filename from the pdos.out file
Parameters
----------
dirpath : str
The directory name where the calculation is.
Returns
-------
List
Returns a list of projection file names
"""
wfc_filenames = []
tmp_wfc_filenames = []
atms_wfc_num = []
# Parsing projection filnames for identification information
for filepath in Path(self.dirpath).iterdir():
if filepath.suffix in [
".pdos",
".pdos_tot",
".lowdin",
".projwfc_down",
".projwfc_up",
".xml",
]:
continue
if filepath.stem != self.pdos_prefix:
continue
filename = str(filepath.name)
tmp_wfc_filenames.append(filename)
atm_num = int(re.findall("_atm#([0-9]*)\(.*", filename)[0])
wfc_num = int(re.findall("_wfc#([0-9]*)\(.*", filename)[0])
wfc = re.findall("_wfc#[0-9]*\(([_A-Za-z0-9.]*)\).*", filename)[0]
atm = re.findall("_atm#[0-9]*\(([A-Za-z]*[0-9]*)\).*", filename)[0]
atms_wfc_num.append((atm_num, atm, wfc_num, wfc))
# sort density of states projections files by atom number
sorted_file_num = sorted(atms_wfc_num, key=lambda a: a[0])
for index in sorted_file_num:
wfc_filenames.append(
f"{self.dirpath}{os.sep}{self.pdos_prefix}.pdos_atm#{index[0]}({index[1]})_wfc#{index[2]}({index[3]})"
)
return wfc_filenames
def _parse_wfc_mapping(self, projwfc_out_filepath):
"""Helper method which creates a mapping between wfc number and the orbtial and atom numbers
Parameters
----------
projwfc_out_filepath : str
The projwfc out filepath
Returns
-------
None
None
"""
with open(projwfc_out_filepath) as f:
pdos_out = f.read()
raw_wfc = re.findall(
"(?<=read\sfrom\spseudopotential\sfiles).*\n\n([\S\s]*?)\n\n(?=\sk\s=)",
pdos_out,
)[0]
wfc_list = raw_wfc.split("\n")
self.wfc_mapping = {}
# print(self.orbitals)
for i, wfc in enumerate(wfc_list):
iwfc = int(re.findall("(?<=state\s#)\s*(\d*)", wfc)[0])
iatm = int(re.findall("(?<=atom)\s*(\d*)", wfc)[0])
l_orbital_type_index = int(re.findall("(?<=l=)\s*(\d*)", wfc)[0])
if self.is_non_colinear:
j_orbital_type_index = float(re.findall("(?<=j=)\s*([-\d.]*)", wfc)[0])
m_orbital_type_index = float(
re.findall("(?<=m_j=)\s*([-\d.]*)", wfc)[0]
)
tmp_orb_dict = {
"l": self._convert_lorbnum_to_letter(lorbnum=l_orbital_type_index),
"j": j_orbital_type_index,
"m": m_orbital_type_index,
}
# print(self._convert_lorbnum_to_letter(lorbnum=l_orbital_type_index))
else:
m_orbital_type_index = int(re.findall("(?<=m=)\s*(\d*)", wfc)[0])
tmp_orb_dict = {"l": l_orbital_type_index, "m": m_orbital_type_index}
iorb = 0
for iorbital, orb in enumerate(self.orbitals):
if tmp_orb_dict == orb:
iorb = iorbital
self.wfc_mapping.update({f"wfc_{iwfc}": {"orbital": iorb, "atom": iatm}})
return None
def _parse_atomic_projections(self, atomic_proj_xml_filepath):
"""A Helper method to parse the atomic projection xml file
Parameters
----------
atomic_proj_xml_filepath : str
The atomic_proj.xml filename
Returns
-------
None
None
"""
atmProj_tree = ET.parse(atomic_proj_xml_filepath)
atm_proj_root = atmProj_tree.getroot()
root_header = atm_proj_root.findall(".//HEADER")[0]
nbnd = int(root_header.get("NUMBER_OF_BANDS"))
nk = int(root_header.get("NUMBER_OF_K-POINTS"))
nwfc = int(root_header.get("NUMBER_OF_ATOMIC_WFC"))
norb = self.n_orbitals
natm = self.n_atoms
nspin_channels = int(root_header.get("NUMBER_OF_SPIN_COMPONENTS"))
nspin_projections = self.n_spin
# The indices are to match the format of the from PROCAR format.
# In it there is an extra 2 columns for orbitals for the ion index and the total
# Also there is and extra row for the totals
self.spd = np.zeros(
shape=(
self.n_k,
self.n_band,
self.n_spin,
self.n_atoms + 1,
self.n_orbitals + 2,
)
)
self.spd_phase = np.zeros(
shape=(self.spd.shape),
dtype=np.complex_,
)
bands = self._parse_bands_tag(atm_proj_root, nk, nbnd, nspin_channels)
kpoints = self._parse_kpoints_tag(atm_proj_root, nk, nspin_channels)
projs, projs_phase = self._parse_projections_tag(
atm_proj_root, nk, nbnd, natm, norb, nspin_channels, nspin_projections
)
# maping the projections to the spd array. The spd array is the output of the PROCAR file
self.spd[:, :, :, :-1, 1:-1] += projs[:, :, :, :, :]
self.spd_phase[:, :, :, :-1, 1:-1] += projs_phase[:, :, :, :, :]
# Adding atom index. This is a vasp output thing
for ions in range(self.ionsCount):
self.spd[:, :, :, ions, 0] = ions + 1
# The following fills the totals for the spd array. Again this is a vasp output thing.
self.spd[:, :, :, :, -1] = np.sum(self.spd[:, :, :, :, 1:-1], axis=4)
self.spd[:, :, :, -1, :] = np.sum(self.spd[:, :, :, :-1, :], axis=3)
self.spd[:, :, :, -1, 0] = 0
return None
def _parse_bands_tag(self, atm_proj_root, nk, nbnd, nspins):
bands = np.zeros(shape=(nk, nbnd, nspins))
results = atm_proj_root.findall(".//EIGENSTATES/E")
# For spin-polarized calculations, there are two spin channels.
# They add them by first adding the spin up and then the spin down
# I break this down with the folloiwng indexing
spin_reuslts = [results[i * nk : (i + 1) * nk] for i in range(nspins)]
for ispin, spin_result in enumerate(spin_reuslts):
for ik, result in enumerate(spin_result):
bands_per_kpoint = result.text.split()
bands[ik, :, ispin] = bands_per_kpoint
return bands
def _parse_kpoints_tag(self, atm_proj_root, nk, nspins):
kpoints = np.zeros(shape=(nk, 3))
kpoint_tags = atm_proj_root.findall(".//EIGENSTATES/K-POINT")
# For spin-polarized calculations, there are two spin channels.
# They add them by first adding the spin up and then the spin down
# I break this down with the folloiwng indexing
spin_reuslts = [kpoint_tags[i * nk : (i + 1) * nk] for i in range(nspins)]
for ispin, spin_result in enumerate(spin_reuslts):
for ik, kpoint_tag in enumerate(spin_result):
kpoint = kpoint_tag.text.split()
kpoints[ik, :] = kpoint
return kpoints
def _parse_projections_tag(
self, atm_proj_root, nk, nbnd, natm, norb, nspin_channels, nspin_projections
):
projs = np.zeros(shape=(nk, nbnd, nspin_projections, natm, norb))
projs_phase = np.zeros(
shape=(nk, nbnd, nspin_projections, natm, norb), dtype=np.complex_
)
proj_tags = atm_proj_root.findall(".//EIGENSTATES/PROJS")
# For spin-polarized calculations, there are two spin channels.
# They add them by first adding the spin up and then the spin down
# I break this down with the folloiwng indexing
spin_reuslts = [proj_tags[i * nk : (i + 1) * nk] for i in range(nspin_channels)]
for ispin, spin_result in enumerate(spin_reuslts):
for ik, proj_tag in enumerate(spin_result):
atm_wfs_tags = proj_tag.findall("ATOMIC_WFC")
for atm_wfs_tag in atm_wfs_tags:
iwfc = int(atm_wfs_tag.get("index"))
iorb = self.wfc_mapping[f"wfc_{iwfc}"]["orbital"]
iatm = self.wfc_mapping[f"wfc_{iwfc}"]["atom"] - 1
band_projections = atm_wfs_tag.text.strip().split("\n")
for iband, band_projection in enumerate(band_projections):
real = float(band_projection.split()[0])
imag = float(band_projection.split()[1])
comp = complex(real, imag)
comp_squared = np.absolute(comp) ** 2
projs_phase[ik, iband, ispin, iatm, iorb] = complex(real, imag)
projs[ik, iband, ispin, iatm, iorb] = comp_squared
atm_sigma_wfs_tags = proj_tag.findall("ATOMIC_SIGMA_PHI")
if atm_sigma_wfs_tags:
spin_x_projections = [
atm_sigma_wfs_tag
for atm_sigma_wfs_tag in atm_sigma_wfs_tags
if atm_sigma_wfs_tag.get("ipol") == "1"
]
spin_y_projections = [
atm_sigma_wfs_tag
for atm_sigma_wfs_tag in atm_sigma_wfs_tags
if atm_sigma_wfs_tag.get("ipol") == "2"
]
spin_z_projections = [
atm_sigma_wfs_tag
for atm_sigma_wfs_tag in atm_sigma_wfs_tags
if atm_sigma_wfs_tag.get("ipol") == "3"
]
spin_projections = [
spin_x_projections,
spin_y_projections,
spin_z_projections,
]
for i_spin_component, spin_projection_tags in enumerate(
spin_projections
):
for spin_projection_tag in spin_projection_tags:
iwfc = int(spin_projection_tag.get("index"))
iorb = self.wfc_mapping[f"wfc_{iwfc}"]["orbital"]
iatm = self.wfc_mapping[f"wfc_{iwfc}"]["atom"] - 1
band_projections = spin_projection_tag.text.strip().split(
"\n"
)
for iband, band_projection in enumerate(band_projections):
real = float(band_projection.split()[0])
imag = float(band_projection.split()[1])
comp = complex(real, imag)
comp_squared = np.absolute(comp) ** 2
# Move spin index by 1 to match the order in the spd array
# First index should be total,
# second, third, and fourth should be x,y,z, respoectively
projs_phase[
ik, iband, i_spin_component + 1, iatm, iorb
] = complex(real, imag)
projs[ik, iband, i_spin_component + 1, iatm, iorb] = (
real
)
return projs, projs_phase
def _parse_structure(self, main_xml_root):
"""A helper method to parse the structure tag of the main xml file
Parameters
----------
main_xml_root : xml.etree.ElementTree.Element
The main xml Element
Returns
-------
None
None
"""
self.nspecies = len(main_xml_root.findall(".//output/atomic_species")[0])
self.composition = {
species.attrib["name"]: 0
for species in main_xml_root.findall(".//output/atomic_species")[0]
}
self.species_list = list(self.composition.keys())
self.ionsCount = int(
main_xml_root.findall(".//output/atomic_structure")[0].attrib["nat"]
)
self.alat = float(
main_xml_root.findall(".//output/atomic_structure")[0].attrib["alat"]
)
self.alat = self.alat * AU_TO_ANG
self.ions = []
for ion in main_xml_root.findall(".//output/atomic_structure/atomic_positions")[
0
]:
self.ions.append(ion.attrib["name"][:2])
self.composition[ion.attrib["name"]] += 1
self.n_atoms = len(self.ions)
self.atomic_positions = np.array(
[
ion.text.split()
for ion in main_xml_root.findall(
".//output/atomic_structure/atomic_positions"
)[0]
],
dtype=float,
)
# in a.u
self.direct_lattice = np.array(
[
acell.text.split()
for acell in main_xml_root.findall(".//output/atomic_structure/cell")[0]
],
dtype=float,
)
# in a.u
self.reciprocal_lattice = (2 * np.pi / self.alat) * np.array(
[
acell.text.split()
for acell in main_xml_root.findall(
".//output/basis_set/reciprocal_lattice"
)[0]
],
dtype=float,
)
# Convert to angstrom
self.direct_lattice = self.direct_lattice * AU_TO_ANG
return None
def _parse_symmetries(self, main_xml_root):
"""A helper method to parse the symmetries tag of the main xml file
Parameters
----------
main_xml_root : xml.etree.ElementTree.Element
The main xml Element
Returns
-------
None
None
"""
self.nsym = int(main_xml_root.findall(".//output/symmetries/nsym")[0].text)
self.nrot = int(main_xml_root.findall(".//output/symmetries/nrot")[0].text)
self.spg = int(
main_xml_root.findall(".//output/symmetries/space_group")[0].text
)
self.nsymmetry = len(main_xml_root.findall(".//output/symmetries/symmetry"))
self.rotations = np.zeros(shape=(self.nsymmetry, 3, 3))
for isymmetry, symmetry_operation in enumerate(
main_xml_root.findall(".//output/symmetries/symmetry")
):
symmetry_matrix = (
np.array(
symmetry_operation.findall(".//rotation")[0].text.split(),
dtype=float,
)
.reshape(3, 3)
.T
)
self.rotations[isymmetry, :, :] = symmetry_matrix
return None
def _parse_magnetization(self, main_xml_root):
"""A helper method to parse the magnetization tag of the main xml file
Parameters
----------
main_xml_root : xml.etree.ElementTree.Element
The main xml Element
Returns
-------
None
None
"""
is_non_colinear = str2bool(
main_xml_root.findall(".//output/magnetization/noncolin")[0].text
)
is_spin_calc = str2bool(
main_xml_root.findall(".//output/magnetization/lsda")[0].text
)
is_spin_orbit_calc = str2bool(
main_xml_root.findall(".//output/magnetization/spinorbit")[0].text
)
# The calcuulation is non-colinear
if is_non_colinear:
n_spin = 4
orbitals = [
{"l": "s", "j": 0.5, "m": -0.5},
{"l": "s", "j": 0.5, "m": 0.5},
{"l": "p", "j": 0.5, "m": -0.5},
{"l": "p", "j": 0.5, "m": 0.5},
{"l": "p", "j": 1.5, "m": -1.5},
{"l": "p", "j": 1.5, "m": -0.5},
{"l": "p", "j": 1.5, "m": -0.5},
{"l": "p", "j": 1.5, "m": 1.5},
{"l": "d", "j": 1.5, "m": -1.5},
{"l": "d", "j": 1.5, "m": -0.5},
{"l": "d", "j": 1.5, "m": -0.5},
{"l": "d", "j": 1.5, "m": 1.5},
{"l": "d", "j": 2.5, "m": -2.5},
{"l": "d", "j": 2.5, "m": -1.5},
{"l": "d", "j": 2.5, "m": -0.5},
{"l": "d", "j": 2.5, "m": 0.5},
{"l": "d", "j": 2.5, "m": 1.5},
{"l": "d", "j": 2.5, "m": 2.5},
]
orbitalNames = []
for orbital in orbitals:
tmp_name = ""
for key, value in orbital.items():
# print(key,value)
if key != "l":
tmp_name = tmp_name + key + str(value)
else:
tmp_name = tmp_name + str(value) + "_"
orbitalNames.append(tmp_name)
# The calcuulation is colinear
else:
# colinear spin or non spin polarized
if is_spin_calc:
n_spin = 2
else:
n_spin = 1
orbitals = [
{"l": 0, "m": 1},
{"l": 1, "m": 3},
{"l": 1, "m": 1},
{"l": 1, "m": 2},
{"l": 2, "m": 5},
{"l": 2, "m": 3},
{"l": 2, "m": 1},
{"l": 2, "m": 2},
{"l": 2, "m": 4},
]
orbitalNames = [
"s",
"py",
"pz",
"px",
"dxy",
"dyz",
"dz2",
"dxz",
"dx2",
"tot",
]
self.is_non_colinear = is_non_colinear
self.is_spin_calc = is_spin_calc
self.is_spin_orbit_calc = is_spin_orbit_calc
self.n_spin = n_spin
self.orbitals = orbitals
self.n_orbitals = len(orbitals)
self.orbital_names = orbitalNames
return None
def _parse_band_structure_tag(self, main_xml_root):
"""A helper method to parse the band_structure tag of the main xml file
Parameters
----------
main_xml_root : xml.etree.ElementTree.Element
The main xml Element
Returns
-------
None
None
"""
self.nkx = None
self.nky = None
self.nkz = None
self.nk1 = None
self.nk2 = None
self.nk3 = None
monkhorst_tag = main_xml_root.findall(
".//output/band_structure/starting_k_points"
)[0][0]
if "monkhorst_pack" in monkhorst_tag.tag:
self.nkx = float(monkhorst_tag.attrib["nk1"])
self.nky = float(monkhorst_tag.attrib["nk2"])
self.nkz = float(monkhorst_tag.attrib["nk3"])
self.nk1 = float(monkhorst_tag.attrib["k1"])
self.nk2 = float(monkhorst_tag.attrib["k2"])
self.nk3 = float(monkhorst_tag.attrib["k3"])
self.nks = int(main_xml_root.findall(".//output/band_structure/nks")[0].text)
self.atm_wfc = int(
main_xml_root.findall(".//output/band_structure/num_of_atomic_wfc")[0].text
)
self.nelec = float(
main_xml_root.findall(".//output/band_structure/nelec")[0].text
)
if self.n_spin == 2:
self.n_band = int(
main_xml_root.findall(".//output/band_structure/nbnd_up")[0].text
)
self.nbnd_up = int(
main_xml_root.findall(".//output/band_structure/nbnd_up")[0].text
)
self.nbnd_down = int(
main_xml_root.findall(".//output/band_structure/nbnd_dw")[0].text
)
self.bands = np.zeros(shape=(self.nks, self.n_band, 2))
self.kpoints = np.zeros(shape=(self.nks, 3))
self.weights = np.zeros(shape=(self.nks))
self.occupations = np.zeros(shape=(self.nks, self.n_band, 2))
band_structure_element = main_xml_root.findall(".//output/band_structure")[
0
]
for ikpoint, kpoint_element in enumerate(
main_xml_root.findall(".//output/band_structure/ks_energies")
):
self.kpoints[ikpoint, :] = np.array(
kpoint_element.findall(".//k_point")[0].text.split(), dtype=float
)
self.weights[ikpoint] = np.array(
kpoint_element.findall(".//k_point")[0].attrib["weight"],
dtype=float,
)
self.bands[ikpoint, :, 0] = (
HARTREE_TO_EV
* np.array(
kpoint_element.findall(".//eigenvalues")[0].text.split(),
dtype=float,
)[: self.nbnd_up]
)
self.occupations[ikpoint, :, 0] = np.array(
kpoint_element.findall(".//occupations")[0].text.split(),
dtype=float,
)[: self.nbnd_up]
self.bands[ikpoint, :, 1] = (
HARTREE_TO_EV
* np.array(
kpoint_element.findall(".//eigenvalues")[0].text.split(),
dtype=float,
)[self.nbnd_down :]
)
self.occupations[ikpoint, :, 1] = np.array(
kpoint_element.findall(".//occupations")[0].text.split(),
dtype=float,
)[self.nbnd_down :]
# For non-spin-polarized and non colinear
else:
self.n_band = int(
main_xml_root.findall(".//output/band_structure/nbnd")[0].text
)
self.bands = np.zeros(shape=(self.nks, self.n_band, 1))
self.kpoints = np.zeros(shape=(self.nks, 3))
self.weights = np.zeros(shape=(self.nks))
self.occupations = np.zeros(shape=(self.nks, self.n_band))
for ikpoint, kpoint_element in enumerate(
main_xml_root.findall(".//output/band_structure/ks_energies")
):
self.kpoints[ikpoint, :] = np.array(
kpoint_element.findall(".//k_point")[0].text.split(), dtype=float
)
self.weights[ikpoint] = np.array(
kpoint_element.findall(".//k_point")[0].attrib["weight"],
dtype=float,
)
self.bands[ikpoint, :, 0] = HARTREE_TO_EV * np.array(
kpoint_element.findall(".//eigenvalues")[0].text.split(),
dtype=float,
)
self.occupations[ikpoint, :] = np.array(
kpoint_element.findall(".//occupations")[0].text.split(),
dtype=float,
)
# Multiply in 2pi/alat
self.kpoints = self.kpoints * (2 * np.pi / self.alat)
# Converting back to crystal basis
self.kpoints = np.around(
self.kpoints.dot(np.linalg.inv(self.reciprocal_lattice)), decimals=8
)
self.n_k = len(self.kpoints)
self.kpointsCount = len(self.kpoints)
self.bandsCount = self.n_band
return None
def _spd2projected(self, spd, nprinciples=1):
"""
Helpermethod to project the spd array to the projected array
which will be fed into pyprocar.coreElectronicBandStructure object
Parameters
----------
spd : np.ndarray
The spd array from the earlier parse. This has a structure simlar to the PROCAR output in vasp
Has the shape [n_kpoints,n_band,n_spins,n-orbital,n_atoms]
nprinciples : int, optional
The prinicipal quantum numbers, by default 1
Returns
-------
np.ndarray
The projected array. Has the shape [n_kpoints,n_band,n_atom,n_principal,n-orbital,n_spin]
"""
# This function is for VASP
# non-pol and colinear
# spd is formed as (nkpoints,nbands, nspin, natom+1, norbital+2)
# natom+1 > last column is total
# norbital+2 > 1st column is the number of atom last is total
# non-colinear
# spd is formed as (nkpoints,nbands, nspin +1 , natom+1, norbital+2)
# natom+1 > last column is total
# norbital+2 > 1st column is the number of atom last is total
# nspin +1 > last column is total
if spd is None:
return None
natoms = spd.shape[3] - 1
nkpoints = spd.shape[0]
nbands = spd.shape[1]
nspins = spd.shape[2]
norbitals = spd.shape[4] - 2
# if spd.shape[2] == 4:
# nspins = 3
# else:
# nspins = spd.shape[2]
# if nspins == 2:
# nbands = int(spd.shape[1] / 2)
# else:
# nbands = spd.shape[1]
projected = np.zeros(
shape=(nkpoints, nbands, natoms, nprinciples, norbitals, nspins),
dtype=spd.dtype,
)
temp_spd = spd.copy()
# (nkpoints,nbands, nspin, natom, norbital)
temp_spd = np.swapaxes(temp_spd, 2, 4)
# (nkpoints,nbands, norbital , natom , nspin)
temp_spd = np.swapaxes(temp_spd, 2, 3)
# (nkpoints,nbands, natom, norbital, nspin)
# projected[ikpoint][iband][iatom][iprincipal][iorbital][ispin]
# if nspins == 3:
# # Used if self.spins==3
# projected[:, :, :, 0, :, :] = temp_spd[:, :, :-1, 1:-1, :]
# # Used if self.spins == 4
# # projected[:, :, :, 0, :, :] = temp_spd[:, :, :-1, 1:-1, 1:]
if nspins == 2:
projected[:, :, :, 0, :, 0] = temp_spd[:, :, :-1, 1:-1, 0]
projected[:, :, :, 0, :, 1] = temp_spd[:, :, :-1, 1:-1, 1]
else:
projected[:, :, :, 0, :, :] = temp_spd[:, :, :-1, 1:-1, :]
return projected
def _parse_efermi(self, main_xml_root):
"""A helper method to parse the band_structure tag of the main xml file for the fermi energy
Parameters
----------
main_xml_root : xml.etree.ElementTree.Element
The main xml Element
Returns
-------
None
None
"""
# self.efermi = float(main_xml_root.findall(".//output/band_structure/fermi_energy")[0].text) * HARTREE_TO_EV
with open(self.scf_out_filepath, "r") as f:
scf_out = f.read()
self.efermi = float(
re.findall("the Fermi energy is\s*([-\d.]*)", scf_out)[0]
)
return None
def _convert_lorbnum_to_letter(self, lorbnum):
"""A helper method to convert the lorb number to the letter format
Parameters
----------
lorbnum : int
The number of the l orbital
Returns
-------
str
The l orbital name
"""
lorb_mapping = {0: "s", 1: "p", 2: "d", 3: "f"}
return lorb_mapping[lorbnum]
def str2bool(v):
"""Converts a string of a boolean to an actual boolean
Parameters
----------
v : str
The string of the boolean value
Returns
-------
boolean
The boolean value
"""
return v.lower() in ("true")
return v.lower() in ("true")
return v.lower() in ("true")