init

2026-01-08 19:47:32 +03:00
commit 4d7676a79e
89 changed files with 62260 additions and 0 deletions
--- a/electrochemistry/echem/io_data/orca.py
+++ b/electrochemistry/echem/io_data/orca.py
@@ -0,0 +1,171 @@
+import numpy as np
+import pandas as pd
+import re
+from monty.re import regrep
+from tqdm import tqdm
+from .universal import Xyz
+from nptyping import NDArray, Shape, Number
+
+
+class SCFLog:
+    """"""
+    def __init__(self, eigenvalues=None, occupation=None, mol_orbs=None):
+        """"""
+        self.eigenvalues = eigenvalues
+        self.occupations = occupation
+        self.mol_orbs = mol_orbs
+
+    @property
+    def natoms(self):
+        if self.mol_orbs is not None:
+            return np.max(self.mol_orbs[0]['atom_ids']) + 1
+        else:
+            return ValueError('natoms might be calculated only if mol_orbs had been read')
+
+    @property
+    def nbands(self):
+        if self.eigenvalues is not None:
+            return len(self.eigenvalues[0])
+        elif self.mol_orbs is not None:
+            return len(self.mol_orbs[0].columns) - 3
+        else:
+            return ValueError('nbands might be calculated only if eigenvalues or mol_orbs had been read')
+
+    @property
+    def nsteps(self):
+        if self.eigenvalues is not None:
+            return len(self.eigenvalues)
+        elif self.mol_orbs is not None:
+            return len(self.mol_orbs)
+        else:
+            return ValueError('nbands might be calculated only if eigenvalues or mol_orbs had been read')
+
+    @staticmethod
+    def from_file(filepath):
+        file = open(filepath, 'r')
+        data = file.readlines()
+        file.close()
+
+        patterns = {'eigs': 'ORBITAL ENERGIES',
+                    'mos': 'MOLECULAR ORBITALS'}
+        matches = regrep(filepath, patterns)
+
+        occs = []
+        eigs = []
+        for match in tqdm(matches['eigs'], desc='Eigenvalues', total=len(matches['eigs'])):
+            eigs_tmp = []
+            occs_tmp = []
+            i = match[1] + 4
+            while data[i] != '\n' and data[i] != '------------------\n':
+                line = data[i].split()
+                occs_tmp.append(float(line[1]))
+                eigs_tmp.append(float(line[3]))
+                i += 1
+            occs.append(occs_tmp)
+            eigs.append(eigs_tmp)
+
+        mos_arr = []
+        for match in tqdm(matches['mos'], desc='Molecular Orbitals', total=len(matches['mos'])):
+            df = pd.DataFrame()
+            first_columns_appended = None
+            last_batch_added = False
+            i = match[1] + 2
+
+            while data[i] != '\n' and data[i] != '------------------\n':
+                if re.match(r'\s*\w+\s+\w+\s+([-+]?\d*\.\d*\s+)+', data[i]) is not None:
+                    last_batch_added = False
+                    line = data[i].split()
+                    if first_columns_appended is False:
+                        atom_number = re.match(r'\d+', line[0])
+                        mos_tmp[0].append(int(atom_number[0]))
+                        atom_symbol = line[0][len(atom_number[0]):]
+                        mos_tmp[1].append(atom_symbol)
+                        orbital = line[1]
+                        mos_tmp[2].append(orbital)
+                        for j, value in enumerate(line[2:]):
+                            mos_tmp[3 + j].append(float(value))
+                        i += 1
+                    elif first_columns_appended is True:
+                        for j, value in enumerate(line[2:]):
+                            mos_tmp[j].append(float(value))
+                        i += 1
+                    else:
+                        pass
+
+                elif re.match(r'\s*(\d+\s+)+', data[i]) is not None:
+                    line = data[i].split()
+                    if first_columns_appended is False:
+                        first_columns_appended = True
+                        last_batch_added = True
+                        df['atom_ids'] = mos_tmp[0][1:]
+                        df['species'] = mos_tmp[1][1:]
+                        df['orbital'] = mos_tmp[2][1:]
+                        for j in range(3, len(mos_tmp)):
+                            df[mos_tmp[j][0]] = mos_tmp[j][1:]
+                        mos_tmp = [[] for _ in range(len(line))]
+                        for j, n_mo in enumerate(line):
+                            mos_tmp[j].append(int(n_mo))
+                        i += 1
+                    elif first_columns_appended is None:
+                        last_batch_added = True
+                        mos_tmp = [[] for j in range(len(line) + 3)]
+                        mos_tmp[0].append('')
+                        mos_tmp[1].append('')
+                        mos_tmp[2].append('')
+                        for j, n_mo in enumerate(line):
+                            mos_tmp[3 + j].append(int(n_mo))
+                        first_columns_appended = False
+                        i += 1
+                    elif first_columns_appended is True:
+                        last_batch_added = True
+                        for j in range(len(mos_tmp)):
+                            df[mos_tmp[j][0]] = mos_tmp[j][1:]
+                        mos_tmp = [[] for _ in range(len(line))]
+                        for j, n_mo in enumerate(line):
+                            mos_tmp[j].append(int(n_mo))
+                        i += 1
+                else:
+                    i += 1
+
+            if not last_batch_added:
+                # df = pd.concat([df, pd.DataFrame(mos_tmp)], axis=1)
+                for j in range(len(mos_tmp)):
+                    df[mos_tmp[j][0]] = mos_tmp[j][1:]
+
+            mos_arr.append(df)
+
+        return SCFLog(np.array(eigs), np.array(occs), mos_arr)
+
+
+class XyzTrajectory:
+    def __init__(self,
+                 first_xyz: Xyz,
+                 trajectory: NDArray[Shape['Nsteps, Natoms, 3'], Number],
+                 energies_pot: NDArray[Shape['Nsteps'], Number]):
+        self.first_xyz = first_xyz
+        self.trajectory = trajectory
+        self.energies_pot = energies_pot
+
+    @staticmethod
+    def from_file(filepath):
+        first_xyz = Xyz.from_file(filepath)
+
+        trajectory = []
+        energies_pot = []
+        with open(filepath, 'rt') as file:
+            while True:
+                try:
+                    natoms = int(file.readline().strip())
+                except:
+                    break
+                line = file.readline()
+                energies_pot.append(float(line.split()[5]))
+                #energies_pot.append(float(line.split()[8].split('=')[1]))
+
+                coords = np.zeros((natoms, 3))
+                for i in range(natoms):
+                    line = file.readline().split()
+                    coords[i] = [float(j) for j in line[1:]]
+                trajectory.append(coords)
+
+            return XyzTrajectory(first_xyz, np.array(trajectory), np.array(energies_pot))