Bases: BaseDataset
The WaterClusters dataset contains putative minima and low energy networks for water
clusters of sizes n = 3 - 30. The cluster structures are derived and labeled with
the TTM2.1-F ab-initio based interaction potential for water.
It contains approximately 4.5 mil. structures.
Sampling was done with the Monte Carlo Temperature Basin Paving (MCTBP) method.
Chemical Species
["H", "O"]
Usage:
from openqdc.datasets import WaterClusters
dataset = WaterClusters()
References
https://doi.org/10.1063/1.5128378
https://sites.uw.edu/wdbase/database-of-water-clusters/
Source code in openqdc/datasets/potential/waterclusters3_30.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97 | class WaterClusters(BaseDataset):
"""
The WaterClusters dataset contains putative minima and low energy networks for water
clusters of sizes n = 3 - 30. The cluster structures are derived and labeled with
the TTM2.1-F ab-initio based interaction potential for water.
It contains approximately 4.5 mil. structures.
Sampling was done with the Monte Carlo Temperature Basin Paving (MCTBP) method.
Chemical Species:
["H", "O"]
Usage:
```python
from openqdc.datasets import WaterClusters
dataset = WaterClusters()
```
References:
https://doi.org/10.1063/1.5128378\n
https://sites.uw.edu/wdbase/database-of-water-clusters/\n
"""
__name__ = "waterclusters3_30"
# Energy in hartree, all zeros by default
atomic_energies = np.zeros((MAX_ATOMIC_NUMBER,), dtype=np.float32)
__energy_unit__ = "kcal/mol"
__distance_unit__ = "ang"
__forces_unit__ = "kcal/mol/ang"
__energy_methods__ = [PotentialMethod.TTM2_1_F] # "ttm2.1-f"
energy_target_names = ["TTM2.1-F Potential"]
__links__ = {"W3-W30_all_geoms_TTM2.1-F.zip": "https://drive.google.com/uc?id=18Y7OiZXSCTsHrQ83GCc4fyE_abbL6E_n"}
def read_raw_entries(self):
samples = []
parent_folder = p_join(self.root, "W3-W30_all_geoms_TTM2.1-F/")
for i in range(3, 31):
name = f"W{i}_geoms_all"
zip_path = p_join(parent_folder, f"{name}.zip")
xyz_path = p_join(parent_folder, f"{name}.xyz")
with zipfile.ZipFile(zip_path, "r") as zip_ref:
zip_ref.extractall(parent_folder)
data = read_xyz(xyz_path, i)
samples += data
return samples
|