Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Command-line tools to generate and calculate NICE features #4

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 19 additions & 10 deletions nice/rascal_coefficients.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,7 @@ cpdef normalize_by_ps(double[:, :, :, :] coefficients):


def get_rascal_coefficients(structures, HYPERS, n_types):



sph = SPH(**HYPERS)
try:
n_max = HYPERS['max_radial']
Expand All @@ -108,9 +107,16 @@ def get_rascal_coefficients(structures, HYPERS, n_types):
def get_rascal_coefficients_stared(task):
return get_rascal_coefficients(*task)

def get_rascal_coefficients_parallelized(structures, rascal_hypers, task_size = 100, num_threads = None):
def get_rascal_coefficients_parallelized(structures, rascal_hypers, mask = None, mask_id = None, task_size = 100, num_threads = None):
hypers = copy.deepcopy(rascal_hypers)


if mask is not None:
for f in structures:
mask_center_atoms_by_species(f,[mask])
if mask_id is not None:
for f in structures:
mask_center_atoms_by_id(f,[mask_id])

if ('expansion_by_species_method' in hypers.keys()):
if (hypers['expansion_by_species_method'] != 'user defined'):
raise ValueError("for proper packing spherical expansion coefficients into [env index, radial/specie index, l, m] shape output should be uniform, thus 'expansion_by_species_method' must be 'user defined'")
Expand All @@ -134,10 +140,7 @@ def get_rascal_coefficients_parallelized(structures, rascal_hypers, task_size =
hypers['global_species'].append(int(specie))

species = np.array(hypers['global_species']).astype(np.int32)





if (num_threads is None):
num_threads = cpu_count()

Expand All @@ -150,5 +153,11 @@ def get_rascal_coefficients_parallelized(structures, rascal_hypers, task_size =
p.close()
p.join()
result = np.concatenate(result, axis = 0)
return split_by_central_specie(all_species, species, result)

if (mask is None and mask_id is None):
return split_by_central_specie(all_species, species, result)
else:
# just return the masked specie/atom
if mask is None:
return {mask_id: result}
else:
return {mask: result}
107 changes: 107 additions & 0 deletions tools/compute_nice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/usr/bin/python3

import pickle
import sys, os, argparse
import ase.io as ase_io
import numpy as np
import tqdm
from nice.transformers import *
from nice.rascal_coefficients import get_rascal_coefficients_parallelized


def main():
"""
Command-line utility to compute NICE features given a precomputed pickle.
"""

# Tweak the autogenerated help output to look nicer
formatter = lambda prog: argparse.HelpFormatter(prog, max_help_position=22)
parser = argparse.ArgumentParser(description=main.__doc__, formatter_class=formatter)

parser.add_argument('input', type=str, default="", nargs="?",
help='XYZ file to load')
parser.add_argument('-o', '--output', type=str, default="",
help='Output files prefix. Defaults to input filename with stripped extension')
parser.add_argument('--select', type=str, default=":",
help='Selection of input frames. ASE format.')
parser.add_argument('--nice', type=str, default="nice.pickle",
help='Definition of the NICE contraction. Output from optimize_nice.py')
parser.add_argument('--blocks', type=int, default=1,
help='Number of blocks to break the calculation into.')


args = parser.parse_args()

filename = args.input
output = args.output
select = args.select
nice = args.nice
nblocks = args.blocks

if output == "":
output = os.path.splitext(filename)[0]

print("Loading structures ", filename, " frames: ", select)
frames = ase_io.read(filename, index=select)
for f in frames:
if f.cell.sum() == 0.0:
f.cell = [100,100,100]
f.pbc = True
f.wrap(eps=1e-12)

aa = pickle.load(open(nice, "rb"))
hypers = aa["HYPERS"]
nice_sequence = aa["NICE"]

mask = hypers["reference-mask"]
try:
mask_id = hypers["reference-mask-id"]
except:
mask_id = -1
lmax = hypers["max_angular"]
scale = hypers["scale"]

# removes keys that are not needed for rascal
for k in ["nsph", "keep", "keep0", "screen", "screen0", "pcond", "scale",
"reference-file", "reference-sel", "reference-mask", "reference-mask-id"]:
hypers.pop(k)
print("HYPERPARAMETERS ", hypers)

print("Precomputing CG coefficients")
cglist = ClebschGordan(lmax)

lblock = len(frames)//nblocks
featinv = {}
for iblock in tqdm.tqdm(range(nblocks)):
print("Computing spherical expansion")
bstart = iblock*lblock
bend = (iblock+1)*lblock
if iblock == nblocks-1:
bend = len(frames)
coefficients = get_rascal_coefficients_parallelized(frames[bstart:bend], hypers, mask=(None if mask=="" else mask), mask_id=(None if mask_id<0 else mask_id))
# merge all the coefficients, as we want (for some reason) to apply the same NICE to all species. if mask has been specified, this does nothing
l = []
for f in coefficients.values():
l.append(f)
coefficients = np.vstack(l)
coefficients *= scale

invariants_even = nice_sequence.transform(coefficients, return_only_invariants = True)

for k in invariants_even:
if k in featinv:
featinv[k].append(invariants_even[k])
else:
featinv[k] = [invariants_even[k]]

print("Saving to ", output)
for k in featinv:
featinv[k] = np.vstack(featinv[k])
pickle.dump(featinv, open(output, "wb"))

if __name__ == '__main__':
main()




162 changes: 162 additions & 0 deletions tools/optimize_nice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#!/usr/bin/python3

import pickle
import json
import sys, os, argparse
import ase.io as ase_io
import numpy as np
from nice.transformers import *
from nice.rascal_coefficients import get_rascal_coefficients_parallelized


def main():
"""
Command-line utility to optimize the parameters of NICE features.
Will output a pickled model which can then be used to predict features.
"""

# Tweak the autogenerated help output to look nicer
formatter = lambda prog: argparse.HelpFormatter(prog, max_help_position=22)
parser = argparse.ArgumentParser(description=main.__doc__, formatter_class=formatter)

parser.add_argument('input', type=str, default="", nargs="?",
help='XYZ file to load')
parser.add_argument('-o', '--output', type=str, default="",
help='Output files prefix. Defaults to input filename with stripped extension')
parser.add_argument('--select', type=str, default=":",
help='Selection of input structures. ASE format.')
parser.add_argument('--nmax', type=int, default=6,
help='Number of radial channels')
parser.add_argument('--lmax', type=int, default=4,
help='Number of angular momentum channels')
parser.add_argument('--rcut', type=float, default=4,
help='Radial cut off')
parser.add_argument('--sigma', type=float, default=0.5,
help='Gaussian smearing')
parser.add_argument('--json', type=str, default='{}', help='Additional hypers, as JSON string')
parser.add_argument('--mask-id', type=int, default=-1,
help='Central atom it to be selected')
parser.add_argument('--mask', type=str, default="",
help='Central atom type to be selected')
parser.add_argument('--screen', type=int, default=1000,
help='Target number of equivariants to be computed in the iteration step')
parser.add_argument('--screen0', type=int, default=500,
help='Target number of invariants to be computed in the iteration step')
parser.add_argument('--nsph', type=int, default=0,
help='Target number of spherical expansion coefficients to keep. 0 defaults all')
parser.add_argument('--keep', type=int, default=200,
help='Target number of equivariants to be contracted to')
parser.add_argument('--keep0', type=int, default=400,
help='Target number of invariants to be stored')


args = parser.parse_args()

filename = args.input
output = args.output
select = args.select
nmax = args.nmax
lmax = args.lmax
rcut = args.rcut
gs = args.sigma
mask = args.mask
mask_id = args.mask_id
nsph = args.nsph
fkeep = args.keep
fkeep0 = args.keep0
fscreen = args.screen
fscreen0 = args.screen0
json_hypers = json.loads(args.json)
pcond = 1e-4

if output == "":
output = os.path.splitext(filename)[0]


numax=4;
purify_take=50;
blocklist = [ ]
for nu in range(1, numax-1): # this starts from nu=2 actually
blocklist.append(
StandardBlock(ThresholdExpansioner(num_expand=fscreen),
CovariantsPurifierBoth(max_take=purify_take),
IndividualLambdaPCAsBoth(n_components=fkeep),
ThresholdExpansioner(num_expand=fscreen0, mode='invariants'),
InvariantsPurifier(),
InvariantsPCA(n_components=fkeep0))
)

# at the last order, we only need invariants
blocklist.append(
StandardBlock(None, None, None,
ThresholdExpansioner(num_expand=fscreen0, mode='invariants'),
InvariantsPurifier(),
InvariantsPCA(n_components=fkeep0))
)

# first-order equivariants are just combinations of the spherical coefficients, done by initial_pca
nice_sequence = StandardSequence(blocklist, initial_pca=IndividualLambdaPCAsBoth(n_components=nsph))

print("Loading structures ", filename, " structures: ", select)
structures = ase_io.read(filename, index=select)
for f in structures:
if f.cell.sum() == 0.0: # fake PBC
f.cell = [100,100,100]
f.positions += np.asarray([50,50,50])
f.pbc = True
f.wrap(eps=1e-12)

print("Computing spherical expansion")

hypers = { **{
'interaction_cutoff': rcut,
'max_radial': nmax,
'max_angular': lmax,
'gaussian_sigma_constant': gs,
'gaussian_sigma_type': 'Constant',
'cutoff_smooth_width': 0.5,
'radial_basis': 'GTO'
}, **json_hypers }

coefficients = get_rascal_coefficients_parallelized(structures, hypers, mask=(None if mask=="" else mask), mask_id=(None if mask_id<0 else mask_id))
# merge all the coefficients, as we want (for some reason) to apply the same NICE to all species. if mask has been specified, this does nothing
l = []
for f in coefficients.values():
l.append(f)
coefficients = np.vstack(l)
if nsph == 0:
nsph = coefficients.shape[1]
scale = 1.0/np.sqrt(np.sum(coefficients**2)/len(coefficients));
coefficients *= scale # normalization of features

print("coefficients ", coefficients.shape)

hypers["keep"] = fkeep
hypers["keep0"] = fkeep0
hypers["screen"] = fscreen
hypers["screen0"] = fscreen0
hypers["pcond"] = pcond
hypers["reference-file"] = filename
hypers["reference-sel"] = select
hypers["reference-mask"] = mask
hypers["reference-mask-id"] = mask_id
hypers["scale"] = scale
hypers["nsph"] = nsph

print("Precomputing CG coefficients")
cglist = ClebschGordan(args.lmax)

print("Optimizing NICE")
nice_sequence.fit(coefficients, clebsch_gordan = cglist)

print("Dumping NICE model")
pickle.dump( {
"HYPERS" : hypers,
"NICE": nice_sequence,
}, open(output+".pickle", "wb"))

#np.save(output+"-inv", np.hstack([rhoinv1, rhoinv2, rhoinv3, rhoinv4, rhoinv5]) )

if __name__ == '__main__':
main()