lab-cosmo · ceriottm · Aug 22, 2020 · Aug 23, 2020 · Aug 23, 2020 · Sep 1, 2020
diff --git a/nice/rascal_coefficients.pyx b/nice/rascal_coefficients.pyx
@@ -88,8 +88,7 @@ cpdef normalize_by_ps(double[:, :, :, :] coefficients):
 
 
 def get_rascal_coefficients(structures, HYPERS, n_types):
-
-
+
     sph = SPH(**HYPERS)
     try:
         n_max = HYPERS['max_radial']
@@ -108,9 +107,16 @@ def get_rascal_coefficients(structures, HYPERS, n_types):
 def get_rascal_coefficients_stared(task):
     return get_rascal_coefficients(*task)
 
-def get_rascal_coefficients_parallelized(structures, rascal_hypers, task_size = 100, num_threads = None):  
+def get_rascal_coefficients_parallelized(structures, rascal_hypers, mask = None, mask_id = None, task_size = 100, num_threads = None):  
     hypers = copy.deepcopy(rascal_hypers)
-
+
+    if mask is not None:
+        for f in structures:
+            mask_center_atoms_by_species(f,[mask])
+    if mask_id is not None:
+        for f in structures:
+            mask_center_atoms_by_id(f,[mask_id])
+
     if ('expansion_by_species_method' in hypers.keys()):
         if (hypers['expansion_by_species_method'] != 'user defined'):
             raise ValueError("for proper packing spherical expansion coefficients into [env index, radial/specie index, l, m] shape output should be uniform, thus 'expansion_by_species_method' must be 'user defined'")
@@ -134,10 +140,7 @@ def get_rascal_coefficients_parallelized(structures, rascal_hypers, task_size =
                 hypers['global_species'].append(int(specie))
 
         species = np.array(hypers['global_species']).astype(np.int32)
-
-
-
-
+
     if (num_threads is None):
         num_threads = cpu_count()
 
@@ -150,5 +153,11 @@ def get_rascal_coefficients_parallelized(structures, rascal_hypers, task_size =
     p.close()
     p.join()
     result = np.concatenate(result, axis = 0)
-    return split_by_central_specie(all_species, species, result)
-
+    if (mask is None and mask_id is None):
+        return split_by_central_specie(all_species, species, result)
+    else:
+        # just return the masked specie/atom
+        if mask is None:
+            return {mask_id: result}
+        else:
+            return {mask: result}
diff --git a/tools/compute_nice.py b/tools/compute_nice.py
@@ -0,0 +1,107 @@
+#!/usr/bin/python3
+
+import pickle
+import sys, os, argparse
+import ase.io as ase_io
+import numpy as np
+import tqdm
+from nice.transformers import *
+from nice.rascal_coefficients import get_rascal_coefficients_parallelized
+
+
+def main():
+    """
+    Command-line utility to compute NICE features given a precomputed pickle.
+    """
+
+    # Tweak the autogenerated help output to look nicer
+    formatter = lambda prog: argparse.HelpFormatter(prog, max_help_position=22)
+    parser = argparse.ArgumentParser(description=main.__doc__, formatter_class=formatter)
+
+    parser.add_argument('input', type=str, default="", nargs="?",
+                        help='XYZ file to load')
+    parser.add_argument('-o', '--output', type=str, default="",
+                        help='Output files prefix. Defaults to input filename with stripped extension')
+    parser.add_argument('--select', type=str, default=":",
+                        help='Selection of input frames. ASE format.')
+    parser.add_argument('--nice', type=str, default="nice.pickle",
+                        help='Definition of the NICE contraction. Output from optimize_nice.py')
+    parser.add_argument('--blocks', type=int, default=1,
+                        help='Number of blocks to break the calculation into.')
+
+
+    args = parser.parse_args()
+
+    filename = args.input
+    output = args.output
+    select = args.select    
+    nice = args.nice
+    nblocks = args.blocks
+
+    if output == "":
+        output = os.path.splitext(filename)[0]
+
+    print("Loading structures ", filename, " frames: ", select)
+    frames = ase_io.read(filename, index=select)
+    for f in frames:
+        if f.cell.sum() == 0.0:
+            f.cell = [100,100,100]
+        f.pbc = True
+        f.wrap(eps=1e-12)
+
+    aa = pickle.load(open(nice, "rb"))
+    hypers = aa["HYPERS"]
+    nice_sequence = aa["NICE"]
+
+    mask = hypers["reference-mask"]
+    try:
+        mask_id = hypers["reference-mask-id"]
+    except:
+        mask_id = -1
+    lmax = hypers["max_angular"]
+    scale = hypers["scale"]
+
+    # removes keys that are not needed for rascal
+    for k in ["nsph", "keep", "keep0", "screen", "screen0", "pcond", "scale",
+              "reference-file", "reference-sel", "reference-mask", "reference-mask-id"]:
+        hypers.pop(k)
+    print("HYPERPARAMETERS ", hypers)
+
+    print("Precomputing CG coefficients")
+    cglist = ClebschGordan(lmax)
+
+    lblock = len(frames)//nblocks
+    featinv = {}
+    for iblock in tqdm.tqdm(range(nblocks)):
+        print("Computing spherical expansion")
+        bstart = iblock*lblock
+        bend = (iblock+1)*lblock
+        if iblock == nblocks-1:
+            bend = len(frames)
+        coefficients = get_rascal_coefficients_parallelized(frames[bstart:bend], hypers, mask=(None if mask=="" else mask), mask_id=(None if mask_id<0 else mask_id))
+        # merge all the coefficients, as we want (for some reason) to apply the same NICE to all species. if mask has been specified, this does nothing
+        l = []
+        for f in coefficients.values():
+            l.append(f)
+        coefficients = np.vstack(l)    
+        coefficients *= scale
+
+        invariants_even = nice_sequence.transform(coefficients, return_only_invariants = True)
+
+        for k in invariants_even:
+            if k in featinv:
+                featinv[k].append(invariants_even[k]) 
+            else:
+                featinv[k] = [invariants_even[k]]
+
+    print("Saving to ", output)
+    for k in featinv:
+        featinv[k] = np.vstack(featinv[k])
+    pickle.dump(featinv, open(output, "wb"))
+
+if __name__ == '__main__':
+    main()
+
+
+
+
diff --git a/tools/optimize_nice.py b/tools/optimize_nice.py
@@ -0,0 +1,162 @@
+#!/usr/bin/python3
+
+import pickle
+import json
+import sys, os, argparse
+import ase.io as ase_io
+import numpy as np
+from nice.transformers import *
+from nice.rascal_coefficients import get_rascal_coefficients_parallelized
+
+
+def main():
+    """
+    Command-line utility to optimize the parameters of NICE features. 
+    Will output a pickled model which can then be used to predict features.    
+    """
+
+    # Tweak the autogenerated help output to look nicer
+    formatter = lambda prog: argparse.HelpFormatter(prog, max_help_position=22)
+    parser = argparse.ArgumentParser(description=main.__doc__, formatter_class=formatter)
+
+    parser.add_argument('input', type=str, default="", nargs="?",
+                        help='XYZ file to load')
+    parser.add_argument('-o', '--output', type=str, default="",
+                        help='Output files prefix. Defaults to input filename with stripped extension')
+    parser.add_argument('--select', type=str, default=":",
+                        help='Selection of input structures. ASE format.')
+    parser.add_argument('--nmax', type=int, default=6,
+                        help='Number of radial channels')
+    parser.add_argument('--lmax', type=int, default=4,
+                        help='Number of angular momentum channels')
+    parser.add_argument('--rcut', type=float, default=4,
+                        help='Radial cut off')
+    parser.add_argument('--sigma', type=float, default=0.5,
+                        help='Gaussian smearing')
+    parser.add_argument('--json', type=str, default='{}', help='Additional hypers, as JSON string')
+    parser.add_argument('--mask-id', type=int, default=-1,
+                        help='Central atom it to be selected')
+    parser.add_argument('--mask', type=str, default="",
+                        help='Central atom type to be selected')
+    parser.add_argument('--screen', type=int, default=1000,
+                        help='Target number of equivariants to be computed in the iteration step')
+    parser.add_argument('--screen0', type=int, default=500,
+                        help='Target number of invariants to be computed in the iteration step')   
+    parser.add_argument('--nsph', type=int, default=0,
+                        help='Target number of spherical expansion coefficients to keep. 0 defaults all')                                             
+    parser.add_argument('--keep', type=int, default=200,
+                        help='Target number of equivariants to be contracted to')
+    parser.add_argument('--keep0', type=int, default=400,
+                        help='Target number of invariants to be stored')
+
+
+    args = parser.parse_args()
+
+    filename = args.input
+    output = args.output
+    select = args.select
+    nmax = args.nmax
+    lmax = args.lmax
+    rcut = args.rcut
+    gs = args.sigma
+    mask = args.mask
+    mask_id = args.mask_id
+    nsph = args.nsph
+    fkeep = args.keep
+    fkeep0 = args.keep0
+    fscreen = args.screen
+    fscreen0 = args.screen0
+    json_hypers = json.loads(args.json)
+    pcond = 1e-4
+
+    if output == "":
+        output = os.path.splitext(filename)[0]
+
+
+    numax=4;
+    purify_take=50;
+    blocklist = [ ]
+    for nu in range(1, numax-1): # this starts from nu=2 actually
+        blocklist.append(
+            StandardBlock(ThresholdExpansioner(num_expand=fscreen),
+                         CovariantsPurifierBoth(max_take=purify_take),
+                         IndividualLambdaPCAsBoth(n_components=fkeep),
+                         ThresholdExpansioner(num_expand=fscreen0, mode='invariants'),
+                         InvariantsPurifier(),
+                         InvariantsPCA(n_components=fkeep0)) 
+                         )
+
+    # at the last order, we only need invariants
+    blocklist.append(
+            StandardBlock(None, None, None,
+                         ThresholdExpansioner(num_expand=fscreen0, mode='invariants'),
+                         InvariantsPurifier(),
+                         InvariantsPCA(n_components=fkeep0)) 
+                         )
+
+    # first-order equivariants are just combinations of the spherical coefficients, done by initial_pca
+    nice_sequence = StandardSequence(blocklist, initial_pca=IndividualLambdaPCAsBoth(n_components=nsph))
+
+    print("Loading structures ", filename, " structures: ", select)
+    structures = ase_io.read(filename, index=select)
+    for f in structures:
+        if f.cell.sum() == 0.0:  # fake PBC
+            f.cell = [100,100,100]
+            f.positions += np.asarray([50,50,50])
+        f.pbc = True
+        f.wrap(eps=1e-12)
+
+    print("Computing spherical expansion")
+
+    hypers = { **{
+        'interaction_cutoff': rcut,
+        'max_radial': nmax,
+        'max_angular': lmax,
+        'gaussian_sigma_constant': gs,
+        'gaussian_sigma_type': 'Constant',
+        'cutoff_smooth_width': 0.5,
+        'radial_basis': 'GTO'
+    }, **json_hypers }
+
+    coefficients = get_rascal_coefficients_parallelized(structures, hypers, mask=(None if mask=="" else mask), mask_id=(None if mask_id<0 else mask_id))    
+    # merge all the coefficients, as we want (for some reason) to apply the same NICE to all species. if mask has been specified, this does nothing
+    l = []
+    for f in coefficients.values():
+        l.append(f)
+    coefficients = np.vstack(l)    
+    if nsph == 0:
+        nsph = coefficients.shape[1]        
+    scale = 1.0/np.sqrt(np.sum(coefficients**2)/len(coefficients));
+    coefficients *= scale   # normalization of features
+
+    print("coefficients ", coefficients.shape)
+
+    hypers["keep"] = fkeep
+    hypers["keep0"] = fkeep0
+    hypers["screen"] = fscreen
+    hypers["screen0"] = fscreen0            
+    hypers["pcond"] = pcond    
+    hypers["reference-file"] = filename
+    hypers["reference-sel"] = select
+    hypers["reference-mask"] = mask
+    hypers["reference-mask-id"] = mask_id            
+    hypers["scale"] = scale
+    hypers["nsph"] = nsph    
+
+    print("Precomputing CG coefficients")
+    cglist = ClebschGordan(args.lmax)
+
+    print("Optimizing NICE")
+    nice_sequence.fit(coefficients, clebsch_gordan = cglist)
+
+    print("Dumping NICE model")    
+    pickle.dump( { 
+               "HYPERS" : hypers, 
+               "NICE": nice_sequence,
+             }, open(output+".pickle", "wb"))
+
+    #np.save(output+"-inv", np.hstack([rhoinv1, rhoinv2, rhoinv3, rhoinv4, rhoinv5]) )
+
+if __name__ == '__main__':
+    main()
+