version 3.2.1

tundeakins · Jun 3, 2024 · f054055 · f054055
1 parent cc7f831
commit f054055
Show file tree

Hide file tree

Showing 11 changed files with 304 additions and 156 deletions.
diff --git a/CONAN3/VERSION.dat b/CONAN3/VERSION.dat
@@ -1 +1 @@
-3.2.0
+3.2.1
diff --git a/CONAN3/_classes.py b/CONAN3/_classes.py
diff --git a/CONAN3/conf.py b/CONAN3/conf.py
@@ -1,5 +1,4 @@
 from ._classes import load_lightcurves, load_rvs, fit_setup,_print_output
-from .fit_data import run_fit
 from .utils import ecc_om_par
 import numpy as np 
 
@@ -27,6 +26,7 @@ def fit_configfile(config_file = "input_config.dat", out_folder = "output",
         verbose: bool;
             show print statements
     """
+    from .fit_data import run_fit
 
     lc_obj, rv_obj, fit_obj = load_configfile(config_file, init_decorr=init_decorr, verbose=verbose)
     result = run_fit(lc_obj, rv_obj, fit_obj,out_folder=out_folder,
@@ -64,6 +64,9 @@ def create_configfile(lc_obj=None, rv_obj=None, fit_obj=None, filename="input_co
         
         fit_obj : object;
             Instance of CONAN.fit_setup() object and its attributes.
+
+        filename : str;
+            name of the configuration file to be saved.
     """
     if lc_obj is None:
         lc_obj = load_lightcurves()

diff --git a/CONAN3/fit_data.py b/CONAN3/fit_data.py
@@ -24,6 +24,7 @@
 from celerite import GP as cGP
 from copy import deepcopy
 from .utils import gp_params_convert
+from .conf import create_configfile
 from scipy.stats import norm, uniform, lognorm, loguniform,truncnorm
 
 
@@ -112,6 +113,9 @@ def run_fit(lc_obj=None, rv_obj=None, fit_obj=None, statistic = "median", out_fo
     if not os.path.exists(out_folder):
         print(f"Creating output folder...{out_folder}")
         os.mkdir(out_folder)
+        # print(f"Saving configuration to file: {out_folder}/config_save.dat")
+        create_configfile(lc_obj, rv_obj, fit_obj, f"{out_folder}/config_save.dat")  #create config file
+
 
     if os.path.exists(f'{out_folder}/chains_dict.pkl'):
         if not rerun_result:
@@ -1453,10 +1457,10 @@ def run_fit(lc_obj=None, rv_obj=None, fit_obj=None, statistic = "median", out_fo
     bpfull[[ijnames][0]] = bp
 
     try:
-        medvals,maxvals = mcmc_outputs(posterior,jnames, ijnames, njnames, nijnames, bpfull, uwl, Rs_in, Ms_in, Rs_PDF, Ms_PDF, 
+        medvals,maxvals,sigma1s = mcmc_outputs(posterior,jnames, ijnames, njnames, nijnames, bpfull, uwl, Rs_in, Ms_in, Rs_PDF, Ms_PDF, 
                                         nfilt, filnames, howstellar, extinpars, RVunit, extind_PDF,npl,out_folder)
     except:
-        medvals,maxvals = mcmc_outputs(posterior,jnames, ijnames, njnames, nijnames, bpfull, uwl, Rs_in, Ms_in, Rs_PDF, Ms_PDF, 
+        medvals,maxvals,sigma1s = mcmc_outputs(posterior,jnames, ijnames, njnames, nijnames, bpfull, uwl, Rs_in, Ms_in, Rs_PDF, Ms_PDF, 
                                         nfilt, filnames, howstellar, extinpars, RVunit, extind_PDF,npl,out_folder)
 
     npar=len(jnames)
@@ -1468,6 +1472,8 @@ def run_fit(lc_obj=None, rv_obj=None, fit_obj=None, statistic = "median", out_fo
     medp[[ijnames][0]]=medvals
     maxp=initial
     maxp[[ijnames][0]]=maxvals
+    stdev = np.zeros_like(initial)
+    stdev[[ijnames][0]] = sigma1s
 
     #============================== PLOTTING ===========================================
     print('\nPlotting output figures')
@@ -1481,7 +1487,7 @@ def run_fit(lc_obj=None, rv_obj=None, fit_obj=None, statistic = "median", out_fo
 
     #save summary_stats and as a hidden files. 
     #can be used to run logprob_multi() to generate out_full.dat files for median posterior, max posterior and best fit values
-    stat_vals = dict(med = medp[jumping], max = maxp[jumping], bf  = bpfull[jumping], 
+    stat_vals = dict(med = medp[jumping], max = maxp[jumping], bf  = bpfull[jumping], stdev=stdev[jumping],
                         T0 = T0_post,  P = p_post, dur = Dur_post, evidence=evidence)
     pickle.dump(stat_vals, open(out_folder+"/.stat_vals.pkl","wb"))
 

diff --git a/CONAN3/get_files.py b/CONAN3/get_files.py
@@ -100,6 +100,7 @@ def download(self,sectors=None,author=None,exptime=None, select_flux="pdcsap_flu
             print(f"downloaded lightcurve for sector {s}")
 
         if hasattr(self,"_ok_mask"): del self._ok_mask
+        self.data_splitted = False
 
     def discard_ramp(self,length=0.25, gap_size=1, start=True, end=True):
         """
@@ -156,7 +157,75 @@ def scatter(self):
         """
         assert self.lc != {}, "No light curves downloaded yet. Run `download()` first."
         for s in self.sectors:
-            self.lc[s].scatter()
+            ax = self.lc[s].scatter()
+            ax.set_title(f"sector {s}")
+
+    def split_data(self, gap_size=None, split_times=None, show_plot=True):
+        """
+        Split the light curves into sections based on (1)gaps in the data, (2)given time intervals (3)given split time(s).
+
+        Parameters
+        ----------
+        gap_size : float
+            minimum gap size at which data will be splitted. same unit as the time axis. Default is None.
+        split_times : float,list
+            Time to split the data. Default is None.
+        show_plot : bool
+            Show plot of the split times. Default is True.
+        """
+
+        assert self.lc != {}, "No light curves downloaded yet. Run `download()` first."
+        if isinstance(split_times,(int,float)): split_times = [split_times] #convert to list
+        if self.data_splitted: 
+            print("data has already being split. Run `.download()` again to restart.")
+            return
+
+        remove_sectors = []
+        curr_sector = self.sectors.copy()
+        for s in curr_sector:
+            t = self.lc[s].time.value
+
+            if gap_size is not None:
+                self._find_gaps(t,gap_size)
+                if show_plot:
+                    ax = self.lc[s].scatter()
+                    [ax.axvline(t[i], color='r', linestyle='--') for i in self.gap_ind[1:-1]]
+                nsplit = len(self.gap_ind)-1
+                for i in range(len(self.gap_ind)-1):
+                    self.lc[f"{s}_{i+1}"] = self.lc[s][(t >= t[self.gap_ind[i]]) & (t < t[self.gap_ind[i+1]])]
+
+            if split_times is not None:
+                split_times = np.array([0] +split_times +[t[-1]])
+                if show_plot:
+                    ax = self.lc[s].scatter()
+                    [ax.axvline(ti, color='r', linestyle='--') for ti in split_times[1:-1]]
+                nsplit = len(split_times)-1
+                for i in range(len(split_times)-1):
+                    self.lc[f"{s}_{i+1}"] = self.lc[s][(t >= split_times[i]) & (t < split_times[i+1])]
+
+            if split_times is None and gap_size is None:
+                raise ValueError("either gap_size or split_time must be given")
+
+            remove_sectors.append(s)
+            self.sectors += [f"{s}_{i+1}" for i in range(nsplit)]
+
+        print(f"Sector {s} data splitted into {nsplit} chunks: {list(self.sectors)}")
+        #remove sectors that have been split        
+        _ = [self.sectors.pop(self.sectors.index(s)) for s in remove_sectors]
+        self.data_splitted = True
+
+
+    def _find_gaps(self,t,gap_size):
+        """
+        find gaps in the data
+        """
+        gap = np.diff(t) 
+        gap = np.insert(gap,len(gap),0) #insert diff of 0 at the beginning
+        self.gap_ind = np.where(gap > gap_size)[0]
+        self.gap_ind = np.array([0]+list(self.gap_ind)+[len(t)-1])
+        # self.gap_ind = np.append(0,self.gap_ind)
+        # self.gap_ind = np.append(self.gap_ind, len(t)-1)
+
 
     def save_CONAN_lcfile(self,bjd_ref = 2450000, folder="data", out_filename=None):
         """
@@ -227,26 +296,29 @@ def search(self,filters=None):
         print(pd.DataFrame(data, columns=["obj_id_catname","file_key", "pi_name", "date_mjd_start",
                                             "obs_total_exptime", "data_arch_rev",'status_published']))
 
-    def download(self, file_keys="all", aperture="DEFAULT", decontaminate=True, reject_highpoints=True, bg_MAD_reject=3,verbose=True):
+    def download(self, file_keys="all", aperture="DEFAULT", decontaminate=True, reject_highpoints=True, bg_MAD_reject=3,
+                 outlier_clip=4, outlier_window_width=11,outlier_clip_niter=1, verbose=True):
         """   
         Download CHEOPS light curves from the cheops database using the dace_query package
 
         Parameters
         ----------
         file_keys : list of str
             List of file keys to download. if "all" is given, all file_keys shown in `.search()` result will be downloaded.
-
         aperture : str
             Aperture to use for the light curve. Default is "DEFAULT". The options are: "R15","R16",...,"R40","RINF","RSUP".
-
         decontaminate : bool
             Decontaminate the light curve. Default is True.
-
         reject_highpoints : bool
             Reject high points in the light curve. Default is True. 
-        
         bg_MAD_reject : float
             Number of median absolute deviations to reject high background points. Default is 3.
+        outlier_clip : float
+            Number of standard deviations to clip outliers. Default is 4.
+        outlier_window_width : int
+            Window width to use for clipping outliers. Default is 11.
+        outlier_clip_niter : int
+            Number of iterations to clip outliers. Default is 3.
         """
 
 
@@ -268,6 +340,9 @@ def download(self, file_keys="all", aperture="DEFAULT", decontaminate=True, reje
             mask  = d.lc["bg"] > bg_MAD_reject*np.nanmedian(d.lc["bg"])
             _,_,_ = d.mask_data(mask, verbose=False)
 
+            #iterative sigma clipping
+            for _ in range(outlier_clip_niter): t_,f_,e_ = d.clip_outliers(outlier_clip,outlier_window_width, verbose=verbose)
+
             self.lc.append(d)
 
     def scatter(self, figsize=(10,4)):
@@ -551,7 +626,7 @@ def save_CONAN_lcfile(self,bjd_ref = 2450000, folder="data"):
 
 
 
-def get_parameters(planet_name, database="exoplanetarchive", table="pscomppars", overwrite_cache=False):
+def get_parameters(planet_name, database="exoplanetarchive", table="pscomppars", ps_select_index=None,overwrite_cache=False):
     """
     get stellar and planet parameters from nasa exoplanet archive or exoplanet.eu
 
@@ -561,6 +636,9 @@ def get_parameters(planet_name, database="exoplanetarchive", table="pscomppars",
         Name of the planet.
     database : str
         Database to use. Default is "exoplanetarchive". Options are "exoplanetarchive" or "exoplanet.eu".
+    table : str
+        Table to use. Default is "pscomppars". Options are "pscomppars" or "ps".
+
     overwrite_cache : bool
         Overwrite the cached parameters. Default is True.
     """
@@ -580,14 +658,23 @@ def get_parameters(planet_name, database="exoplanetarchive", table="pscomppars",
         if table=="pscomppars":
             df = NasaExoplanetArchive.query_object(planet_name,table=table)
         elif table=="ps":
-            df = NasaExoplanetArchive.query_object(planet_name,table=table) 
-            df = df[df['default_flag']==1]
+            ref_table = NasaExoplanetArchive.query_object(planet_name,table=table)
+            ref_table["pl_refname"] = [ref.split('refstr=')[1].split(' href')[0] for ref in ref_table["pl_refname"]]
+            ref_table["sy_refname"] = [ref.split('refstr=')[1].split(' href')[0] for ref in ref_table["sy_refname"]]
+            print("\nAvailable parameters:\n---------------------")
+            print(ref_table[["pl_name","default_flag","pl_refname","sy_refname",]].to_pandas())
+            if ps_select_index is None:
+                df = ref_table[ref_table['default_flag']==1]
+                print(f"\nselecting default parameters: {df['pl_refname'][0]}. Set `ps_select_index` [between 0 – {len(ref_table)-1}] to select another")
+            else:
+                ref_table["index"] = np.arange(len(ref_table))
+                df = ref_table[ref_table["index"]==ps_select_index]
+                print(f"\nselecting parameters: {df['pl_refname'][0]}")
         else: 
             raise ValueError("table must be either 'pscomppars' or 'ps'")
 
         df = df.to_pandas()
 
-
         params["star"]["Teff"]     = (df["st_teff"][0],df["st_tefferr1"][0])
         params["star"]["logg"]     = (df["st_logg"][0],df["st_loggerr1"][0])
         params["star"]["FeH"]      = (df["st_met"][0],df["st_meterr1"][0])

diff --git a/CONAN3/logprob_multi.py b/CONAN3/logprob_multi.py
@@ -5,6 +5,7 @@
 from .utils import rho_to_tdur, gp_params_convert
 import matplotlib
 from ._classes import __default_backend__
+from os.path import splitext
 
 
 def logprob_multi(p, args,t=None,make_outfile=False,verbose=False,debug=False,get_model=False,out_folder=""):
@@ -324,9 +325,9 @@ def logprob_multi(p, args,t=None,make_outfile=False,verbose=False,debug=False,ge
 
                 out_data = np.hstack((out_data,phases))
                 if make_outfile:
-                    outfile=out_folder+"/"+name.split(".")[0]+'_lcout.dat' 
+                    outfile=out_folder+"/"+splitext(name)[0]+'_lcout.dat' 
                     if verbose: print(f"Writing LC output to file: {outfile}")
-                    np.savetxt(outfile,out_data,header=header_fmt.format(*header),fmt='%-16.6f')
+                    np.savetxt(outfile,out_data,header=header_fmt.format(*header),fmt='%-16.6f',delimiter="\t")
 
         elif useGPphot[j] in ['y','ce']: 
             gppars   = params_all[len(params):len(params)+len(GPparams)]   # the GP parameters for all lcs
@@ -384,9 +385,9 @@ def logprob_multi(p, args,t=None,make_outfile=False,verbose=False,debug=False,ge
 
                 out_data = np.hstack((out_data,phases))
                 if make_outfile:
-                    outfile=out_folder+"/"+name.split(".")[0]+'_lcout.dat'
+                    outfile=out_folder+"/"+splitext(name)[0]+'_lcout.dat'
                     if verbose: print(f"Writing LC output with GP({which_GP}) to file: {outfile}")
-                    np.savetxt(outfile,out_data,header=header_fmt.format(*header),fmt='%-16.6f')
+                    np.savetxt(outfile,out_data,header=header_fmt.format(*header),fmt='%-16.6f',delimiter="\t")
 
 
     # now do the RVs and add their probabilities to the model
@@ -534,10 +535,10 @@ def logprob_multi(p, args,t=None,make_outfile=False,verbose=False,debug=False,ge
                     header_fmt += "{:<16s}\t"
                     header     += [f"phase_{n+1}"] if npl>1 else ["phase"]
                 if make_outfile:   
-                    outfile  = out_folder+"/"+RVnames[j].split(".")[0]+'_rvout.dat'
+                    outfile  = out_folder+"/"+splitext(RVnames[j])[0]+'_rvout.dat'
                     out_data = np.hstack((out_data,phases))
                     if verbose: print(f"Writing RV output to file: {outfile}")
-                    np.savetxt(outfile,out_data,header=header_fmt.format(*header),fmt='%-16.6f')
+                    np.savetxt(outfile,out_data,header=header_fmt.format(*header),fmt='%-16.6f',delimiter="\t")
                     # pd.DataFrame(out_data,columns=header).to_csv(outfile,sep="\t",index=False, float_format='%-16.6f')
 
 
@@ -593,9 +594,9 @@ def logprob_multi(p, args,t=None,make_outfile=False,verbose=False,debug=False,ge
 
                 if make_outfile:   
                     out_data = np.hstack((out_data,phases))
-                    outfile  = out_folder+"/"+RVnames[j].split(".")[0]+'_rvout.dat'
+                    outfile  = out_folder+"/"+splitext(RVnames[j])[0]+'_rvout.dat'
                     if verbose: print(f"Writing RV output with GP({which_GP}) to file: {outfile}")
-                    np.savetxt(outfile,out_data,header=header_fmt.format(*header),fmt='%-16.6f')
+                    np.savetxt(outfile,out_data,header=header_fmt.format(*header),fmt='%-16.6f',delimiter="\t")
 
 
     if get_model:

diff --git a/CONAN3/models.py b/CONAN3/models.py
@@ -339,33 +339,6 @@ def get_value(self, tarr, ss=None,grprs=0, vcont=0):
         return mm, model_components
 
 
-        # #==== set up for LC and baseline creation
-        # col5 = np.copy(col5_in)
-        # col3 = np.copy(col3_in)
-        # col4 = np.copy(col4_in)
-        # col6 = np.copy(col6_in)
-        # col7 = np.copy(col7_in)
-        # col8 = np.copy(col8_in)
-        # ts   = tt-np.median(tt)  
-
-        # # MONIKA: added least square optimisation for baselines
-        # if (baseLSQ == 'y'):
-        #     #bvar contains the indices of the non-fixed baseline variables        
-        #     coeffstart  = np.copy(bases[bvar])   
-        #     icoeff,dump = scipy.optimize.leastsq(para_minfunc, coeffstart, args=(bvar, mm, ft, ts, col5, col3, col4, col6, col7,col8))
-        #     coeff = np.copy(bases)
-        #     coeff[bvar] = np.copy(icoeff)   
-        # else:        
-        #     coeff = np.copy(bases)  # the input coefficients 
-
-        # bfunc,spl_comp = basefunc_noCNM(coeff, ts, col5, col3, col4, col6, col7,col8,ft/mm,useSpline)
-        # mod=mm*bfunc
-
-        # lc_result = SimpleNamespace(planet_LC=mm, full_LCmod=mod, LC_bl=bfunc, spline=spl_comp )
-
-        # return lc_result
-
-
 def basefunc_noCNM(coeff, LCdata,res,useSpline):
     # the full baseline function calculated with the coefficients given; of which some are not jumping and set to 0
     t, flux, err, col3, col4, col5, col6, col7, col8 = LCdata.values()
@@ -548,33 +521,6 @@ def RadialVelocity_Model(tt,T0,per,K,sesinw=0,secosw=0,Gamma=0,npl=None):
 
     return mod_RV, model_components
 
-    # bfstartRV= 1+7*npl + nttv + nddf + nocc*5 + nfilt*2 + nphot+ 2*nRV + nphot*22 +j*12  #the first index in the param array that refers to a baseline function
-    # incoeff = list(range(bfstartRV,bfstartRV+12))  # the indices for the coefficients for the base function        
-
-    # ts = tt-np.median(tt)
-
-    # if (baseLSQ == 'y'):
-    #     #get the indices of the variable baseline parameters via bvar (0 for fixed, 1 for variable)
-    #     ivars = np.copy(bvarsRV[j][0])
-
-    #     incoeff=np.array(incoeff)
-    #     coeffstart = np.copy(params[incoeff[ivars]]) 
-    #     if len(ivars) > 0:
-    #         icoeff,dump = scipy.optimize.leastsq(para_minfuncRV, coeffstart, args=(ivars, mod_RV, RVmes, ts, bis, fwhm, contra))
-    #         coeff = np.copy(params[incoeff])   # the full coefficients -- set to those defined in params (in case any are fixed non-zero)
-    #         coeff[ivars] = np.copy(icoeff)     # and the variable ones are set to the result from the minimization
-    #     else:
-    #         coeff = np.copy(params[incoeff])
-    # else:        
-    #     coeff = np.copy(params[incoeff])   # the coefficients for the base function
-
-    # bfuncRV,spl_comp=basefuncRV(coeff, ts, bis, fwhm, contra,RVmes-mod_RV ,useSpline)
-    # mod_RVbl = mod_RV + bfuncRV
-
-    # rv_result = SimpleNamespace(planet_RV=mod_RV-Gamma_in, full_RVmod=mod_RVbl, RV_bl=bfuncRV, spline=spl_comp ) 
-    # return rv_result
-
-
 def para_minfuncRV(icoeff, ivars, mod_RV, RVdata):
     """
     least square fit to the baseline after subtracting model RV mod_RV