From 4a966ee635caeef0b722f30959e441c83aadf366 Mon Sep 17 00:00:00 2001
From: Shreya <genericp3rson@gmail.com>
Date: Thu, 21 Jul 2022 09:49:23 -0500
Subject: [PATCH] Updated healthcare referral starter kit queries

---
 .DS_Store                                     | Bin 8196 -> 12292 bytes
 .../db_scripts/queries/Print_community.gsql   |  20 -
 .../db_scripts/queries/README.gsql            |   7 +
 .../db_scripts/queries/algo_louvain.gsql      | 345 ------------------
 .../db_scripts/queries/algo_page_rank.gsql    |  37 --
 .../queries/ex1_common_patients.gsql          |  39 --
 .../queries/ex2_create_referral_edge.gsql     |  27 --
 .../db_scripts/queries/ex2_main_query.gsql    |   6 -
 .../db_scripts/queries/get_claims.gsql        |  11 -
 .../queries/get_claims_of_prescriber.gsql     |  23 ++
 .../queries/get_common_patients.gsql          |  64 ++++
 .../queries/get_joint_prescribers.gsql        |  43 +++
 .../queries/get_k_hop_neighbors.gsql          |  46 ++-
 .../db_scripts/queries/get_patients.gsql      |  14 -
 .../queries/get_patients_of_prescriber.gsql   |  29 ++
 .../db_scripts/queries/get_prescribers.gsql   |  22 --
 .../queries/get_referral_community.gsql       |  31 ++
 .../queries/infer_all_referrals.gsql          |  16 +
 .../db_scripts/queries/infer_referrals.gsql   |  50 +++
 .../db_scripts/queries/tg_louvain.gsql        | 228 ++++++++++++
 .../db_scripts/queries/tg_page_rank.gsql      |  80 ++++
 21 files changed, 602 insertions(+), 536 deletions(-)
 delete mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/Print_community.gsql
 create mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/README.gsql
 delete mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/algo_louvain.gsql
 delete mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/algo_page_rank.gsql
 delete mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/ex1_common_patients.gsql
 delete mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/ex2_create_referral_edge.gsql
 delete mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/ex2_main_query.gsql
 delete mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_claims.gsql
 create mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_claims_of_prescriber.gsql
 create mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_common_patients.gsql
 create mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_joint_prescribers.gsql
 delete mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_patients.gsql
 create mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_patients_of_prescriber.gsql
 delete mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_prescribers.gsql
 create mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_referral_community.gsql
 create mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/infer_all_referrals.gsql
 create mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/infer_referrals.gsql
 create mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/tg_louvain.gsql
 create mode 100644 Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/tg_page_rank.gsql

diff --git a/.DS_Store b/.DS_Store
index 651b7e086a0c51ec938c4c200db23470566f2628..6924e73579fdbae54cbe3944ff6b2310412d60b8 100644
GIT binary patch
literal 12292
zcmeHNT~8cU7=EXvb_rB5O7udLHEmPUc5tB<OBxfFk6I$w$~N^v4bFZPHXUX*`%y$f
z!?peg|AM#v5`TvGUg7h+XO^8aK!IXQQ_e|d&dm9GKj+MO-g896o9<M{L`Fm;hl}~p
zAd2ylAHYj0v7XnSbRH!huFJUaJS0+)RjErPHSzbfe)9^>fhF?ahig<8C9wEkfE0l=
zAq^#3fkXlFD)J?*3GviM-I_{!4=n@fX#Ffo+PErOA4@bJ#+7mymq7<gl0QNA`Hm*M
z&FSa?DAaVU2)%syTE}b1>fXLGy4zReGntVMDWh*wT1vBnr#i;iz$lMkk0we39n<Cu
zq+N0va2jwLa2hz18sNVVDVO=*n^Jdo8gLpo(;DFXkjKS*D#^D^u2TnYTtc&+OK44a
z<`S1>`6Qo8@@<o&2FWvBk(;hw0x>+(9p(9?-Bglqo7{94Pbp)Zgff8(lc&^?=kbE%
zO=;$CP6JK@`)J@jlwFZ;ff3uXE-}#JQ{cuf(wzr_G<dg#IwH#oTKe)p8fZ`KTbExI
zUDi>b!MWb2`22b}SEL`vc}|Kl3#>f{#Jwg}8{K-pXL?L1)~;aewFDbid(^;(!Oq=?
z!#Kt)p!7blZv%Wb@%&J8avz~*sQlIxr4#Q{T1u`6Kf}wvr^!osz12t}27i?sVZGHZ
z6#n+ko$tHw&c)ot+(7Q9(rUb3Y6tC3YdUEDz_&+n9G1J!LHTjLw!S>{;Y=L0gL+hJ
zYKMBYh9*CLUyrKs`gGihsx9qXG8MW0T>tXW$D5mXz8t@q|6=ma*3JCp<ahzkw<fl>
z`g6mVKfk-Q{4`vP;@^xndC7z<monZ3Tgq#--?LIP3gc)6bjj28WBJY^JmeYtfZomc
z&H+AW`^Y)avON5i&^&Eln7mE<pV${(iEc7B|42%(V+D5Px9;zm*NVw3uz^@vU0Sv^
zF*AwxPb#){%+XHUPXE(4drYY_78rzP+n8578vW!SqlQ_zAsZQ8GX|R0^)M{gLAwU*
z#>nLiBA0>A%RcO4=I(v1rhl229O;d)*u?G06V9^7&|St_nJL@7It@4tI1M-rI1M-r
zoCytl04d+cYU_bKmU$p^U&dq-_ZP0YxKg%up}Ah`=*o{s-5~E<Kz~HzMXU$8BCKPV
zx}#W|SiBcwIIaC}!v!K7_R5s&NlUgtXodUd0eD`}6)E@Av0I<%N4%TfnZKgZ37!LJ
zVpU7|xj*mAH;_I?tR=)-Rd@YUyZxx2K`ndmPP$n-h)Y;>uD&!KuF@l{(^p}cfZW*<
z5#3sph7(HN(*?xIjI4x~T^AYOu}4yW`6Gp_Zpb~=B;!myv{q7G5rs3#)PgP^g0GDv
z^3&FmW1h2+G}SRs?VZvfZJy>J^5=;aqK*a7qfV;IN%!1`eQv_|jp5%1YN361dA8+N
zUrKXuwRf%q)Oz0-BTyBUZwBM>1PJ+Lw91_0DB^iVJ%S~x&`1WmGID0gI(h_3CaOkx
zyL9%G^l;kr$h4Frhte)V2KqO@4&p^)X-Y<|93rdwWt!e67CnAH5iDZ+CAFE22V`_e
zFZJF{d1(&gmgGBl1!u*AjM>)Jb3Ezx*ARK++z_gbDtIFLwZHCJlyzWBAG0m(Oz+{j
zBYHsBvx@l6mZW4;Q=FTo_SFH@_*Fj9ag7i8pp%a7^1HeZ9?Ho1eevAU67onRztkRM
ztuOS73}=W0dY6pec6Q(kcU3ljciv297tmwOHGM{G?-{EnpwGDNH1t6&_2ypwC#WU!
zMeH;8WVmAdHm$E4C*X?Ijvf(AQ@sZ7;cU;SVpqpSjdL_n{tM(;LwV2ohiJ%_Q9btg
zuE!oBc9O#XA{SsQq>%yK<_<ppr;XgrX~1dV?bLwRH(Q#WhHTt6*)MUgJ&)^WTuf}Y
z%?BhrTZaJfu3PcQ3#Ns(nM&Gjo17!C-0b4NwnG5+KBxQtyZ=AeXNUZA_22#0{r~?1
Dl5wX{

delta 164
zcmZokXmOBWU|?W$DortDU;r^WfEYvza8E20o2Vx_*+7Ry637QjFapIH7;=Fyl_8HI
zg&}dXpu}SK&Fma39IPOXW|IxnWhNgJ2-v(xa0cV%b;41MlR3m&C*Kxrm^@ibdozc4
xCbJ+j$TXlO+(5z=WYK0prti#?`E@KO+w(|Fj?v+m9M3a#GNX{%=3eoKOaOy>A^QLT

diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/Print_community.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/Print_community.gsql
deleted file mode 100644
index 94cf234..0000000
--- a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/Print_community.gsql
+++ /dev/null
@@ -1,20 +0,0 @@
-CREATE OR REPLACE QUERY print_community(vertex<Prescriber> inputPrescriber) FOR GRAPH MyGraph { 
-  /* Write query logic here */ 
-  //PRINT "Print_community works!";
-	
-	ListAccum<EDGE> @@edgeList;
-	
-	SumAccum<int> @@cid;
-	
-	Start={inputPrescriber};
-	Start=Select s from Start:s post-accum @@cid += s.communityId;
-	
-	Start = {Prescriber.*};
-	
-	Start = select s from Start:s-(referral>:e)-:t 
-	        where s.communityId == @@cid and s.communityId == t.communityId
-	        accum @@edgeList += e;
-	
-	print Start;
-	print @@edgeList;
-}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/README.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/README.gsql
new file mode 100644
index 0000000..e374870
--- /dev/null
+++ b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/README.gsql
@@ -0,0 +1,7 @@
+CREATE QUERY README() FOR GRAPH MyGraph {
+    PRINT "Healthcare - Referral networks, Hub (PageRank), & Community Detection";
+    PRINT "Investigating healthcare referral networks";
+
+    PRINT "The queries tg_louvain.gsql and infer_all_referrals.gsql MUST be";
+    PRINT "run in that order. tg_page_rank.gsql must also be run.";
+}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/algo_louvain.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/algo_louvain.gsql
deleted file mode 100644
index 346d211..0000000
--- a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/algo_louvain.gsql
+++ /dev/null
@@ -1,345 +0,0 @@
-CREATE OR REPLACE QUERY algo_louvain(INT iter1 = 10, INT iter2 = 10, 
-        INT iter3 = 10, INT split = 10, INT output_level = 0) FOR GRAPH MyGraph {
-
-/*
- * Louvain Method with Parallelism and Refinement
- * https://arxiv.org/pdf/1304.4453
- * The minimum label heuristics are implemented: https://doi.org/10.1016/j.parco.2015.03.003
- * iter: There are three phases in the algorithm -- move, merge and refine. 
- * Their max number of iterations are set by iter1, iter2, iter3 respectively.
- * split: To save memory, split number is 10 by default. When the split number 
- * is larger, the query is closer to sequential Louvain Method, which is slower. 
- * When the split number is 1, the query is parallel, but requires more memory. 
- * output_level: 0, only list number; 1, also list members
- * fComm, fDist: files to store community label and community distribution
- */
-
-    TYPEDEF TUPLE <INT csize, INT number> cluster_num;
-    TYPEDEF TUPLE <VERTEX node, INT cid, FLOAT delta_Q> v_delta_Q;
-    HeapAccum<v_delta_Q>(1, delta_Q DESC, cid ASC) @largest_delta_Q;   // if delta_Q is the same, select the one with mininal vid 
-    MapAccum<INT, FLOAT> @@tot_incident_cluster;   // sun of weight incident to clusters
-    MapAccum<INT, INT> @@cluster_sizes;                // size of a cluster
-    MapAccum<INT, FLOAT> @weight_to_cluster;  // weight from one vertex incident to that cluster
-
-    SumAccum<FLOAT> @@total_weight;   // total weight of all edges
-
-    SumAccum<FLOAT> @weight;          // total weight incident to this vertex
-
-    SumAccum<FLOAT> @cweight;       // total weight incident to this aggregate vertex
-
-    SumAccum<INT> @uid;        // which vertex it belongs to
-
-    SumAccum<INT> @cid;        // which cluster it belongs to
-
-    SumAccum<INT> @vid;        // internal id
-
-    SumAccum<FLOAT> @delta_Q;         // contribution to the modularity
-
-    SumAccum<FLOAT> @@modularity;
-
-    SumAccum<FLOAT> @@modularity2;
-
-    MapAccum<INT, MapAccum<INT, FLOAT>> @@weight_to_cluster_map;   // calculate edges between communities 
-
-    MapAccum<INT, SetAccum<INT>> @@move_comm; // map of communities that changed its community id
-
-    MapAccum<INT, MinAccum<VERTEX>> @@represent_map;
-
-    SetAccum<VERTEX> @@represent_set;
-
-    MapAccum<INT, FLOAT> @@vertex_map;
-
-    MapAccum<INT, MapAccum<INT, FLOAT>> @@edge_map;
-
-    HeapAccum<cluster_num>(100, csize ASC) @@cluster_dist;
-
-    MapAccum<INT, INT> @@cluster_map;
-
-    MapAccum<INT, ListAccum<INT>> @@cluster_members;
-
-    FLOAT last_modularity = 0;
-
-    FLOAT last_modularity2 = 0;
-
-    INT iteration;
-
-    INT Iter1; 
-
-    FLOAT epsilon = 0.0001;
-
-    INT iteration2;
-
-    INT partitions;
-
-    INT loop;
-
-    INT debug = 0;  // debug: 0, no modularity info; 1, show debug log; 2, modularity for each iteration
-
-
-
-    partitions = split;
-
-    CASE WHEN split < 1 THEN
-        partitions = 1;
-    END;
-
-        
-
-// Initialize: count edges and set a unique cluster ID for each vertex
-    Start (ANY) = {Prescriber.*};
-
-    S (ANY) = SELECT s 
-        FROM Start:s -((referral>|<referral):e)- :t
-        ACCUM @@total_weight += e.num_patient,
-          s.@weight += e.num_patient
-      POST-ACCUM s.@vid = getvid(s),
-                       s.@uid = s.@vid,
-                       s.@cid = s.@vid;  // Label each vertex with its own internal ID
-// Special first iteration of Phase 1
-
-    iteration = 1;
-
-    S = SELECT s 
-        FROM Start:s -((referral>|<referral):e)- :t
-        WHERE s.@cid > t.@cid
-        ACCUM s.@largest_delta_Q += v_delta_Q(t, t.@cid, e.num_patient - 2 * s.@weight * s.@weight/ @@total_weight) 
-            // weight_to_cluster is just e.num_patient
-            POST-ACCUM INT best_cluster = s.@largest_delta_Q.top().cid,
-                IF s.@largest_delta_Q.size() > 0 and s.@largest_delta_Q.top().delta_Q > 0 and s.@cid != best_cluster THEN 
-                   s.@cid = best_cluster
-              END,
-                s.@largest_delta_Q.clear();
-
-    S = SELECT s
-        FROM Start:s-((referral>|<referral):e)-:t
-        WHERE s.@cid == t.@cid
-        ACCUM @@modularity += e.num_patient - s.@weight * t.@weight / (@@total_weight);
-
-    @@modularity = @@modularity / @@total_weight;                      
-    PRINT iteration AS Phase1Iter, @@modularity;
-    log(debug > 0, "[redrain]//move", iteration, @@modularity);
-
-        
-
-// Phase 1 -- Move
-
-// For each vertex, calculate the change in modularity FROM adding it to each of the nearby clusters
-
-// Add vertex to cluster with highest positive change in modularity
-
-// Repeat the above until no vertices change cluster anymore
-
-    S = SELECT s 
-        FROM Start:s
-        ACCUM @@tot_incident_cluster += (s.@cid -> s.@weight); 
-            
-    iteration = 1;
-    Iter1 = iter1 - 1;
-              
-    WHILE (iteration < 2 OR @@modularity - last_modularity > epsilon) LIMIT Iter1 DO
-        iteration = iteration + 1;
-        loop = 0;
-        WHILE (loop < partitions) DO 
-            S = SELECT s 
-                FROM Start:s -((referral>|<referral):e)- :t
-                WHERE s.@uid % partitions == loop    // for different split
-                    // At least one cluster not singlet(a cluster on its own). If both clusters are singlets, consider only when the label of target is smaller to avoid swap
-                    AND (( abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) > epsilon   // s is not a singlet 
-                          OR abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) > epsilon )     // or t is not a singlet
-                          OR (abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) < epsilon      // s is a singlet 
-                          AND abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) < epsilon      // t is also a singlet
-                         AND s.@cid > t.@cid) )                                               // consider only when target label is smaller
-                    ACCUM s.@weight_to_cluster += (t.@cid -> e.num_patient)
-                    POST-ACCUM INT best_cluster = s.@cid,
-                               FLOAT max_delta_Q = 0.0,
-                               FLOAT delta_Q_new = 0.0,
-                               FOREACH (cluster, weightToC) IN s.@weight_to_cluster DO   //would be better if this can be distributed
-                                   FLOAT incident = @@tot_incident_cluster.get(cluster),
-                                   delta_Q_new = weightToC - 2 * incident * s.@weight/ @@total_weight,
-                                   IF delta_Q_new > max_delta_Q OR (abs(delta_Q_new - max_delta_Q) < epsilon AND cluster < best_cluster) THEN   // when delta_Q_new is equal to max_delta_Q, and the cluster label is smaller, also change 
-                                       max_delta_Q = delta_Q_new,
-                                       best_cluster = cluster
-                                   END
-                               END,
-                             IF s.@cid != best_cluster THEN 
-                                   @@tot_incident_cluster += (s.@cid -> (-1 * s.@weight)),
-                                   @@tot_incident_cluster += (best_cluster -> s.@weight),
-                                   s.@cid = best_cluster
-                               END,
-                               s.@weight_to_cluster.clear();
-                    loop = loop + 1;
-                END;
-            last_modularity = @@modularity;
-            @@modularity = 0;
-            T1 = SELECT s
-                 FROM Prescriber:s-((referral>|<referral):e)-:t
-                 WHERE s.@cid == t.@cid
-                 ACCUM @@modularity += e.num_patient - s.@weight * t.@weight / (@@total_weight);
-            @@modularity = @@modularity / @@total_weight;                      
-            PRINT iteration AS Phase1Iter, @@modularity;
-            log(debug > 0, "[redrain]//move", iteration, @@modularity);
-        END;
-
-// Phase 2 --  Merge     
-    iteration2 = 0;
-
-    WHILE (iteration2 < 2 OR @@modularity2 - last_modularity2 > epsilon) LIMIT iter2 DO
-        iteration2 = iteration2 + 1;
-        Start = SELECT s
-                FROM Start:s
-                ACCUM s.@uid = s.@cid;      
-        // Select the vertices with minimal internal id to represent the coarsened graph
-        Start = SELECT s
-                FROM Start:s 
-                ACCUM @@represent_map += (s.@cid -> s);
-        
-        FOREACH (key, value) IN @@represent_map DO
-                @@represent_set += value;                       
-        END;      
-        represent = {@@represent_set};
-        @@represent_map.clear();
-        @@represent_set.clear();
-        log(debug > 0, "[redrain]//2_merge", represent.size()); //@@cluster_sizes.size());
-
-    // Get @cweight from totalIncident
-        represent = SELECT s
-                    FROM represent:s
-                    ACCUM s.@cweight = @@tot_incident_cluster.get(s.@uid),
-                          @@cluster_sizes += (s.@cid -> 1);
-
-        log(debug > 1, "[redrain]//2_merge", @@weight_to_cluster_map.size());
-        iteration = 0;
-        last_modularity = 0;
-        @@modularity = 0;
-     
-        WHILE (iteration < 2 OR @@modularity - last_modularity > epsilon) limit iter1 DO
-                iteration = iteration + 1;
-        
-            // Calculate.num_patient incident from vertex to cluster in coarsened graph; change every interation
-            S = SELECT s
-                FROM Start:s -((referral>|<referral):e)-:t
-                WHERE s.@cid != t.@cid AND @@tot_incident_cluster.get(s.@uid) > 0 AND @@tot_incident_cluster.get(t.@cid) > 0   //@@tot_incident_cluster keeps changing, can be 0
-                ACCUM @@weight_to_cluster_map += (s.@uid -> (t.@cid -> e.num_patient));  // from s, incident to some clusters. Not consider the same cluster
-            represent = SELECT s 
-                      FROM represent:s
-                        POST-ACCUM INT best_cluster = s.@cid,
-                                   FLOAT max_delta_Q = 0.0,
-                                   FLOAT delta_Q_new = 0.0,
-                                   FOREACH (cluster, weightToC) IN @@weight_to_cluster_map.get(s.@uid) DO 
-                                       FLOAT incident = @@tot_incident_cluster.get(cluster),
-                                       IF @@cluster_sizes.get(s.@cid) == 1 AND @@cluster_sizes.get(cluster) == 1 AND s.@cid < cluster THEN
-                                           CONTINUE
-                                       END,
-                                       delta_Q_new = weightToC - 2 * incident * s.@cweight/ @@total_weight, //total weight should be the same
-                                       IF delta_Q_new > max_delta_Q OR abs(delta_Q_new - max_delta_Q) < epsilon AND cluster < best_cluster THEN      // new cluster is smaller then the current best cluster
-                                           max_delta_Q = delta_Q_new,
-                                           best_cluster = cluster
-                                       END
-                                   END,
-                                    IF s.@cid != best_cluster THEN
-                                        @@tot_incident_cluster += (s.@cid -> (-1 * s.@cweight)),
-                                        @@tot_incident_cluster += (best_cluster -> s.@cweight),
-                                        @@move_comm += (s.@uid -> best_cluster),
-                                        @@cluster_sizes += (s.@cid -> -1),
-                                        @@cluster_sizes += (best_cluster -> 1),
-                                        s.@cid = best_cluster
-                                    END;
-                log(debug > 1, "[redrain]//2_merge", @@weight_to_cluster_map.size()); 
-                @@weight_to_cluster_map.clear();
-                   
-                log(debug > 1, "[redrain]//2_move:", @@move_comm.size());
-                // move nodes
-                S = SELECT s
-                    FROM Start:s
-                    WHERE @@move_comm.containsKey(s.@uid)
-                POST-ACCUM FOREACH v IN @@move_comm.get(s.@uid) DO
-                   s.@cid = v
-                END;
-            @@move_comm.clear();
-    
-            last_modularity = @@modularity;           
-            @@modularity = 0;
-                
-            S = SELECT s
-                FROM Start:s-((referral>|<referral):e)-:t
-                WHERE s.@cid == t.@cid
-                ACCUM @@modularity += e.num_patient - s.@weight * t.@weight / (@@total_weight);
-                @@modularity = @@modularity / @@total_weight;
-                PRINT iteration AS Phase1Iter, @@modularity;
-            log(debug > 0, "[redrain]//2_move", iteration, @@modularity);
-        END;
-        
-        S = SELECT s
-            FROM represent:s
-            ACCUM s.@cweight = 0;
-        @@cluster_sizes.clear();
-        
-        last_modularity2 = @@modularity2;
-        @@modularity2 = @@modularity;
-        PRINT iteration2 AS Phase2Iter, @@modularity2;
-        log(debug > 0, "[redrain]//2_merge", iteration2, @@modularity2);
-                                      
-    END;
-        
-        
-// Phase 3 -- Refinement
-    iteration = 0;
-    @@modularity = 0;
-    WHILE (iteration < 2 OR @@modularity - last_modularity > epsilon) LIMIT iter3 DO
-        iteration = iteration + 1;
-        S = SELECT s 
-            FROM Start:s -((referral>|<referral):e)- :t
-            WHERE abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) > epsilon OR abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) > epsilon OR (abs(s.@weight - @@tot_incident_cluster.get(s.@cid)) < epsilon AND abs(t.@weight - @@tot_incident_cluster.get(t.@cid)) < epsilon AND s.@cid > t.@cid)   // at least one cluster not only itself, or use smaller label
-            ACCUM s.@weight_to_cluster += (t.@cid -> e.num_patient)
-            POST-ACCUM
-                INT best_cluster = s.@cid,
-                FLOAT max_delta_Q = 0.0,
-                FLOAT delta_Q_new = 0.0,
-                FOREACH (cluster, weightToC) IN s.@weight_to_cluster DO   //would be better if this can be distributed
-                    FLOAT incident = @@tot_incident_cluster.get(cluster),
-                    delta_Q_new = weightToC - 2 * incident * s.@weight/ @@total_weight,
-                    IF delta_Q_new > max_delta_Q OR (abs(delta_Q_new - max_delta_Q) < epsilon AND cluster < best_cluster) THEN   // when delta_Q_new is equal to max_delta_Q, and the cluster label is smaller, also change 
-                        max_delta_Q = delta_Q_new,
-                        best_cluster = cluster
-                    END
-                END,
-                IF s.@cid != best_cluster THEN 
-                    @@tot_incident_cluster += (s.@cid -> (-1 * s.@weight)),
-                    @@tot_incident_cluster += (best_cluster -> s.@weight),
-                    s.@cid = best_cluster
-                END,
-                s.@weight_to_cluster.clear();
-        last_modularity = @@modularity;
-        @@modularity = 0;
-        T1 = SELECT s
-             FROM Start:s-((referral>|<referral):e)-:t
-             WHERE s.@cid == t.@cid
-             ACCUM @@modularity += e.num_patient - s.@weight * t.@weight / (@@total_weight);
-        @@modularity = @@modularity / @@total_weight;                      
-        PRINT iteration AS Phase3Iter, @@modularity;
-        log(debug > 0, "[redrain]//refine", iteration, @@modularity);
-    END;
-    
-            
-    Print Start [Start.@cid];
-    Start = {ANY};
-    Start = SELECT s FROM Start:s
-            POST-ACCUM @@cluster_sizes += (s.@cid -> 1),s.communityId=s.@cid
-                       ;
-    log(TRUE, @@cluster_sizes.size());
-    IF output_level ==0 THEN
-        FOREACH (cluster, csize) IN @@cluster_sizes DO
-            @@cluster_map += (csize -> 1);
-        END;
-        FOREACH (csize, number) IN @@cluster_map DO
-            @@cluster_dist += cluster_num(csize, number);
-        END;
-        PRINT @@cluster_dist;
-    ELSE
-        FOREACH (cluster, csize) IN @@cluster_sizes DO
-            @@cluster_members += (csize -> cluster);
-        END;
-        PRINT @@cluster_members;
-  PRINT "Community Detection Done";
-        END;
-}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/algo_page_rank.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/algo_page_rank.gsql
deleted file mode 100644
index a711d0f..0000000
--- a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/algo_page_rank.gsql
+++ /dev/null
@@ -1,37 +0,0 @@
-CREATE OR REPLACE QUERY algo_page_rank(FLOAT maxChange, INT maxIter, FLOAT damping, INT outputLimit) FOR GRAPH MyGraph { 
-
-/* Compute the pageRank score for each vertex in the GRAPH
-  In each iteration, compute a score for each vertex:
-  score = (1-damping) + damping*sum(received scores FROM its neighbors).
-  The pageRank algorithm stops when either of the following is true:
-  a) it reaches maxIter iterations;
-  b) the max score change for any vertex compared to the last iteration <= maxChange.
-*/
-
-        TYPEDEF TUPLE<vertex Vertex_ID, FLOAT score> vertexScore;
-        HeapAccum<vertexScore>(outputLimit, score DESC) @@top_Scores;
-        MaxAccum<float> @@max_Diff = 999999; // max score change in an iteration
-        SumAccum<float> @received_score = 0; // sum of scores each vertex receives FROM neighbors
-        SumAccum<float> @score = 1;   // Initial score for every vertex is 1.
-        SetAccum<EDGE> @@edge_Set;                   // list of all edges, if display is needed
-
-        Start = {Prescriber.*};   //  Start with all vertices of specified type(s)
-	      V (ANY) = {};
-        WHILE @@max_Diff > maxChange LIMIT maxIter DO
-                @@max_Diff = 0;
-                V = SELECT s
-                    FROM Start:s -(referral>:e)- :t
-                    ACCUM t.@received_score += s.@score/(s.outdegree("referral")) 
-                    POST-ACCUM s.@score = (1.0-damping) + damping * s.@received_score,
-                               s.@received_score = 0,
-                               @@max_Diff += abs(s.@score - s.@score');
-        END; // END WHILE loop
-
-
-        IF outputLimit > 0 THEN
-                V = SELECT s FROM Start:s
-                    POST-ACCUM @@top_Scores += vertexScore(s, s.@score),s.pageRank=s.@score;
-                PRINT @@top_Scores;
-        END;
-	
-}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/ex1_common_patients.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/ex1_common_patients.gsql
deleted file mode 100644
index c519255..0000000
--- a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/ex1_common_patients.gsql
+++ /dev/null
@@ -1,39 +0,0 @@
-CREATE OR REPLACE QUERY ex1_common_patients(vertex<Prescriber> Prescriber1, vertex<Prescriber> Prescriber2) FOR GRAPH MyGraph { 
-  
-	  OrAccum @visited;
-    SetAccum<edge> @@edge_Set;
-    Pre1 =  {Prescriber1};
-    Pre2 =  {Prescriber2};
-    // Step 1 – Start graph Traversal from first prescriber to find all associated claims. Use visited flag to remember claims visited.
-    claims1 = SELECT t
-              FROM Pre1:s -(<submitted_by:e)- Claim:t
-              ACCUM t.@visited += TRUE;
-     // Step 2 – For those claims, find all the linked patients.
-    patients1 = SELECT t
-                FROM claims1:s -(associated>:e)- Patient:t
-                ACCUM t.@visited += TRUE;
-    // Step 3 Start graph traversal from second prescriber to find all claims
-    claims2 = SELECT t
-              FROM Pre2:s -(<submitted_by:e)- Claim:t
-              ACCUM t.@visited += TRUE;
-
-    // Step 4 – Find common patients  by starting from claims in Step 3
-    common_patients = SELECT t
-                      FROM claims2:s -(associated>:e)- Patient:t
-                      WHERE t.@visited == TRUE;
-    PRINT common_patients;
- 
-	  // Step 5 – From common patients find all claims that have been visited in earlier steps. Collect the edges so they can be printed.
-    claims = SELECT t
-             FROM common_patients:s -(<associated:e)- Claim:t
-             WHERE t.@visited == TRUE
-             ACCUM @@edge_Set += e;
-    PRINT claims;
-
-	  // Step 6 – From claims find associated prescribers. Collect and print edges (claims – prescribers) and prescribers.
-    pres = SELECT t
-           FROM claims:s -(submitted_by>:e)- Prescriber:t
-           ACCUM @@edge_Set += e;
-    PRINT pres;
-    PRINT @@edge_Set;
-}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/ex2_create_referral_edge.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/ex2_create_referral_edge.gsql
deleted file mode 100644
index d85c83e..0000000
--- a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/ex2_create_referral_edge.gsql
+++ /dev/null
@@ -1,27 +0,0 @@
-CREATE OR REPLACE QUERY ex2_create_referral_edge(VERTEX<Prescriber> inputPrescriber) FOR GRAPH MyGraph { 
-	OrAccum<BOOL> @visited, @is_Referred_Claim;
-	
-	ListAccum<DATETIME> @date_List;
-	
-	start_set = {inputPrescriber};
-	
-	claims = SELECT t FROM start_set:s-(<submitted_by:e)-:t
-	         POST-ACCUM t.@visited = true;
-	
-	patients = SELECT t FROM claims:s-(associated>:e)-:t
-	           ACCUM t.@date_List += s.rx_fill_date;
-	
-	claims = SELECT t FROM patients:s-(<associated:e)-:t
-	         WHERE t.@visited == false
-	         ACCUM FOREACH dt in s.@date_List do
-	                 CASE WHEN datetime_diff(dt, t.rx_fill_date) BETWEEN 0 AND 2592000 THEN
-	                   t.@is_Referred_Claim = true
-	                 END
-	               END
-	         HAVING t.@is_Referred_Claim == true;
-	
-	prescribers = SELECT t FROM claims-(submitted_by>:e)-:t
-	              POST-ACCUM INSERT INTO referral VALUES(inputPrescriber, t, 1);
-	print start_set;
-	
-}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/ex2_main_query.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/ex2_main_query.gsql
deleted file mode 100644
index 2e54eab..0000000
--- a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/ex2_main_query.gsql
+++ /dev/null
@@ -1,6 +0,0 @@
-CREATE OR REPLACE QUERY ex2_main_query(/* Parameters here */) FOR GRAPH MyGraph { 
-	
-	all_prescribers = select s from Prescriber:s accum ex2_create_referral_edge(s);
-  
-  print all_prescribers;
-}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_claims.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_claims.gsql
deleted file mode 100644
index 643166d..0000000
--- a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_claims.gsql
+++ /dev/null
@@ -1,11 +0,0 @@
-CREATE OR REPLACE QUERY get_claims(vertex<Prescriber> inputPrescriber) FOR GRAPH MyGraph { 
-  
-	ListAccum<EDGE> @@list;
-	
-	start_set = {inputPrescriber};
-	
-	claims = SELECT t FROM start_set:s-(<submitted_by:e)-:t
-	         ACCUM @@list += e;
-	
-	print claims, @@list;
-}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_claims_of_prescriber.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_claims_of_prescriber.gsql
new file mode 100644
index 0000000..f3037c3
--- /dev/null
+++ b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_claims_of_prescriber.gsql
@@ -0,0 +1,23 @@
+CREATE QUERY get_claims_of_prescriber(VERTEX<Prescriber> input_prescriber) FOR GRAPH MyGraph { 
+/*
+    Get the Claims of a given Prescriber
+
+    Sample inputs:
+    input_prescriber: pre6 | pre30 | pre13
+
+    Starting from an "input_prescriber",
+    (1) Grab all the prescriber's claims
+    Prescriber -(<submitted_by)- Claim
+    (2) Print the claims and submitted_by edges
+*/
+
+    ListAccum<EDGE> @@submitted_by_list;
+
+    start = {input_prescriber};
+
+    claims = SELECT t // select claims connected to the input prescriber
+        FROM start:s-(<submitted_by:e)-:t
+        ACCUM @@submitted_by_list += e;
+
+    PRINT claims, @@submitted_by_list;
+}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_common_patients.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_common_patients.gsql
new file mode 100644
index 0000000..6dd416e
--- /dev/null
+++ b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_common_patients.gsql
@@ -0,0 +1,64 @@
+CREATE QUERY get_common_patients(VERTEX<Prescriber> prescriber1, 
+  VERTEX<Prescriber> prescriber2) FOR GRAPH MyGraph { 
+/*
+    Get the Patients that two Prescribers have in common
+
+    Sample inputs:
+    prescriber1: pre6 | pre30 | pre13
+    prescriber2: pre6 | pre30 | pre13
+
+    Starting from "prescriber1",
+    (1) Mark the connected Claim vertices as visited.
+    (2) Mark Patient vertices connected to the Claim vertices as visited.
+    Prescriber -(<submitted_by)- Claim -(associated>)- Patient
+    Starting from "prescriber2",
+    (1) Mark the connected Claim vertices as visited.
+    (2) Find all Patients connected to Claims that has been visited.
+    Prescriber -(<submitted_by)- Claim -(associated>)- Patient
+*/
+
+    OrAccum @visited;
+    SetAccum<EDGE> @@edge_set;
+
+    pre1 =  {prescriber1};
+    pre2 =  {prescriber2};
+
+    /* Step 1 – Start graph Traversal from first prescriber to find all
+    associated claims. Use visited flag to remember claims visited. */
+    claims1 = SELECT t
+        FROM pre1:s -(<submitted_by:e)- Claim:t
+        ACCUM t.@visited += TRUE;
+
+    // Step 2 – For those claims, find all the linked patients.
+    patients1 = SELECT t
+        FROM claims1:s -(associated>:e)- Patient:t
+        ACCUM t.@visited += TRUE;
+
+    // Step 3 - Start graph traversal from second prescriber to find all claims
+    claims2 = SELECT t
+        FROM pre2:s -(<submitted_by:e)- Claim:t
+        ACCUM t.@visited += TRUE;
+
+    // Step 4 – Find common patients  by starting from claims in Step 3
+    common_patients = SELECT t
+        FROM claims2:s -(associated>:e)- Patient:t
+        WHERE t.@visited == TRUE;
+    PRINT common_patients;
+
+    /* Step 5 – From common patients find all claims that have been visited
+    in earlier steps. Collect the edges so they can be printed. */
+    claims = SELECT t
+        FROM common_patients:s -(<associated:e)- Claim:t
+        WHERE t.@visited == TRUE
+        ACCUM @@edge_set += e;
+    PRINT claims;
+
+    /* Step 6 – From claims find associated prescribers. Collect and print
+    edges (claims – prescribers) and prescribers. */
+    pres = SELECT t
+        FROM claims:s -(submitted_by>:e)- Prescriber:t
+        ACCUM @@edge_set += e;
+
+    PRINT pres;
+    PRINT @@edge_set;
+}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_joint_prescribers.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_joint_prescribers.gsql
new file mode 100644
index 0000000..e38b9b3
--- /dev/null
+++ b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_joint_prescribers.gsql
@@ -0,0 +1,43 @@
+CREATE QUERY get_joint_prescribers(VERTEX<Prescriber> input_prescriber)
+  FOR GRAPH MyGraph { 
+/*
+    Get Prescribers who have treated the same patients of a given Prescriber
+
+    Sample inputs:
+    input_prescriber: pre6 | pre30 | pre13
+
+    Starting with an "input_prescriber",
+    (1) Get the connected claims, accumulate the edges, and mark the claims as visited
+    (2) Get the patients connected to claims and accumulate the edges
+    (3) Get the claims connected to the patients that have not been visited and accumulate the edges
+    (4) Get the prescribers connected to the claims and accumulate the edges
+    (5) Display the prescribers and edges
+    Prescriber -(<submitted_by)- Claim -(associated>)- Patient -(<associated)-
+      Claim -(submitted_by>)- Prescriber
+*/
+
+    ListAccum<EDGE> @@edge_list; // list will have all edges traversed
+    OrAccum<BOOL> @visited;
+
+    start = {input_prescriber};
+
+    claims = SELECT t // select connected claims
+        FROM start:s-(<submitted_by:e)-:t
+        ACCUM @@edge_list += e
+        POST-ACCUM t.@visited = TRUE; // mark the selected claims as visited
+
+    patients = SELECT t // select connected patients
+        FROM claims:s-(associated>:e)-:t
+        ACCUM @@edge_list +=e;
+
+    claims = SELECT t 
+        FROM patients:s-(<associated:e)-:t
+        WHERE t.@visited == FALSE // select claims not previously visited
+        ACCUM @@edge_list +=e;
+
+    prescribers = SELECT t  // select connected prescribers
+        FROM claims-(submitted_by>:e)-:t
+        ACCUM @@edge_list +=e;
+
+    PRINT prescribers, @@edge_list;
+}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_k_hop_neighbors.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_k_hop_neighbors.gsql
index 175e2c9..6340390 100644
--- a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_k_hop_neighbors.gsql
+++ b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_k_hop_neighbors.gsql
@@ -1,16 +1,32 @@
-CREATE OR REPLACE QUERY get_k_hop_neighbor(int k, vertex input) FOR GRAPH MyGraph { 
-	
-	OrAccum<BOOL> @visited;
-	ListAccum<EDGE> @@edgeList;
-	
-  start = {input};
-	
-	WHILE start.size() > 0 limit k DO
-	  start = SELECT t from start-(:e)-:t
-	          WHERE t.@visited == false
-	          ACCUM @@edgeList += e
-	          POST-ACCUM t.@visited = true;
-	END;
-	
-	print @@edgeList;
+CREATE QUERY get_k_hop_neighbors(INT k, VERTEX input) FOR GRAPH MyGraph {
+/*
+    Get all the vertices within k hops of a source vertex
+
+    Sample inputs:
+    k: any number > 0
+    input: (Claim, 9921) | (SubSpecialty, Cardiology) | (Prescriber, pre78)
+
+    Starting with the "input",
+    (1) Traverse to all the vertices connected which was not visited
+    (2) Accumulate the vertices and edges
+    (3) Mark the vertices as visited
+    (4) Repeat the traversal k times
+*/
+
+    OrAccum<BOOL> @visited;
+    ListAccum<VERTEX> @@vertex_list;
+    ListAccum<EDGE> @@edge_list;
+
+    start = {input};
+
+    WHILE start.size() > 0 LIMIT k DO /* stops either when there are no
+      vertices or when reaching k */
+        start = SELECT t 
+            FROM start-(:e)-:t // visit all connected vertices
+            WHERE t.@visited == FALSE // vertex must be new
+            ACCUM @@vertex_list += t, @@edge_list += e // add to global lists
+            POST-ACCUM t.@visited = TRUE; // mark vertices visited
+    END;
+
+    PRINT @@vertex_list, @@edge_list;
 }
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_patients.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_patients.gsql
deleted file mode 100644
index a048c95..0000000
--- a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_patients.gsql
+++ /dev/null
@@ -1,14 +0,0 @@
-CREATE OR REPLACE QUERY get_patients(vertex<Prescriber> inputPrescriber) FOR GRAPH MyGraph { 
-  
-	ListAccum<EDGE> @@list;
-	
-	start_set = {inputPrescriber};
-	
-	claims = SELECT t FROM start_set:s-(<submitted_by:e)-:t
-	         ACCUM @@list += e;
-	
-	patients = SELECT t FROM claims:s-(associated>:e)-:t
-	           ACCUM @@list +=e;
-	
-	print claims, @@list;
-}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_patients_of_prescriber.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_patients_of_prescriber.gsql
new file mode 100644
index 0000000..a6a7477
--- /dev/null
+++ b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_patients_of_prescriber.gsql
@@ -0,0 +1,29 @@
+CREATE QUERY get_patients_of_prescriber(VERTEX<Prescriber> input_prescriber) 
+  FOR GRAPH MyGraph { 
+/*
+    Get the Patients of a given Prescriber
+
+    Sample inputs:
+    input_prescriber: pre6 | pre30 | pre13
+
+    Starting with the "input_prescriber",
+    (1) Find all the connected Claim vertices
+    (2) Find all the connected Patient vertices
+    (3) Print the Claim vertices and all the edges traversed
+    Prescriber -(<submitted_by)- Claim -(associated>)- Patient
+*/
+
+    ListAccum<EDGE> @@edge_list;
+
+    start = {input_prescriber};
+
+    claims = SELECT t  // select connected claims
+        FROM start:s-(<submitted_by:e)-:t
+        ACCUM @@edge_list += e;
+
+    patients = SELECT t // select connected patients
+        FROM claims:s-(associated>:e)-:t
+        ACCUM @@edge_list += e;
+
+    PRINT patients, @@edge_list;
+}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_prescribers.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_prescribers.gsql
deleted file mode 100644
index 47a8847..0000000
--- a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_prescribers.gsql
+++ /dev/null
@@ -1,22 +0,0 @@
-CREATE OR REPLACE QUERY get_prescribers(vertex<Prescriber> inputPrescriber) FOR GRAPH MyGraph { 
-  ListAccum<EDGE> @@list;
-	OrAccum<BOOL> @visited;
-	
-	start_set = {inputPrescriber};
-	
-	claims = SELECT t FROM start_set:s-(<submitted_by:e)-:t
-	         ACCUM @@list += e
-	         POST-ACCUM t.@visited = true;
-	
-	patients = SELECT t FROM claims:s-(associated>:e)-:t
-	           ACCUM @@list +=e;
-	
-	claims = SELECT t FROM patients:s-(<associated:e)-:t
-	         WHERE t.@visited == false
-	         ACCUM @@list +=e;
-	
-	prescribers = SELECT t FROM claims-(submitted_by>:e)-:t
-	              ACCUM @@list +=e;
-	
-	print prescribers, @@list;
-}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_referral_community.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_referral_community.gsql
new file mode 100644
index 0000000..4abf375
--- /dev/null
+++ b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/get_referral_community.gsql
@@ -0,0 +1,31 @@
+CREATE QUERY get_referral_community(VERTEX<Prescriber> input_prescriber) 
+  FOR GRAPH MyGraph {
+/*
+    Get the Prescribers in the same referral community as that of a 
+    given Prescriber
+
+    Sample inputs:
+    input_prescriber: pre6 | pre30 | pre13
+
+    Starting with the Prescriber vertices,
+    (1) Find all vertices with the same community ID as the input_prescriber
+    (2) Print the Prescriber vertices in the community and edges
+*/
+
+    ListAccum<EDGE> @@edge_list;
+    SumAccum<INT> @@cid;
+
+    start = {input_prescriber};
+    start = SELECT s 
+        FROM start:s 
+        POST-ACCUM @@cid += s.communityId;
+
+    start = {Prescriber.*};
+
+    start = SELECT s 
+        FROM start:s-(referral>:e)-:t 
+        WHERE s.communityId == @@cid AND s.communityId == t.communityId
+        ACCUM @@edge_list += e;
+
+    PRINT start, @@edge_list;
+}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/infer_all_referrals.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/infer_all_referrals.gsql
new file mode 100644
index 0000000..26c9370
--- /dev/null
+++ b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/infer_all_referrals.gsql
@@ -0,0 +1,16 @@
+CREATE QUERY infer_all_referrals() FOR GRAPH MyGraph { 
+/*
+    Create referral edges from any Prescriber to other Providers when they 
+    treat the same Patient in sequence
+
+    Starting with the Prescriber vertices,
+    (1) Run infer_referrals 
+    (2) Print the Prescriber vertices
+*/
+
+    all_prescribers = SELECT s 
+        FROM Prescriber:s 
+        ACCUM infer_referrals(s); // run infer_referrals query
+
+    PRINT all_prescribers;
+}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/infer_referrals.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/infer_referrals.gsql
new file mode 100644
index 0000000..cdc205e
--- /dev/null
+++ b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/infer_referrals.gsql
@@ -0,0 +1,50 @@
+CREATE QUERY infer_referrals(VERTEX<Prescriber> input_prescriber) 
+  FOR GRAPH MyGraph { 
+/*
+    Create referral edges from a given Prescriber to other Providers when 
+    they treat the same Patient in sequence
+
+    Sample inputs:
+    input_prescriber: pre6 | pre30 | pre13
+
+    Starting with an "input_prescriber",
+    (1) Find all the connected Claim vertices and mark them as visited
+    (2) Find all the connected Patient vertices to the Claim vertices
+    (3) Find all the connected Claim vertices that are have not been visited
+      and the claims has been filled at most 30 days before the input 
+      prescriber's claim
+    Prescriber -(<submitted_by)- Claim -(associated>)- Patient
+      -(<associated)- Claim
+    (4) Insert referral edge values
+*/
+
+    OrAccum<BOOL> @visited, @is_referred_claim;	
+    ListAccum<DATETIME> @date_list;
+
+    start = {input_prescriber};
+
+    claims = SELECT t // get connected claims
+        FROM start:s-(<submitted_by:e)-:t
+        POST-ACCUM t.@visited = TRUE; // mark connected claims as visited
+
+    patients = SELECT t // get connected patients
+        FROM claims:s-(associated>:e)-:t
+        ACCUM t.@date_list += s.rx_fill_date; // flow dates to patients
+
+    referred_claims = SELECT t 
+        FROM patients:s-(<associated:e)-:t
+        WHERE t.@visited == FALSE // get connected claims not visited
+        ACCUM FOREACH dt IN s.@date_list DO
+            CASE WHEN datetime_diff(dt, t.rx_fill_date) 
+              BETWEEN 0 AND 2592000 THEN // if the date is at most a month before
+                t.@is_referred_claim = TRUE // set referred to true
+            END
+        END
+        HAVING t.@is_referred_claim == TRUE; // only include referred claims
+
+    referred_prescribers = SELECT t // get prescribers connected to the referred claims
+        FROM referred_claims-(submitted_by>:e)-:t
+        POST-ACCUM INSERT INTO referral VALUES(input_prescriber, t, 1); /* add
+          a referral edge between the prescribers and the input prescriber */
+    PRINT start;
+}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/tg_louvain.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/tg_louvain.gsql
new file mode 100644
index 0000000..386b655
--- /dev/null
+++ b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/tg_louvain.gsql
@@ -0,0 +1,228 @@
+CREATE QUERY tg_louvain(SET<STRING> v_type, SET<STRING> e_type, STRING wt_attr = "weight", INT max_iter = 10, 
+  STRING result_attr = "cid", STRING file_path = "", BOOL print_info = FALSE)  SYNTAX V1 {
+
+    /*
+    louvain community detection algorithm
+    add keyword DISTRIBUTED for cluster environment
+
+    Sample inputs:
+    v_type: Prescriber
+    e_type: referral, reverse_referral
+    wt_attr: num_patients
+    result_attr: communityId
+
+    Parameters:
+    v_type: vertex types to traverse
+    e_type: edge types to traverse
+    wt_attr: attribute name for edge weights use empty string is graph is unweighted
+    wt_attr type is hardcoded to FLOAT INT or DOUBLE can be supported by changing all `e.getAttr(wt_attr, "FLOAT")`
+    to `e.getAttr(wt_attr, "INT")` or `e.getAttr(wt_attr, "DOUBLE")`
+    * note: when there is a weight attribute missmatch, there may not be an explicit error message
+    all print results showing 0 data are present is an indication that there might be a weight attribute missmatch
+
+    max_iter: maximum iteration of louvain optimization
+    result_attr: attribute name to assign community id results to; use empty string to skip
+    file_path: file path to write CSV output to; use empty string to skip
+    print_info: print louvain execution info
+    */
+
+    TYPEDEF TUPLE <FLOAT deltaQ, FLOAT weight, VERTEX cc> move;
+    SumAccum<FLOAT> @sum_ac; #sum of the degrees of all the vertices in community C of the vertex
+    ListAccum<VERTEX> @cc_list; #the community center
+    SumAccum<FLOAT> @sum_weight; # total weight incident to this vertex
+    SumAccum<FLOAT> @sum_cc_weight; # total weight incident to the cc vertex
+    MapAccum<VERTEX,SumAccum<FLOAT>> @A_map; #A[c]: sum of the edge weights for the edges in community c
+    MaxAccum<move> @max_best_move; # highest dQ, highest -Outdegree, highest cc
+    ListAccum<VERTEX> @cm_list;  #community member list
+    SumAccum<FLOAT> @@sum_m; # total edge weight
+    SumAccum<INT> @sum_outdegree;   # helper variable for outdegree calculation
+    SumAccum<INT> @@sum_cc_change;
+    MapAccum<INT, SumAccum<INT>> @@community_map;
+    MapAccum<INT, SumAccum<INT>> @@community_size_count;
+    FILE f(file_path);
+
+    // initialize
+    Start = {v_type};
+    Start = SELECT s 
+            FROM Start:s -(e_type:e)- :t
+            ACCUM
+                @@sum_m += e.getAttr(wt_attr, "INT")*0.5,
+                s.@sum_weight += e.getAttr(wt_attr, "INT")*1.0,
+                s.@sum_cc_weight += e.getAttr(wt_attr, "INT")*1.0,
+                s.@sum_outdegree += 1
+            // mark @cc only for vertices with more than 1 neighbors
+            // and only the marked vertices will participate in the actual louvain algorithm
+            // the unmorked vertices will be resolved by the vertex following heuristic
+            POST-ACCUM
+                IF s.@sum_outdegree > 1 THEN 
+                    s.@cc_list += s 
+                END;
+    IF print_info THEN
+        PRINT Start.size() AS AllVertexCount;
+    END;
+
+    // special @cc update in the first iteration
+    Start = SELECT t 
+            FROM Start:s -(e_type:e)- :t
+            WHERE s.@sum_outdegree > 1 AND t.@sum_outdegree > 1
+            ACCUM
+                t.@max_best_move += move(e.getAttr(wt_attr, "INT")*1.0 + @@sum_m*t.@sum_weight * 
+                (t.@sum_weight - s.@sum_weight), -s.@sum_cc_weight, s.@cc_list.get(0))
+            POST-ACCUM
+                IF t.@max_best_move.deltaQ > 0 THEN
+                    IF -t.@max_best_move.weight < t.@sum_cc_weight THEN
+                        t.@cc_list.clear(),
+                        t.@cc_list += t.@max_best_move.cc,
+                        t.@sum_cc_weight = -t.@max_best_move.weight,
+                        @@sum_cc_change += 1
+                    ELSE
+                        IF -t.@max_best_move.weight == t.@sum_cc_weight AND getvid(t) < getvid(t.@max_best_move.cc)  THEN
+                            t.@cc_list.clear(),
+                            t.@cc_list += t.@max_best_move.cc,
+                            t.@sum_cc_weight = -t.@max_best_move.weight,
+                            @@sum_cc_change += 1
+                        END
+                    END
+                END;
+    IF print_info THEN
+        PRINT @@sum_cc_change AS InitChangeCount;
+    END;
+
+    // main loop
+    WHILE @@sum_cc_change > 0 LIMIT max_iter DO
+        // initialize for iteration
+        @@sum_cc_change = 0;
+        Start = SELECT s 
+                FROM Start:s
+                WHERE s.@sum_outdegree > 1
+                POST-ACCUM
+                    s.@sum_ac = 0,
+                    s.@cm_list.clear(),
+                    s.@A_map.clear();
+
+        Start = SELECT s 
+                FROM Start:s
+                ACCUM
+                    FOREACH v IN s.@cc_list DO
+                        CASE WHEN getvid(v) != -1 THEN 
+                            v.@cm_list += s 
+                        END
+                    END;
+
+        Start = SELECT s 
+                FROM Start:s -(e_type:e)- :t
+                WHERE t.@sum_outdegree > 1
+                ACCUM 
+                    s.@A_map += (t.@cc_list.get(0) -> e.getAttr(wt_attr, "INT")*1.0);
+
+        Start = SELECT s 
+                FROM Start:s
+                ACCUM
+                    FOREACH v IN s.@cc_list DO
+                        CASE WHEN getvid(v) != -1 THEN 
+                            v.@sum_ac += s.@sum_weight 
+                        END
+                    END;
+
+        Start = SELECT s 
+                FROM Start:s
+                ACCUM
+                    FOREACH v IN s.@cm_list DO
+                        CASE WHEN getvid(v) != -1 THEN 
+                            v.@sum_ac = s.@sum_ac 
+                        END
+                    END;
+
+        // compute @max_dQ
+        Start = SELECT s 
+                FROM Start:s -(e_type:e)- :t
+                WHERE t.@sum_outdegree > 1
+                ACCUM
+                    INT A_s = 0,
+                    IF s.@A_map.containsKey(s) THEN 
+                        A_s = s.@A_map.get(s) 
+                    END,
+                    s.@max_best_move += move(s.@A_map.get(t.@cc_list.get(0)) - A_s + 
+                    1/@@sum_m*s.@sum_weight*(s.@sum_ac-t.@sum_ac), -t.@sum_cc_weight, t.@cc_list.get(0))
+                POST-ACCUM
+                    IF s.@max_best_move.deltaQ > 0 THEN
+                        IF -s.@max_best_move.weight < s.@sum_cc_weight THEN   // smallest best_move weight < current weight
+                            s.@cc_list.clear(),
+                            s.@cc_list += s.@max_best_move.cc,
+                            s.@sum_cc_weight = -s.@max_best_move.weight,
+                            @@sum_cc_change += 1
+                        ELSE
+                            IF -s.@max_best_move.weight == s.@sum_cc_weight AND getvid(s.@cc_list.get(0)) < getvid(s.@max_best_move.cc)  THEN
+                                s.@cc_list.clear(),
+                                s.@cc_list += s.@max_best_move.cc,
+                                s.@sum_cc_weight = -s.@max_best_move.weight,
+                                @@sum_cc_change += 1
+                            END
+                        END
+                    END;
+        IF print_info THEN
+            PRINT @@sum_cc_change AS IterChangeCount;
+        END;
+    END;
+
+    // process node with outdegree=1
+    // follow the vertex to its neighbor's community
+    // if the neighbor also have outdegree=1, mark the two vertices as one community
+    Start = {v_type};
+    Start = SELECT s 
+            FROM Start:s -(e_type:e)- :t
+            WHERE s.@sum_outdegree == 1 AND t.@sum_outdegree != 1
+            ACCUM 
+                s.@cc_list += t.@cc_list.get(0);
+    IF print_info THEN
+        PRINT Start.size() AS VertexFollowedToCommunity;
+    END;
+
+    Start = {v_type};
+    Start = SELECT s 
+            FROM Start:s -(e_type:e)- :t
+            WHERE s.@sum_outdegree == 1 AND t.@sum_outdegree == 1
+            ACCUM
+                IF getvid(s) <= getvid(t) THEN
+                    s.@cc_list += s
+                ELSE
+                    s.@cc_list += t
+                END;
+    IF print_info THEN
+        PRINT Start.size() AS VertexFollowedToVertex;
+    END;
+
+    // process node with outdegree=0
+    // assign them to communities containing only itself
+    Start = {v_type};
+    Start = SELECT s 
+            FROM Start:s
+            WHERE s.@sum_outdegree == 0
+            ACCUM 
+                s.@cc_list += s;
+    IF print_info THEN
+        PRINT Start.size() AS VertexAssignedToItself;
+    END;
+
+    // save result
+    Start = {v_type};
+    Start = SELECT s 
+            FROM Start:s
+            POST-ACCUM
+                IF result_attr != "" THEN 
+                    s.setAttr(result_attr, getvid(s.@cc_list.get(0))) 
+                END,
+                IF file_path != "" THEN 
+                    f.println(s, getvid(s.@cc_list.get(0))) 
+                END;
+
+    // print result satistic
+    IF print_info THEN
+        Start = SELECT s 
+                FROM Start:s
+                WHERE s.@cc_list.size() > 0
+                POST-ACCUM
+                    @@community_map += (getvid(s.@cc_list.get(0)) -> 1);
+        PRINT @@community_map.size() AS FinalCommunityCount;
+    END;
+}
\ No newline at end of file
diff --git a/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/tg_page_rank.gsql b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/tg_page_rank.gsql
new file mode 100644
index 0000000..837779b
--- /dev/null
+++ b/Healthcare-Referral-networks-Hub-PageRank-Community-Detection/db_scripts/queries/tg_page_rank.gsql
@@ -0,0 +1,80 @@
+CREATE QUERY tg_pagerank (STRING v_type, STRING e_type,
+ FLOAT max_change=0.001, INT max_iter=25, FLOAT damping=0.85, INT top_k = 100,
+ BOOL print_accum = TRUE, STRING result_attr =  "", STRING file_path = "",
+ BOOL display_edges = FALSE) SYNTAX V1 {
+
+/*
+ Compute the pageRank score for each vertex in the GRAPH
+
+ Sample inputs:
+ v_type: Prescriber
+ e_type: referral
+ result_attr: pageRank
+
+ In each iteration, compute a score for each vertex:
+     score = (1-damping) + damping*sum(received scores FROM its neighbors).
+ The pageRank algorithm stops when either of the following is true:
+ a) it reaches max_iter iterations;
+ b) the max score change for any vertex compared to the last iteration <= max_change.
+ v_type: vertex types to traverse          print_accum: print JSON output
+ e_type: edge types to traverse            result_attr: INT attr to store results to
+ max_iter; max #iterations                 file_path: file to write CSV output to
+ top_k: #top scores to output              display_edges: output edges for visualization
+ max_change: max allowed change between iterations to achieve convergence
+ damping: importance of traversal vs. random teleport
+
+ This query supports only taking in a single edge for the time being (8/13/2020).
+*/
+TYPEDEF TUPLE<VERTEX Vertex_ID, FLOAT score> Vertex_Score;
+HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap;
+MaxAccum<FLOAT> @@max_diff = 9999;    # max score change in an iteration
+SumAccum<FLOAT> @sum_recvd_score = 0; # sum of scores each vertex receives FROM neighbors
+SumAccum<FLOAT> @sum_score = 1;           # initial score for every vertex is 1.
+SetAccum<EDGE> @@edge_set;             # list of all edges, if display is needed
+FILE f (file_path);
+
+# PageRank iterations	
+Start = {v_type};                     # Start with all vertices of specified type(s)
+WHILE @@max_diff > max_change 
+    LIMIT max_iter DO
+        @@max_diff = 0;
+    V = SELECT s
+	FROM Start:s -(e_type:e)- v_type:t
+	ACCUM 
+            t.@sum_recvd_score += s.@sum_score/(s.outdegree(e_type)) 
+	POST-ACCUM 
+            s.@sum_score = (1.0-damping) + damping * s.@sum_recvd_score,
+	    s.@sum_recvd_score = 0,
+	    @@max_diff += abs(s.@sum_score - s.@sum_score');
+END; # END WHILE loop
+
+# Output
+IF file_path != "" THEN
+    f.println("Vertex_ID", "PageRank");
+END;
+V = SELECT s 
+    FROM Start:s
+    POST-ACCUM 
+        IF result_attr != "" THEN 
+            s.setAttr(result_attr, s.@sum_score) 
+        END,
+   
+	IF file_path != "" THEN 
+            f.println(s, s.@sum_score) 
+        END,
+   
+	IF print_accum THEN 
+            @@top_scores_heap += Vertex_Score(s, s.@sum_score) 
+        END;
+
+IF print_accum THEN
+    PRINT @@top_scores_heap;
+    IF display_edges THEN
+        PRINT Start[Start.@sum_score];
+	Start = SELECT s
+	        FROM Start:s -(e_type:e)- v_type:t
+	        ACCUM @@edge_set += e;
+        PRINT @@edge_set;
+    END;
+END;
+}
\ No newline at end of file