From 95ad4cfcacf9b0d5b89b9dbff801c83c1339e179 Mon Sep 17 00:00:00 2001 From: Arthur Peters Date: Tue, 22 Feb 2022 13:36:04 -0600 Subject: [PATCH] Projected views in katana.local [KAT-2411] (#918) * Adds iterators to DynamicBitset because I'm sick of not having them. * Cleans up the PropertyGraph::MakeProjectedGraph method to accept type IDs and allow null selection. * Add Graph.project to python to create a projected graph view. --- libgalois/include/katana/DynamicBitset.h | 14 +++ libgraph/include/katana/PropertyGraph.h | 17 ++- libgraph/src/PropertyGraph.cpp | 54 +++++---- libgraph/test/projection.cpp | 10 +- .../transformation-view-optional-topology.cpp | 9 +- libkatana_python_native/src/PropertyGraph.cpp | 40 +++++++ libsupport/include/katana/DynamicBitsetSlow.h | 105 ++++++++++++++++++ libsupport/test/CMakeLists.txt | 3 +- libsupport/test/dynamic-bitset.cpp | 55 +++++++++ .../betweenness_centrality_cli.cpp | 14 +-- lonestar/analytics/cpu/bfs/bfs_cli.cpp | 14 +-- lonestar/analytics/cpu/cdlp/cdlp_cli.cpp | 16 +-- .../connected_components_cli.cpp | 14 +-- .../analytics/cpu/jaccard/jaccard_cli.cpp | 14 +-- lonestar/analytics/cpu/k-core/kcore_cli.cpp | 14 +-- .../cpu/k-shortest-paths/ksssp_cli.cpp | 14 +-- .../analytics/cpu/k-truss/k_truss_cli.cpp | 14 +-- .../leiden_clustering_cli.cpp | 14 +-- .../local_clustering_coefficient_cli.cpp | 14 +-- .../louvain_clustering_cli.cpp | 14 +-- .../analytics/cpu/pagerank/pagerank-cli.cpp | 14 +-- lonestar/analytics/cpu/sssp/sssp_cli.cpp | 14 +-- .../subgraph_extraction_cli.cpp | 14 +-- .../triangle_counting_cli.cpp | 14 +-- .../include/Lonestar/BoilerPlate.h | 7 +- lonestar/liblonestar/src/BoilerPlate.cpp | 26 +++++ python/test/test_property_graph.py | 28 +++++ 27 files changed, 370 insertions(+), 210 deletions(-) create mode 100644 libsupport/test/dynamic-bitset.cpp diff --git a/libgalois/include/katana/DynamicBitset.h b/libgalois/include/katana/DynamicBitset.h index 754aa79276..bee5e8e777 100644 --- a/libgalois/include/katana/DynamicBitset.h +++ b/libgalois/include/katana/DynamicBitset.h @@ -34,6 +34,7 @@ #include #include "katana/AtomicWrapper.h" +#include "katana/DynamicBitsetSlow.h" #include "katana/Galois.h" #include "katana/PODVector.h" #include "katana/config.h" @@ -45,6 +46,7 @@ namespace katana { class KATANA_EXPORT DynamicBitset { public: // types using TItem = katana::CopyableAtomic; + using iterator = DynamicBitsetIterator; private: // variables katana::PODVector bitvec_; @@ -92,6 +94,18 @@ class KATANA_EXPORT DynamicBitset { */ auto& get_vec() { return bitvec_; } + iterator begin() const { + iterator bit0{this, 0, 0}; + if (test(0)) { + // If bit 0 is set then we have the right iterator + return bit0; + } + // Otherwise, increment to find the first set bit. + return ++bit0; + } + + iterator end() const { return {this, bitvec_.size(), 0}; } + /** * Resizes the bitset. * diff --git a/libgraph/include/katana/PropertyGraph.h b/libgraph/include/katana/PropertyGraph.h index 610b5f3ec5..753322e5c8 100644 --- a/libgraph/include/katana/PropertyGraph.h +++ b/libgraph/include/katana/PropertyGraph.h @@ -279,12 +279,23 @@ class KATANA_EXPORT PropertyGraph { const katana::RDGManifest& rdg_manifest, const katana::RDGLoadOptions& opts, katana::TxnContext* txn_ctx); - /// Make a projected graph from a property graph. Shares state with - /// the original graph. - static std::unique_ptr MakeProjectedGraph( + [[deprecated]] static std::unique_ptr MakeProjectedGraph( const PropertyGraph& pg, const std::vector& node_types, const std::vector& edge_types); + /// Make a projected graph from a property graph. Shares state with + /// the original graph. + static Result> MakeProjectedGraph( + const PropertyGraph& pg, + std::optional> node_types, + std::optional> edge_types); + + /// Make a projected graph from a property graph. Shares state with + /// the original graph. + static Result> MakeProjectedGraph( + const PropertyGraph& pg, std::optional node_types, + std::optional edge_types); + /// \return A copy of this with the same set of properties. The copy shares no /// state with this. Result> Copy( diff --git a/libgraph/src/PropertyGraph.cpp b/libgraph/src/PropertyGraph.cpp index 70872bc363..128d60bba3 100644 --- a/libgraph/src/PropertyGraph.cpp +++ b/libgraph/src/PropertyGraph.cpp @@ -351,6 +351,36 @@ std::unique_ptr katana::PropertyGraph::MakeProjectedGraph( const PropertyGraph& pg, const std::vector& node_types, const std::vector& edge_types) { + auto ret = MakeProjectedGraph( + pg, node_types.empty() ? std::nullopt : std::make_optional(node_types), + edge_types.empty() ? std::nullopt : std::make_optional(edge_types)); + KATANA_LOG_VASSERT(ret.has_value(), "{}", ret.error()); + return std::move(ret.value()); +} + +katana::Result> +katana::PropertyGraph::MakeProjectedGraph( + const PropertyGraph& pg, std::optional> node_types, + std::optional> edge_types) { + std::optional node_type_ids; + if (node_types) { + node_type_ids = KATANA_CHECKED( + pg.GetNodeTypeManager().GetEntityTypeIDs(node_types.value())); + } + std::optional edge_type_ids; + if (edge_types) { + edge_type_ids = KATANA_CHECKED( + pg.GetEdgeTypeManager().GetEntityTypeIDs(edge_types.value())); + } + return MakeProjectedGraph(pg, node_type_ids, edge_type_ids); +} + +/// Make a projected graph from a property graph. Shares state with +/// the original graph. +katana::Result> +katana::PropertyGraph::MakeProjectedGraph( + const PropertyGraph& pg, std::optional node_types, + std::optional edge_types) { const auto& topology = pg.topology(); if (topology.empty()) { return std::make_unique(); @@ -366,7 +396,7 @@ katana::PropertyGraph::MakeProjectedGraph( NUMAArray original_to_projected_nodes_mapping; original_to_projected_nodes_mapping.allocateInterleaved(topology.NumNodes()); - if (node_types.empty()) { + if (!node_types) { num_new_nodes = topology.NumNodes(); // set all nodes katana::do_all(katana::iterate(topology.Nodes()), [&](auto src) { @@ -378,21 +408,14 @@ katana::PropertyGraph::MakeProjectedGraph( original_to_projected_nodes_mapping.begin(), original_to_projected_nodes_mapping.end(), Node{0}); - std::set node_entity_type_ids; - - for (auto node_type : node_types) { - auto entity_type_id = pg.GetNodeEntityTypeID(node_type); - node_entity_type_ids.insert(entity_type_id); - } - katana::GAccumulator accum_num_new_nodes; katana::do_all(katana::iterate(topology.Nodes()), [&](auto src) { - for (auto type : node_entity_type_ids) { + for (auto type : node_types.value()) { if (pg.DoesNodeHaveType(src, type)) { accum_num_new_nodes += 1; bitset_nodes.set(src); - // this sets the correspondign entry in the array to 1 + // this sets the corresponding entry in the array to 1 // will perform a prefix sum on this array later on original_to_projected_nodes_mapping[src] = 1; return; @@ -444,7 +467,7 @@ katana::PropertyGraph::MakeProjectedGraph( // initializes the edge-index array to all zeros katana::ParallelSTL::fill(out_indices.begin(), out_indices.end(), Edge{0}); - if (edge_types.empty()) { + if (!edge_types) { katana::GAccumulator accum_num_new_edges; // set all edges incident to projected nodes katana::do_all( @@ -464,13 +487,6 @@ katana::PropertyGraph::MakeProjectedGraph( num_new_edges = accum_num_new_edges.reduce(); } else { - std::set edge_entity_type_ids; - - for (auto edge_type : edge_types) { - auto entity_type_id = pg.GetEdgeEntityTypeID(edge_type); - edge_entity_type_ids.insert(entity_type_id); - } - katana::GAccumulator accum_num_new_edges; katana::do_all( @@ -481,7 +497,7 @@ katana::PropertyGraph::MakeProjectedGraph( for (Edge e : topology.OutEdges(old_src)) { auto dest = topology.OutEdgeDst(e); if (bitset_nodes.test(dest)) { - for (auto type : edge_entity_type_ids) { + for (auto type : edge_types.value()) { if (pg.DoesEdgeHaveTypeFromTopoIndex(e, type)) { accum_num_new_edges += 1; bitset_edges.set(e); diff --git a/libgraph/test/projection.cpp b/libgraph/test/projection.cpp index 46e2b310ee..1d5479f98e 100644 --- a/libgraph/test/projection.cpp +++ b/libgraph/test/projection.cpp @@ -68,8 +68,14 @@ main(int argc, char** argv) { std::vector edge_types; SplitString(edgeTypes, &edge_types); - auto pg_view = katana::PropertyGraph::MakeProjectedGraph( - full_graph, node_types, edge_types); + auto pg_view_res = katana::PropertyGraph::MakeProjectedGraph( + full_graph, + node_types.empty() ? std::nullopt : std::make_optional(node_types), + edge_types.empty() ? std::nullopt : std::make_optional(edge_types)); + if (!pg_view_res) { + KATANA_LOG_FATAL("Failed to construct projection: {}", pg_view_res.error()); + } + auto pg_view = std::move(pg_view_res.value()); katana::analytics::TemporaryPropertyGuard temp_node_property{ full_graph.NodeMutablePropertyView()}; diff --git a/libgraph/test/transformation-view-optional-topology.cpp b/libgraph/test/transformation-view-optional-topology.cpp index f7fde75f71..bf36d09260 100644 --- a/libgraph/test/transformation-view-optional-topology.cpp +++ b/libgraph/test/transformation-view-optional-topology.cpp @@ -83,8 +83,13 @@ main(int argc, char** argv) { std::vector edge_types; SplitString(edgeTypes, &edge_types); - auto pg_view = - katana::PropertyGraph::MakeProjectedGraph(pg, node_types, edge_types); + auto pg_view_res = katana::PropertyGraph::MakeProjectedGraph( + pg, node_types.empty() ? std::nullopt : std::make_optional(node_types), + edge_types.empty() ? std::nullopt : std::make_optional(edge_types)); + if (!pg_view_res) { + KATANA_LOG_FATAL("Failed to construct projection: {}", pg_view_res.error()); + } + auto pg_view = std::move(pg_view_res.value()); TestOptionalTopologyGenerationEdgeShuffleTopology(*pg_view); TestOptionalTopologyGenerationShuffleTopology(*pg_view); diff --git a/libkatana_python_native/src/PropertyGraph.cpp b/libkatana_python_native/src/PropertyGraph.cpp index 4bb7fd78e8..4429a47502 100644 --- a/libkatana_python_native/src/PropertyGraph.cpp +++ b/libkatana_python_native/src/PropertyGraph.cpp @@ -205,6 +205,46 @@ DefPropertyGraph(py::module& m) { katana::DefWithNumba<&PropertyGraph::NumNodes>(cls, "num_nodes"); katana::DefWithNumba<&PropertyGraph::NumEdges>(cls, "num_edges"); + cls.def( + "project", + [](PropertyGraph& self, py::object node_types, + py::object edge_types) -> Result> { + std::optional node_type_ids; + if (!node_types.is_none()) { + node_type_ids = katana::SetOfEntityTypeIDs(); + node_type_ids->resize(self.GetNodeTypeManager().GetNumEntityTypes()); + for (auto& t : node_types) { + node_type_ids->set(py::cast(t).type_id); + } + } + std::optional edge_type_ids; + if (!edge_types.is_none()) { + edge_type_ids = katana::SetOfEntityTypeIDs(); + edge_type_ids->resize(self.GetEdgeTypeManager().GetNumEntityTypes()); + for (auto& t : edge_types) { + edge_type_ids->set(py::cast(t).type_id); + } + } + + py::gil_scoped_release + guard; // graph projection may copy or load data. + // is_none is safe without the GIL because it is just a pointer compare. + return KATANA_CHECKED(PropertyGraph::MakeProjectedGraph( + self, node_type_ids, edge_type_ids)); + }, + py::arg("node_types") = py::none(), py::arg("edge_types") = py::none(), + R"""( + Get a projected view of the graph which only contains nodes or edges of + specific types. + + :type node_types: Optional[Iterable[EntityType]] + :param node_types: A set of node types to include in the projected graph, + or ``None`` to keep all nodes. + :type edge_types: Optional[Iterable[EntityType]] + :param edge_types: A set of edge types to include in the projected graph, + or ``None`` to keep all edges on the selected nodes. + )"""); + // GetLocalNodeID(NodeHandle) -> LocalNodeID - local node ID cls.def( "get_local_node_id", diff --git a/libsupport/include/katana/DynamicBitsetSlow.h b/libsupport/include/katana/DynamicBitsetSlow.h index 9a7f1787bf..0878b8f876 100644 --- a/libsupport/include/katana/DynamicBitsetSlow.h +++ b/libsupport/include/katana/DynamicBitsetSlow.h @@ -14,6 +14,97 @@ namespace katana { +/// An iterator over dynamic bitsets. +/// +/// Increment is an optimized linear search over the bitset, so iteration is +/// O(N) where N is the size of the bitset, not O(|S|) where |S| is the number +/// of set bits in the bit set. + +// TODO(amp): This is a template so it can be used for both DynamicBitsetSlow +// and DynamicBitset. This can be made a simple class once DynamicBitsetSlow +// is removed. +template +class KATANA_EXPORT DynamicBitsetIterator + : public std::iterator< + std::forward_iterator_tag, uint64_t, int64_t, const uint64_t*, + uint64_t> { + const DynamicBitsetType* underlying_; + uint64_t array_index_; + uint8_t bit_offset_; + +public: + DynamicBitsetIterator( + const DynamicBitsetType* underlying, uint64_t array_index, + uint8_t bit_offset) + : underlying_(underlying), + array_index_(array_index), + bit_offset_(bit_offset) {} + + DynamicBitsetIterator& operator++() { + // Step forward one to the bit we want to examine first. + bit_offset_++; + if (bit_offset_ > DynamicBitsetType::kNumBitsInUint64) { + bit_offset_ = 0; + array_index_++; + } + + const auto& bitvec = underlying_->get_vec(); + const size_t size = underlying_->size(); + + // Used only to make sure we stop on the last real used bit in cases where + // the number of bits is not a multiple of kNumBitsInUint64. + uint64_t current_bit_index = **this; + + // The following code is optimized to make the search process fast for + // sparse bitsets. It's performance for densely filled bitsets should be + // good too. + + // Iterate forward word by word + for (; array_index_ < bitvec.size(); array_index_++) { + uint64_t word = bitvec[array_index_].load(std::memory_order_relaxed); + // For each word we check if it is non-zero (that is it contains a 1 bit) + if (word != 0) { + // Iterate over the bits in the work + uint64_t bit_mask = uint64_t{1} << bit_offset_; + for (; bit_offset_ < DynamicBitsetType::kNumBitsInUint64 && + current_bit_index < size; + bit_offset_++, current_bit_index++, bit_mask <<= 1) { + // Check if the bit is set. If it is we have reached where we need to be + if ((word & bit_mask) != 0) { + return *this; + } + } + // Reset bit_offset_ here so that we start from our last bit_offset_ and + // only reset when we roll over to the next word. + bit_offset_ = 0; + } + } + bit_offset_ = 0; + array_index_ = bitvec.size(); + return *this; + } + + DynamicBitsetIterator operator++(int) { + auto r = *this; + ++(*this); + return r; + } + + reference operator*() const { + return array_index_ * DynamicBitsetType::kNumBitsInUint64 + bit_offset_; + } + + bool operator==(const DynamicBitsetIterator& other) { + return underlying_ == other.underlying_ && + array_index_ == other.array_index_ && + bit_offset_ == other.bit_offset_; + } + + bool operator!=(const DynamicBitsetIterator& other) { + return !(*this == other); + } +}; + //TODO(emcginnis): Remove this class entirely when DynamicBitset is available to libsupport /** * Concurrent, thread safe, serial implementation of a dynamically allocated bitset @@ -24,6 +115,8 @@ class KATANA_EXPORT DynamicBitsetSlow { size_t num_bits_{0}; public: + using iterator = DynamicBitsetIterator; + static constexpr uint32_t kNumBitsInUint64 = sizeof(uint64_t) * CHAR_BIT; explicit DynamicBitsetSlow( @@ -75,6 +168,18 @@ class KATANA_EXPORT DynamicBitsetSlow { */ auto& get_vec() { return bitvec_; } + iterator begin() const { + iterator bit0{this, 0, 0}; + if (test(0)) { + // If bit 0 is set then we have the right iterator + return bit0; + } + // Otherwise, increment to find the first set bit. + return ++bit0; + } + + iterator end() const { return {this, bitvec_.size(), 0}; } + /** * Resizes the bitset. * diff --git a/libsupport/test/CMakeLists.txt b/libsupport/test/CMakeLists.txt index 8b11eab2b3..8b15425c15 100644 --- a/libsupport/test/CMakeLists.txt +++ b/libsupport/test/CMakeLists.txt @@ -16,11 +16,12 @@ function(add_unit_test name) ) endfunction() -add_unit_test(arrow) add_unit_test(array-from-scalars) +add_unit_test(arrow) add_unit_test(bitmath) add_unit_test(cache) add_unit_test(disjoint_range_iterator) +add_unit_test(dynamic-bitset) add_unit_test(env) add_unit_test(experimental) add_unit_test(logging) diff --git a/libsupport/test/dynamic-bitset.cpp b/libsupport/test/dynamic-bitset.cpp new file mode 100644 index 0000000000..f12b10719b --- /dev/null +++ b/libsupport/test/dynamic-bitset.cpp @@ -0,0 +1,55 @@ +#include "katana/DynamicBitsetSlow.h" + +int +main() { + katana::DynamicBitsetSlow bs1; + bs1.resize(100); + // Test some simple sets, resets, and tests + for (int i : {40, 3, 5, 10}) { + KATANA_LOG_ASSERT(!bs1.test(i)); + bs1.set(i); + KATANA_LOG_ASSERT(bs1.test(i)); + + katana::DynamicBitsetSlow bs2; + bs2.resize(i + 1); + KATANA_LOG_ASSERT(!bs2.test(i)); + bs2.set(i); + KATANA_LOG_ASSERT(bs2.test(i)); + bs2.reset(i); + KATANA_LOG_ASSERT(!bs2.test(i)); + } + + KATANA_LOG_ASSERT(!bs1.test(2)); + KATANA_LOG_ASSERT(!bs1.test(11)); + + auto b = bs1.begin(); + auto e = bs1.end(); + auto e1 = ++(++(++(++bs1.begin()))); + KATANA_LOG_ASSERT(e == e1); + KATANA_LOG_ASSERT(b != e1); + + // Test the iterator + int count = 0; + for (auto i : bs1) { + KATANA_LOG_VASSERT(bs1.test(i), "{} not set", i); + count++; + } + + KATANA_LOG_ASSERT(count == 4); + + std::vector ones; + ones.resize(count); + std::copy(bs1.begin(), bs1.end(), ones.begin()); + + KATANA_LOG_ASSERT(ones[0] == 3); + KATANA_LOG_ASSERT(ones[1] == 5); + KATANA_LOG_ASSERT(ones[2] == 10); + KATANA_LOG_ASSERT(ones[3] == 40); + + // Test the global reset because it's easy + KATANA_LOG_ASSERT(bs1.test(10)); + bs1.reset(); + KATANA_LOG_ASSERT(!bs1.test(10)); + + return 0; +} diff --git a/lonestar/analytics/cpu/betweennesscentrality/betweenness_centrality_cli.cpp b/lonestar/analytics/cpu/betweennesscentrality/betweenness_centrality_cli.cpp index 365454e7a6..2f769a979e 100644 --- a/lonestar/analytics/cpu/betweennesscentrality/betweenness_centrality_cli.cpp +++ b/lonestar/analytics/cpu/betweennesscentrality/betweenness_centrality_cli.cpp @@ -103,18 +103,8 @@ main(int argc, char** argv) { std::cout << "Read " << pg->topology().NumNodes() << " nodes, " << pg->topology().NumEdges() << " edges\n"; - std::vector vec_node_types; - if (node_types != "") { - katana::analytics::SplitStringByComma(node_types, &vec_node_types); - } - - std::vector vec_edge_types; - if (edge_types != "") { - katana::analytics::SplitStringByComma(edge_types, &vec_edge_types); - } - - auto pg_projected_view = katana::PropertyGraph::MakeProjectedGraph( - *pg.get(), vec_node_types, vec_edge_types); + std::unique_ptr pg_projected_view = + ProjectPropertyGraphForArguments(pg); std::cout << "Projected graph has: " << pg_projected_view->topology().NumNodes() << " nodes, " diff --git a/lonestar/analytics/cpu/bfs/bfs_cli.cpp b/lonestar/analytics/cpu/bfs/bfs_cli.cpp index 630fa347bd..eaa244ef43 100644 --- a/lonestar/analytics/cpu/bfs/bfs_cli.cpp +++ b/lonestar/analytics/cpu/bfs/bfs_cli.cpp @@ -138,18 +138,8 @@ main(int argc, char** argv) { std::cout << "Running " << AlgorithmName(algo) << "\n"; - std::vector vec_node_types; - if (node_types != "") { - katana::analytics::SplitStringByComma(node_types, &vec_node_types); - } - - std::vector vec_edge_types; - if (edge_types != "") { - katana::analytics::SplitStringByComma(edge_types, &vec_edge_types); - } - - auto pg_projected_view = katana::PropertyGraph::MakeProjectedGraph( - *pg.get(), vec_node_types, vec_edge_types); + std::unique_ptr pg_projected_view = + ProjectPropertyGraphForArguments(pg); std::cout << "Projected graph has: " << pg_projected_view->topology().NumNodes() << " nodes, " diff --git a/lonestar/analytics/cpu/cdlp/cdlp_cli.cpp b/lonestar/analytics/cpu/cdlp/cdlp_cli.cpp index 4f82b31581..7477e3c4ac 100644 --- a/lonestar/analytics/cpu/cdlp/cdlp_cli.cpp +++ b/lonestar/analytics/cpu/cdlp/cdlp_cli.cpp @@ -50,7 +50,7 @@ static cll::opt algo( /// TODO (Yasin): Asynchronous Algorithm will be implemented later after Synchronous /// is done for both shared and distributed versions. - + clEnumValN( CdlpPlan::kAsynchronous, "Asynchronous", "Asynchronous algorithm")*/), @@ -97,18 +97,8 @@ main(int argc, char** argv) { std::cout << "Running " << AlgorithmName(algo) << " algorithm\n"; - std::vector vec_node_types; - if (node_types != "") { - katana::analytics::SplitStringByComma(node_types, &vec_node_types); - } - - std::vector vec_edge_types; - if (edge_types != "") { - katana::analytics::SplitStringByComma(edge_types, &vec_edge_types); - } - - auto pg_projected_view = katana::PropertyGraph::MakeProjectedGraph( - *pg.get(), vec_node_types, vec_edge_types); + std::unique_ptr pg_projected_view = + ProjectPropertyGraphForArguments(pg); std::cout << "Projected graph has: " << pg_projected_view->topology().NumNodes() << " nodes, " diff --git a/lonestar/analytics/cpu/connected-components/connected_components_cli.cpp b/lonestar/analytics/cpu/connected-components/connected_components_cli.cpp index aac57715e7..0f822114b4 100644 --- a/lonestar/analytics/cpu/connected-components/connected_components_cli.cpp +++ b/lonestar/analytics/cpu/connected-components/connected_components_cli.cpp @@ -143,18 +143,8 @@ main(int argc, char** argv) { std::cout << "Read " << pg->topology().NumNodes() << " nodes, " << pg->topology().NumEdges() << " edges\n"; - std::vector vec_node_types; - if (node_types != "") { - katana::analytics::SplitStringByComma(node_types, &vec_node_types); - } - - std::vector vec_edge_types; - if (edge_types != "") { - katana::analytics::SplitStringByComma(edge_types, &vec_edge_types); - } - - auto pg_projected_view = katana::PropertyGraph::MakeProjectedGraph( - *pg.get(), vec_node_types, vec_edge_types); + std::unique_ptr pg_projected_view = + ProjectPropertyGraphForArguments(pg); std::cout << "Projected graph has: " << pg_projected_view->topology().NumNodes() << " nodes, " diff --git a/lonestar/analytics/cpu/jaccard/jaccard_cli.cpp b/lonestar/analytics/cpu/jaccard/jaccard_cli.cpp index 8af370c2a1..0743d67464 100644 --- a/lonestar/analytics/cpu/jaccard/jaccard_cli.cpp +++ b/lonestar/analytics/cpu/jaccard/jaccard_cli.cpp @@ -70,18 +70,8 @@ main(int argc, char** argv) { std::cout << "Read " << pg->topology().NumNodes() << " nodes, " << pg->topology().NumEdges() << " edges\n"; - std::vector vec_node_types; - if (node_types != "") { - katana::analytics::SplitStringByComma(node_types, &vec_node_types); - } - - std::vector vec_edge_types; - if (edge_types != "") { - katana::analytics::SplitStringByComma(edge_types, &vec_edge_types); - } - - auto pg_projected_view = katana::PropertyGraph::MakeProjectedGraph( - *pg.get(), vec_node_types, vec_edge_types); + std::unique_ptr pg_projected_view = + ProjectPropertyGraphForArguments(pg); std::cout << "Projected graph has: " << pg_projected_view->topology().NumNodes() << " nodes, " diff --git a/lonestar/analytics/cpu/k-core/kcore_cli.cpp b/lonestar/analytics/cpu/k-core/kcore_cli.cpp index e70b1333b7..e14ff07ca0 100644 --- a/lonestar/analytics/cpu/k-core/kcore_cli.cpp +++ b/lonestar/analytics/cpu/k-core/kcore_cli.cpp @@ -95,18 +95,8 @@ main(int argc, char** argv) { std::cout << "Running " << AlgorithmName(algo) << "\n"; - std::vector vec_node_types; - if (node_types != "") { - katana::analytics::SplitStringByComma(node_types, &vec_node_types); - } - - std::vector vec_edge_types; - if (edge_types != "") { - katana::analytics::SplitStringByComma(edge_types, &vec_edge_types); - } - - auto pg_projected_view = katana::PropertyGraph::MakeProjectedGraph( - *pg.get(), vec_node_types, vec_edge_types); + std::unique_ptr pg_projected_view = + ProjectPropertyGraphForArguments(pg); std::cout << "Projected graph has: " << pg_projected_view->topology().NumNodes() << " nodes, " diff --git a/lonestar/analytics/cpu/k-shortest-paths/ksssp_cli.cpp b/lonestar/analytics/cpu/k-shortest-paths/ksssp_cli.cpp index 58fc7a4802..c76bb4157a 100644 --- a/lonestar/analytics/cpu/k-shortest-paths/ksssp_cli.cpp +++ b/lonestar/analytics/cpu/k-shortest-paths/ksssp_cli.cpp @@ -123,18 +123,8 @@ main(int argc, char** argv) { std::cout << "Read " << pg->topology().NumNodes() << " nodes, " << pg->topology().NumEdges() << " edges\n"; - std::vector vec_node_types; - if (node_types != "") { - katana::analytics::SplitStringByComma(node_types, &vec_node_types); - } - - std::vector vec_edge_types; - if (edge_types != "") { - katana::analytics::SplitStringByComma(edge_types, &vec_edge_types); - } - - auto pg_projected_view = katana::PropertyGraph::MakeProjectedGraph( - *pg.get(), vec_node_types, vec_edge_types); + std::unique_ptr pg_projected_view = + ProjectPropertyGraphForArguments(pg); std::cout << "Projected graph has: " << pg_projected_view->topology().NumNodes() << " nodes, " diff --git a/lonestar/analytics/cpu/k-truss/k_truss_cli.cpp b/lonestar/analytics/cpu/k-truss/k_truss_cli.cpp index 61eca46e27..0a2badda32 100644 --- a/lonestar/analytics/cpu/k-truss/k_truss_cli.cpp +++ b/lonestar/analytics/cpu/k-truss/k_truss_cli.cpp @@ -95,18 +95,8 @@ main(int argc, char** argv) { std::cout << "Running " << AlgorithmName(algo) << "\n"; - std::vector vec_node_types; - if (node_types != "") { - katana::analytics::SplitStringByComma(node_types, &vec_node_types); - } - - std::vector vec_edge_types; - if (edge_types != "") { - katana::analytics::SplitStringByComma(edge_types, &vec_edge_types); - } - - auto pg_projected_view = katana::PropertyGraph::MakeProjectedGraph( - *pg.get(), vec_node_types, vec_edge_types); + std::unique_ptr pg_projected_view = + ProjectPropertyGraphForArguments(pg); std::cout << "Projected graph has: " << pg_projected_view->topology().NumNodes() << " nodes, " diff --git a/lonestar/analytics/cpu/leiden_clustering/leiden_clustering_cli.cpp b/lonestar/analytics/cpu/leiden_clustering/leiden_clustering_cli.cpp index 7c30ad31ee..c5c030082a 100644 --- a/lonestar/analytics/cpu/leiden_clustering/leiden_clustering_cli.cpp +++ b/lonestar/analytics/cpu/leiden_clustering/leiden_clustering_cli.cpp @@ -116,18 +116,8 @@ main(int argc, char** argv) { std::cout << "Running " << AlgorithmName(algo) << " algorithm\n"; - std::vector vec_node_types; - if (node_types != "") { - katana::analytics::SplitStringByComma(node_types, &vec_node_types); - } - - std::vector vec_edge_types; - if (edge_types != "") { - katana::analytics::SplitStringByComma(edge_types, &vec_edge_types); - } - - auto pg_projected_view = katana::PropertyGraph::MakeProjectedGraph( - *pg.get(), vec_node_types, vec_edge_types); + std::unique_ptr pg_projected_view = + ProjectPropertyGraphForArguments(pg); std::cout << "Projected graph has: " << pg_projected_view->topology().NumNodes() << " nodes, " diff --git a/lonestar/analytics/cpu/local_clustering_coefficient/local_clustering_coefficient_cli.cpp b/lonestar/analytics/cpu/local_clustering_coefficient/local_clustering_coefficient_cli.cpp index f4ba7a670b..b1f8cd26d5 100644 --- a/lonestar/analytics/cpu/local_clustering_coefficient/local_clustering_coefficient_cli.cpp +++ b/lonestar/analytics/cpu/local_clustering_coefficient/local_clustering_coefficient_cli.cpp @@ -69,18 +69,8 @@ main(int argc, char** argv) { std::cout << "Read " << pg->topology().NumNodes() << " nodes, " << pg->topology().NumEdges() << " edges\n"; - std::vector vec_node_types; - if (node_types != "") { - katana::analytics::SplitStringByComma(node_types, &vec_node_types); - } - - std::vector vec_edge_types; - if (edge_types != "") { - katana::analytics::SplitStringByComma(edge_types, &vec_edge_types); - } - - auto pg_projected_view = katana::PropertyGraph::MakeProjectedGraph( - *pg.get(), vec_node_types, vec_edge_types); + std::unique_ptr pg_projected_view = + ProjectPropertyGraphForArguments(pg); std::cout << "Projected graph has: " << pg_projected_view->topology().NumNodes() << " nodes, " diff --git a/lonestar/analytics/cpu/louvain_clustering/louvain_clustering_cli.cpp b/lonestar/analytics/cpu/louvain_clustering/louvain_clustering_cli.cpp index e1ba4a9d19..101391b183 100644 --- a/lonestar/analytics/cpu/louvain_clustering/louvain_clustering_cli.cpp +++ b/lonestar/analytics/cpu/louvain_clustering/louvain_clustering_cli.cpp @@ -107,18 +107,8 @@ main(int argc, char** argv) { std::cout << "Running " << AlgorithmName(algo) << " algorithm\n"; - std::vector vec_node_types; - if (node_types != "") { - katana::analytics::SplitStringByComma(node_types, &vec_node_types); - } - - std::vector vec_edge_types; - if (edge_types != "") { - katana::analytics::SplitStringByComma(edge_types, &vec_edge_types); - } - - auto pg_projected_view = katana::PropertyGraph::MakeProjectedGraph( - *pg.get(), vec_node_types, vec_edge_types); + std::unique_ptr pg_projected_view = + ProjectPropertyGraphForArguments(pg); std::cout << "Projected graph has: " << pg_projected_view->topology().NumNodes() << " nodes, " diff --git a/lonestar/analytics/cpu/pagerank/pagerank-cli.cpp b/lonestar/analytics/cpu/pagerank/pagerank-cli.cpp index 2ebd655355..17e93d2204 100644 --- a/lonestar/analytics/cpu/pagerank/pagerank-cli.cpp +++ b/lonestar/analytics/cpu/pagerank/pagerank-cli.cpp @@ -70,18 +70,8 @@ main(int argc, char** argv) { std::cout << "Read " << pg->topology().NumNodes() << " nodes, " << pg->topology().NumEdges() << " edges\n"; - std::vector vec_node_types; - if (node_types != "") { - katana::analytics::SplitStringByComma(node_types, &vec_node_types); - } - - std::vector vec_edge_types; - if (edge_types != "") { - katana::analytics::SplitStringByComma(edge_types, &vec_edge_types); - } - - auto pg_projected_view = katana::PropertyGraph::MakeProjectedGraph( - *pg.get(), vec_node_types, vec_edge_types); + std::unique_ptr pg_projected_view = + ProjectPropertyGraphForArguments(pg); std::cout << "Projected graph has: " << pg_projected_view->topology().NumNodes() << " nodes, " diff --git a/lonestar/analytics/cpu/sssp/sssp_cli.cpp b/lonestar/analytics/cpu/sssp/sssp_cli.cpp index 50ad8133cc..bbc8f97392 100644 --- a/lonestar/analytics/cpu/sssp/sssp_cli.cpp +++ b/lonestar/analytics/cpu/sssp/sssp_cli.cpp @@ -167,18 +167,8 @@ main(int argc, char** argv) { std::cout << "Read " << pg->topology().NumNodes() << " nodes, " << pg->topology().NumEdges() << " edges\n"; - std::vector vec_node_types; - if (node_types != "") { - katana::analytics::SplitStringByComma(node_types, &vec_node_types); - } - - std::vector vec_edge_types; - if (edge_types != "") { - katana::analytics::SplitStringByComma(edge_types, &vec_edge_types); - } - - auto pg_projected_view = katana::PropertyGraph::MakeProjectedGraph( - *pg.get(), vec_node_types, vec_edge_types); + std::unique_ptr pg_projected_view = + ProjectPropertyGraphForArguments(pg); std::cout << "Projected graph has: " << pg_projected_view->topology().NumNodes() << " nodes, " diff --git a/lonestar/analytics/cpu/subgraph_extraction/subgraph_extraction_cli.cpp b/lonestar/analytics/cpu/subgraph_extraction/subgraph_extraction_cli.cpp index 06a892cdfe..5d5b821a4a 100644 --- a/lonestar/analytics/cpu/subgraph_extraction/subgraph_extraction_cli.cpp +++ b/lonestar/analytics/cpu/subgraph_extraction/subgraph_extraction_cli.cpp @@ -65,18 +65,8 @@ main(int argc, char** argv) { std::cout << "Read " << pg->topology().NumNodes() << " nodes, " << pg->topology().NumEdges() << " edges\n"; - std::vector vec_node_types; - if (node_types != "") { - katana::analytics::SplitStringByComma(node_types, &vec_node_types); - } - - std::vector vec_edge_types; - if (edge_types != "") { - katana::analytics::SplitStringByComma(edge_types, &vec_edge_types); - } - - auto pg_projected_view = katana::PropertyGraph::MakeProjectedGraph( - *pg.get(), vec_node_types, vec_edge_types); + std::unique_ptr pg_projected_view = + ProjectPropertyGraphForArguments(pg); std::cout << "Projected graph has: " << pg_projected_view->topology().NumNodes() << " nodes, " diff --git a/lonestar/analytics/cpu/triangle-counting/triangle_counting_cli.cpp b/lonestar/analytics/cpu/triangle-counting/triangle_counting_cli.cpp index 02424117aa..e1da7d8882 100644 --- a/lonestar/analytics/cpu/triangle-counting/triangle_counting_cli.cpp +++ b/lonestar/analytics/cpu/triangle-counting/triangle_counting_cli.cpp @@ -70,18 +70,8 @@ main(int argc, char** argv) { std::cout << "Read " << pg->topology().NumNodes() << " nodes, " << pg->topology().NumEdges() << " edges\n"; - std::vector vec_node_types; - if (node_types != "") { - katana::analytics::SplitStringByComma(node_types, &vec_node_types); - } - - std::vector vec_edge_types; - if (edge_types != "") { - katana::analytics::SplitStringByComma(edge_types, &vec_edge_types); - } - - auto pg_projected_view = katana::PropertyGraph::MakeProjectedGraph( - *pg.get(), vec_node_types, vec_edge_types); + std::unique_ptr pg_projected_view = + ProjectPropertyGraphForArguments(pg); std::cout << "Projected graph has: " << pg_projected_view->topology().NumNodes() << " nodes, " diff --git a/lonestar/liblonestar/include/Lonestar/BoilerPlate.h b/lonestar/liblonestar/include/Lonestar/BoilerPlate.h index 690e77abc6..fa43f7c0e7 100644 --- a/lonestar/liblonestar/include/Lonestar/BoilerPlate.h +++ b/lonestar/liblonestar/include/Lonestar/BoilerPlate.h @@ -17,8 +17,8 @@ * Documentation, or loss or inaccuracy of data of any kind. */ -#ifndef LONESTAR_BOILERPLATE_H -#define LONESTAR_BOILERPLATE_H +#ifndef KATANA_LONESTAR_LIBLONESTAR_LONESTAR_BOILERPLATE_H_ +#define KATANA_LONESTAR_LIBLONESTAR_LONESTAR_BOILERPLATE_H_ #include "Lonestar/Utils.h" #include "katana/Galois.h" @@ -45,4 +45,7 @@ std::unique_ptr LonestarStart( int argc, char** argv, const char* app, const char* desc, const char* url, llvm::cl::opt* input); std::unique_ptr LonestarStart(int argc, char** argv); + +std::unique_ptr ProjectPropertyGraphForArguments( + const std::unique_ptr& pg); #endif diff --git a/lonestar/liblonestar/src/BoilerPlate.cpp b/lonestar/liblonestar/src/BoilerPlate.cpp index d352961d30..b00fd78732 100644 --- a/lonestar/liblonestar/src/BoilerPlate.cpp +++ b/lonestar/liblonestar/src/BoilerPlate.cpp @@ -124,3 +124,29 @@ LonestarStart( katana::ReportParam("(NULL)", "Hostname", name); return shared_mem_sys; } + +std::unique_ptr +ProjectPropertyGraphForArguments( + const std::unique_ptr& pg) { + std::vector vec_node_types; + if (node_types != "") { + katana::analytics::SplitStringByComma(node_types, &vec_node_types); + } + + std::vector vec_edge_types; + if (edge_types != "") { + katana::analytics::SplitStringByComma(edge_types, &vec_edge_types); + } + + auto pg_view_res = katana::PropertyGraph::MakeProjectedGraph( + *pg.get(), + vec_node_types.empty() ? std::nullopt + : std::make_optional(vec_node_types), + vec_edge_types.empty() ? std::nullopt + : std::make_optional(vec_edge_types)); + if (!pg_view_res) { + KATANA_LOG_FATAL("Failed to construct projection: {}", pg_view_res.error()); + } + auto pg_projected_view = std::move(pg_view_res.value()); + return pg_projected_view; +} diff --git a/python/test/test_property_graph.py b/python/test/test_property_graph.py index 2aa9a0bff2..00d706b446 100644 --- a/python/test/test_property_graph.py +++ b/python/test/test_property_graph.py @@ -325,3 +325,31 @@ def test_types(graph): "IS_PART_OF": 9, } assert graph.edge_types.is_subtype_of(0, 1) is True + + +def test_projected(graph): + projected_graph = graph.project([]) + assert projected_graph.num_nodes() == 0 + assert projected_graph.num_edges() == 0 + + projected_graph = graph.project([graph.node_types.atomic_types["Person"]]) + assert projected_graph.num_nodes() == 45 + assert projected_graph.num_edges() == 58 + + projected_graph = graph.project([graph.node_types.atomic_types["Person"], graph.node_types.atomic_types["Message"]]) + assert projected_graph.num_nodes() == 45 + 3928 + assert projected_graph.num_edges() == 4752 + + projected_graph = graph.project([graph.node_types.atomic_types["Person"]], edge_types=[]) + assert projected_graph.num_nodes() == 45 + assert projected_graph.num_edges() == 0 + + projected_graph = graph.project( + [graph.node_types.atomic_types["Message"]], [graph.edge_types.atomic_types["REPLY_OF"]] + ) + assert projected_graph.num_nodes() == 3928 + assert projected_graph.num_edges() == 371 + + projected_graph = graph.project(edge_types=[graph.edge_types.atomic_types["REPLY_OF"]]) + assert projected_graph.num_nodes() == 29946 + assert projected_graph.num_edges() == 371