Skip to content

Commit

Permalink
chore(examples): add glob example
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Dec 28, 2023
1 parent 054bd30 commit 28c883e
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 5 deletions.
12 changes: 11 additions & 1 deletion examples/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,14 @@ required-features = ["spider/sync", "spider/encoding"]
[[example]]
name = "advanced_configuration"
path = "advanced_configuration.rs"
required-features = ["spider/sync"]
required-features = ["spider/sync"]

[[example]]
name = "url_glob"
path = "url_glob.rs"
required-features = ["spider/sync", "spider/glob"]

[[example]]
name = "url_glob_subdomains"
path = "url_glob_subdomains.rs"
required-features = ["spider/sync", "spider/glob"]
11 changes: 9 additions & 2 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,14 @@ Use different encodings for the page. [Encoding](./encoding.rs).

- `cargo run --example encoding --features encoding`


Use advanced configuration re-use. [Advanced Configuration](./advanced_configuration.rs).

- `cargo run --example advanced_configuration`
- `cargo run --example advanced_configuration`

Use URL globbing for a domain. [URL Globbing](./glob.rs).

- `cargo run --example glob --features glob`

Use URL globbing for a domain and subdomains. [URL Globbing Subdomains](./url_glob_subdomains.rs).

- `cargo run --example url_glob_subdomains --features glob`
4 changes: 2 additions & 2 deletions examples/advanced_configuration.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
//! `cargo run --example advanced_configuration`
extern crate spider;

use spider::{tokio, website::Website, configuration::Configuration};
use std::{time::Instant, io::Error};
use spider::{configuration::Configuration, tokio, website::Website};
use std::{io::Error, time::Instant};

const CAPACITY: usize = 5;
const CRAWL_LIST: [&str; CAPACITY] = [
Expand Down
34 changes: 34 additions & 0 deletions examples/url_glob.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
//! `cargo run --example url_glob --features glob`
extern crate spider;

use spider::tokio;
use spider::website::Website;
use std::time::Instant;

#[tokio::main]
async fn main() {
let mut website: Website = Website::new(
"https://rsseau.fr/blog/{lazy-load-components,gnu-parallel,migrate-from-jekyll-to-gatsby}",
);
website
.configuration
.blacklist_url
.insert(Default::default())
.push("https://rsseau.fr/resume".into());

let start = Instant::now();
website.crawl().await;
let duration = start.elapsed();

let links = website.get_links();

for link in links {
println!("- {:?}", link.as_ref());
}

println!(
"Time elapsed in website.crawl() is: {:?} for total pages: {:?}",
duration,
links.len()
)
}
30 changes: 30 additions & 0 deletions examples/url_glob_subdomains.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
//! `cargo run --example url_glob_subdomains --features glob`
extern crate spider;

use spider::tokio;
use spider::website::Website;
use std::time::Instant;

#[tokio::main]
async fn main() {
let mut website: Website = Website::new("https://{www,docs}.a11ywatch.com")
.with_subdomains(true)
.build()
.unwrap();

let start = Instant::now();
website.crawl().await;
let duration = start.elapsed();

let links = website.get_links();

for link in links {
println!("- {:?}", link.as_ref());
}

println!(
"Time elapsed in website.crawl() is: {:?} for total pages: {:?}",
duration,
links.len()
)
}

0 comments on commit 28c883e

Please sign in to comment.