From a21a1c6ed64cc69ce33fcdf06a52d1e491c69499 Mon Sep 17 00:00:00 2001 From: j-mendez Date: Sun, 16 Feb 2025 17:11:40 -0500 Subject: [PATCH] chore(website): fix page empty status --- Cargo.lock | 14 +++++++------- spider/Cargo.toml | 2 +- spider/src/website.rs | 5 ++++- spider_chrome/Cargo.toml | 2 +- spider_cli/Cargo.toml | 2 +- spider_firewall/Cargo.toml | 2 +- spider_transformations/Cargo.toml | 2 +- spider_utils/Cargo.toml | 2 +- spider_worker/Cargo.toml | 2 +- 9 files changed, 18 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 63d3dc19a..f56638f0e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5589,7 +5589,7 @@ dependencies = [ [[package]] name = "spider" -version = "2.28.1" +version = "2.28.2" dependencies = [ "ahash", "aho-corasick", @@ -5655,7 +5655,7 @@ dependencies = [ [[package]] name = "spider_chrome" -version = "2.28.1" +version = "2.28.2" dependencies = [ "adblock", "aho-corasick", @@ -5746,7 +5746,7 @@ dependencies = [ [[package]] name = "spider_cli" -version = "2.28.1" +version = "2.28.2" dependencies = [ "clap", "env_logger", @@ -5771,7 +5771,7 @@ dependencies = [ [[package]] name = "spider_firewall" -version = "2.28.1" +version = "2.28.2" dependencies = [ "phf 0.11.3", "phf_codegen 0.11.3", @@ -5798,7 +5798,7 @@ dependencies = [ [[package]] name = "spider_transformations" -version = "2.28.1" +version = "2.28.2" dependencies = [ "aho-corasick", "fast_html2md", @@ -5821,7 +5821,7 @@ dependencies = [ [[package]] name = "spider_utils" -version = "2.28.1" +version = "2.28.2" dependencies = [ "hashbrown 0.15.2", "indexmap 1.9.3", @@ -5838,7 +5838,7 @@ dependencies = [ [[package]] name = "spider_worker" -version = "2.28.1" +version = "2.28.2" dependencies = [ "env_logger", "lazy_static", diff --git a/spider/Cargo.toml b/spider/Cargo.toml index 2a7994794..c5561a7b7 100644 --- a/spider/Cargo.toml +++ b/spider/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider" -version = "2.28.1" +version = "2.28.2" authors = [ "j-mendez " ] diff --git a/spider/src/website.rs b/spider/src/website.rs index 5b10a4165..7830b21d6 100644 --- a/spider/src/website.rs +++ b/spider/src/website.rs @@ -2425,10 +2425,13 @@ impl Website { let links = if !page_links.is_empty() { page_links } else { - self.status = CrawlStatus::Empty; Default::default() }; + if page.is_empty() { + self.status = CrawlStatus::Empty; + } + self.initial_status_code = page.status_code; if page.status_code == reqwest::StatusCode::FORBIDDEN { diff --git a/spider_chrome/Cargo.toml b/spider_chrome/Cargo.toml index d67a6f353..bff1db820 100644 --- a/spider_chrome/Cargo.toml +++ b/spider_chrome/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_chrome" -version = "2.28.1" +version = "2.28.2" rust-version = "1.70" authors = [ "j-mendez " diff --git a/spider_cli/Cargo.toml b/spider_cli/Cargo.toml index cd87cfba0..4bbf9ebd9 100644 --- a/spider_cli/Cargo.toml +++ b/spider_cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_cli" -version = "2.28.1" +version = "2.28.2" authors = [ "j-mendez " ] diff --git a/spider_firewall/Cargo.toml b/spider_firewall/Cargo.toml index fc623713b..5a02e9d19 100644 --- a/spider_firewall/Cargo.toml +++ b/spider_firewall/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_firewall" -version = "2.28.1" +version = "2.28.2" authors = [ "j-mendez " ] diff --git a/spider_transformations/Cargo.toml b/spider_transformations/Cargo.toml index 49a6ebd89..9575db97d 100644 --- a/spider_transformations/Cargo.toml +++ b/spider_transformations/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_transformations" -version = "2.28.1" +version = "2.28.2" authors = [ "j-mendez " ] diff --git a/spider_utils/Cargo.toml b/spider_utils/Cargo.toml index ed458c77b..3a4c6804e 100644 --- a/spider_utils/Cargo.toml +++ b/spider_utils/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_utils" -version = "2.28.1" +version = "2.28.2" authors = [ "j-mendez " ] diff --git a/spider_worker/Cargo.toml b/spider_worker/Cargo.toml index 1d3ccd75c..4f5c59987 100644 --- a/spider_worker/Cargo.toml +++ b/spider_worker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_worker" -version = "2.28.1" +version = "2.28.2" authors = [ "j-mendez " ]