Skip to content

Commit

Permalink
fix CRAN issues for gcc 13 (#36)
Browse files Browse the repository at this point in the history
fix maxp crash
  • Loading branch information
lixun910 authored Jul 3, 2023
1 parent 1c14030 commit 94c17da
Show file tree
Hide file tree
Showing 15 changed files with 99 additions and 56 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@
^_pkgdown\.yml$
^docs$
^pkgdown$
^Dockerfile$
11 changes: 7 additions & 4 deletions .github/workflows/build-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ jobs:
config:
- {os: macOS-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'release'}
#- {os: windows-latest, r: '3.6', rspm: "https://packagemanager.rstudio.com/cran/latest"}
#- {os: ubuntu-18.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/bionic/latest", http-user-agent: "R/4.0.0 (ubuntu-18.04) R (4.0.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" }
- {os: ubuntu-18.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/bionic/latest"}
#- {os: ubuntu-18.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/bionic/latest"}
#- {os: ubuntu-18.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/bionic/latest"}
#- {os: ubuntu-18.04, r: '3.6', rspm: "https://packagemanager.rstudio.com/cran/__linux__/bionic/latest"}
#- {os: ubuntu-18.04, r: '3.5', rspm: "https://packagemanager.rstudio.com/cran/__linux__/bionic/latest"}
Expand All @@ -42,16 +43,18 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
with:
submodules: recursive

- uses: r-lib/actions/setup-r@v1
- uses: r-lib/actions/setup-r@v2
with:
rtools-version: '42'
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true

- uses: r-lib/actions/setup-pandoc@v1
- uses: r-lib/actions/setup-pandoc@v2

- name: Query dependencies
run: |
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/lint-project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ jobs:
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
with:
submodules: recursive

- uses: r-lib/actions/setup-r@v1
- uses: r-lib/actions/setup-r@v2

- name: Install lintr
run: install.packages("lintr")
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ jobs:
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
with:
submodules: recursive

- uses: r-lib/actions/setup-r@v1
- uses: r-lib/actions/setup-r@v2

- uses: r-lib/actions/setup-pandoc@v1
- uses: r-lib/actions/setup-pandoc@v2

- name: Query dependencies
run: |
Expand Down
4 changes: 3 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@
"geodaweight.h": "c",
"filesystem": "cpp",
"numbers": "cpp",
"semaphore": "cpp"
"semaphore": "cpp",
"__bits": "cpp",
"__verbose_abort": "cpp"
}
}
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: rgeoda
Type: Package
Title: R Library for Spatial Data Analysis
Version: 0.0.9
Date: 2022-04-09
Version: 0.0.10-4
Date: 2023-07-01
Authors@R:
c(person(given = "Xun", family = "Li", email="[email protected]", role=c("aut","cre")),
person(given = "Luc", family = "Anselin", email="[email protected]", role="aut"))
Expand Down Expand Up @@ -39,4 +39,4 @@ Encoding: UTF-8
Suggests:
wkb,
sp
SystemRequirements: C++14
SystemRequirements: C++17
7 changes: 7 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM rocker/r-base

ENV DEBIAN_FRONTEND noninteractive

RUN apt-get update && apt-get install -y git libssl-dev libgeos-dev libgeos++-dev gdal-bin libproj-dev libgdal-dev libudunits2-dev

RUN install2.r --error proxy Rcpp wk sp digest sf BH wkb TinyTex
43 changes: 30 additions & 13 deletions R/clustering.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,26 @@
#' similar values for features of interest.
#' @param k The number of clusters
#' @param w An instance of Weight class
#' @param df A data frame with selected variables only. E.g. guerry[c("Crm_prs", "Crm_prp", "Litercy")]
#' @param df A data frame with selected variables only.
#' E.g. guerry[c("Crm_prs", "Crm_prp", "Litercy")]
#' @param bound_variable (optional) A data frame with selected bound variable
#' @param min_bound (optional) A minimum bound value that applies to all clusters
#' @param scale_method One of the scaling methods {'raw', 'standardize', 'demean', 'mad', 'range_standardize', 'range_adjust'} to apply on input data. Default is 'standardize' (Z-score normalization).
#' @param distance_method (optional) The distance method used to compute the distance betwen observation i and j. Defaults to "euclidean". Options are "euclidean" and "manhattan"
#' @param random_seed (int,optional) The seed for random number generator. Defaults to 123456789.
#' @param cpu_threads (optional) The number of cpu threads used for parallel computation
#' @param rdist (optional) The distance matrix (lower triangular matrix, column wise storage)
#' @return A names list with names "Clusters", "Total sum of squares", "Within-cluster sum of squares", "Total within-cluster sum of squares", and "The ratio of between to total sum of squares".
#' @param min_bound (optional) A minimum bound value that applies to all
#' clusters
#' @param scale_method One of the scaling methods {'raw', 'standardize',
#' 'demean', 'mad', 'range_standardize', 'range_adjust'} to apply on input data.
#' Default is 'standardize' (Z-score normalization).
#' @param distance_method (optional) The distance method used to compute the
#' distance betwen observation i and j. Defaults to "euclidean". Options are
#' "euclidean" and "manhattan"
#' @param random_seed (int,optional) The seed for random number generator.
#' Defaults to 123456789.
#' @param cpu_threads (optional) The number of cpu threads used for parallel
#' computation
#' @param rdist (optional) The distance matrix (lower triangular matrix,
#' column wise storage)
#' @return A names list with names "Clusters", "Total sum of squares",
#' "Within-cluster sum of squares", "Total within-cluster sum of squares",
#' and "The ratio of between to total sum of squares".
#' @examples
#' library(sf)
#' guerry_path <- system.file("extdata", "Guerry.shp", package = "rgeoda")
Expand All @@ -22,12 +33,15 @@
#' guerry_clusters <- skater(4, queen_w, data)
#' guerry_clusters
#' @export
skater <- function(k, w, df, bound_variable=data.frame(), min_bound=0, scale_method="standardize", distance_method="euclidean", random_seed=123456789, cpu_threads=6, rdist=numeric()) {
skater <- function(k, w, df, bound_variable=data.frame(), min_bound=0,
scale_method="standardize", distance_method="euclidean",
random_seed=123456789, cpu_threads=6, rdist=numeric()) {
if (w$num_obs < 1) {
stop("The weights is not valid.")
}
if (k <1 && k > w$num_obs) {
stop("The number of clusters should be a positive integer number, which is less than the number of observations.")
stop("The number of clusters should be a positive integer number, which is
less than the number of observations.")
}
if (inherits(df, "data.frame") == FALSE) {
stop("The input data needs to be a data.frame.")
Expand All @@ -43,9 +57,11 @@ skater <- function(k, w, df, bound_variable=data.frame(), min_bound=0, scale_met
stop("The data.frame is empty.")
}

scale_methods <- c('raw', 'standardize', 'demean', 'mad', 'range_standardize', 'range_adjust')
scale_methods <- c('raw', 'standardize', 'demean', 'mad', 'range_standardize',
'range_adjust')
if (!(scale_method %in% scale_methods)) {
stop("The scale_method has to be one of {'raw', 'standardize', 'demean', 'mad', 'range_standardize', 'range_adjust'}")
stop("The scale_method has to be one of {'raw', 'standardize', 'demean',
'mad', 'range_standardize', 'range_adjust'}")
}

if (distance_method != "euclidean" && distance_method != "manhattan") {
Expand All @@ -56,7 +72,8 @@ skater <- function(k, w, df, bound_variable=data.frame(), min_bound=0, scale_met
if (length(bound_variable) > 0) {
bound_values <- bound_variable[[1]]
}
return(p_skater(k, w$GetPointer(), df, n_vars, scale_method, distance_method, bound_values, min_bound, random_seed, cpu_threads, rdist))
return(p_skater(k, w$GetPointer(), df, n_vars, scale_method, distance_method,
bound_values, min_bound, random_seed, cpu_threads, rdist))
}


Expand Down
4 changes: 2 additions & 2 deletions R/sf_geoda.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Create a random string (internally used)
# The input is a positive number, indicating the number of items to choose from.
random_string <- function(n = 5000) {
a <- do.call(paste0, replicate(5, sample(LETTERS, n, TRUE), FALSE))
paste0(a, sprintf("%04d", sample(9999, n, TRUE)), sample(LETTERS, n, TRUE))
a <- do.call(paste0, replicate(10, sample(LETTERS, n, TRUE), FALSE))
return(a)
}


Expand Down
27 changes: 19 additions & 8 deletions man/skater.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/Makevars
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ PKG_CPPFLAGS=\
PKG_LIBS=\
-pthread

CXX_STD=CXX14
CXX_STD=CXX17

CPP_SRC_FILES = \
$(RGEODALIB)/libgeoda.cpp \
Expand Down
2 changes: 1 addition & 1 deletion src/Makevars.win
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ PKG_LIBS=\
-L$(RWINLIB)/lib$(R_ARCH) \
-pthread

CXX_STD = CXX14
CXX_STD = CXX17

CPP_SRC_FILES = \
$(RGEODALIB)/libgeoda.cpp \
Expand Down
2 changes: 0 additions & 2 deletions src/rcpp_clustering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,8 @@ Rcpp::List p_skater(int k, SEXP xp_w, Rcpp::List& data, int n_vars, std::string
int num_obs = w->GetNumObs();
double** dist_matrix = rdist_matrix(num_obs, rdist);

Rcout << "aaa" << dist_matrix;
std::vector<std::vector<int> > cluster_ids = gda_skater(k, w, raw_data, scale_method, distance_method, raw_bound, min_bound, seed, cpu_threads, dist_matrix);

Rcout << "after gda_skater";
if (dist_matrix) {
for (int i = 1; i < num_obs; i++) {
free(dist_matrix[i]);
Expand Down
34 changes: 19 additions & 15 deletions tests/testthat/test-clustering.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,14 @@ testthat::test_that("schc", {
testthat::expect_equal(clusters[[5]], 0.2147711255)
})

# NOTE!!!!!!!!!
# The results are computed using Boost library 1.58.0.
# To pass the following test cases
# , please install BH package version==1.58.0
# NOTE
# The previous results are computed using Boost library 1.58.0.
# The new results are computed using Boost library 1.81.0.1
# The differences are caused by the different implementation of
# boost::unordered_map: he keys in boost::unordered_map are not ordered and
# have different orders in the two Boost versions. This involves a different
# mechanism of randomness in max-p algorithm when picking which area or region
# to process.

testthat::test_that("azp_greedy", {
library(sf)
Expand All @@ -78,7 +82,7 @@ testthat::test_that("azp_greedy", {

azp_clusters <- azp_greedy(5, queen_w, data)

testthat::expect_equal(azp_clusters[[5]], 0.3598541)
testthat::expect_equal(azp_clusters[[5]], 0.36, tolerance = 1e-3)

bound_variable <- guerry["Pop1831"]
min_bound <- 3236.67 # 10% of Pop1831
Expand All @@ -87,7 +91,7 @@ testthat::test_that("azp_greedy", {
bound_variable = bound_variable,
min_bound = min_bound)

testthat::expect_equal(azp_clusters[[5]], 0.3980921835)
testthat::expect_equal(azp_clusters[[5]], 0.417, tolerance = 1e-3)

})

Expand All @@ -101,7 +105,7 @@ testthat::test_that("azp_sa", {

azp_clusters <- azp_sa(5, queen_w, data, cooling_rate = 0.85, sa_maxit = 1)

testthat::expect_equal(azp_clusters[[5]], 0.4211363)
testthat::expect_equal(azp_clusters[[5]], 0.359, tolerance = 1e-3)
})

testthat::test_that("azp_tabu", {
Expand Down Expand Up @@ -129,9 +133,9 @@ testthat::test_that("maxp_greedy", {
bound_vals <- guerry["Pop1831"]
min_bound <- 3236.67 # 10% of Pop1831

#clusters <- maxp_greedy(queen_w, data, bound_vals, min_bound)
clusters <- maxp_greedy(queen_w, data, bound_vals, min_bound)

#testthat::expect_equal(clusters[[5]], 0.4499671068)
testthat::expect_equal(clusters[[5]], 0.484, tolerance = 1e-3)
})

testthat::test_that("maxp_sa", {
Expand All @@ -145,10 +149,10 @@ testthat::test_that("maxp_sa", {
bound_vals <- guerry["Pop1831"]
min_bound <- 3236.67 # 10% of Pop1831

#clusters <- maxp_sa(queen_w, data, bound_vals, min_bound,
# cooling_rate = 0.85, sa_maxit = 1)
clusters <- maxp_sa(queen_w, data, bound_vals, min_bound,
cooling_rate = 0.85, sa_maxit = 1)

#testthat::expect_equal(clusters[[5]], 0.4585352223)
testthat::expect_equal(clusters[[5]], 0.496, tolerance = 1e-3)
})

testthat::test_that("maxp_tabu", {
Expand All @@ -163,9 +167,9 @@ testthat::test_that("maxp_tabu", {
min_bound <- 3236.67 # 10% of Pop1831


#clusters <- maxp_tabu(queen_w, data, bound_vals, min_bound,
# tabu_length = 10, conv_tabu = 10)
clusters <- maxp_tabu(queen_w, data, bound_vals, min_bound,
tabu_length = 10, conv_tabu = 10)

#testthat::expect_equal(clusters[[5]], 0.4893668149)
testthat::expect_equal(clusters[[5]], 0.478, tolerance = 1e-3)

})

0 comments on commit 94c17da

Please sign in to comment.