Skip to content

Commit

Permalink
Merge pull request osmlab#164 from maps-osm/dev_merge_602
Browse files Browse the repository at this point in the history
Bringing in atlas checks 6.0.1 and 6.0.2
  • Loading branch information
Daniel Baah (V) authored and GitHub Enterprise committed Mar 6, 2020
2 parents 9ca666a + 7251349 commit 92ac405
Show file tree
Hide file tree
Showing 82 changed files with 3,927 additions and 989 deletions.
50 changes: 49 additions & 1 deletion config/configuration.json
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,21 @@
"tags": "building"
}
},
"LineCrossingWaterBodyCheck": {},
"LineCrossingWaterBodyCheck": {
"highway.minimum": "path",
"highways.exclude": [
"bus_guideway"
],
"lineItems.offending": "railway->rail,narrow_gauge,preserved,subway,disused,monorail,tram,light_rail,funicular,construction,miniature",
"lineItems.non_offending": "waterway->*|boundary->*|landuse->*|bridge->yes,viaduct,aqueduct,boardwalk,covered,low_water_crossing,movable,suspension|tunnel->yes,culvert,building_passage|embankment->yes|location->underwater,underground|power->line,minor_line|man_made->pier,breakwater,embankment,groyne,dyke,pipeline|route->ferry|highway->proposed,construction|ice_road->yes|ford->yes|winter_road->yes|snowmobile->yes|ski->yes",
"buildings.flag": true,
"challenge": {
"description": "Certain OSM features should not cross waterbodies.",
"blurb": "Edit features overlapping the waterbody so they either validly overlap or do not overlap.",
"instruction": "Open your favorite editor and edit the features overlapping the waterbody so they either validly overlap or do not overlap.",
"difficulty": "EASY"
}
},
"LongSegmentCheck": {
"length.minimum.kilometers": 10.0,
"challenge": {
Expand Down Expand Up @@ -799,6 +813,7 @@
"SinkIslandCheck": {
"tree.size": 50,
"minimum.highway.type": "service",
"filter.pedestrian.network": false,
"challenge": {
"description": "Tasks that identify islands of roads where it is impossible to get out. The simplest is a one-way that dead-ends; that would be a one-edge island.",
"blurb": "Identify islands of roads.",
Expand Down Expand Up @@ -940,5 +955,38 @@
"difficulty": "MEDIUM",
"defaultPriority": "MEDIUM"
}
},
"AtGradeSignPostCheck": {
"connected.highway.types": {
"primary": ["trunk","primary","secondary"],
"trunk": ["primary"],
"secondary": ["primary"]
},
"challenge":{
"description":"This tasks contains at-grade intersections that are not part of destination_sign relations.",
"blurb":"Add/Fix destination_sign relations to at-grade intersections",
"instruction":"Open your favorite editor and check the instruction fr the task and add a destination_sign relation or destination_sign tag to the at-grade intersection.",
"difficulty":"NORMAL",
"defaultPriority":"MEDIUM"
}
},
"OceanBleedingCheck": {
"ocean": {
"valid": "natural->strait,channel,fjord,sound,bay|harbour->*&harbour->!no|estuary->*&estuary->!no|bay->*&bay->!no|place->sea|seamark:type->harbour,harbour_basin,sea_area|water->bay,cove,harbour|waterway->artificial,dock",
"invalid": "man_made->breakwater,pier|natural->beach,marsh,swamp|water->marsh|wetland->bog,fen,mangrove,marsh,saltern,saltmarsh,string_bog,swamp,wet_meadow|landuse->*"
},
"highway": {
"minimum": "path",
"exclude": [
"bus_guideway"
]
},
"lineItems.offending": "railway->rail,narrow_gauge,preserved,subway,disused,monorail,tram,light_rail,funicular,construction,miniature",
"challenge": {
"description": "Certain OSM features should not bleed into (intersect) oceans.",
"blurb": "Edit features overlapping the ocean feature so they either validly intersect or do not overlap.",
"instruction": "Open your favorite editor and edit the features overlapping the ocean so they either validly overlap or do not overlap.",
"difficulty": "EASY"
}
}
}
4 changes: 2 additions & 2 deletions dependencies.gradle
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
project.ext.versions = [
checkstyle: '8.18',
jacoco: '0.8.3',
atlas: '6.0.1',
atlas: '6.1.0',
commons:'2.6',
atlas_generator: '5.0.0',
atlas_generator: '5.0.1',
atlas_checkstyle: '5.6.9',
postgis: '2.1.7.2',
postgres: '42.2.6',
Expand Down
2 changes: 2 additions & 0 deletions docs/available_checks.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ This document is a list of tables with a description and link to documentation f
| [ShadowDetectionCheck](checks/shadowDetectionCheck.md) | The purpose of this check is to identify floating buildings. |
| [SpikyBuildingCheck](checks/spikyBuildingCheck.md) | The purpose of this check is to identify buildings with extremely sharp angles in their geometry. |
| [WaterbodyAndIslandSizeCheck](checks/waterbodyAndIslandSizeCheck.md) | The purpose of this check is to identify waterbodies and islands which are either too small or too large in size. |
| [OceanBleedingCheck](checks/oceanBleedingCheck.md) | The purpose of this check is to identify streets, buildings, and railways that bleed into (intersect) an ocean feature. |

## Highways
| Check Name | Check Description |
Expand All @@ -27,6 +28,7 @@ This document is a list of tables with a description and link to documentation f
| [SinkIslandCheck](tutorials/tutorial3-SinkIslandCheck.md) | The purpose of this check is to identify whether a network of car-navigable Edges can be exited. |
| [SnakeRoadCheck](checks/snakeRoadCheck.md) | The purpose of the SnakeRoad check is to identify roads that should be split into two or more roads. |
| [InvalidPiersCheck](checks/invalidPiersCheck.md) | The purpose of this check is to identify piers(OSM Ways with man_made=pier tag) that are ingested in Atlas as edges with linear or polygonal geometry without an area=yes tag |
| [AtGradeSignPostCheck](checks/atGradeSignPostCheck.md) | The purpose of this check is to identify at-grade intersections that are not part of destination sign relations. |

## Nodes
| Check Name | Check Description |
Expand Down
31 changes: 31 additions & 0 deletions docs/checks/atGradeSignPostCheck.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# AtGradeSignPostCheck

#### Description

This check identifies at-grade intersections that are not modeled into destination_sign relations.
An at-grade intersection is an intersection with two or more edges at the same level with valid
highway classifications. The highway classifications of the in and out edges of an at-grade intersection are specified in the configuration file and are as follows:
1. If in edge is of type primary, the intersection is valid if there are at least two out edges, at the same z level as the in edge, that are either trunk, primary or secondary.
2. If in edge is of type trunk or secondary, the intersection is valid if there are at least two out edges, at the same z level as the in edge, that are primary.

#### Live Examples

1. Node [id:393673917](https://www.openstreetmap.org/node/393673917) forms an at-grade intersection
with ways [393673917](https://www.openstreetmap.org/way/202447272),
[34370252](https://www.openstreetmap.org/way/34370252) and
[41234996](https://www.openstreetmap.org/way/41234996) but is not a member of a "destination_sign" relation.
2. Node [id:5351792253](https://www.openstreetmap.org/node/5351792253) forms an at-grade intersection with its exit road
[554569602](https://www.openstreetmap.org/way/554569602) but is not a member of a "destination_sign" relation.

#### Code Review

The check ensures that the Atlas object being evaluated is a [Node](https://github.com/osmlab/atlas/blob/dev/src/main/java/org/openstreetmap/atlas/geography/atlas/items/Node.java)
with a minimum node valence of 3. The node is a valid candidate for the check if it forms an intersection with edges at the same z-level with highway classification specified in the configurable.
Once the node is evaluated to be a valid at-grade intersection, check if it is a member of any destination_sign relations.
If the node is not part of a destination_sign relation, then flag the node along with the intersecting edges. Since a node can be part of multiple destination_sign
relations, check if all the valid intersecting edges are members of destination_sign relations. If there are edges missing destination_sign
relations, flag the node and the edges. If all the intersecting edges are members of relations, check if all the relations have destination tags.
If the relations are missing destination_sign tag, flag it. If the node is part of a roundabout, flag all the roundabout edges when flagging the node and its intersecting edges.

To learn more about the code, please look at the comments in the source code for the check.
[AtGradeSignPostCheck.java](../../src/main/java/org/openstreetmap/atlas/checks/validation/intersections/AtGradeSignPostCheck.java)
14 changes: 14 additions & 0 deletions docs/checks/oceanBleedingCheck.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Ocean Bleeding Check

This check aims to flag streets, railways, and buildings that bleed into (intersect) ocean features. Intersection includes any geometrical interaction between the ocean feature and the land feature. The definition of streets and railways can be changed in the configuration for the check ("lineItems.offending" for railways, "highway.minimum" and "highway.exclude" for streets) Additionally, tags that should be considered when validating/invalidating an ocean feature are configurable.

#### Live Examples

1. Building [id:355294262](https://www.openstreetmap.org/way/355294262#map=19/22.36138/114.09546&layers=C) extends into ocean feature [id:243872591](https://www.openstreetmap.org/way/243872591#map=16/22.3630/114.0932&layers=C) invalidly.
2. Street [id:327223335](https://www.openstreetmap.org/way/327223335#map=17/25.21095/55.24491&layers=C) extends into ocean feature [id:87287185](https://www.openstreetmap.org/way/87287185#map=17/25.21143/55.24443&layers=C) invalidly.

#### Code Review

The check starts off by validating certain waterbodies (Atlas Areas or LineItems) as being ocean features. Then it collects all valid buildings, streets, and railways that intersect the given ocean feature. A single flag is created which includes all intersecting land features for the ocean feature. The check repeats this process for every Area and LineItem in the supplied atlas.

Please see source code for OceanBleedingCheck here: [OceanBleedingCheck](../../src/main/java/org/openstreetmap/atlas/checks/validation/intersections/OceanBleedingCheck.java)
5 changes: 3 additions & 2 deletions docs/cluster.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ To get the job running against a different version of Spark could be very challe
If you see an exception while running in your environment, like `ClassNotFoundException`, `ClassCastException`, thrown from a standard library, such as Guava, Slf4j,
then you are hitting the dependency conflict problem. To solve it, try to figure out the name of the conflicted library, then update dependency configuration in `build.gradle` file to force a working version.

#### large country support
#### Large country support

By default atlas-check run jobs by country. This means one worker node will have to be able to load all the data for one country in memory.
This is not a good way to distribute workload and could make it very challenging to run large countries like USA, RUS, or CHN. Before a better solution gets implemented, you'll have to allocate large memories if you need to check against an enormous country.
This is not a good way to distribute workload and could make it very challenging to run large countries like USA, RUS, or CHN.
For running large countries see [Sharded Checks](shardedchecks.md).
58 changes: 58 additions & 0 deletions docs/shardedchecks.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Sharded Atlas Checks

Sharded Atlas Checks is an implementation of the Atlas Checks framework that seeks to provide a memory efficient way to run large data sets. This is achieved using a "sharded" input schema and parallel processing.

Normally when running Atlas Checks the size of input data is capped on a per country level by the amount of memory available. This means that even in a distributed environment enormous amounts of RAM are required to run countries like USA, RUS, and CHN. This implementation circumvents that cap by processing a country as multiple small sections. Each section is loaded as an individual process. Each process runs checks on the given section and returns a set of flags. Flags from all sections of a country are combined in the output.

While the normal Atlas Checks job can run on either PBF or Atlas files, this job is restricted to specialized atlas files. These files should be in a countrified and sharded format. These files can be produced by using [Atlas Generator](https://github.com/osmlab/atlas-generator).

## How To Run

Sharded Checks is designed to be run in a Spark [cluster](cluster.md) for best performance, but can also be run as a standalone Java execution. The following details the parameters required for running the job in either environment, and parameters that are unique to Sharded Checks.

#### Main Class
The main class of this job is org.openstreetmap.atlas.checks.distributed.ShardedIntegrityChecksSparkJob.

#### Input Data Path
Input Atlas files should be organized by country and sub-region (shard). The root of the input folder should contain sub-folders that are named by ISO3 county codes. Each sub-folder should contain atlas files named with the schema `<iso>_<zoom>_<x>_<y>.atlas`. An example of the full structure would be:
```text
- root
| - USA
| - SGP
| - SGP_10-806-508.atlas
| - SGP_11-1614-1016.atlas
| - ...
```

#### Expansion Distance
To maintain geographic completeness and avoid edge effects while running subsections of countries, each process is allowed to expand the area of a country that is loaded up to a set amount. The distance given in this parameter defines that expansion as shards within the given distance (in kilometers).

#### Sharding Schema
In order to load geographically connected shards together the job requires a definition of the sharding schema used for the input Atlas files. This can be supplied in 2 ways. A dynamic sharding definition can be supplied by placing a sharding.txt file in the input path. Alternatively, a schema can be provided using the `sharding` parameter. For more on this see the [sharding package](https://github.com/osmlab/atlas/tree/dev/src/main/java/org/openstreetmap/atlas/geography/sharding) in Atlas.

#### In Memory Atlas Type
By default Sharded Checks uses a [Dynamic Atlas](https://github.com/osmlab/atlas/tree/dev/src/main/java/org/openstreetmap/atlas/geography/atlas/dynamic). It is also possible to use a [Multi Atlas](https://github.com/osmlab/atlas/tree/dev/src/main/java/org/openstreetmap/atlas/geography/atlas/multi) to load Atlas files. This can be done by setting the `multiAtlas` parameter to `true`. It has been found that a Multi Atlas is the more performant in non-distributed environments.

#### Spark Storage
By default Spark uses memory before disk when storing an RDD. In environments with large mounts of available memory this works well. In memory limited environments this can bog down when trying to process large data sets. In these scenarios it is more efficient to save everything directly to disk. This can be done by setting the `sparkStorageDiskOnly` to `true`.

#### Shared Arguments
The following are brief descriptions of the parameters that Sharded Atlas Checks shares with the normal job

| Parameter | Description |
|---|---|
| Output Path | Local or remote folder to save outputs to |
| Output Formats | Comma separated list of output types (flags,geojson,metrics,tippecanoe); MapRoulette output is not available in Sharded Checks |
| Countries | Comma separated list of ISO3 country codes of countries to run |
| Configuration File | Comma separated list of resource URIs for checks configuration json files |
| Master | Spark cluster master URL (just `local` for local environments) |

## Limitations

#### Limited Input
Currently Sharded Checks only supports sharded Atlas files as an input data source. Eventually this may be expanded to include PBF files.

#### Large Relations
There is a known issue with large relations being flagged incorrectly. This can occur when a relation spans many shards and even at maximum expansion it cannot all be loaded.


3 changes: 2 additions & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
group=org.openstreetmap.atlas
version=6.0.0.0.APPLE

version=6.0.2-SNAPSHOT

# a trick to make our distribution package follow the same naming convention.
releaseBuild=false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ public class CheckResourceLoader
private final MultiMap<String, String> countryGroups = new MultiMap<>();
private final Boolean enabledByDefault;
private final String enabledKeyTemplate;
private static final String COUNTRY_WHITELIST_TEMPLATE = "%s."
+ BaseCheck.PARAMETER_WHITELIST_COUNTRIES;
private static final String COUNTRY_BLACKLIST_TEMPLATE = "%s."
+ BaseCheck.PARAMETER_BLACKLIST_COUNTRIES;
private final Set<String> packages;
private final Optional<List<String>> checkWhiteList;
private final Optional<List<String>> checkBlackList;
Expand Down Expand Up @@ -181,9 +185,8 @@ public <T extends Check> Set<T> loadChecks()
public <T extends Check> Set<T> loadChecksForCountry(final String country)
{
final Configuration countryConfiguration = this.getConfigurationForCountry(country);
return loadChecks(
checkClass -> this.isEnabledByConfiguration(countryConfiguration, checkClass),
countryConfiguration);
return loadChecks(checkClass -> this.isEnabledByConfiguration(countryConfiguration,
checkClass, country), countryConfiguration);
}

public <T extends Check> Set<T> loadChecksUsingConstructors(
Expand Down Expand Up @@ -310,4 +313,20 @@ private boolean isEnabledByConfiguration(final Configuration configuration,
final String key = String.format(this.enabledKeyTemplate, checkClass.getSimpleName());
return configuration.get(key, this.enabledByDefault).value();
}

private boolean isEnabledByConfiguration(final Configuration configuration,
final Class checkClass, final String country)
{
final List<String> countryWhitelist = configuration
.get(String.format(COUNTRY_WHITELIST_TEMPLATE, checkClass.getSimpleName()),
Collections.emptyList())
.value();
final List<String> countryBlacklist = configuration
.get(String.format(COUNTRY_BLACKLIST_TEMPLATE, checkClass.getSimpleName()),
Collections.emptyList())
.value();
return this.isEnabledByConfiguration(configuration, checkClass)
&& countryWhitelist.isEmpty() ? !countryBlacklist.contains(country)
: countryWhitelist.contains(country);
}
}
Loading

0 comments on commit 92ac405

Please sign in to comment.