Skip to content

Commit

Permalink
Simplify WikipediaApp.getLabelForWikidata() by using ListUtil.process…
Browse files Browse the repository at this point in the history
…InBatches()
  • Loading branch information
floscher committed May 23, 2018
1 parent 05e8b25 commit ab54c73
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 35 deletions.
63 changes: 32 additions & 31 deletions src/org/wikipedia/WikipediaApp.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
import java.util.Objects;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiConsumer;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Collector;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
Expand All @@ -37,8 +37,13 @@
import org.openstreetmap.josm.tools.Utils;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.wikipedia.api.wikidata_action.ApiQueryClient;
import org.wikipedia.api.wikidata_action.WikidataActionApiUrl;
import org.wikipedia.api.wikidata_action.json.CheckEntityExistsResult;
import org.wikipedia.api.wikidata_action.json.SerializationSchema;
import org.wikipedia.data.WikidataEntry;
import org.wikipedia.data.WikipediaEntry;
import org.wikipedia.tools.ListUtil;
import org.wikipedia.tools.RegexUtil;
import org.wikipedia.tools.XPath;

Expand Down Expand Up @@ -394,47 +399,43 @@ public static String getLabelForWikidata(String wikidataId, Locale locale, Strin
}

static List<WikidataEntry> getLabelForWikidata(List<? extends WikipediaEntry> entries, Locale locale, String ... preferredLanguage) {
if (entries.size() > 50) {
return partitionList(entries, 50).stream()
.flatMap(chunk -> getLabelForWikidata(chunk, locale, preferredLanguage).stream())
.collect(Collectors.toList());
} else if (entries.isEmpty()) {
return Collections.emptyList();
final Collection<String> languages = new ArrayList<>();
if (locale != null) {
languages.add(getMediawikiLocale(locale));
languages.add(getMediawikiLocale(new Locale(locale.getLanguage())));
}
try {
final String url = "https://www.wikidata.org/w/api.php" +
languages.addAll(Arrays.asList(preferredLanguage));
languages.add("en");
languages.add(null);

final List<WikidataEntry> result = new ArrayList<>(entries.size());
ListUtil.processInBatches(entries, 50, batch -> {
try {
final String url = "https://www.wikidata.org/w/api.php" +
"?action=wbgetentities" +
"&props=labels|descriptions" +
"&ids=" + entries.stream().map(x -> x.article).collect(Collectors.joining("|")) +
"&format=xml";
final Collection<String> languages = new ArrayList<>();
if (locale != null) {
languages.add(getMediawikiLocale(locale));
languages.add(getMediawikiLocale(new Locale(locale.getLanguage())));
}
languages.addAll(Arrays.asList(preferredLanguage));
languages.add("en");
languages.add(null);
final List<WikidataEntry> r = new ArrayList<>(entries.size());
try (final InputStream in = connect(url).getContent()) {
final Document xml = newDocumentBuilder().parse(in);
for (final WikipediaEntry entry : entries) {
final Node entity = X_PATH.evaluateNode("//entity[@id='" + entry.article + "']", xml);
if (entity == null) {
continue;
}
r.add(new WikidataEntry(
try (InputStream in = connect(url).getContent()) {
final Document xml = newDocumentBuilder().parse(in);
for (final WikipediaEntry entry : entries) {
final Node entity = X_PATH.evaluateNode("//entity[@id='" + entry.article + "']", xml);
if (entity == null) {
continue;
}
result.add(new WikidataEntry(
entry.article,
getFirstField(languages, "label", entity),
entry.coordinate,
getFirstField(languages, "description", entity)
));
));
}
}
} catch (Exception ex) {
throw new RuntimeException(ex);
}
return r;
} catch (Exception ex) {
throw new RuntimeException(ex);
}
});
return result;
}

private static String getFirstField(Collection<String> languages, String field, Node entity) {
Expand Down
6 changes: 3 additions & 3 deletions src/org/wikipedia/gui/WikipediaToggleDialog.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import javax.swing.AbstractAction;
import javax.swing.DefaultListCellRenderer;
import javax.swing.DefaultListModel;
Expand Down Expand Up @@ -51,6 +50,7 @@
import org.wikipedia.WikipediaApp;
import org.wikipedia.actions.FetchWikidataAction;
import org.wikipedia.data.WikipediaEntry;
import org.wikipedia.tools.ListUtil;

public class WikipediaToggleDialog extends ToggleDialog implements ActiveLayerChangeListener, DataSetListenerAdapter.Listener {

Expand Down Expand Up @@ -187,8 +187,8 @@ protected Void doInBackground() throws Exception {
final List<WikipediaEntry> entries = getEntries();
entries.sort(null);
publish(entries.toArray(new WikipediaEntry[entries.size()]));
WikipediaApp.partitionList(entries, 20).forEach(chunk -> {
WikipediaApp.forLanguage(chunk.get(0).lang).updateWIWOSMStatus(chunk);
ListUtil.processInBatches(entries, 20, batch -> {
WikipediaApp.forLanguage(batch.get(0).lang).updateWIWOSMStatus(batch);
list.repaint();
});
return null;
Expand Down
8 changes: 7 additions & 1 deletion src/org/wikipedia/tools/ListUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ private ListUtil() {
// Private constructor to avoid instantiation
}

private static final BiConsumer<Integer, Integer> EMPTY_BICONSUMER = (a, b) -> {};

/**
* Splits the given list {@code fullList} into batches of a size of {@code maxBatchSize} or less and each batch is
* then consumed by the given {@link Consumer} {@code processBatch}.
Expand All @@ -27,5 +29,9 @@ public static <T> void processInBatches(final List<T> fullList, int maxBatchSize
}
}

public static <T> void processInBatches(final List<T> fullList, int maxBatchSize, final Consumer<List<T>> processBatch) {
processInBatches(fullList, maxBatchSize, processBatch, EMPTY_BICONSUMER);
}


}
}

0 comments on commit ab54c73

Please sign in to comment.