Skip to content

Commit

Permalink
Merge pull request #433 from onc-healthit/LANTERN-769-ehealthline-web…
Browse files Browse the repository at this point in the history
…scraper

LANTERN-769: ehealthline wenscraper
  • Loading branch information
vishnu-mettles authored Nov 18, 2024
2 parents 52471df + 25be11f commit 0e2a69b
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ var pointclickURL = "https://fhir.pointclickcare.com/"
var nextgenPracticeURL = "https://www.nextgen.com/api/practice-search"
var aspmdURL = "https://fhirapi.asp.md:3030/aspmd/fhirserver/fhir_aspmd.asp"
var axeiumURL = "https://apifhir.axeium.net:8443/reference-server/"
var ehealthlineURL = "http://ehealthline.com/dev/pdf/FHIR%20API%20Endpoints.htm"

var bundleQuerierArray = [30]string{"https://ac-fhir.harrisambulatory.com/endpoints/r4", "https://dynamicfhirpresentation.dynamicfhirsandbox.com/fhir/r4/endpoints",
"https://ct-fhir.harrisambulatory.com/Endpoints/R4", "https://kantime.com/wp-content/uploads/2024/03/fhir-base-urls.json",
Expand Down Expand Up @@ -552,6 +553,8 @@ func QueryCHPLEndpointList(chplURL string, fileToWriteTo string) {
AxeiumeWebscraper(axeiumURL, fileToWriteTo)
} else if contains(bundleQuerierArray, chplURL) {
BundleQuerierParser(chplURL, fileToWriteTo)
} else if URLsEqual(chplURL, ehealthlineURL) {
EhealthlineWebscraper(ehealthlineURL, fileToWriteTo)
} else {
log.Warnf("Handler is required for url %s", chplURL)
}
Expand Down
40 changes: 40 additions & 0 deletions endpointmanager/pkg/chplendpointquerier/ehealthlinewebscraper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package chplendpointquerier

import (
"regexp"
"strings"

"github.com/PuerkitoBio/goquery"
"github.com/onc-healthit/lantern-back-end/endpointmanager/pkg/helpers"
log "github.com/sirupsen/logrus"
)

func EhealthlineWebscraper(CHPLURL string, fileToWriteTo string) {

var lanternEntryList []LanternEntry
var endpointEntryList EndpointList

ipPattern := `\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(:\d{1,5})?\b`
ipRegex := regexp.MustCompile(ipPattern)

doc, err := helpers.ChromedpQueryEndpointList(CHPLURL, "")
if err != nil {
log.Fatal(err)
}

doc.Find("td:contains('https')").Each(func(index int, item *goquery.Selection) {
if !ipRegex.MatchString(item.Text()) {
var entry LanternEntry
entry.URL = strings.TrimSpace(item.Find("span").Text())
lanternEntryList = append(lanternEntryList, entry)
}
})

endpointEntryList.Endpoints = lanternEntryList

err = WriteCHPLFile(endpointEntryList, fileToWriteTo)
if err != nil {
log.Fatal(err)
}

}

0 comments on commit 0e2a69b

Please sign in to comment.