Skip to content

Commit

Permalink
Merge branch 'main' into LANTERN-771-772-773-webscrapers-and-csvparsers
Browse files Browse the repository at this point in the history
  • Loading branch information
rishi-salunkhe-mettle authored Nov 18, 2024
2 parents ddd3835 + 546a149 commit 1801ee3
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 8 deletions.
14 changes: 8 additions & 6 deletions endpointmanager/pkg/chplendpointquerier/bundleparser.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,14 @@ func BundleToLanternFormat(bundle []byte) []LanternEntry {

for _, bundleEntry := range structBundle.Entries {
if strings.EqualFold(strings.TrimSpace(bundleEntry.Resource.ResourceType), "Organization") {
addressMapArr := bundleEntry.Resource.Address.([]interface{})
for _, address := range addressMapArr {
addressMap := address.(map[string]interface{})
postalCode, ok := addressMap["postalCode"].(string)
if ok {
organizationZip[bundleEntry.Resource.OrgId] = postalCode
if bundleEntry.Resource.Address != nil {
addressMapArr := bundleEntry.Resource.Address.([]interface{})
for _, address := range addressMapArr {
addressMap := address.(map[string]interface{})
postalCode, ok := addressMap["postalCode"].(string)
if ok {
organizationZip[bundleEntry.Resource.OrgId] = postalCode
}
}
}
}
Expand Down
11 changes: 10 additions & 1 deletion endpointmanager/pkg/chplendpointquerier/chplendpointquerier.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ var firelyURL = "https://docs.fire.ly/projects/Firely-Server/en/latest/_static/g
var azurewebsitesURL = "https://sfp-proxy9794.azurewebsites.net/fhir/base-url"
var viewmymedURL = "https://portal.viewmymed.com/fhir/Endpoint"
var imedemrURL = "https://icom.imedemr.com/icom50/html/emr/mvc/pages/fhir_endpoints.php?format=csv"
var imedemrURL2 = "https://icom.imedemr.com/icom50/html/emr/mvc/pages/fhir_endpoints.php"
var moyaeURL = "https://documenter.getpostman.com/view/15917486/UyxojQMd#a24aa40c-fe15-478e-a555-3c2cb10d56c9"
var myheloURL = "https://www.myhelo.com/api/"
var nextechURL = "https://www.nextech.com/hubfs/Nextech%20FHIR%20Base%20URL.csv"
Expand Down Expand Up @@ -192,6 +193,7 @@ var pointclickURL = "https://fhir.pointclickcare.com/"
var nextgenPracticeURL = "https://www.nextgen.com/api/practice-search"
var aspmdURL = "https://fhirapi.asp.md:3030/aspmd/fhirserver/fhir_aspmd.asp"
var axeiumURL = "https://apifhir.axeium.net:8443/reference-server/"
var ehealthlineURL = "http://ehealthline.com/dev/pdf/FHIR%20API%20Endpoints.htm"

var bundleQuerierArray = [30]string{"https://ac-fhir.harrisambulatory.com/endpoints/r4", "https://dynamicfhirpresentation.dynamicfhirsandbox.com/fhir/r4/endpoints",
"https://ct-fhir.harrisambulatory.com/Endpoints/R4", "https://kantime.com/wp-content/uploads/2024/03/fhir-base-urls.json",
Expand All @@ -207,6 +209,7 @@ var mdlandURL = "https://api-fhir-proxy-2.mdland.net/"
var abeoURL = "https://www.crystalpm.com/FHIRServiceURLs.csv"
var nextechURL2 = "https://www.nextech.com/developers-portal"
var icareURL = "https://www.icare.com/endpoints.csv"
var ezemrxURL = "https://www.ezemrx.com/fhir"

func contains(arr [30]string, str string) bool {
for _, v := range arr {
Expand Down Expand Up @@ -424,6 +427,8 @@ func QueryCHPLEndpointList(chplURL string, fileToWriteTo string) {
BundleQuerierParser(chplURL, fileToWriteTo)
} else if URLsEqual(chplURL, imedemrURL) {
ImedemrWebscraper("https://icom.imedemr.com/icom50/html/emr/mvc/pages/fhir_endpoints.php", fileToWriteTo)
} else if URLsEqual(chplURL, imedemrURL2) {
ImedemrWebscraper(chplURL, fileToWriteTo)
} else if URLsEqual(chplURL, moyaeURL) {
MoyaeURLWebscraper(chplURL, fileToWriteTo)
} else if URLsEqual(chplURL, myheloURL) {
Expand Down Expand Up @@ -493,7 +498,7 @@ func QueryCHPLEndpointList(chplURL string, fileToWriteTo string) {
} else if URLsEqual(chplURL, fhirjunoURL) {
BundleQuerierParser(fhirjunoURL, fileToWriteTo)
} else if URLsEqual(chplURL, veradigmURL) {
BundleQuerierParser("https://open.platform.veradigm.com/fhirendpoints/download/R4", fileToWriteTo)
BundleQuerierParser("https://open.platform.veradigm.com/fhirendpoints/download/R4?endpointFilter=All", fileToWriteTo)
} else if URLsEqual(chplURL, meldrxURL) {
BundleQuerierParser(meldrxURL, fileToWriteTo)
} else if URLsEqual(chplURL, emr4MDURL) {
Expand Down Expand Up @@ -556,6 +561,8 @@ func QueryCHPLEndpointList(chplURL string, fileToWriteTo string) {
AspMDeWebscraper(aspmdURL, fileToWriteTo)
} else if URLsEqual(chplURL, axeiumURL) {
AxeiumeWebscraper(axeiumURL, fileToWriteTo)
} else if URLsEqual(chplURL, ezemrxURL) {
EzemrxWebscraper(chplURL, fileToWriteTo)
} else if contains(bundleQuerierArray, chplURL) {
BundleQuerierParser(chplURL, fileToWriteTo)
} else if URLsEqual(chplURL, ontadaURL) {
Expand All @@ -568,6 +575,8 @@ func QueryCHPLEndpointList(chplURL string, fileToWriteTo string) {
CSVParser("https://www.nextech.com/hubfs/Nextech%20FHIR%20Base%20URL.csv", fileToWriteTo, "./Nextech FHIR Base URL.csv", -1, 0, true, 1, 0)
} else if URLsEqual(icareURL, chplURL) {
CSVParser(chplURL, fileToWriteTo, "./endpoints.csv", -1, 0, true, 1, 0)
} else if URLsEqual(chplURL, ehealthlineURL) {
EhealthlineWebscraper(ehealthlineURL, fileToWriteTo)
} else {
log.Warnf("Handler is required for url %s", chplURL)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ func TestWebScrapers(t *testing.T) {
scraperFunc: MdlandWebscraper,
url: "https://api.mdland.com/Mdland%20SMART%20on%20FHIR%20OAuth%202.0%20Guide.htm",
fileName: "MDLAND_EndpointSources.json",
scraperFunc: EzemrxWebscraper,
url: "https://www.ezemrx.com/fhir",
fileName: "ezEMRx_Inc_EndpointSources.json",
},
}

Expand Down
40 changes: 40 additions & 0 deletions endpointmanager/pkg/chplendpointquerier/ehealthlinewebscraper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package chplendpointquerier

import (
"regexp"
"strings"

"github.com/PuerkitoBio/goquery"
"github.com/onc-healthit/lantern-back-end/endpointmanager/pkg/helpers"
log "github.com/sirupsen/logrus"
)

func EhealthlineWebscraper(CHPLURL string, fileToWriteTo string) {

var lanternEntryList []LanternEntry
var endpointEntryList EndpointList

ipPattern := `\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(:\d{1,5})?\b`
ipRegex := regexp.MustCompile(ipPattern)

doc, err := helpers.ChromedpQueryEndpointList(CHPLURL, "")
if err != nil {
log.Fatal(err)
}

doc.Find("td:contains('https')").Each(func(index int, item *goquery.Selection) {
if !ipRegex.MatchString(item.Text()) {
var entry LanternEntry
entry.URL = strings.TrimSpace(item.Find("span").Text())
lanternEntryList = append(lanternEntryList, entry)
}
})

endpointEntryList.Endpoints = lanternEntryList

err = WriteCHPLFile(endpointEntryList, fileToWriteTo)
if err != nil {
log.Fatal(err)
}

}
38 changes: 38 additions & 0 deletions endpointmanager/pkg/chplendpointquerier/ezemrxwebscraper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package chplendpointquerier

import (
"strings"

"github.com/onc-healthit/lantern-back-end/endpointmanager/pkg/helpers"
log "github.com/sirupsen/logrus"
)

func EzemrxWebscraper(CHPLURL string, fileToWriteTo string) {

var lanternEntryList []LanternEntry
var endpointEntryList EndpointList
var entry LanternEntry

doc, err := helpers.ChromedpQueryEndpointList(CHPLURL, "#comp-lb6njyhb")
if err != nil {
log.Fatal(err)
}

divElem := doc.Find("#comp-lb6njyhb").First()
pElem := divElem.Find("p").First()
spanElem := pElem.Find("span").First()

parts := strings.Split(spanElem.Text(), "\n")

entry.URL = strings.TrimSpace(parts[1])

lanternEntryList = append(lanternEntryList, entry)

endpointEntryList.Endpoints = lanternEntryList

err = WriteCHPLFile(endpointEntryList, fileToWriteTo)
if err != nil {
log.Fatal(err)
}

}
3 changes: 2 additions & 1 deletion endpointmanager/pkg/chplendpointquerier/imedemrwebscraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ func ImedemrWebscraper(CHPLURL string, fileToWriteTo string) {

tableElem.Find("tbody").Each(func(index int, tbodyElem *goquery.Selection) {
tbodyElem.Find("tr").Each(func(trIndex int, trElem *goquery.Selection) {
if trIndex >= 1 {
// LANTERN-783: Changed the condition to include the first row of table
if trIndex >= 0 {
tdElem := trElem.Find("td")
org := tdElem.Eq(0)
URL := tdElem.Eq(1)
Expand Down

0 comments on commit 1801ee3

Please sign in to comment.