diff --git a/docs/api/openapi.yaml b/docs/api/openapi.yaml index c92c681..390af27 100644 --- a/docs/api/openapi.yaml +++ b/docs/api/openapi.yaml @@ -188,7 +188,18 @@ paths: `edm:isShownBy` or `edm:hasView` properties, whose URL when MD5-hashed matches the path parameter `{webResourceHash}`. If no such web resource is found, responds with status code 404. - 1. Starts proxying the web resource from the provider's site to the client. 1. If the web resource is detected to be an HTML document, stops proxying + 1. Removes all but the following headers from the request before forwarding + it to the provider's site: + * `accept-encoding` + * `accept-language` + * `accept` + * `if-match` + * `if-modified-since` + * `referer` + * `user-agent` + 1. Starts proxying the web resource from the provider's site to the client. + + 1. If the web resource is detected to be an HTML document, stops proxying and respond with status code 302 to redirect the client to the web page. 1. If the web resource is any other media type, preserve only these upstream response headers, and remove the rest: diff --git a/src/lib/constants.js b/src/lib/constants.js index b94207d..5f65d00 100644 --- a/src/lib/constants.js +++ b/src/lib/constants.js @@ -12,13 +12,20 @@ export const EUROPEANA_APIS = { } export const HTTP_HEADERS = { + ACCEPT: 'accept', + ACCEPT_ENCODING: 'accept-encoding', + ACCEPT_LANGUAGE: 'accept-language', ACCEPT_RANGES: 'accept-ranges', CACHE_CONTROL: 'cache-control', CONTENT_DISPOSITION: 'content-disposition', CONTENT_LENGTH: 'content-length', CONTENT_TYPE: 'content-type', ETAG: 'etag', + IF_MATCH: 'if-match', + IF_MODIFIED_SINCE: 'if-modified-since', LAST_MODIFIED: 'last-modified', LINK: 'link', + REFERER: 'referer', + USER_AGENT: 'user-agent', X_EUROPEANA_WEB_RESOURCE: 'x-europeana-web-resource' } diff --git a/src/middlewares/web-resource-proxy.js b/src/middlewares/web-resource-proxy.js index 8cdfc8b..641b910 100644 --- a/src/middlewares/web-resource-proxy.js +++ b/src/middlewares/web-resource-proxy.js @@ -3,7 +3,17 @@ import mime from 'mime-types' import { CONTENT_DISPOSITIONS, CONTENT_TYPES, HTTP_HEADERS } from '../lib/constants.js' -const headersToProxy = [ +const requestHeadersToProxy = [ + HTTP_HEADERS.ACCEPT_ENCODING, + HTTP_HEADERS.ACCEPT_LANGUAGE, + HTTP_HEADERS.ACCEPT, + HTTP_HEADERS.IF_MATCH, + HTTP_HEADERS.IF_MODIFIED_SINCE, + HTTP_HEADERS.REFERER, + HTTP_HEADERS.USER_AGENT +] + +const responseHeadersToProxy = [ HTTP_HEADERS.ACCEPT_RANGES, HTTP_HEADERS.CACHE_CONTROL, HTTP_HEADERS.CONTENT_LENGTH, @@ -27,18 +37,24 @@ const contentDisposition = ({ contentType, req } = {}) => { } const filterReqHeaders = (req) => { - delete req.headers.cookie - delete req.headers.origin + // Delete any request headers we don't want to proxy. + for (const header in req.headers) { + if (!requestHeadersToProxy.includes(header)) { + delete req.headers[header] + } + } } -const normaliseProxyResHeaders = (proxyRes) => { - // Delete any headers we don't want to proxy. +const filterProxyResHeaders = (proxyRes) => { + // Delete any response headers we don't want to proxy. for (const header in proxyRes.headers) { - if (!headersToProxy.includes(header)) { + if (!responseHeadersToProxy.includes(header)) { delete proxyRes.headers[header] } } +} +const normaliseProxyResHeaders = (proxyRes) => { // Default content-type to application/octet-stream, if not present if (!proxyRes.headers[HTTP_HEADERS.CONTENT_TYPE]) { proxyRes.headers[HTTP_HEADERS.CONTENT_TYPE] = CONTENT_TYPES.APPLICATION_OCTET_STREAM @@ -74,6 +90,7 @@ const onProxyReq = (webResourceId, next) => (proxyReq, req, res) => { const onProxyRes = (webResourceId, next) => (proxyRes, req, res) => { try { + filterProxyResHeaders(proxyRes) normaliseProxyResHeaders(proxyRes) setCustomResHeaders(webResourceId, res)