From 00fc61a17b7b6cc5de7bc6d43bf57ae68ffe7590 Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:07:41 +0000 Subject: [PATCH 01/15] renamed create routes --- .../community-guide/bulk-url-import.md | 4 ++-- .../getting-started/installation/security.md | 4 ++-- frontend/lib/api/user/recipes/recipe.ts | 8 ++++---- frontend/pages/g/_groupSlug/r/create/zip.vue | 2 +- mealie/routes/recipe/recipe_crud_routes.py | 8 ++++---- tests/utils/api_routes/__init__.py | 16 ++++++++-------- 6 files changed, 21 insertions(+), 21 deletions(-) diff --git a/docs/docs/documentation/community-guide/bulk-url-import.md b/docs/docs/documentation/community-guide/bulk-url-import.md index 7361c91ec6f..5498290d168 100644 --- a/docs/docs/documentation/community-guide/bulk-url-import.md +++ b/docs/docs/documentation/community-guide/bulk-url-import.md @@ -23,7 +23,7 @@ function import_from_file () { do echo $line curl -X 'POST' \ - "$3/api/recipes/create-url" \ + "$3/api/recipes/create/url" \ -H "Authorization: Bearer $2" \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ @@ -81,7 +81,7 @@ def import_from_file(input_file, token, mealie_url): data = { 'url': line } - response = requests.post(mealie_url + "/api/recipes/create-url", headers=headers, json=data) + response = requests.post(mealie_url + "/api/recipes/create/url", headers=headers, json=data) print(response.text) input_file="list" diff --git a/docs/docs/documentation/getting-started/installation/security.md b/docs/docs/documentation/getting-started/installation/security.md index e83ec86b658..29f6b19da28 100644 --- a/docs/docs/documentation/getting-started/installation/security.md +++ b/docs/docs/documentation/getting-started/installation/security.md @@ -18,7 +18,7 @@ Use your best judgement when deciding what to do. By default, the API is **not** rate limited. This leaves Mealie open to a potential **Denial of Service Attack**. While it's possible to perform a **Denial of Service Attack** on any endpoint, there are a few key endpoints that are more vulnerable than others. -- `/api/recipes/create-url` +- `/api/recipes/create/url` - `/api/recipes/{id}/image` These endpoints are used to scrape data based off a user provided URL. It is possible for a malicious user to issue multiple requests to download an arbitrarily large external file (e.g a Debian ISO) and sufficiently saturate a CPU assigned to the container. While we do implement some protections against this by chunking the response, and using a timeout strategy, it's still possible to overload the CPU if an attacker issues multiple requests concurrently. @@ -33,7 +33,7 @@ If you'd like to mitigate this risk, we suggest that you rate limit the API in g ## Server Side Request Forgery -- `/api/recipes/create-url` +- `/api/recipes/create/url` - `/api/recipes/{id}/image` Given the nature of these APIs it's possible to perform a **Server Side Request Forgery** attack. This is where a malicious user can issue a request to an internal network resource, and potentially exfiltrate data. We _do_ perform some checks to mitigate access to resources within your network but at the end of the day, users of Mealie are allowed to trigger HTTP requests on **your server**. diff --git a/frontend/lib/api/user/recipes/recipe.ts b/frontend/lib/api/user/recipes/recipe.ts index 8e3b673b6de..201f5a83fdb 100644 --- a/frontend/lib/api/user/recipes/recipe.ts +++ b/frontend/lib/api/user/recipes/recipe.ts @@ -32,10 +32,10 @@ const routes = { recipesCreate: `${prefix}/recipes/create`, recipesBase: `${prefix}/recipes`, recipesTestScrapeUrl: `${prefix}/recipes/test-scrape-url`, - recipesCreateUrl: `${prefix}/recipes/create-url`, - recipesCreateUrlBulk: `${prefix}/recipes/create-url/bulk`, - recipesCreateFromZip: `${prefix}/recipes/create-from-zip`, - recipesCreateFromImage: `${prefix}/recipes/create-from-image`, + recipesCreateUrl: `${prefix}/recipes/create/url`, + recipesCreateUrlBulk: `${prefix}/recipes/create/url/bulk`, + recipesCreateFromZip: `${prefix}/recipes/create/zip`, + recipesCreateFromImage: `${prefix}/recipes/create/image`, recipesCategory: `${prefix}/recipes/category`, recipesParseIngredient: `${prefix}/parser/ingredient`, recipesParseIngredients: `${prefix}/parser/ingredients`, diff --git a/frontend/pages/g/_groupSlug/r/create/zip.vue b/frontend/pages/g/_groupSlug/r/create/zip.vue index e0c57ea79a7..308436ac368 100644 --- a/frontend/pages/g/_groupSlug/r/create/zip.vue +++ b/frontend/pages/g/_groupSlug/r/create/zip.vue @@ -67,7 +67,7 @@ export default defineComponent({ const formData = new FormData(); formData.append(newRecipeZipFileName, newRecipeZip.value); - const { response } = await api.upload.file("/api/recipes/create-from-zip", formData); + const { response } = await api.upload.file("/api/recipes/create/zip", formData); handleResponse(response); } diff --git a/mealie/routes/recipe/recipe_crud_routes.py b/mealie/routes/recipe/recipe_crud_routes.py index aa3fe8c8222..3abdf3805c9 100644 --- a/mealie/routes/recipe/recipe_crud_routes.py +++ b/mealie/routes/recipe/recipe_crud_routes.py @@ -201,7 +201,7 @@ def handle_exceptions(self, ex: Exception) -> None: # ======================================================================= # URL Scraping Operations - @router.post("/create-url", status_code=201, response_model=str) + @router.post("/create/url", status_code=201, response_model=str) async def parse_recipe_url(self, req: ScrapeRecipe): """Takes in a URL and attempts to scrape data and load it into the database""" try: @@ -233,7 +233,7 @@ async def parse_recipe_url(self, req: ScrapeRecipe): return new_recipe.slug - @router.post("/create-url/bulk", status_code=202) + @router.post("/create/url/bulk", status_code=202) def parse_recipe_url_bulk(self, bulk: CreateRecipeByUrlBulk, bg_tasks: BackgroundTasks): """Takes in a URL and attempts to scrape data and load it into the database""" bulk_scraper = RecipeBulkScraperService(self.service, self.repos, self.group, self.translator) @@ -266,7 +266,7 @@ async def test_parse_recipe_url(self, data: ScrapeRecipeTest): # ================================================================================================================== # Other Create Operations - @router.post("/create-from-zip", status_code=201) + @router.post("/create/zip", status_code=201) def create_recipe_from_zip(self, archive: UploadFile = File(...)): """Create recipe from archive""" with get_temporary_zip_path() as temp_path: @@ -280,7 +280,7 @@ def create_recipe_from_zip(self, archive: UploadFile = File(...)): return recipe.slug - @router.post("/create-from-image", status_code=201) + @router.post("/create/image", status_code=201) async def create_recipe_from_image( self, images: list[UploadFile] = File(...), diff --git a/tests/utils/api_routes/__init__.py b/tests/utils/api_routes/__init__.py index dc096b387e7..7b05d849c52 100644 --- a/tests/utils/api_routes/__init__.py +++ b/tests/utils/api_routes/__init__.py @@ -147,14 +147,14 @@ """`/api/recipes/bulk-actions/settings`""" recipes_bulk_actions_tag = "/api/recipes/bulk-actions/tag" """`/api/recipes/bulk-actions/tag`""" -recipes_create_from_image = "/api/recipes/create-from-image" -"""`/api/recipes/create-from-image`""" -recipes_create_from_zip = "/api/recipes/create-from-zip" -"""`/api/recipes/create-from-zip`""" -recipes_create_url = "/api/recipes/create-url" -"""`/api/recipes/create-url`""" -recipes_create_url_bulk = "/api/recipes/create-url/bulk" -"""`/api/recipes/create-url/bulk`""" +recipes_create_image = "/api/recipes/create/image" +"""`/api/recipes/create/image`""" +recipes_create_url = "/api/recipes/create/url" +"""`/api/recipes/create/url`""" +recipes_create_url_bulk = "/api/recipes/create/url/bulk" +"""`/api/recipes/create/url/bulk`""" +recipes_create_zip = "/api/recipes/create/zip" +"""`/api/recipes/create/zip`""" recipes_exports = "/api/recipes/exports" """`/api/recipes/exports`""" recipes_test_scrape_url = "/api/recipes/test-scrape-url" From b1977c30196c4f62c8a4b0fd73741c2c4a697565 Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:29:19 +0000 Subject: [PATCH 02/15] refactor/add route for directly providing HTML --- mealie/routes/recipe/recipe_crud_routes.py | 23 ++++++++++++++++--- mealie/schema/recipe/__init__.py | 3 ++- mealie/schema/recipe/recipe_scraper.py | 5 ++++ .../services/scraper/recipe_bulk_scraper.py | 4 ++-- mealie/services/scraper/recipe_scraper.py | 4 ++-- mealie/services/scraper/scraper.py | 9 +++++--- mealie/services/scraper/scraper_strategies.py | 4 ++-- tests/unit_tests/test_recipe_parser.py | 2 +- 8 files changed, 40 insertions(+), 14 deletions(-) diff --git a/mealie/routes/recipe/recipe_crud_routes.py b/mealie/routes/recipe/recipe_crud_routes.py index 3abdf3805c9..cdf05989083 100644 --- a/mealie/routes/recipe/recipe_crud_routes.py +++ b/mealie/routes/recipe/recipe_crud_routes.py @@ -40,7 +40,7 @@ from mealie.routes._base.routers import MealieCrudRoute, UserAPIRouter from mealie.schema.cookbook.cookbook import ReadCookBook from mealie.schema.make_dependable import make_dependable -from mealie.schema.recipe import Recipe, RecipeImageTypes, ScrapeRecipe +from mealie.schema.recipe import Recipe, RecipeImageTypes, ScrapeRecipe, ScrapeRecipeHTML from mealie.schema.recipe.recipe import ( CreateRecipe, CreateRecipeByUrlBulk, @@ -73,7 +73,7 @@ from mealie.services.recipe.template_service import TemplateService from mealie.services.scraper.recipe_bulk_scraper import RecipeBulkScraperService from mealie.services.scraper.scraped_extras import ScraperContext -from mealie.services.scraper.scraper import create_from_url +from mealie.services.scraper.scraper import create_from_html from mealie.services.scraper.scraper_strategies import ( ForceTimeoutException, RecipeScraperOpenAI, @@ -201,11 +201,28 @@ def handle_exceptions(self, ex: Exception) -> None: # ======================================================================= # URL Scraping Operations + @router.post("/create/html", status_code=201) + async def create_recipe_from_html(self, req: ScrapeRecipeHTML): + """Takes in HTML and attempts to scrape data and load it into the database""" + + return await self._create_recipe_from_web(req) + @router.post("/create/url", status_code=201, response_model=str) async def parse_recipe_url(self, req: ScrapeRecipe): """Takes in a URL and attempts to scrape data and load it into the database""" + + return await self._create_recipe_from_web(req) + + async def _create_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeHTML): + if isinstance(req, ScrapeRecipeHTML): + html = req.html + url = "" + else: + html = None + url = req.url + try: - recipe, extras = await create_from_url(req.url, self.translator) + recipe, extras = await create_from_html(url, self.translator, html) except ForceTimeoutException as e: raise HTTPException( status_code=408, detail=ErrorResponse.respond(message="Recipe Scraping Timed Out") diff --git a/mealie/schema/recipe/__init__.py b/mealie/schema/recipe/__init__.py index bf9580df9ef..59687567e82 100644 --- a/mealie/schema/recipe/__init__.py +++ b/mealie/schema/recipe/__init__.py @@ -71,7 +71,7 @@ ) from .recipe_notes import RecipeNote from .recipe_nutrition import Nutrition -from .recipe_scraper import ScrapeRecipe, ScrapeRecipeTest +from .recipe_scraper import ScrapeRecipe, ScrapeRecipeHTML, ScrapeRecipeTest from .recipe_settings import RecipeSettings from .recipe_share_token import RecipeShareToken, RecipeShareTokenCreate, RecipeShareTokenSave, RecipeShareTokenSummary from .recipe_step import IngredientReferences, RecipeStep @@ -157,6 +157,7 @@ "RecipeTool", "RecipeToolPagination", "ScrapeRecipe", + "ScrapeRecipeHTML", "ScrapeRecipeTest", "AssignCategories", "AssignSettings", diff --git a/mealie/schema/recipe/recipe_scraper.py b/mealie/schema/recipe/recipe_scraper.py index 8b4bc22ce9a..70d237176af 100644 --- a/mealie/schema/recipe/recipe_scraper.py +++ b/mealie/schema/recipe/recipe_scraper.py @@ -19,3 +19,8 @@ class ScrapeRecipe(MealieModel): }, } ) + + +class ScrapeRecipeHTML(MealieModel): + html: str + include_tags: bool = False diff --git a/mealie/services/scraper/recipe_bulk_scraper.py b/mealie/services/scraper/recipe_bulk_scraper.py index 606284fa1c8..22ae44afb99 100644 --- a/mealie/services/scraper/recipe_bulk_scraper.py +++ b/mealie/services/scraper/recipe_bulk_scraper.py @@ -15,7 +15,7 @@ from mealie.schema.user.user import GroupInDB from mealie.services._base_service import BaseService from mealie.services.recipe.recipe_service import RecipeService -from mealie.services.scraper.scraper import create_from_url +from mealie.services.scraper.scraper import create_from_html class RecipeBulkScraperService(BaseService): @@ -85,7 +85,7 @@ async def scrape(self, urls: CreateRecipeByUrlBulk) -> None: async def _do(url: str) -> Recipe | None: async with sem: try: - recipe, _ = await create_from_url(url, self.translator) + recipe, _ = await create_from_html(url, self.translator) return recipe except Exception as e: self.service.logger.error(f"failed to scrape url during bulk url import {url}") diff --git a/mealie/services/scraper/recipe_scraper.py b/mealie/services/scraper/recipe_scraper.py index 2e05dd10aa5..b04126a3c88 100644 --- a/mealie/services/scraper/recipe_scraper.py +++ b/mealie/services/scraper/recipe_scraper.py @@ -32,12 +32,12 @@ def __init__(self, translator: Translator, scrapers: list[type[ABCScraperStrateg self.scrapers = scrapers self.translator = translator - async def scrape(self, url: str) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]: + async def scrape(self, url: str, html: str | None = None) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]: """ Scrapes a recipe from the web. """ - raw_html = await safe_scrape_html(url) + raw_html = html or await safe_scrape_html(url) for scraper_type in self.scrapers: scraper = scraper_type(url, self.translator, raw_html=raw_html) result = await scraper.parse() diff --git a/mealie/services/scraper/scraper.py b/mealie/services/scraper/scraper.py index a6e307ca820..6e0cd30e1fe 100644 --- a/mealie/services/scraper/scraper.py +++ b/mealie/services/scraper/scraper.py @@ -20,18 +20,21 @@ class ParserErrors(str, Enum): CONNECTION_ERROR = "CONNECTION_ERROR" -async def create_from_url(url: str, translator: Translator) -> tuple[Recipe, ScrapedExtras | None]: +async def create_from_html( + url: str, translator: Translator, html: str | None = None +) -> tuple[Recipe, ScrapedExtras | None]: """Main entry point for generating a recipe from a URL. Pass in a URL and - a Recipe object will be returned if successful. + a Recipe object will be returned if successful. Optionally pass in the HTML to skip fetching it. Args: url (str): a valid string representing a URL + html (str | None): optional HTML string to skip network request. Defaults to None. Returns: Recipe: Recipe Object """ scraper = RecipeScraper(translator) - new_recipe, extras = await scraper.scrape(url) + new_recipe, extras = await scraper.scrape(url, html) if not new_recipe: raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value}) diff --git a/mealie/services/scraper/scraper_strategies.py b/mealie/services/scraper/scraper_strategies.py index 517672b2db9..95ae6dce1c2 100644 --- a/mealie/services/scraper/scraper_strategies.py +++ b/mealie/services/scraper/scraper_strategies.py @@ -192,7 +192,7 @@ def get_instructions() -> list[RecipeStep]: total_time=try_get_default(None, "totalTime", None, cleaner.clean_time, translator=self.translator), prep_time=try_get_default(None, "prepTime", None, cleaner.clean_time, translator=self.translator), perform_time=cook_time, - org_url=url, + org_url=url or try_get_default(None, "url", None, cleaner.clean_string), ) return recipe, extras @@ -340,7 +340,7 @@ def og_fields(properties: list[tuple[str, str]], field_name: str) -> list[str]: "recipeIngredient": ["Could not detect ingredients"], "recipeInstructions": [{"text": "Could not detect instructions"}], "slug": slugify(og_field(properties, "og:title")), - "orgURL": self.url, + "orgURL": self.url or og_field(properties, "og:url"), "categories": [], "tags": og_fields(properties, "og:article:tag"), "dateAdded": None, diff --git a/tests/unit_tests/test_recipe_parser.py b/tests/unit_tests/test_recipe_parser.py index e7515d73d98..7d98b1323b9 100644 --- a/tests/unit_tests/test_recipe_parser.py +++ b/tests/unit_tests/test_recipe_parser.py @@ -20,7 +20,7 @@ @pytest.mark.asyncio async def test_recipe_parser(recipe_test_data: RecipeSiteTestCase): translator = local_provider() - recipe, _ = await scraper.create_from_url(recipe_test_data.url, translator) + recipe, _ = await scraper.create_from_html(recipe_test_data.url, translator) assert recipe.slug == recipe_test_data.expected_slug assert len(recipe.recipe_instructions or []) == recipe_test_data.num_steps From 6bf48d6a5b3a3fed84c7171b1c6051557c9161b8 Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:52:16 +0000 Subject: [PATCH 03/15] allow passing JSON string to html route --- mealie/routes/recipe/recipe_crud_routes.py | 17 ++++++++++------- mealie/schema/recipe/__init__.py | 5 +++-- mealie/schema/recipe/recipe_scraper.py | 13 ++++++++----- mealie/services/scraper/scraper_strategies.py | 17 ++++++++++++----- 4 files changed, 33 insertions(+), 19 deletions(-) diff --git a/mealie/routes/recipe/recipe_crud_routes.py b/mealie/routes/recipe/recipe_crud_routes.py index cdf05989083..fdc76ff9f7c 100644 --- a/mealie/routes/recipe/recipe_crud_routes.py +++ b/mealie/routes/recipe/recipe_crud_routes.py @@ -40,7 +40,7 @@ from mealie.routes._base.routers import MealieCrudRoute, UserAPIRouter from mealie.schema.cookbook.cookbook import ReadCookBook from mealie.schema.make_dependable import make_dependable -from mealie.schema.recipe import Recipe, RecipeImageTypes, ScrapeRecipe, ScrapeRecipeHTML +from mealie.schema.recipe import Recipe, RecipeImageTypes, ScrapeRecipe, ScrapeRecipeData from mealie.schema.recipe.recipe import ( CreateRecipe, CreateRecipeByUrlBulk, @@ -201,9 +201,12 @@ def handle_exceptions(self, ex: Exception) -> None: # ======================================================================= # URL Scraping Operations - @router.post("/create/html", status_code=201) - async def create_recipe_from_html(self, req: ScrapeRecipeHTML): - """Takes in HTML and attempts to scrape data and load it into the database""" + @router.post("/create/html-or-json", status_code=201) + async def create_recipe_from_html_or_json(self, req: ScrapeRecipeData): + """Takes in raw HTML or a https://schema.org/Recipe object as a JSON string and parses it like a URL""" + + if req.data.startswith("{"): + req.data = RecipeScraperPackage.ld_json_to_html(req.data) return await self._create_recipe_from_web(req) @@ -213,9 +216,9 @@ async def parse_recipe_url(self, req: ScrapeRecipe): return await self._create_recipe_from_web(req) - async def _create_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeHTML): - if isinstance(req, ScrapeRecipeHTML): - html = req.html + async def _create_recipe_from_web(self, req: ScrapeRecipe | ScrapeRecipeData): + if isinstance(req, ScrapeRecipeData): + html = req.data url = "" else: html = None diff --git a/mealie/schema/recipe/__init__.py b/mealie/schema/recipe/__init__.py index 59687567e82..4fe6861ef2e 100644 --- a/mealie/schema/recipe/__init__.py +++ b/mealie/schema/recipe/__init__.py @@ -71,7 +71,7 @@ ) from .recipe_notes import RecipeNote from .recipe_nutrition import Nutrition -from .recipe_scraper import ScrapeRecipe, ScrapeRecipeHTML, ScrapeRecipeTest +from .recipe_scraper import ScrapeRecipe, ScrapeRecipeBase, ScrapeRecipeData, ScrapeRecipeTest from .recipe_settings import RecipeSettings from .recipe_share_token import RecipeShareToken, RecipeShareTokenCreate, RecipeShareTokenSave, RecipeShareTokenSummary from .recipe_step import IngredientReferences, RecipeStep @@ -157,7 +157,8 @@ "RecipeTool", "RecipeToolPagination", "ScrapeRecipe", - "ScrapeRecipeHTML", + "ScrapeRecipeBase", + "ScrapeRecipeData", "ScrapeRecipeTest", "AssignCategories", "AssignSettings", diff --git a/mealie/schema/recipe/recipe_scraper.py b/mealie/schema/recipe/recipe_scraper.py index 70d237176af..2f1efcf0571 100644 --- a/mealie/schema/recipe/recipe_scraper.py +++ b/mealie/schema/recipe/recipe_scraper.py @@ -8,9 +8,12 @@ class ScrapeRecipeTest(MealieModel): use_openai: bool = Field(False, alias="useOpenAI") -class ScrapeRecipe(MealieModel): - url: str +class ScrapeRecipeBase(MealieModel): include_tags: bool = False + + +class ScrapeRecipe(ScrapeRecipeBase): + url: str model_config = ConfigDict( json_schema_extra={ "example": { @@ -21,6 +24,6 @@ class ScrapeRecipe(MealieModel): ) -class ScrapeRecipeHTML(MealieModel): - html: str - include_tags: bool = False +class ScrapeRecipeData(ScrapeRecipeBase): + data: str + """HTML data or JSON string of a https://schema.org/Recipe object""" diff --git a/mealie/services/scraper/scraper_strategies.py b/mealie/services/scraper/scraper_strategies.py index 95ae6dce1c2..7f0de46215a 100644 --- a/mealie/services/scraper/scraper_strategies.py +++ b/mealie/services/scraper/scraper_strategies.py @@ -119,6 +119,14 @@ async def parse(self) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]: class RecipeScraperPackage(ABCScraperStrategy): + @staticmethod + def ld_json_to_html(ld_json: str) -> str: + return ( + "
" + f'' + "" + ) + async def get_html(self, url: str) -> str: return self.raw_html or await safe_scrape_html(url) @@ -300,11 +308,10 @@ async def get_html(self, url: str) -> str: prompt = service.get_prompt("recipes.scrape-recipe") response_json = await service.get_response(prompt, text, force_json_response=True) - return ( - "" - f'' - "" - ) + if not response_json: + raise Exception("OpenAI did not return any data") + + return self.ld_json_to_html(response_json) except Exception: self.logger.exception(f"OpenAI was unable to extract a recipe from {url}") return "" From 288fe407a6559444202d669a05e8bc550de5c8e6 Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Thu, 26 Sep 2024 16:09:16 +0000 Subject: [PATCH 04/15] add dummy URL for recipe scrapers --- mealie/services/scraper/scraper_strategies.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mealie/services/scraper/scraper_strategies.py b/mealie/services/scraper/scraper_strategies.py index 7f0de46215a..77c326d3ead 100644 --- a/mealie/services/scraper/scraper_strategies.py +++ b/mealie/services/scraper/scraper_strategies.py @@ -209,7 +209,8 @@ async def scrape_url(self) -> SchemaScraperFactory.SchemaScraper | Any | None: recipe_html = await self.get_html(self.url) try: - scraped_schema = scrape_html(recipe_html, org_url=self.url, supported_only=False) + # scrape_html requires a URL, but we might not have one, so we default to a dummy URL + scraped_schema = scrape_html(recipe_html, org_url=self.url or "https://example.com", supported_only=False) except (NoSchemaFoundInWildMode, AttributeError): self.logger.error(f"Recipe Scraper was unable to extract a recipe from {self.url}") return None From 3f6e8162e1129f91d6acee93d12681808910e7ba Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Thu, 26 Sep 2024 16:12:40 +0000 Subject: [PATCH 05/15] dev:generate --- frontend/lib/api/types/recipe.ts | 7 +++++++ tests/utils/api_routes/__init__.py | 2 ++ 2 files changed, 9 insertions(+) diff --git a/frontend/lib/api/types/recipe.ts b/frontend/lib/api/types/recipe.ts index f8620c4b3ee..3125b6bced0 100644 --- a/frontend/lib/api/types/recipe.ts +++ b/frontend/lib/api/types/recipe.ts @@ -472,8 +472,15 @@ export interface SaveIngredientUnit { groupId: string; } export interface ScrapeRecipe { + includeTags?: boolean; url: string; +} +export interface ScrapeRecipeBase { + includeTags?: boolean; +} +export interface ScrapeRecipeData { includeTags?: boolean; + data: string; } export interface ScrapeRecipeTest { url: string; diff --git a/tests/utils/api_routes/__init__.py b/tests/utils/api_routes/__init__.py index 7b05d849c52..d91e72a26e7 100644 --- a/tests/utils/api_routes/__init__.py +++ b/tests/utils/api_routes/__init__.py @@ -147,6 +147,8 @@ """`/api/recipes/bulk-actions/settings`""" recipes_bulk_actions_tag = "/api/recipes/bulk-actions/tag" """`/api/recipes/bulk-actions/tag`""" +recipes_create_html_or_json = "/api/recipes/create/html-or-json" +"""`/api/recipes/create/html-or-json`""" recipes_create_image = "/api/recipes/create/image" """`/api/recipes/create/image`""" recipes_create_url = "/api/recipes/create/url" From f6dc3eb5544be43232c69ba23f15b83cbba4bbca Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Thu, 26 Sep 2024 18:37:40 +0000 Subject: [PATCH 06/15] add html/json page --- .../components/global/RecipeJsonEditor.vue | 6 +- frontend/lang/messages/en-US.json | 6 + frontend/lib/api/user/recipes/recipe.ts | 5 + frontend/lib/icons/icons.ts | 4 +- frontend/pages/g/_groupSlug/r/create.vue | 5 + frontend/pages/g/_groupSlug/r/create/html.vue | 167 ++++++++++++++++++ frontend/pages/g/_groupSlug/r/create/url.vue | 10 +- 7 files changed, 200 insertions(+), 3 deletions(-) create mode 100644 frontend/pages/g/_groupSlug/r/create/html.vue diff --git a/frontend/components/global/RecipeJsonEditor.vue b/frontend/components/global/RecipeJsonEditor.vue index 99067f8e4ae..1e5c580a6cb 100644 --- a/frontend/components/global/RecipeJsonEditor.vue +++ b/frontend/components/global/RecipeJsonEditor.vue @@ -1,7 +1,7 @@{{ $t('recipe.scrape-recipe-description') }}
-{{ $t('recipe.scrape-recipe-have-a-lot-of-recipes') }} {{ $t('recipe.scrape-recipe-suggest-bulk-importer') }}.
+
+ {{ $t('recipe.scrape-recipe-have-a-lot-of-recipes') }}
+ {{ $t('recipe.scrape-recipe-suggest-bulk-importer') }}.
+
+ {{ $t('recipe.scrape-recipe-have-raw-html-or-json-data') }}
+ {{ $t('recipe.scrape-recipe-you-can-import-from-raw-data-directly') }}.
+
+ {{ $tc("recipe.import-from-html-or-json-description") }} +
++ {{ $tc("recipe.json-import-format-description-colon") }} + https://schema.org/Recipe +
- {{ $tc("recipe.import-from-html-or-json-description") }} -
-- {{ $tc("recipe.json-import-format-description-colon") }} - https://schema.org/Recipe -
-+ {{ $tc("recipe.import-from-html-or-json-description") }} +
++ {{ $tc("recipe.json-import-format-description-colon") }} + https://schema.org/Recipe +
+