Skip to content

Commit

Permalink
Rerun
Browse files Browse the repository at this point in the history
  • Loading branch information
salgo60 committed Nov 12, 2024
1 parent 389ba4e commit 7fc65fe
Showing 1 changed file with 114 additions and 164 deletions.
278 changes: 114 additions & 164 deletions Notebook/SWERIKS check.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Last run: 2024-11-12 10:09:02.380817\n"
"Last run: 2024-11-12 10:54:07.030201\n"
]
}
],
Expand Down Expand Up @@ -120,7 +120,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 9,
"id": "a5832d52-5f28-4dab-b2aa-487c421c2a3a",
"metadata": {},
"outputs": [],
Expand All @@ -135,19 +135,18 @@
" response = requests.get(base_url)\n",
" if response.status_code == 200:\n",
" NrValid += 1\n",
" return True, None # Success, no error message\n",
" else:\n",
" NrnotValid += 1\n",
" print(f\"WD {wd} - {base_url}\")\n",
" return False, f\"WD {wd} - {base_url} - Status Code: {response.status_code}\"\n",
" except requests.exceptions.RequestException as e:\n",
" NrnotValid += 1\n",
" print(f\"WD {wd} - {base_url}\")\n",
" print(f\"Error: {e}\")\n",
" return False, f\"WD {wd} - {base_url} - Error: {e}\""
" return False, f\"WD {wd} - {base_url} - Error: {e}\"\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 10,
"id": "c8917366-acbc-4c7e-a159-e4d858b27b91",
"metadata": {},
"outputs": [],
Expand All @@ -160,156 +159,15 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"id": "1b97e742-71be-4274-8bfc-a2d6b6112f95",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Processing records: 12%|██▎ | 713/6177 [03:05<28:31, 3.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WD Q4934552 - https://swerik-project.github.io/person-catalog/i-PCZrYEHwPaEeNTZphEsWTv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Processing records: 15%|██▉ | 905/6177 [04:05<28:28, 3.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WD Q4957371 - https://swerik-project.github.io/person-catalog/i-31gPpUoSm7zqzQckVmfPGy\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Processing records: 16%|███▏ | 995/6177 [04:33<27:36, 3.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WD Q4970175 - https://swerik-project.github.io/person-catalog/i-UX4D3JJdrTjFBf2zyfHx5t\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Processing records: 19%|███▌ | 1159/6177 [05:25<25:59, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WD Q4976825 - https://swerik-project.github.io/person-catalog/i-NvxzaU2RSok83zCskNAuhg\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Processing records: 38%|███████▏ | 2352/6177 [11:45<19:55, 3.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WD Q97971262 - https://swerik-project.github.io/person-catalog/i-RH6VCPhyxs9yYcfXJzPxYT\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Processing records: 38%|███████▎ | 2359/6177 [11:46<18:29, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WD Q97971276 - https://swerik-project.github.io/person-catalog/i-Cdgsqn4Ts9WMwbjXcE4537\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Processing records: 38%|███████▎ | 2377/6177 [11:52<19:45, 3.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WD Q98271639 - https://swerik-project.github.io/person-catalog/i-x1CuoKmRHYgQr9i2kh3B5\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Processing records: 39%|███████▎ | 2388/6177 [11:55<19:42, 3.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WD Q98538839 - https://swerik-project.github.io/person-catalog/i-TUyWWYGDFXW92GhiG3CLwF\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Processing records: 42%|████████ | 2612/6177 [13:06<15:19, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WD Q98937434 - https://swerik-project.github.io/person-catalog/i-EzcxskgMAVbnq8hM2F2km9\n",
"WD Q98937482 - https://swerik-project.github.io/person-catalog/i-HYFwSCrwnemwyJTLMcyqvN\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Processing records: 59%|███████████▏ | 3625/6177 [18:31<11:41, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WD Q117223085 - https://swerik-project.github.io/person-catalog/i-EQM2NLR1fbN9izUQhjTRGR\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Processing records: 60%|███████████▍ | 3710/6177 [18:58<13:09, 3.13it/s]"
"Processing records: 100%|███████████████████| 6177/6177 [31:35<00:00, 3.26it/s]\n"
]
}
],
Expand All @@ -324,45 +182,137 @@
" swerik = result[\"swerik\"][\"value\"]\n",
" wdurl = result[\"wd\"][\"value\"]\n",
" wd = str(wdurl).replace(\"http://www.wikidata.org/entity/\",\"\")\n",
" try:\n",
" success, error_message = checkurl(wd, swerik)\n",
" if not success and error_message:\n",
" errors.append(error_message)\n",
" except Exception as e:\n",
" # Store the error details in the list\n",
" errors.append((wd, swerik, str(e)))\n",
" success, error_message = checkurl(wd, swerik)\n",
" if not success and error_message:\n",
" errors.append(error_message)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 14,
"id": "b9d7be57-8e8e-4670-a031-bf7124906aac",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of valid URLs: 6110\n",
"Number of invalid URLs: 68\n",
"\n",
"Errors encountered:\n",
"WD Q4934552 - https://swerik-project.github.io/person-catalog/i-PCZrYEHwPaEeNTZphEsWTv - Status Code: 404\n",
"WD Q4957371 - https://swerik-project.github.io/person-catalog/i-31gPpUoSm7zqzQckVmfPGy - Status Code: 404\n",
"WD Q4970175 - https://swerik-project.github.io/person-catalog/i-UX4D3JJdrTjFBf2zyfHx5t - Status Code: 404\n",
"WD Q4976825 - https://swerik-project.github.io/person-catalog/i-NvxzaU2RSok83zCskNAuhg - Status Code: 404\n",
"WD Q97971262 - https://swerik-project.github.io/person-catalog/i-RH6VCPhyxs9yYcfXJzPxYT - Status Code: 404\n",
"WD Q97971276 - https://swerik-project.github.io/person-catalog/i-Cdgsqn4Ts9WMwbjXcE4537 - Status Code: 404\n",
"WD Q98271639 - https://swerik-project.github.io/person-catalog/i-x1CuoKmRHYgQr9i2kh3B5 - Status Code: 404\n",
"WD Q98538839 - https://swerik-project.github.io/person-catalog/i-TUyWWYGDFXW92GhiG3CLwF - Status Code: 404\n",
"WD Q98937434 - https://swerik-project.github.io/person-catalog/i-EzcxskgMAVbnq8hM2F2km9 - Status Code: 404\n",
"WD Q98937482 - https://swerik-project.github.io/person-catalog/i-HYFwSCrwnemwyJTLMcyqvN - Status Code: 404\n",
"WD Q117223085 - https://swerik-project.github.io/person-catalog/i-EQM2NLR1fbN9izUQhjTRGR - Status Code: 404\n",
"WD Q5802544 - https://swerik-project.github.io/person-catalog/i-EtThq89KCE79SrwT9ppHwa - Status Code: 404\n",
"WD Q6001491 - https://swerik-project.github.io/person-catalog/i-S3CBCc7cXNPRWXt4kT1Nn - Status Code: 404\n",
"WD Q6196285 - https://swerik-project.github.io/person-catalog/i-GSjyw1eeZNrEr8Uk3Wy79K - Status Code: 404\n",
"WD Q117289330 - https://swerik-project.github.io/person-catalog/i-W4ytnPuPTvRtJf3k6ST5af - Status Code: 404\n",
"WD Q116162237 - https://swerik-project.github.io/person-catalog/i-EZYMWS6pSZNPSxi4996Lpc - Status Code: 404\n",
"WD Q116916 - https://swerik-project.github.io/person-catalog/i-soGG7WvpfsE45txj7YR3j - Status Code: 404\n",
"WD Q16650562 - https://swerik-project.github.io/person-catalog/i-6F9rS1XcW3FrTADBP2ew1K - Status Code: 404\n",
"WD Q18274740 - https://swerik-project.github.io/person-catalog/i-UFUisxxPnKCE3asVJtR1C6 - Status Code: 404\n",
"WD Q26202 - https://swerik-project.github.io/person-catalog/i-Xvpu7KtsFhUijgkbtWpCVM - Status Code: 404\n",
"WD Q2694124 - https://swerik-project.github.io/person-catalog/i-GVHobKxNYcHjgVszvkcndc - Status Code: 404\n",
"WD Q38773508 - https://swerik-project.github.io/person-catalog/i-RE5rAQ194rSt7bN8ZGzmSk - Status Code: 404\n",
"WD Q4569362 - https://swerik-project.github.io/person-catalog/i-65tDQ1Kb8spvfcwmsyYib7 - Status Code: 404\n",
"WD Q4992085 - https://swerik-project.github.io/person-catalog/i-9FLyBDaVeYA1bbxCdRsmNS - Status Code: 404\n",
"WD Q52924 - https://swerik-project.github.io/person-catalog/i-SQxvy2ue6orrTGivt4nDBE - Status Code: 404\n",
"WD Q52925 - https://swerik-project.github.io/person-catalog/i-5MnwqH2UtehSx7EbLGDQMA - Status Code: 404\n",
"WD Q52926 - https://swerik-project.github.io/person-catalog/i-3be6RBChcyBubPmFEyzLuZ - Status Code: 404\n",
"WD Q52927 - https://swerik-project.github.io/person-catalog/i-NBgv74Z6fFc4kgB87Q5s3i - Status Code: 404\n",
"WD Q5499466 - https://swerik-project.github.io/person-catalog/i-68qKbvhEHER4C2TRzVi2T9 - Status Code: 404\n",
"WD Q5547623 - https://swerik-project.github.io/person-catalog/i-VoSy23Ve5KQBpG4mSX9qdp - Status Code: 404\n",
"WD Q5553946 - https://swerik-project.github.io/person-catalog/i-Q58Ze7TxTyB6TSL3tKfeoK - Status Code: 404\n",
"WD Q5585712 - https://swerik-project.github.io/person-catalog/i-JWaAdR37r5gFr2SYf1P6zG - Status Code: 404\n",
"WD Q5585717 - https://swerik-project.github.io/person-catalog/i-ELDAUPfRgFPiVy5G37SrBn - Status Code: 404\n",
"WD Q5605987 - https://swerik-project.github.io/person-catalog/i-K6cT1SiaPMfvDc7UQducRL - Status Code: 404\n",
"WD Q5615448 - https://swerik-project.github.io/person-catalog/i-JtSCjZhbn7kKtLncnKjNjs - Status Code: 404\n",
"WD Q5620967 - https://swerik-project.github.io/person-catalog/i-34QhxUMskSZttM6WAtP9fu - Status Code: 404\n",
"WD Q5715090 - https://swerik-project.github.io/person-catalog/i-693RFVRzxr1MXjzetwbKzY - Status Code: 404\n",
"WD Q5724152 - https://swerik-project.github.io/person-catalog/i-21sS3832F96xjNFhsY9x2i - Status Code: 404\n",
"WD Q5773319 - https://swerik-project.github.io/person-catalog/i-ADcRhddZxegj2BX4Abux5i - Status Code: 404\n",
"WD Q5779321 - https://swerik-project.github.io/person-catalog/i-6R7CFRqLrZfQGAGpRxRZmq - Status Code: 404\n",
"WD Q5779581 - https://swerik-project.github.io/person-catalog/i-BibwVxLqqeX5rUkp4qZsoT - Status Code: 404\n",
"WD Q5779691 - https://swerik-project.github.io/person-catalog/i-AvyNgUr5vHb4YSPYHYNoDf - Status Code: 404\n",
"WD Q5891553 - https://swerik-project.github.io/person-catalog/i-M8wzDjdnp3v1kx7mCnhrnz - Status Code: 404\n",
"WD Q5930843 - https://swerik-project.github.io/person-catalog/i-PPwk8GX9Ac1MMgY78vBnxU - Status Code: 404\n",
"WD Q5931248 - https://swerik-project.github.io/person-catalog/i-8q84CfWpoFkjGhrjKmh5nV - Status Code: 404\n",
"WD Q5973676 - https://swerik-project.github.io/person-catalog/i-W5KTkCsx6UQycN1fck4krq - Status Code: 404\n",
"WD Q6015512 - https://swerik-project.github.io/person-catalog/i-657md5LkCsjE6B2F6cMUFR - Status Code: 404\n",
"WD Q6026925 - https://swerik-project.github.io/person-catalog/i-UMxTFnyXFG1sA9nuaawcTn - Status Code: 404\n",
"WD Q6043619 - https://swerik-project.github.io/person-catalog/i-EPD5BJ5xvWKMLybqidZ7xr - Status Code: 404\n",
"WD Q6045631 - https://swerik-project.github.io/person-catalog/i-XA4KxPbJJcoEq2kHZmGBg8 - Status Code: 404\n",
"WD Q6054405 - https://swerik-project.github.io/person-catalog/i-KYcNGH8TDrzXp5RkXfVxcZ - Status Code: 404\n",
"WD Q6070153 - https://swerik-project.github.io/person-catalog/i-SoPKUW6bamDYhSJ8r5kbfm - Status Code: 404\n",
"WD Q6083505 - https://swerik-project.github.io/person-catalog/i-Kz6LFDnXFaN9pxampQmtys - Status Code: 404\n",
"WD Q6151281 - https://swerik-project.github.io/person-catalog/i-EQ1EaRBTJC4gvBVqD6F6QS - Status Code: 404\n",
"WD Q6186524 - https://swerik-project.github.io/person-catalog/i-UVmEkRLtur2TYHixo3YS36 - Status Code: 404\n",
"WD Q6228284 - https://swerik-project.github.io/person-catalog/i-NEzajeS8oXAKC4PXqeHZNT - Status Code: 404\n",
"WD Q6244276 - https://swerik-project.github.io/person-catalog/i-7eaDwLCH46J5Z48Agp4bDd - Status Code: 404\n",
"WD Q6257688 - https://swerik-project.github.io/person-catalog/i-YSxWozeNBai9QXW24ThZk2 - Status Code: 404\n",
"WD Q792307 - https://swerik-project.github.io/person-catalog/i-6FzAA1fd4V1GWFU8UEjDM9 - Status Code: 404\n",
"WD Q97104614 - https://swerik-project.github.io/person-catalog/i-F9yiexrfiaMq7XRkN2UQtm - Status Code: 404\n",
"WD Q96758042 - https://swerik-project.github.io/person-catalog/i-Li7xEjG4CU6Q9Kayu1A6JD - Status Code: 404\n",
"WD Q97386321 - https://swerik-project.github.io/person-catalog/i-JDzNUwA9QaroyEei8swjky - Status Code: 404\n",
"WD Q97824066 - https://swerik-project.github.io/person-catalog/i-4o1RM4T3EmDZc7uLvsoLiC - Status Code: 404\n",
"WD Q3352340 - https://swerik-project.github.io/person-catalog/i-Y7HHuSEZsgc8ayVQEsVKs9 - Status Code: 404\n",
"WD Q6015181 - https://swerik-project.github.io/person-catalog/i-F5Lo79KEGCBu1choqsfsAZ - Status Code: 404\n",
"WD Q60971016 - https://swerik-project.github.io/person-catalog/i-8RqA5Vq57Dp8X1YMfWXXz1 - Status Code: 404\n",
"WD Q47067977 - https://swerik-project.github.io/person-catalog/i-BCDpWeGcyN6FUwwXHRDSyd - Status Code: 404\n",
"WD Q19976148 - https://swerik-project.github.io/person-catalog/i-F8n5AiCeSxhtfcXwu7PkYD - Status Code: 404\n"
]
}
],
"source": [
"# Print the results\n",
"print(f\"Number of valid URLs: {NrValid}\")\n",
"print(f\"Number of invalid URLs: {NrnotValid}\")\n",
"\n",
"if errors:\n",
"if len(errors) > 0:\n",
" print(\"\\nErrors encountered:\")\n",
" for wd, swerik, error_msg in errors:\n",
" print(f\"Error with wd: {wd}, swerik: {swerik} - {error_msg}\")\n",
" for e in errors:\n",
" print(f\"{e}\")\n",
"else:\n",
" print(\"\\nAll records processed without errors.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"id": "125dabca-1841-4ebc-857e-730d07ab8b34",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"End run: 2024-11-12 10:40:53.645775\n",
"Time elapsed (hh:mm:ss.ms) 0:31:51.265143\n"
]
}
],
"source": [
"print(\"End run: \", datetime.now())\n",
"print('Time elapsed (hh:mm:ss.ms) {}'.format(datetime.now() - start_time))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b0bf4d3d-a1d3-4dee-aa14-6ced53e26e3e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down

0 comments on commit 7fc65fe

Please sign in to comment.