-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpptConvertToTxt.py
67 lines (56 loc) · 3.14 KB
/
pptConvertToTxt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from pptx import Presentation
import os
# Define the root directory path containing the PowerPoint files
root_directory_path = './training'
output_directory_path = os.path.join(root_directory_path, 'all')
# Ensure the output directory exists
os.makedirs(output_directory_path, exist_ok=True)
# Function to extract text from a slide, excluding 'Content Placeholder 14'
def extract_text_from_slide(slide):
text_content = []
for shape in slide.shapes:
if hasattr(shape, "text") and shape.name != 'Content Placeholder 14' and shape.name != 'Text Placeholder 1':
text_content.append(shape.text)
return "\n".join(text_content)
# Function to find the game's date from the first slide or wherever it appears first
def find_games_date(prs):
for slide in prs.slides:
for shape in slide.shapes:
if shape.name == 'Text Placeholder 1' and hasattr(shape, "text"):
return shape.text
return "Unknown Date" # Return a default value if the date is not found
# Iterate through all files in the root directory
for root, dirs, files in os.walk(root_directory_path):
for file in files:
if file.endswith('.pptx'):
file_path = os.path.join(root, file)
prs = Presentation(file_path)
# Find the game's date
game_date = find_games_date(prs)
# Extract text from slides 1, 3, and 5
text_to_save = f"Game Date: {game_date}\n\n" # Prepend the game's date
for i in [0, 2, 4]: # Slide numbers are 0-indexed in python-pptx
if i < len(prs.slides):
slide_text = extract_text_from_slide(prs.slides[i])
if i == 2: # Check if it's slide 3
questions = slide_text.split('\n') # Assuming questions are separated by newlines
if len(questions) > 10: # Check if there are more than 10 questions
questions = questions[:10] # Keep only the first 10 questions
slide_text = '\n'.join(questions) # Rejoin the questions into a single string
if i == 4: # Check if it's slide 5
slide_text = "Bonus Round\n" + slide_text
text_to_save += slide_text + "\n\n"
# # Extract text from slides 1, 3, and 5
# text_to_save = f"Game Date: {game_date}\n\n" # Prepend the game's date
# for i in [0, 2, 4]: # Slide numbers are 0-indexed in python-pptx
# if i < len(prs.slides):
# slide_text = extract_text_from_slide(prs.slides[i])
# if i == 4: # Check if it's slide 5
# slide_text = "Bonus Round\n" + slide_text
# text_to_save += slide_text + "\n\n"
# Define the output file path
output_file_path = os.path.join(output_directory_path, os.path.splitext(file)[0] + '.txt')
# Save the extracted text to a new text file
with open(output_file_path, 'w') as text_file:
text_file.write(text_to_save)
print("Conversion completed.")