summaryrefslogtreecommitdiff
path: root/docs/usage-manual/export-usage-manual.py
diff options
context:
space:
mode:
authorMarc <marcll@vt.edu>2021-11-19 23:48:03 -0500
committermormj <34754695+mormj@users.noreply.github.com>2021-11-21 16:21:26 -0500
commit5d7430508a4c98baa53531442196023a602419a2 (patch)
treea2646f41ac8d232911ad4e24ac44f314a7601e8f /docs/usage-manual/export-usage-manual.py
parent1fb203afd0dc3ab5681210c94bfbefe26c8e22f5 (diff)
Docs: updated usage manual export
Diffstat (limited to 'docs/usage-manual/export-usage-manual.py')
-rw-r--r--docs/usage-manual/export-usage-manual.py20
1 files changed, 14 insertions, 6 deletions
diff --git a/docs/usage-manual/export-usage-manual.py b/docs/usage-manual/export-usage-manual.py
index 02d3fb620a..74f3d4d30c 100644
--- a/docs/usage-manual/export-usage-manual.py
+++ b/docs/usage-manual/export-usage-manual.py
@@ -1,12 +1,17 @@
-from selenium import webdriver
+# pip install selenium
+# I had to also do pip install --upgrade requests
+# Download geckodriver from here https://github.com/mozilla/geckodriver/releases and put it in Downloads
+# sudo chmod +x geckodriver
+# export PATH=$PATH:/home/marc/Downloads (or wherever you put it)
+
+from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
-import HTMLParser
+from html.parser import HTMLParser
import os
# Settings
-pages_to_save = ['GNURadioCompanion',
- 'Handling Flowgraphs',
+pages_to_save = ['Handling Flowgraphs',
'Types of Blocks',
'Metadata Information',
'Stream Tags',
@@ -22,8 +27,10 @@ pages_to_save = ['GNURadioCompanion',
'Polyphase Filterbanks']
# set up web driver
-driver = webdriver.Firefox()
+driver = webdriver.Firefox('/home/marc/Downloads/geckodriver') # dir that contains geckodriver
+print("STARTING")
for page_name in pages_to_save:
+ print("Processing", page_name)
driver.get("https://wiki.gnuradio.org/index.php/Special:Export")
# fill in text box
@@ -44,10 +51,11 @@ for page_name in pages_to_save:
cropped_html = raw_html[start_index:]
# save text to file
- h = HTMLParser.HTMLParser()
+ h = HTMLParser()
cropped_html_text = h.unescape(cropped_html) # makes it so stuff like &gt shows up as a greater than sign
text_file = open("(exported from wiki) " + page_name + ".txt", "w")
text_file.write(cropped_html_text)
text_file.close()
driver.close()
+print("DONE")