I recently purchased a copy of ‘Modern Python Cookbook’ but I found that the code listings in the epub file were indented which caused a problem when reading on my tablet. (I reverted to epub as the PDF version froze in the Bookari ereader software.)
from textwrap import dedent
from bs4 import BeautifulSoup
ENCODING = 'utf8'
soup = load_soup(filepath)
code = soup.findAll('pre')
for c in code:
# Dedent twice to cater for 'blank' lines with spaces.
c.string = dedent(dedent(c.text))
with codecs.open(filepath, encoding = ENCODING) as f:
def save_soup(filepath, soup):
with codecs.open(filepath, mode = 'w', encoding = ENCODING) as f:
if __name__ == "__main__":
FOLDER = r'ebook\OEBPS'
html_files = [fn for fn in os.listdir(FOLDER) if fn.endswith('.html')]
total_files = len(html_files)
for i, file_name in enumerate(html_files):
print 'Processing file %s (%s/%s)' % (file_name, i + 1, total_files)