sbcgen

sitegen.py at tip
Login

File sitegen.py from the latest check-in


#!/usr/bin/env python3

# Static site generator
import os
import sys
import datetime
import glob
import shutil
import re
import yaml
import html
import frontmatter
import markdown as md
from jinja2 import Environment, FileSystemLoader
import datetime as dt


def main(args):
    """sitegen: builds a basic static website from a folder of 
    markdown files. Compiles markdown files, copies other files
    as is to the build folder.

    Usage: sitegen [all/blog/static/sitemap]

    If run without options will rebuild only the blog.
    Otherwise can be set to generate only the blog, only the static pages, or
    all pages.
    Sitemap is only built with build all, or build sitemap
"""
    site = site_data()
    if len(args) > 1:
        if args[1] == 'all':
            build_all(site)
        elif args[1] == 'blog':
            build_blog(site)
        elif args[1] == 'static':
            build_static(site)
        elif args[1] == 'sitemap':
            build_sitemap(site)
        else:
            print(main.__doc__)
    else:
        build_all(site)


# Build blog and static pages
def build_all(site):
    build_blog(site)
    build_static(site)
    build_sitemap(site)

# Build the blog portion of the site, including generating posts and an index
# Regenerates everything, lazy and slower but simpler to implement
def build_blog(site):
    print("Generating blog posts...")
    blog_files = get_blog_files(site)
    # Build post index dictionary
    # Each item has values of:
    # - title
    # - link
    # - post date
    # - markdown content
    posts = []
    for file in blog_files:
        name, extension = os.path.splitext(os.path.basename(file))
        post = frontmatter.load(file)
        post_date = datetime.date.fromisoformat(name[:10])
        posts.append(dict(
            title = post['title'],
            template = post['layout'],
            link = name[11:],
            date = post_date,
            content = site_vars(site, post.content)
        ))

    # Generate post archive page and individual post pages
    build_index(site, posts)
    build_archive(site, posts)
    build_posts(site, posts)
    build_feed(site, posts)


# Render each blog post to an html file
def build_posts(site, posts):
    env = Environment(
        loader=FileSystemLoader(site['templates'])
    )
    env.filters['date_format'] = date_format

    for post in posts:
        try:
            template = env.get_template(post['template'] + '.html')
        except:
            print("Problem with the template")
        
        # Write post.html
        path = site['destination'] + '/' + post['link']
        if not os.path.isdir(path):
            os.makedirs(path)
        output = path + '/index.html'
        f = open(output, 'w')
        f.write(template.render(title = post['title'],
                                content = md.markdown(post['content'], extensions=['smarty', 'tables', 'footnotes', 'codehilite']),
                                date = post['date']
        ))
        f.close()


# Create the site index page (homepage)
# Loads the first post text as the main content
# Loads the first 5 posts to create a short index
# Loads the a markdown file for addition content
def build_index(site, posts):
    env = Environment(
        loader=FileSystemLoader(site['templates'])
    )
    env.filters['date_format'] = date_format
    template = env.get_template('index.html')
    
    # Get index content past the list of posts
    index = frontmatter.load('_index.md')

    # Create index page
    path = site['destination'] + '/'
    if not os.path.isdir(path):
        os.makedirs(path)
    output = path + '/index.html'
    f = open(output, 'w')
    f.write(template.render(main_post = md.markdown(posts[0]['content'], extensions=['smarty', 'tables', 'footnotes', 'codehilite']),
                            posts = posts[:5],
                            the_rest = md.markdown(index.content, extensions=['smarty', 'tables', 'footnotes', 'codehilite'])))
    f.close()


# Builds the site archive page as a series of links to every blog post.
# Only shows blog posts
def build_archive(site, posts):
    env = Environment(
        loader=FileSystemLoader(site['templates'])
    )
    env.filters['date_format'] = date_format
    
    #try:
    template = env.get_template('archive.html')
    #except:
    #    print("Problem with the template")
    
    # Create archive page
    path = site['destination'] + '/archive'
    if not os.path.isdir(path):
        os.makedirs(path)
    output = path + '/index.html'
    f = open(output, 'w')
    f.write(template.render(posts = posts))
    f.close()


# Make Atom feed
def build_feed(site, posts):
    entries = 10
    env = Environment(
        loader=FileSystemLoader(site['templates'])
    )
    env.filters['date_format'] = date_format
    template = env.get_template('feed.xml')
    output = site['destination'] + '/feed.xml'
    date = datetime.datetime.now().astimezone().replace(microsecond=0).isoformat()
    # Render content and escape
    feed_entries = []
    for post in posts[:entries]:
        feed_entries.append(dict(
            title = post['title'],
            link = post['link'],
            date = post['date'],
            content = html.escape(
                md.markdown(post['content'], 
                extensions=['smarty', 'tables', 'footnotes', 'codehilite'])
                , quote=True)
        ))

    f = open(output, 'w')
    f.write(template.render(date = date, posts = feed_entries))
    f.close()


# Build static (non-date-based) portion of the site
# Markdown files are rendered to their defined template and copied into
# the build folder. Other files are copied as-is to the build folder
def build_static(site):
    print("Generating static site files...")
    files = get_static_files(site)
    env = Environment(
        loader=FileSystemLoader(site['templates'])
    )
    env.filters['date_format'] = date_format

    for file in files:
        path, filename = os.path.split(file)
        name, extension = os.path.splitext(filename)
        if extension == '.md':
            page = frontmatter.load(os.path.join(path, name + extension))
            template = env.get_template(page['layout'] + '.html')
            output = os.path.join(site['destination'], path, name + '.html')
            out_folder = (os.path.join(site['destination'], path))
            if not os.path.isdir(out_folder):
                os.makedirs(out_folder)
            f = open(output, 'w')
            f.write(template.render(title = page['title'],
                                content = md.markdown(page.content, extensions=['smarty', 'tables', 'footnotes', 'codehilite'])
            ))
            f.close()
        else:
            output = os.path.join(site['destination'], path, name + extension)
            out_folder = (os.path.join(site['destination'], path))
            if not os.path.isdir(out_folder):
                os.makedirs(out_folder)
            shutil.copy(file, output)


# Build site map (non-date-based) portion of the site
# This may be more complicated and less modular than desired
def build_sitemap(site):
    print("Generating sitemap...")
    blog_files = get_blog_files(site)
    static_files = get_static_files(site)
    env = Environment(
        loader=FileSystemLoader(site['templates'])
    )
    env.filters['date_format'] = date_format
    template = env.get_template('sitemap.xml')
    output = site['destination'] + '/sitemap.xml'
    # Set up blog posts portion of site map
    posts = []
    for file in blog_files:
        name, extension = os.path.splitext(os.path.basename(file))
        posts.append(dict(
            link = name,
            date = datetime.date.fromisoformat(name[:10]),
        ))

    # Set up static portion of site map, only portions that render from markdown to html
    pages = []
    for file in static_files:
        path, filename = os.path.split(file)
        name, extension = os.path.splitext(filename)
        if extension == '.md':
            if name == 'index':
                pages.append(dict(
                    link = path
                ))
            else:
                pages.append(dict(
                    link = os.path.join(path, name + '.html')
                ))
    f = open(output, 'w')
    f.write(template.render(site = site, posts = posts, pages = pages))
    f.close()    


# Helper functions follow
# Load site configuration
def site_data():
    f = open('_config.yaml', 'r')
    site = yaml.load(f.read(), Loader=yaml.SafeLoader)
    f.close()
    return site


# Get filenames and sort in reverse date order. Files are assumed to be named
# as yyyy-mm-dd-post-slug.md as a standard convention, stored in a flat folder
def get_blog_files(site):
    files = glob.glob(site['blog'] + '*.md', recursive=True)
    files.sort(reverse = True)
    return(files)


# Get remainder of static files to render or copy as needed
# Basically filter out any file that starts with _, then process the rest
def get_static_files(site):
    files = glob.glob('**/*.*', recursive=True)
    files = filter(lambda file: file[0]!='_', files)
    return(files)


# Time format filter, defaults to DD Month YYYY
def date_format(value, format="%d %B %Y"):
    return value.strftime(format)


# Replace site variables
def site_vars(site, content):
    return(re.sub('{{ site.url }}',site['url'], content))

# Run main routine
if __name__ == "__main__":
    main(sys.argv)