#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
A single-file Python CGI script for effortless sharing of other single-file
scripts. If you're viewing a "Useful Hacks" list on my website, this is the
code behind it.
Simply put your desired description into each file's docstring (for shell
scripts, it takes every commented line starting with the shabang and ending
with the first non-comment line) and drop them into a folder along with
this script. Currently supports Bourne-compatible shell scripts and Python
scripts. Other languages under consideration.
Non-obvious Features:
- Hyperlinks URLs and obfuscates e-mail addresses in script descriptions.
- Configurable license name hyperlinking
Warnings:
- The HTML templating is a quick hackjob. I'm not kidding.
- Don't forget to remove the template bits specific to my site.
TODO:
- Switch to a proper templating solution? (No longer a single-file script)
- Add caching eventually (current run time for my site, 0.1 seconds)
"""
__appname__ = "Lazybones Script Lister"
__author__ = "Stephan Sokolow (deitarion/SSokolow)"
__version__ = "0.3"
__license__ = "GNU GPL 2.0 or later"
import cgi, os, parser, re, time, token, urllib
DEFAULT_LICENSE = "GNU GPL 2.0 or newer"
LICENSES = {
re.compile("(^|\b)((GNU )?GPL v?2(\.0)?)", re.IGNORECASE) : "http://www.gnu.org/licenses/gpl-2.0.html"
}
PAGE_HEADER = """Content-Type: text/html; charset=utf-8
Useful Hacks @ ssokolow.com
"""
PAGE_FOOTER = """
"""
#TODO: Make use of this regex to sanitize input before using it in HTML/XML.
#(Should also be sanitizing 0xD800-0xDFFF, 0xFFFE-0xFFFF, and 0x110000, but
# that has to wait until I've added support for parsing and honoring encoding
# declarations)
control_char_re = re.compile('[\x00-\x09\x0B\x0C\x0E-\x1F]')
bad_anchor_char_re = re.compile('[^A-Za-z0-9-_:.]+')
hyperlinkable_url_re = re.compile(r"""((?:ht|f)tps?://[^\s()]+(?:\([^\s()]+\))*[^\s()]*)""", re.IGNORECASE | re.UNICODE)
_bc = r"""!@#$%^&*()=+{}[\]|\;:'"/?>,<\s"""
email_address_re = re.compile(r"""(?P[^%s]+@[^%s]+\.[^%s]*[^.%s])""" % (_bc, _bc, _bc, _bc), re.UNICODE)
del _bc
class ScriptEntry(object):
_metadata = {
'name' : '',
'filepath' : '',
'filename' : '',
'filesize' : 0,
'filetime' : 0,
'language' : '',
'description' : '',
'anchor' : '',
'license' : DEFAULT_LICENSE,
'version' : '',
}
shabang_re = None
license_re = None
extensions = []
anchors = [] # Static
def __cmp__(self, other):
"""Make ScriptEntry objects case-insensitive sortable by name."""
return cmp(self.metadata['name'].lower(), other.metadata['name'].lower())
def __init__(self, filename):
self.metadata = self._metadata.copy()
tmp = os.stat(filename)
# Store all the metadata that isn't format-specific.
_ = self.metadata
_['filepath'] = os.path.normpath(filename)
_['filename'] = os.path.basename(self.metadata['filepath'])
_['filesize'] = tmp.st_size
_['filetime'] = tmp.st_mtime
# Construct a hyperlinkable anchor from the filename
_['anchor'] = bad_anchor_char_re.sub('_',_['filename']).lower()
if not _['anchor'][0].isalpha():
_['anchor'] = 'a' + _['anchor']
# Ensure no duplicate anchors
if _['anchor'] in self.anchors:
count = 0
while ('%s%d' % (_['anchor'], count)) in self.anchors:
count += 1
_['anchor'] = '%s%d' % (_['anchor'], count)
self.anchors.append(_['anchor'])
# Make sure that the filename will be used as a fallback program name.
_['name'] = _['filename']
# Actually extract the metadata.
self._do_init()
# Allow controlled truncation of module docstrings.
for marker in ('--snip--', '--clip--'):
if '\n%s\n' % marker in _['description']:
_['description'] = _['description'].split('\n%s\n' % marker,1)[0] + '\n[...]'
# Add various pretty-printed and escaped values to the metadata dict.
_.update({
'fname_q': urllib.quote_plus(self.metadata['filename']),
'fsize_p':formatFileSize(self.metadata['filesize']),
'desc_e': self._xml_escape(self.metadata['description']),
'mtime': time.strftime('%Y-%m-%d %H:%M:%S UTC',time.gmtime(self.metadata['filetime']))
})
# Hyperlink all the URLs in the description.
_['desc_e'] = hyperlinkable_url_re.sub(r'\1', _['desc_e'])
# Add some spam protection to any e-mail addresses
_['desc_e'] = email_address_re.sub(spamProtectEmail, _['desc_e'])
# Hyperlink any licenses we can.
_['license_h'] = _['license']
for regex in LICENSES:
if regex.search(_['license']):
_['license_h'] = regex.sub(r'\2' % LICENSES[regex], _['license'])
def _do_init(self):
"""Code to actually extract format-specific metadata goes here."""
raise NotImplementedError("Cannot instantiate abstract class")
def _xml_escape(self, instr):
"""Perform basic XML escaping on the provided string."""
return instr.replace('&','&').replace('<','<').replace('>','>')
def render(self):
output = """
- Size: %(fsize_p)s
""" % self.metadata
if self.metadata['version']:
output += '\n- Version: %(version)s
\n' % self.metadata
output += """- License: %(license_h)s
- Language: %(language)s
- Last Modified: %(mtime)s
%(desc_e)s
""" % self.metadata
return output
class PythonScriptEntry(ScriptEntry):
shabang_re = re.compile('^#!(/usr(/local)?)?/bin/(env )?python')
extensions = ['.py']
_variable_re = r"""^%s\s*=\s*(?P'{1,3}|\"{1,3})(?P.+?)(?P=delim)\s*$"""
_metadata_regexes = {
'license' : re.compile(_variable_re % '__license__', re.MULTILINE),
'name' : re.compile(_variable_re % '__appname__', re.MULTILINE),
'version' : re.compile(_variable_re % '__version__', re.MULTILINE)
}
def _do_init(self):
_ = self.metadata
_['language'] = 'Python'
# Load the file and extract all metadata but the description.
filecontents = open(_['filepath'], 'rU').read()
for key in self._metadata_regexes:
match_obj = self._metadata_regexes[key].search(filecontents)
if match_obj:
self.metadata[key] = match_obj.group('value')
# Parse out the module docstring as the description.
try:
_['description'] = self._get_docstring(filecontents)
except:
_['description'] = "ERROR: Unable to parse file."
def _get_docstring(self, tup):
"""
Module docstring extractor.
Written because Demo/parser/example.py DOESN'T WORK.
"""
if isinstance(tup, basestring):
tup = parser.suite(tup).totuple()
if tup[0] == token.STRING:
return tup[1]
for value in tup:
if isinstance(value, tuple):
val = self._get_docstring(value)
if val:
return val
class ShellScriptEntry(ScriptEntry):
shabang_re = re.compile('^#!(/usr(/local)?)?/bin/(env )?(ba|k)?sh$')
extensions = ['.sh']
_license_re = re.compile(r"""^#\s*(Licensed|Released) under (the|a) (?P.+?)(\slicense)?\.?\s*$""", re.M | re.I)
def _do_init(self):
_ = self.metadata
_['language'] = 'Bourne Shell Script'
# Extract the comment block header as the description if present
lines = []
for line in file(_['filepath']):
line = line.strip()
if line.startswith('#'):
lines.append(line)
else:
break
_['description'] = '\n'.join(lines)
# Extract the license info if present
match_obj = self._license_re.search(_['description'])
if match_obj:
self.metadata['license'] = match_obj.group('license')
entryClasses = [PythonScriptEntry, ShellScriptEntry]
def spamProtectEmail(match_obj):
"""Use this as the replacement in a regex substitution with email_address_re
to provide some degree of spam protection for e-mail addresses in docstrings
XXX: Should I add some randomness to the obfuscation approach?"""
maps = {'@' : ' at ', '.' : ' dot '}
email = match_obj.group(0)
for char in maps:
email = email.replace(char, maps[char])
return email
def formatFileSize(size,unit='',precision=0):
"""Take a size in bits or bytes and return it all prettied
up and rounded to whichever unit gives the smallest number.
A fixed unit can be specified. Possible units are B, KB,
MB, GB, TB, and PB so far. Case-insensitive.
Works on both negative and positive numbers. In the event
that the given value is in bits, the user will have to
use result = result[:-1] + 'b' to make it appear correct.
Will calculate using integers unless precision is != 0.
Will display using integers unless precision is > 0."""
# Each unit's position in the list is crucial.
# units[2] = 'MB' and size / 1024**2 = size in MB
units = ['bytes','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB']
if precision: size = float(size)
# Did the calling function specify a valid unit of measurement?
if unit and unit.upper() in units: # If so, find the unit index by searching.
unit_idx = units.index(unit)
size /= (1024**unit_idx)
else: # If not, find the unit index by iteration.
unit_idx = 0
while abs(size) > 1024 and unit_idx < (len(units) - 1):
size /= 1024
unit_idx += 1
return '%.*f %s' % (precision,size,units[unit_idx])
def list_content(path='.'):
"""Generate an HTML listing of the available files, complete with metadata"""
scripts, categories, path = [], [], os.path.abspath(path)
for name in os.listdir(os.path.abspath(path)):
fpath = os.path.join(path, name)
if os.path.isdir(fpath):
pass #TODO: Support categories.
else:
ext = os.path.splitext(name)[1]
for ec in entryClasses:
if ext in ec.extensions:
scripts.append(ec(name))
continue
lineOne = file(name).readline()
if ec.shabang_re.match(lineOne):
scripts.append(ec(name))
scripts.sort()
print PAGE_HEADER
print ""
if categories:
print "Categories
" #TODO: Add this to the table of contents.
for entry in scripts:
print entry.render()
print PAGE_FOOTER
if __name__ == '__main__':
form = cgi.FieldStorage()
if not form.has_key("get"):
list_content()
else:
fname = os.path.normpath(form['get'].value)
if not os.path.abspath(fname).startswith(os.getcwd()) or not os.path.isfile(fname):
print PAGE_HEADER
print "Unfortunately, you have requested an invalid file. Please try again.
"
print PAGE_FOOTER
else:
print "Content-Type: text/plain"
print
print file(form['get'].value).read()