Emacs rules.
我用 Emacs 写 Wiki,做计划,也用它作笔记。当然了,偶尔也用它玩俄罗斯方块。最近用 Notes Mode 作笔记时有个麻烦,就是 notes mode 不支持生成中文的PDF文件。可 Mac 上却有个如此方便的 XeTeX 来用,不能看中文实在不爽。我就把生成PDF的脚本文件-parsenotes 作了些简单修改来满足中文需求。下面就是我修改后的 parsenotes(你也可以点击这里下载)。
#!/usr/bin/env python
""" Convert a notes-mode file to pdf (via latex, etc.)
@todo: support for \epsfig """
""" Character codes: \0 = backslash \1 = open brace \2 = close brace \3 = _ """
import sys, re, os, shutil, os.path
Latex header.
HEADER = r""" \documentclass{article} \usepackage{fontspec} \setromanfont{STSong} \XeTeXlinebreaklocale "zh" \XeTeXlinebreakskip = 0pt plus 1pt \usepackage{fullpage} \usepackage{amsmath} \usepackage{amssymb} \usepackage{alltt} \usepackage{epsfig} \usepackage[dvipdfm, pagebackref, colorlinks=true, pdftitle={%s}, pdfauthor={%s}, bookmarks=true, bookmarksopen=false, pdfpagemode=UseOutlines]{hyperref}
\setlength{\parskip}{1ex} \setlength{\parindent}{0ex} \setlength{\topsep}{0.3em} \setlength{\partopsep}{0em}
%% This is used to reduce spacing in bulleted lists. \def\nogap{ \setlength{\itemsep}{0em} \setlength{\parskip}{0em}}
%% Declare some symbols that x-emacs believes exists. \DeclareTextSymbol{\textbackslash}{T1}{92} \newcommand{\nsubset}{\not\subset} \renewcommand{\textflorin}{\textit{f}} \newcommand{\setB}{{\mathord{\mathbb B}}} \newcommand{\setC}{{\mathord{\mathbb C}}} \newcommand{\setN}{{\mathord{\mathbb N}}} \newcommand{\setQ}{{\mathord{\mathbb Q}}} \newcommand{\setR}{{\mathord{\mathbb R}}} \newcommand{\setZ}{{\mathord{\mathbb Z}}} \newcommand{\coloncolon}{\mathrel{::}} \newcommand{\lsemantics}{\mathopen{\lbrack\mkern-3mu\lbrack}} \newcommand{\rsemantics}{\mathclose{\rbrack\mkern-3mu\rbrack}} \newcommand{\lcata}{\mathopen{(\mkern-3mu\mid}} \newcommand{\rcata}{\mathopen{\mid\mkern-3mu)}}
\begin{document} """
FOOTER = r""" \end{document} """
DEBUG=0
def dolists(notes): """ Convert notes-style lists to LaTeX enum/itemize lists. """ BULLETRE = re.compile(r'^([ \t])([]|[-]|\d+.)(.*)', re.MULTILINE)
indent = [-1]
bullets = []
out = ''
verbatim = 0
for line in notes.split('\n'):
# Skip verbatim areas..
if re.search(r'\\begin{alltt}', line): verbatim = 1
if re.search(r'\\end{alltt}', line): verbatim = 0
if verbatim:
out += line + '\n'
continue
m = _BULLET_RE.match(line)
if m:
spaces = len(m.group(1).replace('\t', ' '))
bullet = m.group(2)
# Start a new (sub)list.
if spaces > indent[-1]:
if bullet in ('*', '-'):
bullets.append('itemize')
else:
bullets.append('enumerate')
out += '\n\\vspace{-1ex}\\begin{%s}\\nogap\n' % bullets[-1]
indent.append(spaces)
# End one or more sublists.
while spaces < indent[-1]:
out += '\\vspace{-1ex}\\end{%s}\n\n' % bullets.pop()
indent.pop()
# List item.
out += (' '*spaces) + '\\item{}' + m.group(3) + '\n'
else:
# End the last sublist..
m = re.match('([ \t]*)', line)
spaces = len(m.group(1).replace('\t', ' '))
# End one or more sublists.
while spaces <= indent[-1]:
out += '\\vspace{-1ex}\\end{%s}\n\n' % bullets.pop()
indent.pop()
out += line + '\n'
return out
def dotimestamps(notes): """ Convert notes-style timestamps to headings. I put a timestamp at the beginning of each day's class, so this puts each day on its own page. The timestamp can optionally be followed (on the same line) by a title for that day's lecture. """ import time _TIMESTAMPRE = re.compile(r'^[(\d\d/\d\d/\d\d) \d\d:\d\d [AP]M](.*)$') just_did_timestamp = 0 out = '' for line in notes.split('\n'): m = _TIMESTAMP_RE.match(line) if m is None: if just_did_timestamp and line.strip() != '': just_did_timestamp = 0 if not re.match(r'>>?>?\s', line): out += '\vspace{2em}\n' out += line+'\n' continue
(date,text) = m.groups()
datestr = time.strftime('%A, %B %e, %Y',
time.strptime(date, '%m/%d/%y'))
out += '\\newpage\n'
if text:
out += '\\begin{centering}\\LARGE %s\\\\' % text
out += '\\end{centering}\\vspace{4mm}\n'
out += '\\hrule\\hrule\\hrule\n'
out += '\\begin{raggedleft}\\Large \\it \\hfill %s' % datestr
out += '\\\\ \\end{raggedleft}\n\n\n'
just_did_timestamp = 1
return out
VERB1REA = re.compile(r'((\n[#]([ \t][^\n]*)?)+)', re.MULTILINE) _VERB2REA = re.compile(r'((\n[%]([ \t][^\n]*)?)+)', re.MULTILINE) _VERB3REA = re.compile(r'((\n[!]([ \t][^\n]*)?)+)', re.MULTILINE) _VERB4REA = re.compile(r'((\n[]([ \t][^\n])?)+)', re.MULTILINE) _VERB1SUB_A = r"""
\begin{tabular}{||l} \begin{minipage}{0.9\textwidth} \begin{alltt}\small\1 \end{alltt} \end{minipage} \end{tabular}
""" VERB2SUBA = _VERB3SUB_A = r"""
\begin{tabular}{|l} \begin{minipage}{0.9\textwidth} \begin{alltt}\rmfamily\1 \end{alltt} \end{minipage} \end{tabular}
""" VERB4SUB_A = r"""
\begin{tabular}{|l|} \hline \begin{minipage}{0.9\textwidth} \begin{alltt}\bfseries\rmfamily\itshape\large\1 \end{alltt} \end{minipage}\\ \hline \end{tabular}
"""
Subscript or superscript. \1=open brace, \2=close brace.
SCRIPT = r'(%s|%s|%s)' % ((r'%s[^%s]+%s' % ('\1','\1\2','\2')), '\0[a-zA-Z0-9]+', r'[^{}^\]')
FIGURERE = re.compile('^::FIGURE::(\d+)::FIGURE::$', re.MULTILINE) FIGURESUB = r"""\end{alltt} \epsfig{file=figure\1.eps} \begin{alltt}"""
def notes2latex(notes, **headervars): """ Convert a notes file to a LaTeX file.
@type notes: C{string}
"""
# Change backslashes to \0s, so we can tell our backslashes from theirs.
notes = re.sub(r'\\', '\0', notes)
notes = re.sub(r'\{', '\1', notes)
notes = re.sub(r'\}', '\2', notes)
notes = re.sub('\0_', '\3', notes)
#notes = re.sub('\0epsfig\1([^\2]*file=)([^\2]+)\2',
# r'\\epsfig{\1%s/\2}' % os.curdir, notes)
# In the case of expressions like {\'o} (for o with an accent), we
# want to keep the {, }, and \ characters; change them back, to
# prevent them from being rendered literally.
notes = re.sub("\1\0(['\"c`]? ?\w)\2", r'{\\\1}', notes)
# Timestamps.
notes = do_timestamps(notes)
# Headings.
H1_RE = re.compile(r'^> (.*)$', re.MULTILINE)
notes = H1_RE.sub('\n'+r'\\section{\1}'+'\n', notes)
H2_RE = re.compile(r'^>> (.*)$', re.MULTILINE)
notes = H2_RE.sub('\n'+r'\\subsection{\1}'+'\n', notes)
H3_RE = re.compile(r'^>>> (.*)$', re.MULTILINE)
notes = H3_RE.sub('\n'+r'\\subsubsection*{\1}'+'\n', notes)
# Emphasis.
notes = re.sub(r'!!(.*)!!', r" \\emph{\1} ", notes)
# Some characters need to be in math mode.
# (do this before verb, since verb introduces |'s)
notes = re.sub(r'([<>|])', r'\\(\1\\)', notes)
notes = _VERB1_RE_A.sub(_VERB1_SUB_A, notes)
notes = _VERB2_RE_A.sub(_VERB2_SUB_A, notes)
notes = _VERB3_RE_A.sub(_VERB3_SUB_A, notes)
notes = _VERB4_RE_A.sub(_VERB4_SUB_A, notes)
# Get rid of the verbatim markers.
VERB_RE_B = re.compile(r'^[#%!*][ \t]?', re.MULTILINE)
notes = VERB_RE_B.sub('', notes)
# Handle sub & super scripts. Run these regexps repeatedly, since
# sub & superscripts might be nested.
MATH_RE_A = re.compile(r'\^(%s)' % _SCRIPT)
MATH_RE_B = re.compile(r'_(%s)' % _SCRIPT)
while 1:
notes2 = notes
# Replace _ and ^ with \sb and \sp
notes2 = MATH_RE_A.sub(r"\\(\\sp{\\text{\1}}\\)", notes2)
notes2 = MATH_RE_B.sub(r"\\(\\sb{\\text{\1}}\\)", notes2)
# Get rid of excess { and }s
notes2 = re.sub("\\\\text{\1([^\1\2{}]*)\2}",
r'\\text{\1}', notes2)
if notes2 == notes: break
else: notes = notes2
# Put appropriate elements in math mode.
notes = re.sub('\0([a-zA-Z0-9]+)', r"\\(\\\1\\)", notes)
# Get rid of any remaining ^s and _s (from nested use)
notes = re.sub(r'\^', r'\\textasciicircum', notes)
notes = re.sub(r'_', r'\\_', notes)
# Some characters need to be backslashed
notes = re.sub('([#$&%])', r'\\\1', notes)
# Some commands are *not* supposed to be in math mode.
notes = re.sub((r'(\\text(?!width)[a-zA-Z0-9]+|'+
r'\\l(?!\w)|\\o(?!\w))'),
r'\\textrm{\1}', notes)
# Is this necessary?
notes = re.sub('~', r'{\\textasciitilde}', notes)
# If they backslashed braces, then unbackslash them.
notes = re.sub('\0\1', '\1', notes)
notes = re.sub('\0\2', '\2', notes)
# Change any remaining backslashes to textbackslash, etc.
notes = re.sub('\0', r'{\\textbackslash}', notes)
notes = re.sub('\1', r'\\{', notes)
notes = re.sub('\2', r'\\}', notes)
notes = re.sub('\3', r'\\_', notes)
# Handle figures
notes = _FIGURE_RE.sub(_FIGURE_SUB, notes)
notes = re.sub(r'\\begin{alltt}\n\\end{alltt}\n', '', notes)
# Handle lists.
notes = dolists(notes)
# Get rid of 2 consecutive blank lines..
notes = re.sub(r'\n([ \t]*\n)+', '\n\n', notes)
# Fill in header variables.
author = headervars.get('author', 'Edward Loper')
title = headervars.get('title', '')
header = HEADER % (author, title)
return header+notes+FOOTER
def tree2ps(treestr, outfile): # Use square braces. treestr = re.sub(r'[', '(', treestr) treestr = re.sub(r']', ')', tree_str)
# Undo some of our earlier changes.. :-/
tree_str = tree_str.strip()
tree_str = re.sub(r'\\_', '_', tree_str)
THE TREE2IMAGE VERSION:
import tree2image
tree = tree2image.parse_treebank_tree(tree_str, '()', 1)
tree2image.tree2ps(outfile, tree, ('times', 9))
THE NLTK VERSION:
from nltk.tree import parse_treebank
import nltk.draw.tree
# Hack to make tree sizes more reasonable:
nltk.draw.tree.TreeView.YSPACING = 8
nltk.draw.tree.TreeView.XSPACING = 4
#
tree = parsetreebank(treestr)
nltk.draw.tree.print_tree(tree, outfile, 8)
FIGURENUMBER = 0 def dofigures(str, dir, type, showoriginal=None): if type == 'trees': braces = '[]' elif type == 'graphs': braces = '{}' elif type == 'plots': braces = ';;' # (no braces; plots are 1-line) else: raise ValueError('bad type')
# By default, show originals for trees but not graphs or plots.
if show_original is None:
show_original = (type == 'trees')
brace_count = 0
fig = ''
out = ''
verbatim = 0
sys.stdout.write('Converting %s' %type)
for line in str.split('\n'):
# Skip non-verbatim areas..
if line[:2] != '# ':
out += line + '\n'
brace_count = 0
fig = ''
continue
stripline = line[2:].strip()
if (fig or (type == 'trees' and stripline[:1] == braces[0]) or
(type == 'graphs' and stripline[:8] == 'digraph ') or
(type == 'plots' and stripline[:5] == 'plot ' and
stripline[-1] == ';')):
fig += stripline + '\n'
if show_original:
out += line + '\n'
else:
out += line + '\n'
if fig:
brace_count += (stripline.count(braces[0]) -
stripline.count(braces[1]))
if brace_count == 0 and stripline[-1:] == braces[1]:
sys.stdout.write('.'); sys.stdout.flush()
# We have a figure!
global _FIGURE_NUMBER
_FIGURE_NUMBER += 1
epsname = 'figure%d.eps' % _FIGURE_NUMBER
if type == 'trees':
try: tree2ps(fig, epsname)
except: print 'BAD TREE:\n'+fig; continue
elif type == 'graphs':
figname = 'figure%d.fig' % _FIGURE_NUMBER
figfile = open(os.path.join(dir, figname), 'w')
figfile.write(fig)
figfile.close()
if os.system('dot %s -Tps -o %s' %
(figname, epsname)) != 0:
print 'BAD DOT GRAPH:\n'+fig; continue
elif type == 'plots':
if os.system("echo 'set term postscript eps;"+
" set size 0.5,0.5;"+
(' set output "%s";' % epsname) +
fig + "' |gnuplot") != 0:
print 'BAD PLOT: %s\n' + fig; continue
else:
assert 0, 'Bad type'
out += '# ::FIGURE::%d::FIGURE::\n' % _FIGURE_NUMBER
brace_count = 0
fig = ''
print
return out
def latex2pdf(notes_str, outfile, trees=0, graphs=0, plots=0): olddir = os.path.abspath('.')
# Make a temp directory
tempdir = os.tempnam()
while os.path.exists(tempdir): tempdir = os.tempnam()
try:
os.mkdir(tempdir)
os.chdir(tempdir)
# Special handling: figures. Do this *before* we do other
# latex conversions, because the text within the figure should
# be literal..
if trees:
notes_str = do_figures(notes_str, tempdir, 'trees')
if graphs:
notes_str = do_figures(notes_str, tempdir, 'graphs')
if plots:
notes_str = do_figures(notes_str, tempdir, 'plots')
# Convert the notes file to latex.
latex_str = notes2latex(notes_str, title=outfile.replace('.pdf',''))
# Write the latex file
texfile = open("file.tex", 'w')
texfile.write(latex_str)
texfile.close()
# Run latex twice (for bookmarks & x-refs)
command = 'xelatex file.tex'
print command
if os.system(command) != 0:
os.system('less file.tex')
raise ValueError('Warning: latex failed')
#command = 'latex file.tex >/dev/null'
#print command
#if os.system(command) != 0:
# os.system('less file.tex')
# raise ValueError('Warning: latex failed')
#if DEBUG:
# os.system('xdvi file.dvi')
# return
# Run dvips
#command = 'dvips -q file.dvi -o file.ps -G0 -Ppdf'
#print command
#if os.system(command) != 0:
# raise ValueError('Warning: dvips failed')
#os.system('cp file.ps /tmp/genomics.ps')
# Run ps2pdf
#command = ('ps2pdf -sPAPERSIZE=letter -dMaxSubsetPct=100 '+
# '-dCompatibilityLevel=1.2 -dSubsetFonts=true '+
# '-dEmbedAllFonts=true file.ps file.pdf')
#print command
#if os.system(command) != 0:
# raise ValueError('Warning: ps2pdf failed')
#Run dvi2pdf
#command = 'dvipdfm file.dvi'
#print command
#if os.system(command) != 0:
# raise ValueError('Warning: dvipdfm failed')
# Read the pdf
pdffile = open("file.pdf", 'r')
pdf_str = pdffile.read()
pdffile.close()
# Write the output.
os.chdir(olddir)
outfile = open(outfile, 'w')
outfile.write(pdf_str)
outfile.close()
finally:
os.chdir(olddir)
shutil.rmtree(tempdir)
def usage(): print "usage: parsenotes [-trees] file.notes" #sys.exit(-1)
def main(): trees = graphs = plots = 0 infile = None for arg in sys.argv[1:]: if arg[:1] == '-': if arg in ('-trees', '-tree'): trees = 1 elif arg in ('-graphs', '-graph'): graphs = 1 elif arg in ('-plots', '-plot'): plots = 1 else: return usage() else: if infile is None: infile = arg else: return usage() if infile is None: return usage()
outfile = re.sub('.notes$', '.pdf', infile)
if infile == outfile:
print 'Expected a notes file'
notes = '\n'+open(infile, 'r').read()+'\n'
print '%s --> %s' % (infile, outfile)
latex2pdf(notes, outfile, trees, graphs, plots)
if name == 'main': main()




0 comments ↓
There are no comments yet...Kick things off by filling out the form below.
Leave a Comment