XeTeX 与 Emacs Notes Mode

Emacs rules.

我用 Emacs 写 Wiki做计划,也用它作笔记。当然了,偶尔也用它玩俄罗斯方块。最近用 Notes Mode 作笔记时有个麻烦,就是 notes mode 不支持生成中文的PDF文件。可 Mac 上却有个如此方便XeTeX 来用,不能看中文实在不爽。我就把生成PDF的脚本文件-parsenotes 作了些简单修改来满足中文需求。下面就是我修改后的 parsenotes(你也可以点击这里下载)。

#!/usr/bin/env python

""" Convert a notes-mode file to pdf (via latex, etc.)

@todo: support for \epsfig """

""" Character codes: \0 = backslash \1 = open brace \2 = close brace \3 = _ """

import sys, re, os, shutil, os.path

Latex header.

HEADER = r""" \documentclass{article} \usepackage{fontspec} \setromanfont{STSong} \XeTeXlinebreaklocale "zh" \XeTeXlinebreakskip = 0pt plus 1pt \usepackage{fullpage} \usepackage{amsmath} \usepackage{amssymb} \usepackage{alltt} \usepackage{epsfig} \usepackage[dvipdfm, pagebackref, colorlinks=true, pdftitle={%s}, pdfauthor={%s}, bookmarks=true, bookmarksopen=false, pdfpagemode=UseOutlines]{hyperref}

\setlength{\parskip}{1ex} \setlength{\parindent}{0ex} \setlength{\topsep}{0.3em} \setlength{\partopsep}{0em}

%% This is used to reduce spacing in bulleted lists. \def\nogap{ \setlength{\itemsep}{0em} \setlength{\parskip}{0em}}

%% Declare some symbols that x-emacs believes exists. \DeclareTextSymbol{\textbackslash}{T1}{92} \newcommand{\nsubset}{\not\subset} \renewcommand{\textflorin}{\textit{f}} \newcommand{\setB}{{\mathord{\mathbb B}}} \newcommand{\setC}{{\mathord{\mathbb C}}} \newcommand{\setN}{{\mathord{\mathbb N}}} \newcommand{\setQ}{{\mathord{\mathbb Q}}} \newcommand{\setR}{{\mathord{\mathbb R}}} \newcommand{\setZ}{{\mathord{\mathbb Z}}} \newcommand{\coloncolon}{\mathrel{::}} \newcommand{\lsemantics}{\mathopen{\lbrack\mkern-3mu\lbrack}} \newcommand{\rsemantics}{\mathclose{\rbrack\mkern-3mu\rbrack}} \newcommand{\lcata}{\mathopen{(\mkern-3mu\mid}} \newcommand{\rcata}{\mathopen{\mid\mkern-3mu)}}

\begin{document} """

FOOTER = r""" \end{document} """

DEBUG=0

def dolists(notes): """ Convert notes-style lists to LaTeX enum/itemize lists. """ BULLETRE = re.compile(r'^([ \t])([]|[-]|\d+.)(.*)', re.MULTILINE)

indent = [-1]
bullets = []
out = ''
verbatim = 0
for line in notes.split('\n'):
    # Skip verbatim areas..
    if re.search(r'\\begin{alltt}', line): verbatim = 1
    if re.search(r'\\end{alltt}', line): verbatim = 0
    if verbatim:
        out += line + '\n'
        continue

    m = _BULLET_RE.match(line)
    if m:
        spaces = len(m.group(1).replace('\t', '        '))
        bullet = m.group(2)

        # Start a new (sub)list.
        if spaces > indent[-1]:
            if bullet in ('*', '-'):
                bullets.append('itemize')
            else:
                bullets.append('enumerate')
            out += '\n\\vspace{-1ex}\\begin{%s}\\nogap\n' % bullets[-1]
            indent.append(spaces)

        # End one or more sublists.
        while spaces < indent[-1]:
            out += '\\vspace{-1ex}\\end{%s}\n\n' % bullets.pop()
            indent.pop()

        # List item.
        out += (' '*spaces) + '\\item{}' + m.group(3) + '\n'

    else:
        # End the last sublist..
        m = re.match('([ \t]*)', line)
        spaces = len(m.group(1).replace('\t', '        '))
        # End one or more sublists.
        while spaces <= indent[-1]:
            out += '\\vspace{-1ex}\\end{%s}\n\n' % bullets.pop()
            indent.pop()

        out += line + '\n'

return out

def dotimestamps(notes): """ Convert notes-style timestamps to headings. I put a timestamp at the beginning of each day's class, so this puts each day on its own page. The timestamp can optionally be followed (on the same line) by a title for that day's lecture. """ import time _TIMESTAMPRE = re.compile(r'^[(\d\d/\d\d/\d\d) \d\d:\d\d [AP]M](.*)$') just_did_timestamp = 0 out = '' for line in notes.split('\n'): m = _TIMESTAMP_RE.match(line) if m is None: if just_did_timestamp and line.strip() != '': just_did_timestamp = 0 if not re.match(r'>>?>?\s', line): out += '\vspace{2em}\n' out += line+'\n' continue

    (date,text) = m.groups()
    datestr = time.strftime('%A, %B %e, %Y',
                            time.strptime(date, '%m/%d/%y'))
    out += '\\newpage\n'
    if text:
        out += '\\begin{centering}\\LARGE %s\\\\' % text
        out += '\\end{centering}\\vspace{4mm}\n'
    out += '\\hrule\\hrule\\hrule\n'
    out += '\\begin{raggedleft}\\Large \\it \\hfill %s' % datestr
    out += '\\\\ \\end{raggedleft}\n\n\n'
    just_did_timestamp = 1
return out

VERB1REA = re.compile(r'((\n[#]([ \t][^\n]*)?)+)', re.MULTILINE) _VERB2REA = re.compile(r'((\n[%]([ \t][^\n]*)?)+)', re.MULTILINE) _VERB3REA = re.compile(r'((\n[!]([ \t][^\n]*)?)+)', re.MULTILINE) _VERB4REA = re.compile(r'((\n[]([ \t][^\n])?)+)', re.MULTILINE) _VERB1SUB_A = r"""

\begin{tabular}{||l} \begin{minipage}{0.9\textwidth} \begin{alltt}\small\1 \end{alltt} \end{minipage} \end{tabular}

""" VERB2SUBA = _VERB3SUB_A = r"""

\begin{tabular}{|l} \begin{minipage}{0.9\textwidth} \begin{alltt}\rmfamily\1 \end{alltt} \end{minipage} \end{tabular}

""" VERB4SUB_A = r"""

\begin{tabular}{|l|} \hline \begin{minipage}{0.9\textwidth} \begin{alltt}\bfseries\rmfamily\itshape\large\1 \end{alltt} \end{minipage}\\ \hline \end{tabular}

"""

Subscript or superscript. \1=open brace, \2=close brace.

SCRIPT = r'(%s|%s|%s)' % ((r'%s[^%s]+%s' % ('\1','\1\2','\2')), '\0[a-zA-Z0-9]+', r'[^{}^\]')

FIGURERE = re.compile('^::FIGURE::(\d+)::FIGURE::$', re.MULTILINE) FIGURESUB = r"""\end{alltt} \epsfig{file=figure\1.eps} \begin{alltt}"""

def notes2latex(notes, **headervars): """ Convert a notes file to a LaTeX file.

@type notes: C{string}
"""
# Change backslashes to \0s, so we can tell our backslashes from theirs.
notes = re.sub(r'\\', '\0', notes)
notes = re.sub(r'\{', '\1', notes)
notes = re.sub(r'\}', '\2', notes)
notes = re.sub('\0_', '\3', notes)

#notes = re.sub('\0epsfig\1([^\2]*file=)([^\2]+)\2',
#               r'\\epsfig{\1%s/\2}' % os.curdir, notes)

# In the case of expressions like {\'o} (for o with an accent), we
# want to keep the {, }, and \ characters; change them back, to
# prevent them from being rendered literally.
notes = re.sub("\1\0(['\"c`]? ?\w)\2", r'{\\\1}', notes)

# Timestamps.
notes = do_timestamps(notes)

# Headings.
H1_RE = re.compile(r'^> (.*)$', re.MULTILINE)
notes = H1_RE.sub('\n'+r'\\section{\1}'+'\n', notes)
H2_RE = re.compile(r'^>> (.*)$', re.MULTILINE)
notes = H2_RE.sub('\n'+r'\\subsection{\1}'+'\n', notes)
H3_RE = re.compile(r'^>>> (.*)$', re.MULTILINE)
notes = H3_RE.sub('\n'+r'\\subsubsection*{\1}'+'\n', notes)

# Emphasis.
notes = re.sub(r'!!(.*)!!', r" \\emph{\1} ", notes)

# Some characters need to be in math mode.
# (do this before verb, since verb introduces |'s)
notes = re.sub(r'([<>|])', r'\\(\1\\)', notes)

notes = _VERB1_RE_A.sub(_VERB1_SUB_A, notes)
notes = _VERB2_RE_A.sub(_VERB2_SUB_A, notes)
notes = _VERB3_RE_A.sub(_VERB3_SUB_A, notes)
notes = _VERB4_RE_A.sub(_VERB4_SUB_A, notes)

# Get rid of the verbatim markers.
VERB_RE_B = re.compile(r'^[#%!*][ \t]?', re.MULTILINE)
notes = VERB_RE_B.sub('', notes)

# Handle sub & super scripts.  Run these regexps repeatedly, since
# sub & superscripts might be nested.
MATH_RE_A = re.compile(r'\^(%s)' % _SCRIPT)
MATH_RE_B = re.compile(r'_(%s)' % _SCRIPT)
while 1:
    notes2 = notes

    # Replace _ and ^ with \sb and \sp
    notes2 = MATH_RE_A.sub(r"\\(\\sp{\\text{\1}}\\)", notes2)
    notes2 = MATH_RE_B.sub(r"\\(\\sb{\\text{\1}}\\)", notes2)

    # Get rid of excess { and }s
    notes2 = re.sub("\\\\text{\1([^\1\2{}]*)\2}",
                    r'\\text{\1}', notes2)

    if notes2 == notes: break
    else: notes = notes2

# Put appropriate elements in math mode.
notes = re.sub('\0([a-zA-Z0-9]+)', r"\\(\\\1\\)", notes)

# Get rid of any remaining ^s and _s (from nested use)
notes = re.sub(r'\^', r'\\textasciicircum', notes)
notes = re.sub(r'_', r'\\_', notes)

# Some characters need to be backslashed
notes = re.sub('([#$&%])', r'\\\1', notes)

# Some commands are *not* supposed to be in math mode.
notes = re.sub((r'(\\text(?!width)[a-zA-Z0-9]+|'+
                r'\\l(?!\w)|\\o(?!\w))'),
               r'\\textrm{\1}', notes)

# Is this necessary?
notes = re.sub('~', r'{\\textasciitilde}', notes)

# If they backslashed braces, then unbackslash them.
notes = re.sub('\0\1', '\1', notes)
notes = re.sub('\0\2', '\2', notes)

# Change any remaining backslashes to textbackslash, etc.
notes = re.sub('\0', r'{\\textbackslash}', notes)
notes = re.sub('\1', r'\\{', notes)
notes = re.sub('\2', r'\\}', notes)
notes = re.sub('\3', r'\\_', notes)

# Handle figures
notes = _FIGURE_RE.sub(_FIGURE_SUB, notes)
notes = re.sub(r'\\begin{alltt}\n\\end{alltt}\n', '', notes)

# Handle lists.
notes = dolists(notes)

# Get rid of 2 consecutive blank lines..
notes = re.sub(r'\n([ \t]*\n)+', '\n\n', notes)

# Fill in header variables.
author = headervars.get('author', 'Edward Loper')
title = headervars.get('title', '')
header = HEADER % (author, title)

return header+notes+FOOTER

def tree2ps(treestr, outfile): # Use square braces. treestr = re.sub(r'[', '(', treestr) treestr = re.sub(r']', ')', tree_str)

# Undo some of our earlier changes.. :-/
tree_str = tree_str.strip()
tree_str = re.sub(r'\\_', '_', tree_str)

THE TREE2IMAGE VERSION:

import tree2image
tree = tree2image.parse_treebank_tree(tree_str, '()', 1)
tree2image.tree2ps(outfile, tree, ('times', 9))

THE NLTK VERSION:

from nltk.tree import parse_treebank

import nltk.draw.tree

# Hack to make tree sizes more reasonable:

nltk.draw.tree.TreeView.YSPACING = 8

nltk.draw.tree.TreeView.XSPACING = 4

#

tree = parsetreebank(treestr)

nltk.draw.tree.print_tree(tree, outfile, 8)

FIGURENUMBER = 0 def dofigures(str, dir, type, showoriginal=None): if type == 'trees': braces = '[]' elif type == 'graphs': braces = '{}' elif type == 'plots': braces = ';;' # (no braces; plots are 1-line) else: raise ValueError('bad type')

# By default, show originals for trees but not graphs or plots.
if show_original is None:
    show_original = (type == 'trees')

brace_count = 0
fig = ''
out = ''

verbatim = 0
sys.stdout.write('Converting %s' %type)
for line in str.split('\n'):

    # Skip non-verbatim areas..
    if line[:2] != '# ':
        out += line + '\n'
        brace_count = 0
        fig = ''
        continue

    stripline = line[2:].strip()

    if (fig or (type == 'trees' and stripline[:1] == braces[0]) or
        (type == 'graphs' and stripline[:8] == 'digraph ') or
        (type == 'plots' and stripline[:5] == 'plot ' and
         stripline[-1] == ';')):
        fig += stripline + '\n'
        if show_original:
            out += line + '\n'
    else:
        out += line + '\n'

    if fig:
        brace_count += (stripline.count(braces[0]) -
                        stripline.count(braces[1]))
        if brace_count == 0 and stripline[-1:] == braces[1]:
            sys.stdout.write('.'); sys.stdout.flush()
            # We have a figure!
            global _FIGURE_NUMBER
            _FIGURE_NUMBER += 1
            epsname = 'figure%d.eps' % _FIGURE_NUMBER
            if type == 'trees':
                try: tree2ps(fig, epsname)
                except: print 'BAD TREE:\n'+fig; continue
            elif type == 'graphs':
                figname = 'figure%d.fig' % _FIGURE_NUMBER
                figfile = open(os.path.join(dir, figname), 'w')
                figfile.write(fig)
                figfile.close()
                if os.system('dot %s -Tps -o %s' %
                             (figname, epsname)) != 0:
                    print 'BAD DOT GRAPH:\n'+fig; continue
            elif type == 'plots':
                if os.system("echo 'set term postscript eps;"+
                             " set size 0.5,0.5;"+
                             (' set output "%s";' % epsname) +
                             fig + "' |gnuplot") != 0:
                    print 'BAD PLOT: %s\n' + fig; continue
            else:
                assert 0, 'Bad type'
            out += '# ::FIGURE::%d::FIGURE::\n' % _FIGURE_NUMBER
            brace_count = 0
            fig = ''
print
return out

def latex2pdf(notes_str, outfile, trees=0, graphs=0, plots=0): olddir = os.path.abspath('.')

# Make a temp directory
tempdir = os.tempnam()
while os.path.exists(tempdir): tempdir = os.tempnam()

try:
    os.mkdir(tempdir)
    os.chdir(tempdir)

    # Special handling: figures.  Do this *before* we do other
    # latex conversions, because the text within the figure should
    # be literal..
    if trees:
        notes_str = do_figures(notes_str, tempdir, 'trees')
    if graphs:
        notes_str = do_figures(notes_str, tempdir, 'graphs')
    if plots:
        notes_str = do_figures(notes_str, tempdir, 'plots')

    # Convert the notes file to latex.
    latex_str = notes2latex(notes_str, title=outfile.replace('.pdf',''))

    # Write the latex file
    texfile = open("file.tex", 'w')
    texfile.write(latex_str)
    texfile.close()

    # Run latex twice (for bookmarks & x-refs)
    command = 'xelatex file.tex'
    print command
    if os.system(command) != 0:
        os.system('less file.tex')
        raise ValueError('Warning: latex failed')
    #command = 'latex file.tex >/dev/null'
    #print command
    #if os.system(command) != 0:
    #    os.system('less file.tex')
    #    raise ValueError('Warning: latex failed')

    #if DEBUG:
    #    os.system('xdvi file.dvi')
    #    return

    # Run dvips
    #command = 'dvips -q file.dvi -o file.ps -G0 -Ppdf'
    #print command
    #if os.system(command) != 0:
    #    raise ValueError('Warning: dvips failed')
    #os.system('cp file.ps /tmp/genomics.ps')

    # Run ps2pdf
    #command = ('ps2pdf -sPAPERSIZE=letter -dMaxSubsetPct=100 '+
    #           '-dCompatibilityLevel=1.2 -dSubsetFonts=true '+
    #           '-dEmbedAllFonts=true file.ps file.pdf')
    #print command
    #if os.system(command) != 0:
    #    raise ValueError('Warning: ps2pdf failed')

#Run dvi2pdf
#command = 'dvipdfm file.dvi'
#print command
#if os.system(command) != 0:
#    raise ValueError('Warning: dvipdfm failed')

    # Read the pdf
    pdffile = open("file.pdf", 'r')
    pdf_str = pdffile.read()
    pdffile.close()

    # Write the output.
    os.chdir(olddir)
    outfile = open(outfile, 'w')
    outfile.write(pdf_str)
    outfile.close()

finally:
    os.chdir(olddir)
    shutil.rmtree(tempdir)

def usage(): print "usage: parsenotes [-trees] file.notes" #sys.exit(-1)

def main(): trees = graphs = plots = 0 infile = None for arg in sys.argv[1:]: if arg[:1] == '-': if arg in ('-trees', '-tree'): trees = 1 elif arg in ('-graphs', '-graph'): graphs = 1 elif arg in ('-plots', '-plot'): plots = 1 else: return usage() else: if infile is None: infile = arg else: return usage() if infile is None: return usage()

outfile = re.sub('.notes$', '.pdf', infile)
if infile == outfile:
    print 'Expected a notes file'

notes = '\n'+open(infile, 'r').read()+'\n'
print '%s --> %s' % (infile, outfile)
latex2pdf(notes, outfile, trees, graphs, plots)

if name == 'main': main()

0 comments ↓

There are no comments yet...Kick things off by filling out the form below.

Leave a Comment