Annotation of doc/help/texxml2latex.pl, revision 1.11

1.1       bowersj2    1: #!/usr/bin/perl
                      2: 
1.2       bowersj2    3: # The LearningOnline Network with CAPA
                      4: # Converts a texxml file into a single tex file
                      5: #
                      6: # Copyright Michigan State University Board of Trustees
                      7: #
                      8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
                      9: #
                     10: # LON-CAPA is free software; you can redistribute it and/or modify
                     11: # it under the terms of the GNU General Public License as published by
                     12: # the Free Software Foundation; either version 2 of the License, or
                     13: # (at your option) any later version.
                     14: #
                     15: # LON-CAPA is distributed in the hope that it will be useful,
                     16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     18: # GNU General Public License for more details.
                     19: #
                     20: # You should have received a copy of the GNU General Public License
                     21: # along with LON-CAPA; if not, write to the Free Software
                     22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     23: #
                     24: # /home/httpd/html/adm/gpl.txt
                     25: #
                     26: # http://www.lon-capa.org/
                     27: #
                     28: # 7-16-2002 Jeremy Bowers
                     29: 
1.1       bowersj2   30: use strict;
                     31: use HTML::TokeParser;
                     32: use GDBM_File;
1.5       bowersj2   33: use File::Temp;
1.1       bowersj2   34: 
                     35: # accept texxml document on standard in
                     36: my $p = HTML::TokeParser->new( $ARGV[0] );
1.4       albertel   37: my $dirprefix = "../../loncom/html/adm/help/tex/";
1.1       bowersj2   38: 
1.10      albertel   39: my $include_filenames = ($ARGV[1] eq '--with-filenames');
1.5       bowersj2   40: # Make myself a temp dir for processing POD
                     41: my $tmpdir = File::Temp::tempdir('loncapahelpgenXXXXXXX', TMPDIR => 1);
                     42: 
1.1       bowersj2   43: # Print the header
                     44: open (LATEX_FILE, $dirprefix . "Latex_Header.tex");
                     45: print <LATEX_FILE>;
                     46: 
1.11    ! albertel   47: sub escape_latex {
        !            48:     my ($string)=@_;
        !            49:     $string=~s/\\/\\ensuremath{\\backslash}/g;
        !            50:     $string=~s/([^\\]|^)\%/$1\\\%/g;
        !            51:     $string=~s/([^\\]|^)\$/$1\\\$/g;
        !            52:     $string=~s/([^\\])\_/$1\\_/g;
        !            53:     $string=~s/\$\$/\$\\\$/g;
        !            54:     $string=~s/\_\_/\_\\\_/g;
        !            55:     $string=~s/\#\#/\#\\\#/g;
        !            56:     $string=~s/([^\\]|^)(\~|\^)/$1\\$2\\strut /g;
        !            57:     $string=~s/(>|<)/\\ensuremath\{$1\}/g; #more or less
        !            58: #    $string=&Apache::lonprintout::character_chart($string);
        !            59:     # any & or # leftover should be safe to just escape
        !            60:     $string=~s/([^\\]|^)\&/$1\\\&/g;
        !            61:     $string=~s/([^\\]|^)\#/$1\\\#/g;
        !            62:     $string=~s/\|/\$\\mid\$/g;
        !            63:     return $string;
        !            64: }
        !            65: 
1.1       bowersj2   66: while (my $token = $p->get_token())
                     67: {
                     68:     my $type = $token->[0];
1.5       bowersj2   69:     if ($type eq 'S') {
1.1       bowersj2   70: 	my $tag = $token->[1];
                     71: 	my $attr = $token->[2];
1.5       bowersj2   72: 	if ($tag eq 'section') {
1.1       bowersj2   73: 	    my $title = $attr->{'name'};
1.11    ! albertel   74: 	    print "\\section{".&escape_latex($title)."}\n\n";
1.1       bowersj2   75: 	}
                     76: 
1.5       bowersj2   77: 	if ($tag eq 'subsection') {
1.1       bowersj2   78: 	    my $title = $attr->{'name'};
1.11    ! albertel   79: 	    print "\\subsection{"&escape_latex($title)."}\n\n";
1.1       bowersj2   80: 	}
                     81: 
1.5       bowersj2   82: 	if ($tag eq 'subsubsection') {
1.1       bowersj2   83: 	    my $title = $attr->{'name'};
1.11    ! albertel   84: 	    print "\\subsubsection{".&escape_latex($title)."}\n\n";
1.1       bowersj2   85: 	}
                     86: 
1.5       bowersj2   87: 	if ($tag eq 'file') {
1.1       bowersj2   88: 	    my $file = $attr->{'name'};
1.9       bowersj2   89: 	    open (LATEX_FILE, $dirprefix . $file) or 
                     90: 		($! = 1, die "Can't find LaTeX file $dirprefix/$file; terminating build.");
1.10      albertel   91: 	    if ($include_filenames) {
1.11    ! albertel   92: 		print "\\textrm{File: \\bf ".&escape_latex($file)."}\\\\\n";
1.10      albertel   93: 	    }
1.1       bowersj2   94: 	    print <LATEX_FILE>;
1.3       bowersj2   95: 	    print "\n\n";
1.1       bowersj2   96: 	}
                     97: 
1.5       bowersj2   98: 	if ($tag eq 'tex') {
1.3       bowersj2   99: 	    print "\n\n";
1.1       bowersj2  100: 	    print $attr->{'content'};
1.3       bowersj2  101: 	    print "\n\n";
1.1       bowersj2  102: 	}
1.5       bowersj2  103: 
                    104: 	if ($tag eq 'pod') {
                    105: 	    my $file = $attr->{'file'};
1.8       bowersj2  106: 	    my $section = $attr->{'section'};	    
1.5       bowersj2  107: 	    if (!defined($section)) { $section = ''; }
1.6       bowersj2  108: 	    else { 
1.8       bowersj2  109: 		$section = "-section '$section'";
1.6       bowersj2  110: 	    }
1.8       bowersj2  111: 	    my $h1level = $attr->{'h1level'};
                    112: 	    if (!defined($h1level)) { $h1level = '2'; }
1.5       bowersj2  113: 	    $file = '../../loncom/' . $file;
1.8       bowersj2  114: 	    my $filename = substr($file, rindex($file, '/') + 1);
                    115: 	    system ("cp $file $tmpdir\n");
1.9       bowersj2  116: 	    my $latexFile;
                    117: 	    if (index($filename, '.') == -1) {
                    118: 		# pod2latex *insists* that either the extension of the
                    119: 		# file be .pl|.pm|.pod or that it be executable. Some
                    120: 		# extension-less files like "lonsql' are none-of-the-above.
                    121: 		system ("cd $tmpdir; mv $filename $filename.pm");
                    122: 		$filename .= ".pm";
                    123: 		print STDERR $filename . "\n";
                    124: 	    }
1.8       bowersj2  125: 	    system ("cd $tmpdir; pod2latex -h1level $h1level $section $filename\n");
1.9       bowersj2  126: 	    $latexFile = substr($filename, 0, rindex($filename, '.')) . '.tex';
                    127: 	    open LATEX_FILE, $tmpdir . '/' . $latexFile or
                    128: 		($! = 1, die "Latex file $latexFile not found while trying to use pod2latex, ".
                    129: 		 "terminating build");
1.7       bowersj2  130: 	    # pod2latex inserts \labels and \indexs for every section,
                    131: 	    # which is horrible because the section names tend to get
                    132: 	    # reused a lot. This filters those out, so we need to do
                    133: 	    # create our own indexes.
                    134: 	    for (<LATEX_FILE>) {
1.8       bowersj2  135: 		$_ =~ s/\\([^{]*)(section|paragraph)(\*?)\{([^\\]+)\\label\{[^\\]+\}\\index\{([^\\]+)\}\}/\\\1\2\3\{\4\}/g;
1.7       bowersj2  136: 		print $_;
                    137: 	    }
1.5       bowersj2  138: 	    print "\n\n";
                    139: 	}
1.1       bowersj2  140:     }
                    141: }
                    142: 
                    143: # Print out the footer.
                    144: open (LATEX_FILE, $dirprefix . "Latex_Footer.tex");
                    145: print <LATEX_FILE>;
1.5       bowersj2  146: 
                    147: # Remove the temp directory
                    148: system ("rm -rf $tmpdir");
1.8       bowersj2  149: 
                    150: __END__
                    151: 
                    152: =pod
                    153: 
                    154: =head1 NAME
                    155: 
                    156: texxml2latex.pl - core script that drives the help file assembly
                    157:   applications
                    158: 
                    159: =head1 SYNOPSIS
                    160: 
                    161: LON-CAPA's help system is based on assembling various pieces into
                    162: LaTeX files for conversion into printed documents. The various pieces
                    163: can also be used as online help.
                    164: 
                    165: =head1 OVERVIEW
                    166: 
                    167: X<help system, overview>LON-CAPA's help system is based on the idea of
                    168: assembling various pieces as needed to create documents for printing,
                    169: and using these various pieces for online help. LaTeX is the primary
                    170: language of the help system, because we can easily convert it to HTML,
                    171: and it makes the nicest printed documents.
                    172: 
                    173: The scripts for the help system are stored in /docs/help in the CVS
                    174: repository.
                    175: 
                    176: =head2 Data Sources
                    177: 
                    178: The help system can draw from the following sources to create help
                    179: documents:
                    180: 
                    181: =over 4
                    182: 
                    183: =item * B<LaTeX fragments>: LaTeX fragments stored in
                    184: C</loncom/html/adm/help/tex> in the CVS repository (which end up in
                    185: C</home/httpd/html/adm/help/tex>). A "LaTeX fragment" is a file that
                    186: contains LaTeX-style markup, but is not a complete LaTeX file with
                    187: header and footer.
                    188: 
                    189: =item * B<perl POD documentation>: POD documentation may be extracted
                    190: from perl modules used in LON-CAPA, using the syntax described in
                    191: podselect's man page.
                    192: 
                    193: =back
                    194: 
                    195: =head2 Online Help
                    196: 
                    197: The online aspect of the help system is covered in the documentation
                    198: for loncommon.pm; see L<Apache::loncommon>, look for
                    199: C<help_open_topic>.
                    200: 
                    201: Online help can only come from LaTeX fragments.
                    202: 
                    203: Access to the printed documents is partially provided online by
                    204: rendering the help files structure in a way that allows the user to
                    205: click through to the underlying help files; see 
                    206: L<http://msu.loncapa.org/adm/help/author.manual.access.hlp> for an
                    207: example. It's not very good, but it's marginally better then nothing.
                    208: 
                    209: =head2 Offline Documents 
                    210: 
                    211: Offline documents are generated from XML documents which tell a
                    212: rendering script how to assemble the various LaTeX fragments into a
                    213: single LaTeX file, which is then rendered into PostScript and PDF
                    214: files, suitable for download and printing. 
                    215: 
                    216: =head1 texxml And Rendering texxml
                    217: 
                    218: =head2 texxml 
                    219: 
                    220: X<texxml>
                    221: texxml is a little XML file format used to specify to the texxml2*.pl
                    222: scripts how to assemble the input sources into LaTeX documents. texxml
                    223: files end in the .texxml extension, and there is one texxml file per
                    224: final rendered document.
                    225: 
                    226: The texxml format is as follows: There is a root <texxml> element,
                    227: with no attributes and the following children:
                    228: 
                    229: =over 4
                    230: 
                    231: =item * B<title>: The B<name> attribute of this tag is used as the
                    232:    title of the document in texxml2index.pl; it is ignored in 
                    233:    texxml2latex.pl. If you don't intend to offer online-access
                    234:    to the rendered documents this may be skipped.
                    235: 
                    236: =item * B<section>, B<subsection>, and B<subsubsection>: These create
                    237:    the corresponding environments in the output file. The B<name>
                    238:    attribute is used to determine the name of the section.
                    239: 
                    240: =item * B<file>: The C<name> attribute specifies a LaTeX fragment by
                    241:    filename. The file is assumed to be located in the
                    242:    C<loncom/html/adm/help/tex/> directory in the CVS repository. The
                    243:    C<.tex> is required.
                    244: 
                    245: =item * B<tex>: The contents of the B<content> attribute are directly
                    246:    inserted into the rendered LaTeX file, followed by a paragraph
                    247:    break. This is generally used for little connective paragraphs in
                    248:    the documentation that don't make sense in the online help. See
                    249:    C<author.manual.texxml> for several example usages.
                    250: 
                    251: =item * B<pod>: The B<file> attribute specified a file to draw the POD
                    252:    documentation out of. The B<section> attribute is a section
                    253:    specification matching the format specified in the man page of
                    254:    podselect. By default, all POD will be included. The file is
                    255:    assumed to be relative to the C<loncom> directory in the CVS
                    256:    repository; you are allowed to escape from that with .. if
                    257:    necessary. The B<h1level> attribute can be used to change 
                    258:    the default depth of the headings; by default, this is set to 2,
                    259:    which makes =head1 a "subsection". Setting this higher can allow
                    260:    you to bundle several related pod files together; see 
                    261:    developer.manual.texxml for examples.
                    262: 
                    263: =back
                    264: 
                    265: texxml2latex.pl will automatically include C<Latex_Header.tex> at the
                    266: beginning and C<Latex_Footer.tex> at the end, to make a complete
                    267: document LaTeX document.
                    268: 
1.9       bowersj2  269: =head2 Rendering texxml 
1.8       bowersj2  270: 
1.9       bowersj2  271: =head3 render.texxml.pl 
1.8       bowersj2  272: 
1.9       bowersj2  273: X<texxml, rendering>X<render.texxml.pl>The C<render.texxml.pl> script
                    274: takes a .texxml file, and produces PostScript and PDF files. The LaTeX
                    275: files will be given access to .eps files in the
                    276: C</loncom/html/adm/help/eps/> directory while rendering. Call it as
                    277: follows, from the C<doc/help> directory:
1.8       bowersj2  278: 
                    279:  perl render.texxml.pl -- author.manual.texxml
                    280: 
                    281: substituting the appropriate texxml file.
                    282: 
1.9       bowersj2  283: =head3 texxml2latex.pl 
1.8       bowersj2  284: 
1.9       bowersj2  285: X<texxml2latex.pl>texxml2latex.pl is a perl script that takes texxml in and assembles
1.8       bowersj2  286: the final LaTeX file, outputting it on stout. Invoke it as follows:
                    287: 
                    288:  perl texxml2latex.pl author.manual.texx
                    289: 
                    290: Note that there is no error handling; if the script can not find a
                    291: .tex file, it is simply ignored. Generally, if a file is not in the
                    292: final render, it either could not be found, or you do not have
                    293: sufficient permissions with the current user to read it.
                    294: 
1.9       bowersj2  295: =head3 texxml2index.pl 
1.8       bowersj2  296: 
1.9       bowersj2  297: X<texxml2index.pl>texxml2index.pl is a perl script that takes texxml in and assembles a
1.8       bowersj2  298: file that can be used online to access all the .tex files that are
                    299: specified in the .texxml file. For an example of how this looks
                    300: online, see
                    301: C<http://msu.loncapa.org/adm/help/author.manual.access.hlp>.
                    302: 
                    303: =head2 texxml support
                    304: 
                    305: There are a couple of scripts that you may find useful for creating
                    306: texxml-based help:
                    307: 
1.9       bowersj2  308: =head3 latexSplitter.py 
1.8       bowersj2  309: 
1.9       bowersj2  310: X<latexSplitter.py>latexSplitter.py is a Python script that helps you seperate a
1.8       bowersj2  311: monolithic .tex file into the small pieces LON-CAPA's help system
                    312: expects. Invoke it like this:
                    313: 
                    314:  python latexSplitter.py monolithic.tex
                    315: 
                    316: where C<monolithic.tex> is the .tex file you want to split into
                    317: pieces. This requires Python 2.1 or greater (2.0 may work); on many
                    318: modern RedHat installs this is installed by default under the
                    319: executable name C<python2>.
                    320: 
                    321: Use the program by highlighting the desired section, give it a file
                    322: name in the textbox near the bottom, and hit the bottom button. The
                    323: program will remove that text from the textbox, and create a file in
                    324: the C<loncom/html/adm/help/tex/> directory containing that LaTeX. For
                    325: consistency, you should use underscores rather then spaces in the
                    326: filename, and note there are a few naming conventions for the .tex
                    327: files, which you can see just by listing the
                    328: C<loncom/html/adm/help/tex/> directory.
                    329: 
                    330: The idea behind this program is that if you are writing a big document
                    331: from scratch, you can use a "real" program like LyX to create the .tex
                    332: file, then easily split it with this program.
                    333: 
1.9       bowersj2  334: =head3 simpleEdit.py 
1.8       bowersj2  335: 
1.9       bowersj2  336: X<simpleEdit.py>simpleEdit.py is a python script that takes a .texxml file and shows
1.8       bowersj2  337: all the tex files that went into in sequence, allowing you to "edit"
                    338: the entire document as one entity. Note this is intended for simple
                    339: typo corrections and such in context, not major modification of the
                    340: document. Invoke it with 
                    341: 
                    342:  python simpleEdit.py author.manual.texxml
                    343: 
                    344: Make your changes, and hit the "Save" button to save them.
                    345: 
                    346: =head2 texxml LaTeX Feature Support
                    347: 
                    348: =head3 Cross-referencing
                    349: 
                    350: LaTeX has a cross-referencing system build around labeling points in
                    351: the document with \label, and referencing those labels with \ref. In a
                    352: complete LaTeX document, there's no problem because all \refs and
                    353: \labels are present. However, for the online help, \ref'ing something
                    354: that is not in the current LaTeX fragment causes a TTH error when it
                    355: can't find the crossreference.
                    356: 
                    357: The solution is to do the cross-references for TTH. When LON-CAPA is
                    358: installed, the C<rebuildLabelHahs.pl>X<rebuildLabelHash.pl> script
                    359: is executed, which extracts all the labels from the LaTeX fragments
                    360: and stores them in the C<fragmentLabels.gdbm>X<fragmentLabels.gdbm> hash. 
                    361: The C<lonhelp.pm> handler then replaces \refs with appropriate
                    362: HTML to provide a link to the referenced help file while online. Thus,
                    363: you can freely use references, even in online help.
                    364: 
                    365: =head3 Indexing
                    366: 
                    367: LaTeX has a popular index making package called MakeIndex. LON-CAPA's
                    368: help system supports this, so you can create indices using the \index
                    369: LaTeX command. In perl POD files, use the X command. Note that in both
1.9       bowersj2  370: cases the index text is not included in the render, so the index must 
                    371: be included in addition to the indexed text, and need not match the 
                    372: indexed text precisely.
1.8       bowersj2  373: 
                    374: =head1 Writing POD: Style
                    375: 
                    376: Adopting a little bit from everybody who has included POD in their
                    377: documents to date, the help system is going to expect the following
                    378: format for POD documentation.
                    379: 
                    380: The POD should start with a C<=head1> with the title C<NAME> (in caps
                    381: as shown). The following paragraph should extremely briefly describe
                    382: what the module does and contains. Example:
                    383: 
                    384:  =head1 NAME
                    385: 
                    386:  Apache::lonflunkstudent - provides interface to set all
                    387:    student assessments point score to 0
                    388: 
                    389: Next should be a C<head1> titled C<SYNOPSIS> which contains a
                    390: paragraph or two description of the module.
                    391: 
                    392:  =head1 SYNOPSIS
                    393: 
                    394:  lonflunkstudent provides a handler to select a student and set all
                    395:  assignment values to zero, thereby flunking the student.
                    396: 
                    397:  Routines for setting all assessments to some value are provided by
                    398:  this module, as well as some useful student taunting routines.
                    399: 
                    400: Optionally, an C<OVERVIEW> section can be included. This can then be
                    401: extracted by the help system for the LON-CAPA subsystems overview
                    402: chapter. The overview should be a relatively high-level, but still
                    403: technical, overview of the module, sufficient to give the reader
                    404: enough context to understand what the module does, what it might be
                    405: useful for in other contexts, and what is going on in the code when it
                    406: is read.
                    407: 
                    408: The remainder should be formatted as appropriate for the file, such
                    409: that discarding the NAME, SYNOPSIS, and OVERVIEW sections provides a
1.9       bowersj2  410: useful API overview of the module. This may be anything from an 
                    411: elaborate discussion of the data structures, algorithms, and design 
                    412: principles that went into the module, or a simple listing of 
                    413: what functions exist, how to call them, and what they return, as
                    414: appropriate.
1.8       bowersj2  415: 
                    416: Routines that are private to the module should B<not> be documented;
                    417: document them in perl comments, or, as is the style of the time, not
                    418: at all, as is appropriate.
                    419: 
                    420: Method and function names should be bolded when being
1.9       bowersj2  421: documented. 
                    422: 
                    423: Literal string such as filename should be enclosed in
1.8       bowersj2  424: the C command, like this: C</home/httpd/lonTabs/>. 
1.9       bowersj2  425: 
                    426: Indexing can be done with the X command in perldoc, and should be used 
                    427: as appropriate. Do not include X commands in the headings, the output 
                    428: from pod2latex screws up some regexes in texxml2latex.pl.
1.8       bowersj2  429: 
                    430: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>
500 Internal Server Error

Internal Server Error

The server encountered an internal error or misconfiguration and was unable to complete your request.

Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

More information about this error may be available in the server error log.