version 1.16, 2008/11/20 13:04:24
|
version 1.23, 2019/02/23 20:05:50
|
Line 1
|
Line 1
|
# The LearningOnline Network |
# The LearningOnline Network |
# entity -> tex. |
# entity -> tex. |
# |
# |
# entities.pm,v 1.15 2008/11/20 08:00:00 raeburn |
# $Id$ |
# |
# |
# Copyright Michigan State University Board of Trustees |
# Copyright Michigan State University Board of Trustees |
# |
# |
Line 26
|
Line 26
|
# |
# |
# |
# |
|
|
=pod |
|
|
|
=head1 NAME |
|
|
|
Apache::entities.pm |
|
|
|
=head1 SYNOPSIS |
|
|
|
This file contains a table driven entity-->latex converter. |
|
|
|
This is part of the LearningOnline Network with CAPA project |
|
described at http://www.lon-capa.org. |
|
|
|
=head1 OVERVIEW |
|
|
|
|
|
Assumptions: |
|
The number of entities in a resource is small compared with the |
|
number of possible entities that might be translated. |
|
Therefore the strategy is to match a general entity pattern |
|
&.+; over and over, pull out the match look it up in an entity -> tex hash |
|
and do the replacement. |
|
|
|
In order to simplify the hash, the following reductions are done: |
|
&#d+; have the &# and ; stripped and is converted to an int. |
|
&#.+; have the &#x and ; stripped and is converted to an int as a hex |
|
value. |
|
All others have the & and ; stripped. |
|
|
|
|
|
The hash: Add new conversions here; leave off the leading & and the trailing ; |
|
all numeric entities need only appear as their decimal versions |
|
(e.g. no need for 1234 is sufficient, no need for 0x4d2 as well. |
|
|
|
This entity table is mercilessly cribbed from the HTML pocket reference |
|
table starting at pg 82. In most cases the LaTeX equivalent codes come from |
|
the original massive regular expression replacements originally by |
|
A. Sakharuk in lonprintout.pm |
|
|
|
I also want to acknowledge |
|
ISO Character entities and their LaTeX equivalents by |
|
Vidar Bronken Gundersen, and Rune Mathisen |
|
http://www.bitjungle.com/isoent-ref.pdf |
|
|
|
|
|
Note numerical entities are essentially unicode character codes. |
|
|
|
|
|
=head1 SUBROUTINES |
|
|
|
=over |
|
|
|
=item entity_to_utf8() |
|
|
|
|
|
Convert a numerical entity (that does not exist in our hash) |
|
to its UTF-8 equivalent representation. |
|
This allows us to support, to some extent, any entity for which |
|
dvipdf can find a gylph (given that LaTeX is now UTF-8 clean). |
|
|
|
Parameters: |
|
unicode - The unicode for the character. This is assumed to |
|
be a decimal value |
|
Returns: |
|
The UTF-8 equiavalent of the value. |
|
|
|
=item entity_to_latex() |
|
|
|
Convert an entity to the corresponding LateX if possible. |
|
If not possible, and the entity is numeric, |
|
the entity is treated like a Unicode character and converted |
|
to UTF-8 which should display as long as dvipdf can find the |
|
appropriate glyph. |
|
|
|
The entity is assumed to have already had the |
|
&; or & ; removed |
|
|
|
Parameters: |
|
entity - Name of entity to convert. |
|
Returns: |
|
One of the following: |
|
- Latex string that produces the entity. |
|
- UTF-8 equivalent of a numeric entity for which we don't have a latex string. |
|
- ' ' for text entities for which there's no latex equivalent. |
|
|
|
|
|
=item replace_entities() |
|
|
|
Convert all the entities in a string. |
|
We locate all the entities, pass them into entity_to_latex and |
|
and replace occurences in the input string. |
|
The assumption is that there are few entities in any string/document |
|
so this looping is not too bad. The advantage of looping vs. regexping is |
|
that we now can use lookup tables for the translation in entity_to_latex above. |
|
|
|
Parameters: |
|
input - Input string/document |
|
Returns |
|
input with entities replaced by latexable stuff (UTF-8 encodings or |
|
latex control strings to produce the entity. |
|
|
|
=back |
|
|
|
=cut |
|
|
|
package Apache::entities; |
package Apache::entities; |
use strict; |
use strict; |
|
|
Line 192 use strict;
|
Line 87 use strict;
|
|
|
The general punctuation set |
The general punctuation set |
|
|
=item (8472-8501) |
=item (8462-8501) |
|
|
Letter like symbols |
Letter like symbols |
|
|
Line 742 my %entities = (
|
Line 637 my %entities = (
|
'euro' => '\texteuro', |
'euro' => '\texteuro', |
|
|
# Letter like symbols |
# Letter like symbols |
|
|
|
8462 => '\ensuremath{h}', |
|
'planckh' => '\ensuremath{h}', |
|
8463 => '\ensuremath{\hbar}', |
|
'hbar' => '\ensuremath{\hbar}', |
8472 => '\ensuremath{\wp}', |
8472 => '\ensuremath{\wp}', |
'weierp' => '\ensuremath{\wp}', |
'weierp' => '\ensuremath{\wp}', |
8465 => '\ensuremath{\Im}', |
8465 => '\ensuremath{\Im}', |
Line 751 my %entities = (
|
Line 650 my %entities = (
|
'real' => '\ensuremath{\Re}', |
'real' => '\ensuremath{\Re}', |
8482 => '\texttrademark', |
8482 => '\texttrademark', |
'trade' => '\texttrademark', |
'trade' => '\texttrademark', |
|
8496 => '\ensuremath{\mathcal{E}}', |
|
'expectation' => '\ensuremath{\mathcal{E}}', |
8501 => '\ensuremath{\aleph}', |
8501 => '\ensuremath{\aleph}', |
'alefsym'=> '\ensuremath{\aleph}', |
'alefsym'=> '\ensuremath{\aleph}', |
|
|
# Arrows and then some (harpoons from Hon Kie). |
# Arrows and then some (harpoons from Hon Kie). |
|
|
8592 => '\textleftarrow', |
8592 => '\ensuremath{\leftarrow}', |
'larr' => '\textleftarrow', |
'larr' => '\ensuremath{\leftarrow}', |
8593 => '\textuparrow', |
8593 => '\ensuremath{\uparrow}', |
'uarr' => '\textuparrow', |
'uarr' => '\ensuremath{\uparrow}', |
8594 => '\textrightarrow', |
8594 => '\ensuremath{\rightarrow}', |
'rarr' => '\textrightarrow', |
'rarr' => '\ensuremath{\rightarrow}', |
8595 => '\textdownarrow', |
'rightarrow' => '\ensuremath{\rightarrow}', |
'darr' => '\textdownarrow', |
8595 => '\ensuremath{\downarrow}', |
|
'darr' => '\ensuremath{\downarrow}', |
8596 => '\ensuremath{\leftrightarrow}', |
8596 => '\ensuremath{\leftrightarrow}', |
'harr' => '\ensuremath{\leftrightarrow}', |
'harr' => '\ensuremath{\leftrightarrow}', |
8598 => '\ensuremath{\nwarrow}', |
8598 => '\ensuremath{\nwarrow}', |
Line 893 my %entities = (
|
Line 795 my %entities = (
|
'cong' => '\ensuremath{\cong}', |
'cong' => '\ensuremath{\cong}', |
8773 => '\ensuremath{\cong}', |
8773 => '\ensuremath{\cong}', |
8775 => '\ensuremath{\ncong}', |
8775 => '\ensuremath{\ncong}', |
|
8776 => '\ensuremath{\approx}', |
|
'approx' => '\ensuremath{\approx}', |
8778 => '\ensuremath{\approxeq}', |
8778 => '\ensuremath{\approxeq}', |
|
'approxeq' => '\ensuremath{\approxeq}', |
8784 => '\ensuremath{\doteq}', |
8784 => '\ensuremath{\doteq}', |
8785 => '\ensuremath{\doteqdot}', |
8785 => '\ensuremath{\doteqdot}', |
8786 => '\ensuremath{\fallingdotseq}', |
8786 => '\ensuremath{\fallingdotseq}', |
Line 1164 sub replace_entities {
|
Line 1069 sub replace_entities {
|
1; |
1; |
|
|
__END__ |
__END__ |
|
|
|
=pod |
|
|
|
=head1 NAME |
|
|
|
Apache::entities.pm |
|
|
|
=head1 SYNOPSIS |
|
|
|
This file contains a table driven entity-->latex converter. |
|
|
|
This is part of the LearningOnline Network with CAPA project |
|
described at http://www.lon-capa.org. |
|
|
|
=head1 OVERVIEW |
|
|
|
|
|
Assumptions: |
|
The number of entities in a resource is small compared with the |
|
number of possible entities that might be translated. |
|
Therefore the strategy is to match a general entity pattern |
|
&.+; over and over, pull out the match look it up in an entity -> tex hash |
|
and do the replacement. |
|
|
|
In order to simplify the hash, the following reductions are done: |
|
&#d+; have the &# and ; stripped and is converted to an int. |
|
&#.+; have the &#x and ; stripped and is converted to an int as a hex |
|
value. |
|
All others have the & and ; stripped. |
|
|
|
|
|
The hash: Add new conversions here; leave off the leading & and the trailing ; |
|
all numeric entities need only appear as their decimal versions |
|
(e.g. no need for 1234 is sufficient, no need for 0x4d2 as well. |
|
|
|
This entity table is mercilessly cribbed from the HTML pocket reference |
|
table starting at pg 82. In most cases the LaTeX equivalent codes come from |
|
the original massive regular expression replacements originally by |
|
A. Sakharuk in lonprintout.pm |
|
|
|
I also want to acknowledge |
|
ISO Character entities and their LaTeX equivalents by |
|
Vidar Bronken Gundersen, and Rune Mathisen |
|
http://www.bitjungle.com/isoent-ref.pdf |
|
|
|
|
|
Note numerical entities are essentially unicode character codes. |
|
|
|
|
|
=head1 SUBROUTINES |
|
|
|
=over |
|
|
|
=item entity_to_utf8() |
|
|
|
|
|
Convert a numerical entity (that does not exist in our hash) |
|
to its UTF-8 equivalent representation. |
|
This allows us to support, to some extent, any entity for which |
|
dvipdf can find a gylph (given that LaTeX is now UTF-8 clean). |
|
|
|
Parameters: |
|
unicode - The unicode for the character. This is assumed to |
|
be a decimal value |
|
Returns: |
|
The UTF-8 equiavalent of the value. |
|
|
|
=item entity_to_latex() |
|
|
|
Convert an entity to the corresponding LateX if possible. |
|
If not possible, and the entity is numeric, |
|
the entity is treated like a Unicode character and converted |
|
to UTF-8 which should display as long as dvipdf can find the |
|
appropriate glyph. |
|
|
|
The entity is assumed to have already had the |
|
&; or & ; removed |
|
|
|
Parameters: |
|
entity - Name of entity to convert. |
|
Returns: |
|
One of the following: |
|
- Latex string that produces the entity. |
|
- UTF-8 equivalent of a numeric entity for which we don't have a latex string. |
|
- ' ' for text entities for which there's no latex equivalent. |
|
|
|
|
|
=item replace_entities() |
|
|
|
Convert all the entities in a string. |
|
We locate all the entities, pass them into entity_to_latex and |
|
and replace occurences in the input string. |
|
The assumption is that there are few entities in any string/document |
|
so this looping is not too bad. The advantage of looping vs. regexping is |
|
that we now can use lookup tables for the translation in entity_to_latex above. |
|
|
|
Parameters: |
|
input - Input string/document |
|
Returns |
|
input with entities replaced by latexable stuff (UTF-8 encodings or |
|
latex control strings to produce the entity. |
|
|
|
=back |
|
|
|
=cut |