9.22 babel.sty

  9.22.1 Character Set

<..configure html4 babel..>
 <.babel macros.>
 <.babel char set.>
 <.babel accents.>
 <.u mlaut.>
-_-_-

9.22.1 Character Set

<..babel char set..>
 \def\a:temp#1#2#3{%
    \ifOption{charset=#2}{\def\a:charset{charset=#2}}{}%
    \def\:tempa{#1}\ifx \languagename\:tempa
       \def\A:charset{#2}\ifx \A:charset\empty\else
          \def\A:charset{charset=#2}%
       \fi
       \if !#3!\else
          \Configure{@HTML}{%
             \iflanguage{#1}{xml:lang="#3" }{}%
          }
       \fi
    \fi}
 \a:temp{afrikaans}{iso-8859-1}{af}
 \a:temp{arabic}{iso-8859-6}{ar}
 \a:temp{austrian}{iso-8859-1}{ge}
 \a:temp{brazilian}{iso-8859-1}{pt}
 \a:temp{brazil}{iso-8859-1}{pt}
 \a:temp{catalan}{iso-8859-1}{ca}
 \a:temp{croatian}{iso-8859-2}{hr}
 \a:temp{czech}{iso-8859-2}{cs}
 \a:temp{danish}{iso-8859-1}{da}
 \a:temp{dutch}{iso-8859-1}{nl}
 \a:temp{esperanto}{iso-8859-3}{eo}
 \a:temp{estonian}{iso-8859-5}{et}
 \a:temp{finnish}{iso-8859-1}{fi}
 \a:temp{francais}{iso-8859-1}{fr}
 \a:temp{frenchb}{iso-8859-1}{fr}
 \a:temp{french}{iso-8859-1}{fr}
 \a:temp{galician}{iso-8859-1}{gl}
 \a:temp{germanb}{iso-8859-1}{de}
 \a:temp{german}{iso-8859-1}{de}
 \a:temp{greek}{iso-8859-7}{el}
 \a:temp{hebrew}{iso-8859-8}{he}
 \a:temp{hungarian}{iso-8859-2}{hu}
 \a:temp{ngerman}{iso-8859-1}{de}
 \a:temp{norsk}{iso-8859-1}{no}
 \a:temp{nynorsk}{iso-8859-1}{no}
 \a:temp{polish}{iso-8859-2}{pl}
 \a:temp{polski}{iso-8859-2}{pl}
 \a:temp{polutonikogreek}{iso-8859-7}{el}
 \a:temp{portuges}{iso-8859-1}{pt}
 \a:temp{portuguese}{iso-8859-1}{pt}
 \a:temp{romanian}{iso-8859-2}{ro}
 \a:temp{scottish}{iso-8859-1}{gd}
 \a:temp{slovak}{iso-8859-2}{sk}
 \a:temp{slovene}{iso-8859-2}{sl}
 \a:temp{spanish}{iso-8859-1}{es}
 \a:temp{swedish}{iso-8859-1}{sv}
 \a:temp{turkish}{iso-8859-9}{tr}
 \a:temp{ukrainian}{iso-8859-5}{uk}
 \a:temp{uppersorbian}{iso-8859-2}{}
 \a:temp{welsh}{iso-8859-1}{cy}
 <.set babel charset.>
-_-_-

<..configure html4 german..>+
 <.quotedblbase.>
-_-_-

<..configure html4 ngerman..>+
 <.quotedblbase.>
-_-_-

<..quotedblbase..>
 \Configure{quotedblbase}{\leavevmode\ht:special{t4ht@+&{35}x201E;}x}
 \Configure{quotesinglbase}{\leavevmode\ht:special{t4ht@+&{35}x201A;}x}
-_-_-

<..configure html4 austrian..>
 \def\A:charset{charset=iso-8859-1}
-_-_-

<..configure html4 catalan..>
 \def\A:charset{charset=iso-8859-1}
-_-_-

<..configure html4 croatian..>
 \def\A:charset{charset=iso-8859-2}
 \:CheckOption{new-accents}     \if:Option \else
    <.old iso-8859-2 accents.>
 \fi
-_-_-

<..configure html4 latin2..>
 \def\A:charset{charset=iso-8859-2}
-_-_-

<..configure html4 czech..>
 \def\A:charset{charset=iso-8859-2}
-_-_-

<..configure html4 danish..>
 \def\A:charset{charset=iso-8859-1}
-_-_-

<..configure html4 dutch..>
 \def\A:charset{charset=iso-8859-1}
-_-_-

<..configure html4 english..>
 \def\A:charset{charset=iso-8859-1}
-_-_-

<..configure html4 esperant..>
 \def\A:charset{charset=iso-8859-3}
-_-_-

<..configure html4 estonian..>
 \def\A:charset{charset=iso-8859-5}
-_-_-

<..configure html4 finnish..>
 \def\A:charset{charset=iso-8859-1}
-_-_-

<..configure html4 francais..>
 \def\A:charset{charset=iso-8859-1}
-_-_-

<..configure html4 frenchb..>
 \def\A:charset{charset=iso-8859-1}
 \Configure{@TITLE}{\Configure{frenchb-thinspace}{}{}}
-_-_-

<..configure html4 frenchb..>+
 \Configure{system-nbsp}
    {\ht:special{t4ht@[unhskip}}
    {\ht:special{t4ht@]unhskip}}
-_-_-

Patch submitted by Michal Hoftich:

html entity &nbsp; is inserted before colon. This causes validation error in some xml environments, where this entity is not defined, for example in epub3. macro \:nbsp should be used instead. Patch for the file tex4ht-html4.tex is provided in the attachment.

<..configure html4 frenchb..>+
 \ifx \tmp:bx\:UnDef \csname newbox\endcsname \tmp:bx \fi
 \Configure{frenchb-nbsp}
    {\ht:special{t4ht@?unhskip}\HCode{<span class="frenchb-nbsp">}%
     \:nbsp\setbox\tmp:bx=\hbox\bgroup}
    {\egroup \HCode{</span>}}
 \Css{.frenchb-nbsp{font-size:75\%;}}
-_-_-

<..configure html4 frenchb..>+
 \Configure{frenchb-thinspace}
    {\ht:special{t4ht@?unhskip}\HCode{<span class="frenchb-thinspace">}%
     \:nbsp\setbox\tmp:bx=\hbox\bgroup}
    {\egroup \HCode{</span>}}
 \Css{.frenchb-thinspace{font-size:75\%;}}
-_-_-

french babel provides some macros which put some text to the superscript. standard macros for superscripts aren’t used, so these superscripts doesn’t show in the html output.

<..configure html4 frenchb..>+
 \NewConfigure{frenchup}{2}
 \Configure{frenchup}{\HCode{<sup class="french">}}{\HCode{</sup>}}
 \Css{sup.french{
 vertical-align:super;
 font-size:70\%;
 }}
 \renewcommand\fup[1]{\a:frenchup#1\b:frenchup}
-_-_-

<..configure html4 galician..>
 \def\A:charset{charset=iso-8859-1}
-_-_-

<..configure html4 germanb..>
 \def\A:charset{charset=iso-8859-1}
-_-_-

<..configure html4 greek..>
 \def\A:charset{charset=iso-8859-7}
 \ifOption{charset=iso-8859-7}
    {<.greek ldf iso-8859-7.>}
    {\Log:Note{for iso-8859-7 accents
           use the command line option ‘charset=iso-8859-7’}}
 \ifOption{sgreek}
   {\:CheckOption{new-accents}     \if:Option \else
        <.greek ldf sgreek.>
    \fi}
   {\Log:Note{for Sgreek font
           use the command line option ‘sgreek’}}
 \ifOption{oldgreek}
   {\:CheckOption{new-accents}     \if:Option \else
        <.greek ldf Greek Old Face.>
    \fi}
   {\Log:Note{for Greek Old Face font
           use the command line option ‘oldgreek’}}
-_-_-

<..greek ldf sgreek..>
 \def\A:charset{charset=iso-8859-1}
 \Configure{htf}{254}{+}{<font \Hnewline
    face="}{}{}{}{}{Sgreek">}{</font>}
 \Configure{htf}{252}{+}{<font \Hnewline
    face="}{}{}{}{}{Sgreek" class="small-caps">}{</font>}
 \Configure{htf-css}{252}{.small-caps{font-variant: small-caps; }}
 \Configure{accents}
    {\ht:special{t4ht@+\string&{35}x#2{59}}x}
    {\expandafter \ifx \csname \@curr@enc :#1\endcsname\relax
        \Picture+{ \a:@Picture{#1}}#2{#3}\EndPicture
     \else
        \csname \@curr@enc :#1\endcsname {#3}%
     \fi
    }
 \def\LGR:acute#1{%
   \LGR:gobble\ifx #1\i \HChar{237}%
   \else #1\HCode{<font face="Sgreek">/</font>}\fi }
 \def\LGR:grave#1{%
   \LGR:gobble\ifx #1\i \HChar{236}%
   \else #1\HCode{<font face="Sgreek">\string\</font>}\fi}
 \def\LGR:circ#1{%
   \LGR:gobble\ifx #1\i \HChar{238}%
   \else #1\HCode{<font face="Sgreek">=</font>}\fi}
 \def\LGR:tilde#1{#1\HCode{<font face="Sgreek">@</font>}}
 \def\LGR:uml#1{%
   \LGR:gobble\ifx #1\i \HChar{239}%
   \else #1\HCode{<font face="Sgreek">+</font>}\fi}
 \def\LGR:gobble#1#2#3{#1}
-_-_-

The accented ´
’  i’ arrives in the form of ´
’  use@text@encoding curr@enc i’.

<..greek ldf Greek Old Face..>
 \def\A:charset{charset=iso-8859-1}
 \Configure{htf}{254}{+}{<font \Hnewline
    face="}{}{}{}{}{Greek Old Face">}{</font>}
 \Configure{htf}{252}{+}{<font \Hnewline
    face="}{}{}{}{}{Greek Old Face" class="small-caps">}{</font>}
 \Configure{htf-css}{252}{.small-caps{font-variant: small-caps; }}
 \Configure{accents}
    {\ht:special{t4ht@+\string&{35}x#2{59}}x}
    {\expandafter \ifx \csname \@curr@enc :#1\endcsname\relax
        \Picture+{ \a:@Picture{#1}}#2{#3}\EndPicture
     \else
        \csname \@curr@enc :#1\endcsname {#3}%
     \fi
    }
 \def\LGR:acute#1{%
   \LGR:gobble\ifx #1\i \HChar{237}%
   \else
      \edef\:temp{\:gobbleII#1\empty\empty}%
      \LGR:ch{i}{133}\LGR:ch{w}{232}\LGR:ch{u}{218}\LGR:ch{o}{210}%
      \LGR:ch{h}{187}\LGR:ch{a}{163}\LGR:ch{e}{154}%
      \ifx \:temp\empty\else #1\HChar{-180}\fi
   \fi }
 \def\LGR:grave#1{%
   \LGR:gobble\ifx #1\i \HChar{236}%
   \else
      \edef\:temp{\:gobbleII#1\empty\empty}%
      \LGR:ch{i}{136}\LGR:ch{235}{232}\LGR:ch{u}{221}\LGR:ch{o}{213}%
      \LGR:ch{h}{190}\LGR:ch{a}{166}\LGR:ch{e}{157}%
      \ifx \:temp\empty\else #1‘\fi
   \fi }
 \def\LGR:circ#1{%
   \LGR:gobble\ifx #1\i \HChar{238}%
   \else
      \edef\:temp{\:gobbleII#1\empty\empty}%
      \LGR:ch{i}{139}\LGR:ch{w}{238}\LGR:ch{u}{224}\LGR:ch{o}{253}%
      \LGR:ch{h}{193}\LGR:ch{a}{169}\LGR:ch{e}{252}%
      \ifx \:temp\empty\else #1\string^\fi
   \fi}
 \def\LGR:tilde#1{#1}
 \def\LGR:uml#1{%
   \LGR:gobble\ifx #1\i \HChar{239}%
   \else
      \edef\:temp{\:gobbleII#1\empty\empty}%
      \LGR:ch{u}{227}\LGR:ch{i}{142}%
      \ifx \:temp\empty\else  #1\HChar{168}\fi
   \fi}
 \def\LGR:gobble#1#2#3{#1}
 \def\LGR:ch#1#2{\def\:tempa{#1}\ifx \:temp\:tempa
    \HCode{<font face="Greek Old Face">}\HChar{#2}\HCode{</font>}%
    \let\:temp=\empty \fi }
-_-_-

<..configure html4 hebrew..>
 \def\A:charset{charset=iso-8859-8}
-_-_-

<..configure html4 ngermanb..>
 \def\A:charset{charset=iso-8859-1}
-_-_-

<..configure html4 norsk..>
 \def\A:charset{charset=iso-8859-1}
-_-_-

<..configure html4 polish..>
 \def\A:charset{charset=iso-8859-2}
-_-_-

<..configure html4 polski..>
 \def\A:charset{charset=iso-8859-2}
-_-_-

<..configure html4 portuges..>
 \def\A:charset{charset=iso-8859-1}
-_-_-

<..configure html4 scottish..>
 \def\A:charset{charset=iso-8859-1}
-_-_-

<..configure html4 slovak..>
 \def\A:charset{charset=iso-8859-2}
 \:CheckOption{new-accents}     \if:Option \else
    <.old iso-8859-2 accents.>
 \fi
-_-_-

<..configure html4 slovene..>
 \def\A:charset{charset=iso-8859-2}
 \:CheckOption{new-accents}     \if:Option \else
    <.old iso-8859-2 accents.>
 \fi
-_-_-

<..configure html4 spanish..>
 \def\A:charset{charset=iso-8859-1}
 \Configure{es@accents}{\es:accents}{}
 \def\es:accents#1\b:es@accents{\HCode{&\#x#1;}}
 <.spanish configs.>
-_-_-

<..spanish configs..>
 \Configure{guillemotleft}{\HCode{&\#x00AB;}}
 \Configure{guillemotright}{\HCode{&\#x00BB;}}
-_-_-

<..spanish configs..>+
 \Configure{spanish"a}{\HCode{<span ="underline">&\#x00AA;</span>}}
 \Configure{spanish"o}{\HCode{<span ="underline">&\#x00BA;</span>}}
 \Configure{spanish"e}{\HCode{<sup class="underline">e</sup>}}
 \Configure{spanish"A}{\HCode{<sup class="underline">A</sup>}}
 \Configure{spanish"O}{\HCode{<sup class="underline">O</sup>}}
 \Configure{spanish"E}{\HCode{<sup class="underline">E</sup>}}
 \Configure{spanish’i}{\HCode{&\#x00ED;}}
-_-_-

<..spanish configs..>+
 \Configure{es@accent}
   {\string\OT1\string\’}
   {<.acute codes.>}
-_-_-

<..spanish configs..>+
 \Configure{es@accent}
   {\string\OT1\string\"}
   {<.diaeresis codes.>{}{34}}
-_-_-

<..spanish configs..>+
 \Configure{es@accent}
   {\string\OT1\string\~}
   {<.tilde codes.>}
-_-_-

<..configure html4 swedish..>
 \def\A:charset{charset=iso-8859-1}
-_-_-

<..configure html4 turkish..>
 \def\A:charset{charset=iso-8859-9}
-_-_-

<..configure html4 ukraineb..>
 \def\A:charset{charset=iso-8859-5}
-_-_-

<..configure html4 usorbian..>
 \def\A:charset{charset=iso-8859-2}
-_-_-

<..configure html4 welsh..>
 \def\A:charset{charset=iso-8859-1}
-_-_-