7.14 Languages

  7.14.1 Babel
  7.14.2 Umlaut
  7.14.3 Russian and Cyrillic

7.14.1 Babel

<..babel.def..>
 <.quotedblbase.>
-_-_-

<..babel macros..>
 <.babel.def.>
-_-_-

<..config tex4ht..>+
 \NewConfigure{charset}[1]{\def\a:charset{#1}}
 \def\:temp#1charset=#2,#3<.par del.>{%
    \if !#2!%
      \Log:Note{for alternative charset, use
           the command line option ‘charset=...’}
    \else \Configure{charset}{charset=#2}\fi}
 \expandafter\:temp\Preamble ,charset=,<.par del.>
-_-_-

<..configure html4 CJK..>
 \def\A:charset{charset=utf-8}
 <.simplified chinese (mainland).>
 <.traditional chinese (taiwanese).>
 <.japanese charset.>
 <.korean charset.>
 \Configure{CJKbold}{\HCode{<b>}}{\HCode{</b>}}
-_-_-

The following are from CJK.enc

% CEF:          CEFX    C80  
%               CEFY    C81

<..traditional chinese (taiwanese)..>
 \Configure{CJK.enc}{Bg5}{\def\A:charset{charset=big5}}
 \Configure{CJK.enc}{Bg5pmC}{\def\A:charset{charset=big5}}
 \Configure{CJK.enc}{Bg5+}{\def\A:charset{charset=big5}}
-_-_-

GB2312 (1980) has been superceded by GBK (circa 1993?) and GB18030 (2000).

<..simplified chinese (mainland)..>
 \Configure{CJK.enc}{GBpmC}{\def\A:charset{charset=gbk}}
 \Configure{CJK.enc}{GBK}{\def\A:charset{charset=gbk}}
 \Configure{CJK.enc}{GBt}{\def\A:charset{charset=gbk}}
 \Configure{CJK.enc}{GBtpmC}{\def\A:charset{charset=gbk}}
 \Configure{CJK.enc}{CNS1-7}{\def\A:charset{charset=gbk}}
-_-_-

<..japanese charset..>
 \Configure{CJK.enc}{JIS}{\def\A:charset{charset=iso-2022-jp}}
 \Configure{CJK.enc}{JISpmC}{\def\A:charset{charset=iso-2022-jp}}
 \Configure{CJK.enc}{JISdnp}{\def\A:charset{charset=iso-2022-jp}}
 \Configure{CJK.enc}{JISwn}{\def\A:charset{charset=iso-2022-jp}}
 \Configure{CJK.enc}{JIS2}{\def\A:charset{charset=iso-2022-jp}}
 \Configure{CJK.enc}{JIS2dnp}{\def\A:charset{charset=iso-2022-jp}}
 \Configure{CJK.enc}{SJIS}{\def\A:charset{charset=iso-2022-jp}}
-_-_-

<..korean charset..>
 \Configure{CJK.enc}{KS}{\def\A:charset{charset=iso-2022-kr}}
 \Configure{CJK.enc}{KSpmC}{\def\A:charset{charset=iso-2022-kr}}
 \Configure{CJK.enc}{KSHL}{\def\A:charset{charset=iso-2022-kr}}
-_-_-

<..configure html4 hebtex..>
 \def\A:charset{charset=iso-8859-8}
 \Configure{arabtext}
    {\ifhmode \let\end:arabtex=\empty \IgnorePar
     \else
          \HCode{<div style="text-align:right"><table class="arabtex"><tr><td\Hnewline
               style="white-space:nowrap; text-align:right">}%
          \def\end:arabtex{\HCode{</td></tr></table></div>}}%
     \fi
     \global\let\start:arab=\relax}
    {\end:arabtex}
    {\ifx \start:arab\relax \global\let\start:arab=\empty
     \else \expandafter\Tg<br />\fi}
    {}
 \:CheckOption{nikud}     \if:Option
    \Configure{chireq}{\char5 }
    \Configure{cholem}{\char3 }
    \Configure{chpatach}{\char1\char7 }
    \Configure{chqames}{\char2\char7 }
    \Configure{chsegol}{\char3\char7 }
    \Configure{dagesh}{\char46 }
    \Configure{meteg}{\char44 }
    \Configure{patachf}{\char1 }
    \Configure{patach}{\char1 }
    \Configure{qameschat}{\char6\char7 }
    \Configure{qames}{\char6 }
    \Configure{qibbus}{\HCode{<!--qibbus-->}}
    \Configure{rdot}{\HCode{<!--rdot-->}}
    \Configure{segol}{\char3 }
    \Configure{sere}{\char9 }
    \Configure{shindot}{\HCode{<!--shindot-->}}
    \Configure{shwa}{\char7 }
    \Configure{sindot}{\HCode{<!--sindot-->}}
 \else
    \Log:Note{for hebrew vowels, use
         the command line option ‘nikud’}
    \Configure{chireq}{\HCode{<!--chireq-->}}
    \Configure{cholem}{\HCode{<!--cholem-->}}
    \Configure{chpatach}{\HCode{<!--chpatach-->}}
    \Configure{chqames}{\HCode{<!--chqames-->}}
    \Configure{chsegol}{\HCode{<!--chsegol-->}}
    \Configure{dagesh}{\HCode{<!--dagesh-->}}
    \Configure{meteg}{\HCode{<!--meteg-->}}
    \Configure{patachf}{\HCode{<!--patachf-->}}
    \Configure{patach}{\HCode{<!--patach-->}}
    \Configure{qameschat}{\HCode{<!--qameschat-->}}
    \Configure{qames}{\HCode{<!--qames-->}}
    \Configure{qibbus}{\HCode{<!--qibbus-->}}
    \Configure{rdot}{\HCode{<!--rdot-->}}
    \Configure{segol}{\HCode{<!--segol-->}}
    \Configure{sere}{\HCode{<!--sere-->}}
    \Configure{shindot}{\HCode{<!--shindot-->}}
    \Configure{shwa}{\HCode{<!--shwa-->}}
    \Configure{sindot}{\HCode{<!--sindot-->}}
 \fi
-_-_-

<..configure html4 abidir..>
 \:CheckOption{pic-RL}\if:Option
    \Configure{RL}
       {\ifvmode \IgnorePar\EndP \def\RL:tag{div}\else \def\RL:tag{span}\fi
        \HCode{<\RL:tag\space class="pic-RL">}\Picture*{}}
       {\EndPicture\HCode{</\RL:tag>}}
       {}  {} {} {}
    \Css{div.pic-RL{text-align:right;}}
 \else
    \Log:Note{for pictorial RL, use
              the command line option ‘pic-RL’}
    \Configure{RL}
       {\HCode{<span class="RL">}} {\HCode{</span>}}
       {}  {\HCode{<br />}} {} {}
 
 \fi
-_-_-

<..configure html4 romanian..>
 \def\A:charset{charset=iso-8859-2}
 \:CheckOption{new-accents}     \if:Option \else
    <.old iso-8859-2 accents.>
 \fi
-_-_-

7.14.2 Umlaut

<..configure html4 german..>
 \:CheckOption{new-accents}     \if:Option \else
    \Configure{accent}\grmn@OTumlaut
       \grmn@OTumlaut{<.diaeresis codes.>{}{34}}
    {\a:accents{uml}{#1}}     {\b:accents{uml}{#1}{#2}}
 \fi
-_-_-

<..configure html4 ngerman..>
 \:CheckOption{new-accents}     \if:Option \else
    \Configure{accent}\grmn@OTumlaut
       \grmn@OTumlaut{<.diaeresis codes.>{}{34}}
    {\a:accents{uml}{#1}}     {\b:accents{uml}{#1}{#2}}
 \fi
-_-_-

<..babel.def..>+
 \:CheckOption{new-accents}     \if:Option \else
    \Configure{accent}\lower@umlaut\lower@umlaut{<.diaeresis codes.>{}{34}}
    {\a:accents{uml}{#1}}     {\b:accents{uml}{#1}{#2}}
 \fi
-_-_-

<..u mlaut..>
 \Configure{accent}\bbl@umlauta
    \bbl@umlaute{<.diaeresis codes.>{}{34}}
    {\a:accents{uml}{#1}}     {\b:accents{uml}{#1}{#2}}
-_-_-

7.14.3 Russian and Cyrillic

<..configure html4 t2benc..>
 \def\:temp{charset=koi8-r}\ifx \A:charset\:temp \else
    \def\A:charset{charset=iso-8859-5}
 \fi
-_-_-

<..configure html4 koi8-r..>
 \def\A:charset{charset=koi8-r}
-_-_-

<..set babel charset..>
 \def\:temp{charset=koi8-r}\ifx \A:charset\:temp \else
    \a:temp{russian}{iso-8859-5}{ru}
 \fi
-_-_-

<..configure html4 russianb..>
 \NewConfigure{dtd-lang}{1}  \Configure{dtd-lang}{RU}
 \def\:temp{charset=koi8-r}\ifx \A:charset\:temp \else
    \def\A:charset{charset=iso-8859-5}
 \fi
 \ifx \@begindocumenthook\:UnDef\else
    \:CheckOption{new-accents}     \if:Option \else
 %      \def\:temp{russian}\ifx \languagename\:temp
          <.russian.>
 %      \fi
 \fi\fi
-_-_-

We had also \append:def\@begindocumenthook{\HLet\"|=\ddot} in babel. It gets russian and brazil into infinite loop. Why it was inserted.

ERROR: The non ascii characters might translate wrongly in the 4ht file (e.g., ^^e5 for 00EB below). They also not show in the documentation since a cmtt10 font is in use and it doesn’t covet those symbols. The problem will probably vanish once the new accent approach will kick in.

<..russian..>
 \Configure{accent}\"\ddot{<.diaeresis codes.>{00EB}%
            {\@use@text@encoding \@curr@enc A}{00C4}%
            {\@use@text@encoding \@curr@enc E}{00CB}%
            {\@use@text@encoding \@curr@enc I}{00CF}%
            {\@use@text@encoding \@curr@enc O}{00D6}%
            {\@use@text@encoding \@curr@enc U}{00DC}%
            {\@use@text@encoding \@curr@enc Y}{0178}%
            {\@use@text@encoding \@curr@enc a}{00E4}%
            {\@use@text@encoding \@curr@enc e}{00EB}%
            {\@use@text@encoding \@curr@enc i}{00EF}%
            {\@use@text@encoding \@curr@enc \i}{00EF}%
            {\@use@text@encoding \@curr@enc o}{00F6}%
            {\@use@text@encoding \@curr@enc u}{00FC}%
            {\@use@text@encoding \@curr@enc y}{00FF}%
            {\@use@text@encoding \@curr@enc }{00EB}%
            {}{34}}
    {\a:accents{uml}{#1}}
    {\def\:temp{>}\def\:tempa{#2}\ifx \:temp\:tempa\HCode{}%
     \else \def\:temp{<}\ifx \:temp\:tempa\HCode{}%
     \else \b:accents{uml}{#1}{#2}\fi\fi}
-_-_-