Semantics of Dvi Code

Chapter 3
Semantics of Dvi Code

   3.1 Scanning the Characters
   3.2 Spaces
      Line Breaks
      Move Horizontally
      Move Vertically
      Typeset Indirect Characters
      Utilities
      Out of Place Spaces
   3.3 Rulers
   3.4 Command Characters
      Fonts
   3.5 Specials: Extensions to dvi Primitives

[example of a dvi file]

3.1 Scanning the Characters

<..process ch according to the case ..>
{       register int ch_1;
    ch_1 = ch;
    <.auto quit halign .>
    <.process indirect character ch .>
    <.get back sub/sup before ch .>
    <.trace dvi char block .>
    <.span char block .>
    <.pre accent symbol .>
    if( ch < 132 )  {
       x_val += math_class_on? <.set-ch-class(ch-1).>
                             : insert_ch(ch_1);       typset and move
       if(  max_x_val < x_val ) max_x_val = x_val;
    } else switch( ch ) {
       case 133: case 134: case 135: case 136: {typset and don’t move
            INTEGER w;
          w = math_class_on?  <.set-ch-class(ch-1).> : insert_ch(ch_1);
          max_x_val = ( x_val + w > max_x_val )?  x_val + w : max_x_val;
          break;
       }
       <.process dvi op ’ch’.>
    }
}
-_-_-

Characters are inserted into the html file as they are encountered. Spaces are inserted when ‘x_val’ is larger than ‘max_x_val’.

<..vars ..>+
static long int x_val = 0, max_x_val = -10000,
max_y_val = 0, prev_y_val = 0;
-_-_-

3.2 Spaces

Line Breaks

Conditionally start new line.

<..header functions ..>+
static void try_new_line( ARG_I(void) );
-_-_-

<..functions ..>+
static void try_new_line(MYVOID)
{        long int  v;
          double    dy;
    dy =  (cur_fnt == -1)? 0.0 : (<.sign of ex .> * <.size of ex .>) ;
    v = y_val - prev_y_val;
    if( !text_on && (y_val > max_y_val) ){
      if( v > dy/2.5 ){
         <.cond insert new line .>  max_x_val = -10000;
         prev_y_val = max_y_val  = stack_n? y_val : 0;
      }
    }else{
       if( v > dy ){ <.cond insert new line .>  max_x_val = x_val;
                     prev_y_val = stack_n? y_val : 0;
       }else if( v < -(dy / 1.4) ) prev_y_val = stack_n? y_val : 0;
}  }
-_-_-

The 2.5 divisor provide a line break before b, but not before 5 and a in ${A’}_{5_{a_b}}$.

<..sign of ex ..>
(<.size of ex .> < 0? -1 : 1)
-_-_-

‘<size of ex>’ is negative in hclassic font of hebrew, and probably also in other right-to-left fonts.

In default font, one ex is about 4.5pt. We take distances greatr than 1.7ex for distance between lines and distances smaller than of (1.7/1.3)ex from base line to superscript. My TeX inserts superscripts before subscripts into the dvi files, no matter what order they have in the input (otherwise, TeX obeys the input order in the output).

Move Horizontally

<..process dvi op ’ch’..>
case <.mv hor 1-byte .>: {;}
case <.mv hor 2-byte .>: {;}
case <.mv hor 3-byte .>: {;}
case <.mv hor 4-byte .>: {
    try_new_line();
    (void) move_x((INTEGER) get_int(ch - <.mv hor 1-byte .> + 1 ));
    break; }
-_-_-

<..process dvi op ’ch’..>+
case <.mv hor dist dx.1 .>: {
    (void) move_x( dx_1 ); break; }
case <.dx.1 store and mv hor 1-byte .>: {;}
case <.dx.1 store and mv hor 2-byte .>: {;}
case <.dx.1 store and mv hor 3-byte .>: {;}
case <.dx.1 store and mv hor 4-byte .>: {
    try_new_line();
    dx_1 = move_x((INTEGER) get_int(ch - <.mv hor dist dx.1 .> ));
    break; }
-_-_-

<..process dvi op ’ch’..>+
case <.mv hor dist dx.2 .>: {
    (void) move_x( dx_2 ); break; }
case <.dx.2 store and mv hor 1-byte .>: {;}
case <.dx.2 store and mv hor 2-byte .>: {;}
case <.dx.2 store and mv hor 3-byte .>: {;}
case <.dx.2 store and mv hor 4-byte .>: {
    try_new_line();
    dx_2 = move_x((INTEGER) get_int(ch - <.mv hor dist dx.2 .> ));
    break; }
-_-_-

<..vars ..>+
static INTEGER dx_1 = 0, dx_2 = 0;
-_-_-

<..header functions ..>+
static INTEGER move_x( ARG_I(register INTEGER) );
-_-_-

<..functions ..>+

static INTEGER move_x( d )
       register INTEGER  d
;{    register long     i, dx;
    x_val += d;
    if( (x_val > max_x_val) && x_val ){
      if( max_x_val == -10000) max_x_val = x_val - d;
      <.insert space .>
    } else    if( d && text_on  && (x_val != max_x_val) ){
       <.space for llap .>
    }
    return  d;
}
-_-_-

x_val == max_x_val typically occurs in push after [hbox]to compansate for loss of space due to a pop..

The \llap create problems for spaces, e.g., in section number at section heads. The same might happen for \hrule. The following provides a solution.

<..space for llap ..>
if( !ignore_spaces ){
    i =  (INTEGER) ( (double) (dx = d) / word_sp + 0.5 );
    if( i<0 ) i=0;
    if( !i ) i = dx>99999L;
    if( i ){ put_char(’ ’); }
}
-_-_-

Originally, we had also ‘(max_x_val == -10000)’ in the else part.

Move Vertically

<..process dvi op ’ch’..>+
case <.mv ver 1-byte .>: {;}
case <.mv ver 2-byte .>: {;}
case <.mv ver 3-byte .>: {;}
case <.mv ver 4-byte .>: {
(void) move_y( (INTEGER) get_int(ch - <.mv ver 1-byte .> + 1 ));
break; }
-_-_-

<..process dvi op ’ch’..>+
case <.mv ver dist dy.1 .>: { (void) move_y( dy_1 );   break; }
case <.dy.1 store and mv ver 1-byte .>: {;}
case <.dy.1 store and mv ver 2-byte .>: {;}
case <.dy.1 store and mv ver 3-byte .>: {;}
case <.dy.1 store and mv ver 4-byte .>: {
   dy_1 = move_y( (INTEGER) get_int(ch - <.mv ver dist dy.1 .> ));
   break; }
-_-_-

<..process dvi op ’ch’..>+
case <.mv ver dist dy.2 .>: { (void) move_y( dy_2 );   break; }
case <.dy.2 store and mv ver 1-byte .>: {;}
case <.dy.2 store and mv ver 2-byte .>: {;}
case <.dy.2 store and mv ver 3-byte .>: {;}
case <.dy.2 store and mv ver 4-byte .>: {
   dy_2 = move_y( (INTEGER) get_int(ch - <.mv ver dist dy.2 .> ));
   break; }
-_-_-

<..vars ..>+
static INTEGER dy_1 = 0, dy_2 = 0;
static long int y_val = 0;
-_-_-

<..header functions ..>+
static INTEGER move_y( ARG_I(register INTEGER) );
-_-_-

<..functions ..>+

static INTEGER move_y( d ) register INTEGER d
;{  y_val += d;
    <.trace v spaces .>
    return  d;
}
-_-_-

Typeset Indirect Characters

Read the character to be typsetted, insert the character, and either move the cursor (ops 128–131) or don’t (ops 133–136).

<..process indirect character ch ..>
if( (ch > 127) && (ch < 137) && (ch != <.insert rule + move op .>) ){
ch_1 = (int) get_unt( (ch - (ch>132)) % 4 +1);
}
-_-_-

NOTE. Didn’t take care yet of character codes beyond c[1] (e.g., unicode).

[more]

Utilities

Spaces are inserted when ‘x_val’ is larger than ‘max_x_val’.

<..insert space ..>
i =  (INTEGER) (  (double) (dx = x_val - max_x_val)
             /         (text_on? word_sp : margin_sp)
             +         0.5 );
<.try word space if i=0 .>
if( i<0 ) i=0;
if( i==0 ){ <.missing space .> }
if( i ){ <.trace h spaces .> }
if( !ignore_spaces ){
   <.end text accent .>
    while( i-- ) { text_on=TRUE;  put_char(’ ’); }
} else { recover_spaces = (int) i; }
max_x_val = x_val;
-_-_-

<..try word space if i=0 ..>
if( i==0 ){
    i =  (INTEGER) (  (double) dx
             /         word_sp
             +         0.5 );
}
-_-_-

<..recover ignored space ..>
while( recover_spaces-- ){ text_on=TRUE; put_char(’ ’); }
recover_spaces = 0;
-_-_-

<..ignore spaces ..>
ignore_spaces++;
-_-_-

<..end ignore spaces ..>
ignore_spaces--;
-_-_-

<..unhskip vars ..>
U_CHAR *unhskip_mark;
long retract_addr;
BOOL unhskip;
int cr_fnt, ch, unskip_depth;
-_-_-

In the case of unhskip we don’t want to inore embedded font changes.

<..ignore chs ..>
if( special_n ){
                   <.unhskip vars .>
    cr_fnt = cur_fnt;
    unskip_depth = 0;
    unhskip_mark = get_str( (int) special_n );  special_n=0;
    retract_addr = ftell(dvi_file);
    <.conditional unhskip .>
    cur_fnt = cr_fnt;
    free((void *)  unhskip_mark);
} else { ignore_chs++;; }
-_-_-

<..conditional unhskip ..>
unhskip = TRUE;
while( unhskip ){
   if( (ch = get_char()) >= 128 ) {
   switch( ch ){
     <.ignore font def on preview pass .>
     <.ignore on preview pass .>
     <.ignore indirect chars on preview pass .>
     <.push/pop for unhskip .>
     <.hooks for conditional unhskip .>
     <.fonts and default ignored on preview pass .>
   }
} }
<.scan condition .>
-_-_-

<..scan condition ..>
do{
                                long int i;
                                char *mark;
   ch = get_char();
   if(
       ( ch==<.special 1 .>) ||  ( ch==<.special 2 .>) ||
       ( ch==<.special 3 .>) ||  ( ch==<.special 4 .>)
     )
   {
      if( tex4ht_special( &ch, &i ) ){
          mark = get_str( (int) i );
          if( (ch==’@’) && ( *mark==’?’) && eq_str(mark+1,unhskip_mark)){
            break;
      }  }
   }
   (IGNORED) fseek(dvi_file, (long) retract_addr, <.abs file addr .>);
} while(FALSE);
-_-_-

<..ignore indirect chars on preview pass ..>
case 128: case 129: case 130: case 131: case 133:
case 134: case 135: case 136: {
(void) get_unt( (ch-(ch>132)) % 4 +1);
break;
}
-_-_-

<..push/pop for unhskip ..>
case <.sv loc op .>:
case <.retrieve loc op .>: { break; }
-_-_-

<..hooks for conditional unhskip ..>
case <.special 1 .>:  case <.special 2 .>:
case <.special 3 .>:  case <.special 4 .>: {  long int i;
   if( tex4ht_special( &ch, &i ) ){    char *mark;
     mark = get_str( (int) i );
     if( i ){
       if( (ch==’@’) && eq_str(mark+1,unhskip_mark) ){
          switch( *mark ){
            case ’[’: { unskip_depth++; break; }
            case ’]’: {
                 unhskip = !(--unskip_depth);
                 break;
              }
             default: { ; }
     }  }  }
   }else{ <.ignore non-t4ht special .>  }
   break;
}
-_-_-

<..end ignore chs ..>
if( special_n ){
while( special_n-- > 0 ){ (void) get_char(); }
} else { ignore_chs--; }
-_-_-

<..cond ignore chs ..>
while( special_n-- > 0 ){ (void) get_char(); }
-_-_-

<..get eoln str ..>
if( eoln_str ){ free((void *) eoln_str); }
if( special_n ){
eoln_str = get_str( (int) special_n ); special_n=0;
} else { eoln_str = (char *) 0; }
-_-_-

<..insert eoln ch ..>
if( eoln_str ){ print_f(eoln_str); }
else { (IGNORED) put_4ht_ch( ch, cur_o_file ); }
recover_spaces = 0;
-_-_-

<..vars ..>+
static U_CHAR *eoln_str = (char *)0;
-_-_-

<..get space str ..>
if( space_str ){ free((void *) space_str); }
if( special_n ){
space_str = get_str( (int) special_n ); special_n=0;
} else { space_str = (char *) 0; }
-_-_-

<..insert space ch ..>
if( space_str ){ print_f(space_str); }
else { (IGNORED) put_4ht_ch( ch, cur_o_file ); }
-_-_-

<..vars ..>+
static U_CHAR *space_str = (char *)0;
-_-_-

<..vars ..>+
static int ignore_chs=0, ignore_spaces=0, recover_spaces=0;
-_-_-

Out of Place Spaces

<..missing space ..>
          long  curr_pos;
          BOOL  done;
          int ch, cr_fnt;
curr_pos = ftell(dvi_file);
done = FALSE;
while( !done ){
    ch = get_char();
    switch( ch ){
      <.h-move get space info .>
      <.h-move skip font loading .>
      <.case: skip font def .>
      case <.sv loc op .>:
      case <.retrieve loc op .>: { break; }
      default: {
         if( (ch < <.font 0 .>) || (ch > <.font 63 .>)   ){
            done = TRUE;
         } else {
            <.check alternative space .>
}  } }  }
(IGNORED) fseek(dvi_file, curr_pos, <.abs file addr .>);
-_-_-

<..check alternative space ..>
          double word_sp;
cr_fnt = ch - <.font 0 .>;
cr_fnt = search_font_tbl( cr_fnt );
word_sp = design_size_to_pt( font_tbl[cr_fnt].word_sp )
              * (double) font_tbl[cr_fnt].scale;
i =  (INTEGER) (  (double) dx
             /         (text_on? word_sp : margin_sp)
             +         0.5 );
<.try word space if i=0 .>
if( i>0 ){ i =1; }
-_-_-

<..h-move get space info-WAIT ..>
case  <.font 1-byte .>:
case <.font 2-bytes .>:
case <.font 3-bytes .>:
case     <.font int .>: {
                               INTEGER n;
                               int ch;
    n = ch - <.font 1-byte .> + 1;
    cr_fnt = (int)  ((n==4)? get_int(4) : get_unt((int) n));
    cr_fnt = search_font_tbl( cr_fnt );
    break;
}
-_-_-

Removed ‘if( !i ) i = dx>99999L;’ after ‘if( i<0 ) i=0;’, and insertex ‘<missing space>’ instead. It created problems for cases like

\documentclass{article}

\begin{document}
$\mathcal{ABCDEFGHIJKLMNOPQRSTUVWXYTJZ}$
\end{document}

It gives ‘dx=121058’ on ‘word_sp=500255.625000’. A 0.24 ratio with a large space.
But that is problematic for:
\documentclass{article}

\renewcommand{\rmdefault}{ptm}
\immediate\write16{........\the\textwidth}
\setlength\textwidth{478.00812pt}%

\begin{document}

The first is stylistic - there is inconsistent indenting, the
constants are given non-meaningful names, and are not shared between
\texttt{func} and

\end{document}

The end is disassebled into
006066: Y0
006067: PUSH
006068: FNT_DEF1: 16
006070:           checksum         : -538297224
006074:           scale            : 655360
006078:           design           : 655360
006082:           name             : cmtt10
006090: FONT_16
006091: Char:     func
006095: RIGHT3:   163840
006099: FONT_15
006100: Char:     and
006103: POP
006104: Y0

We have ‘dx=163840’ with ‘word_sp=344061.25’ for font 16 and ‘word_sp=163840’ fot font 15. The space of 163840 seems fitting the font that follows.
TeX is cheating here, it should have placed the font change before the space.

3.3 Rulers

<..process dvi op ’ch’..>+
case <.insert rule + move op .>: {
(void) rule_x( TRUE ); break;
}
case <.insert rule + nomove op .>: {
(void) rule_x( FALSE ); break;
}
-_-_-

<..vars ..>+
static BOOL text_on = FALSE;
-_-_-

Originally, we didn’t have ‘text_on’ in the computation of i within ‘rule_x’ and ‘move_x’. This caused a problem in case that we have a line of the form ‘<math stuff><regular stuff>’, because changes to the variable (WHICH VARIABLE? x_val?) are lost once we get out of the math stuff. The foollowing example had the problem also after the introduction of text_on, and got fixed with the segment <try word space if i=0>.

\documentclass[twocolumn]{article}

\def\chem#1{\ensuremath {\mathrm {#1}}}
\def\un#1{\ensuremath {\unskip \,\mathrm {#1}}}

\setlength{\textwidth}{180mm}

\begin{document}

The bands we have identified as linear chains come in pairs, the
states are parity doublets. The structure of the intrinsic shapes
has been discussed in many works as given in the introduction
(sect.~xx). We compare the moments of inertia of the
bands in \chem{{\HCode{}}^{14}C} with the moments of inertia of other
molecular bands in light nuclei in table~xx. The
proposed bands in \chem{{\HCode{}}^{14}C} have very large values of
$\hbar^2/2\theta\approx 120$\un{keV}, consistent with the concept
of chain states.

\end{document}

<..header functions ..>+
static void rule_x( ARG_I(BOOL) );
-_-_-

<..functions ..>+

static  void rule_x( tag )
       BOOL  tag
;{    long i, right, up;
    up = (INTEGER) get_int(4);
    right = (INTEGER) get_int(4);
    if( ch_map_flag ){ <.ruler into ch map .> }
    else if( pos_dvi ){
       <.pos dvi x rule .>
       if( tag ) x_val += right;
    } else if( (up>0) && (right>0) ) {
       <.typset positive rule x .>
       if( tag ) x_val += right;
    } else {
       <.don’t typset rule x .>
       if( tag ) x_val += right;
}  }
-_-_-

<..vars ..>+
static U_CHAR rule_ch = ’_’;
static BOOL <.radical-line-off .> = FALSE;
-_-_-

<..ruler ch ..>
if( !special_n ){ rule_ch = ’\0’; }
else { while( special_n-- > 0 ){ rule_ch = get_char(); }
}
-_-_-

<..write to lg file with loc stamp ..>
           struct files_rec *p;
while( special_n-- > 0 ) (void)  putc( get_char(), log_file );
for( p = opened_files; p != (struct files_rec*) 0;  p = p->next ){
    if( p->file == cur_o_file) {
         (IGNORED) fprintf(log_file, "%d %s\n",
                (int) ftell(cur_o_file), p->name);
         break;
}  }
-_-_-

<..typset positive rule x ..>
if( (x_val + right)  &&
         (    ((x_val + right) > max_x_val)
           || ( !text_on && !ignore_chs )
         )
){
    if( (max_x_val == -10000) || ((x_val + right) <= max_x_val) )
    {  max_x_val = x_val;  }
    i =  (INTEGER) (  (double) (x_val + right - max_x_val)
                    /         (text_on? word_sp : margin_sp)
                    +         0.5 );
    <.try word size if i=0 .>
    if( i && !text_on )  try_new_line();
    <.trace x rules .>
    while( i-- ) { text_on=TRUE;
       if( rule_ch && !<.radical-line-off .> ){ put_char(rule_ch); }
    }
    <.end trace x rules .>
    max_x_val = x_val + right;
}
-_-_-

<..try word size if i=0 ..>
if( i==0 ){
    i =  (INTEGER) (  (double) (x_val + right - max_x_val)
                    /         word_sp
                    +         0.5 );
}
-_-_-

<..radical-line-off ..>
rule_ch_off
-_-_-

<..don’t typset rule x ..>
<.trace x rules .>
<.end trace x rules .>
-_-_-

3.4 Command Characters

Fonts

<..process dvi op ’ch’..>+
<.case: skip font def .>
-_-_-

<..case: skip font def ..>
case <.def 4 byte font .>:   (void) get_char();
case <.def 3 byte font .>:   (void) get_char();
case <.def 2 byte font .>:   (void) get_char();
case <.def 1 byte font .>: {
   for( i=0; i<14; i++ ){ ch = get_char(); }
   for( i=ch + get_char(); i>0; i--) (void) get_char();
   break;
}
-_-_-