Chapter 2
Structure of Dvi Files

   2.1 Background
      Example
   2.2 Scan Postamble
      No Op’s at the End of File
      Find Start of Postamble
      Find Stack Size and Number of Pages
      Font Definitions
   2.3 Scan Preamble
   2.4 Scan Pages
   2.5 Dvi Trace
      Setting the Delimiters
      Using the Delimiters

2.1 Background

DVI driver standards

Dvi files are consisted of three parts.

+-----------+ 
| preamble  | 
+-----------+ 
|   pages   | 
+-----------+ 
| postamble | 
+-----------+ 

Example

000000: PRE       version          : 2 
000002:           numerator        : 25400000 
000006:           denominator      : 473628672 
000010:           magnification    : 1000 
000014:           job name ( 27)   : TeX output 1995.02.22:0104 
000042: BOP       page number      : 1 
000047:                0       0       0 
000059:                0       0       0 
000071:                0       0       0 
000083:           prev page offset : -00001 
000087: PUSH 
000088: DOWN3:    -917504 
000092: POP 
000093: DOWN4:    42152922 
000098: PUSH 
000099: DOWN4:    -41497562 
000104: PUSH 
000105: FNT_DEF1: 29 
000107:           checksum         : -538297224 
000111:           scale            : 655360 
000115:           design           : 655360 
000119:           name             : cmtt10 
000127: FONT_29 
000128: Char:     <HTML><TITLE>try.htex</TITLE><BODY> 
000163: POP 
000164: Y3:       786432 
000168: PUSH 
000169: PUSH 
000170: RIGHT3:   1310720 
000174: Char:     <P> 
000177: POP 
000178: RIGHT3:   2342903 
000182: FNT_DEF1: 12 
000184:           checksum         : 555887770 
000188:           scale            : 655360 
000192:           design           : 655360 
000196:           name             : cmsy10 
000204: FONT_12 
000205: Char:     ff         /* ligature (non-printing) */ 
000206: W3:       145632 
000210: Char:     0x 0 
000211: W0 
000212: Char:     A 
000213: PUSH 
000214: DOWN3:    -237825 
000218: Char:     ffi        /* ligature (non-printing) */ 
000219: POP 
000220: RIGHT3:   704510 
000224: FONT_29 
000225: Char:     A 
000226: RIGHT3:   344061 
000230: Char:     B 
000231: POP 
000232: Y0 
000233: PUSH 
000234: Char:     </BODY></HTML> 
000248: POP 
000249: POP 
000250: DOWN3:    1572864 
000254: EOP 
000255: POST      last page offset : 000042 
000260:           numerator        : 25400000 
000264:           denominator      : 473628672 
000268:           magnification    : 1000 
000272:           max page height  : 43725786 
000276:           max page width   : 30785863 
000280:           stack size needed: 3 
000282:           number of pages  : 1 
000284: FNT_DEF1: 29 
000286:           checksum         : -538297224 
000290:           scale            : 655360 
000294:           design           : 655360 
000298:           name             : cmtt10 
000306: FNT_DEF1: 12 
000308:           checksum         : 555887770 
000312:           scale            : 655360 
000316:           design           : 655360 
000320:           name             : cmsy10 
000328: POSTPOST  postamble offset : 000255 
000333:           version          : 2 
000334: TRAILER 
000335: TRAILER 
000336: TRAILER 
000337: TRAILER 
000338: TRAILER 
000339: TRAILER 

2.2 Scan Postamble

The postamble of a dvi file has the following structure. It should be consistent with the preamble.

no_ops ???      >= 0 bytes     NOPS, I think they are allowed here... 
postamble_marker   1 ubyte     POST 
last_page_offset   4 sbytes 
numerator          4 ubytes 
denominator        4 ubytes 
magnification      4 ubytes 
max_page_height    4 ubytes 
max_page_width     4 ubytes 
max_stack          2 ubytes 
total_pages        2 ubytes    number of pages in file 
... FONT DEFINITIONS ... 
POST_POST          1 byte 
postamble_offset   4 sbytes    offset in file where postamble starts 
version_id         1 ubyte 
trailer         >= 4 ubytes    TRAILER 
<EOF> 

No Op’s at the End of File

Verify eof op and version number at postamble. There should be at least four trailing characters, and normally there are no more than seven. They act as signatyre and provide for file lengths that are divisible by four, aimed at machines that pack four bytes into a word.

<..scan postamble..>
 i=0;
 do{
   i++; file_len -= 1;
   (IGNORED) fseek(dvi_file, file_len, <.abs file addr.>);
 }   while( (ch=get_char()) == <.eof op.> );
 eof_op_n = file_len;
 if( (i<4)
     ||
     ((ch != <.version dvi.>) && (ch != <.version xdv.>))
   )  bad_dvi;
 version_id = ch;
 -_-_-

<..vars..>+
 static int version_id;
 -_-_-

The function ‘(IGNORED) fseek’ enables random access to file.

<..abs file addr..>
 0-_-_-

<..relative file addr..>
 1-_-_-

Find Start of Postamble

<..scan postamble..>+
 file_len -= 5;
 (IGNORED) fseek(dvi_file, file_len, <.abs file addr.>);
 if( get_char() != <.end-postamble op.> )  bad_dvi;
 eof_op_n -= begin_postamble = get_unt(4);
 (IGNORED) fseek(dvi_file, begin_postamble, <.abs file addr.>);
 -_-_-

Find Stack Size and Number of Pages

<..scan postamble..>+
 if( get_char() != <.begin-postamble op.> )  bad_dvi;
 (IGNORED) fseek(dvi_file, 16L, <.relative file addr.>);
 <.max page dimensions.>
 if( (stack_len = (int) get_unt(2)) < 1)     bad_dvi;
 <.stack = m-alloc....>
 unread_pages = (int) get_unt(2);
 -_-_-

<..main’s vars..>+
 int unread_pages;
 -_-_-

<..vars..>+
 static int  stack_len;
 -_-_-

Font Definitions

<..scan postamble..>+
 {      <.scan fonts vars.>
        BOOL missing_fonts;
 #ifndef KPATHSEA
        <.fls var.>
    <.get dir of tex4ht.fls.>
    <.open old cache file.>
 #endif
    <.init scan fonts vars.>
    missing_fonts = FALSE;
    <.check env font variables.>
    <.start loading fonts.>
    <.load unicode.4hf.>
    while( (ch =  get_char()) != <.end-postamble op.> ){
      <.scan font entry.>
    }
    <.end loading fonts.>
    if( missing_fonts ) err_i(14);
 #ifndef KPATHSEA
    <.close old cache file.>
    <.set new cache file.>
 #endif
    <.free html font memory.>
 }
 -_-_-

2.3 Scan Preamble

The preamble of a dvi file has the following structure.

no_ops          >= 0 bytes     NOP, nops before the preamble 
preamble_marker    1 ubyte     PRE 
version_id         1 ubyte 
numerator          4 ubytes 
denominator        4 ubytes 
magnification      4 ubytes 
id_len             1 ubyte     lenght of identification string 
id_string     id_len ubytes    identification string 

<..scan preamble..>
 (IGNORED) fseek(dvi_file, 0L, <.abs file addr.>);
 ch = get_noop();
 if( ch != <.start of preamble op.> )   bad_dvi;
 if( ((int) get_char()) != version_id ) bad_dvi;
 (void) get_unt(4);     numerator   = (INTEGER) get_unt(4);
 (void) get_unt(4);     denominator = (INTEGER) get_unt(4);
 magnification = (void) get_unt(4);
 for( i= get_char(); i>0; i-- ) ch = get_char();
 -_-_-

<..version dvi..>
 2 -_-_-

The following is from xetex.

<..version xdv..>
 5
 -_-_-

2.4 Scan Pages

The pages in a dvi file have the following structure.

no_ops          >= 0 bytes     NOP 
begin_of_page      1 ubyte     BOP 
page_nr            4 sbytes    page number 
                  36 bytes 
prev_page_offset   4 sbytes    offset in file where previous 
                                             page starts, -1 for none 
... PAGE DATA ... 
end_of_page        1 ubyte     EOP 

<..scan pages..>
 {
   dis_pages = unread_pages;
   while( unread_pages-- ){
     (IGNORED) printf("[%d", dis_pages - unread_pages);
     <.initial values for new page.>
     if( get_noop() != <.start page op.> )  bad_dvi;
     for( i = 1; i<45; i++ )
       if( get_char() == EOF )   bad_dvi;
     while( (ch = get_char()) != <.end page op.> ){
       <.process ch according to the case.>
     }
     <.dump ch-map at end of page.>
     (IGNORED) printf("]%c",unread_pages % 10 == 0? ’\n’ :  ’);
     put_char(’\n’);
 } }
 -_-_-

The new line at end of pages is importnant and problematic for verbatim environment that are expressed by ‘<pre>’ elements

<..initial values for new page..>
 x_val = dx_1 = dx_2 = 0;  max_x_val = -10000; temp
 y_val = max_y_val = prev_y_val = dy_1 = dy_2 = 0;
 -_-_-

[example of a dvi file]

2.5 Dvi Trace

Good for post processing by plug-ins of ‘tex4ht.c’. That is, we set [hints] for the postprocessors.

<..dvi trace..>
 if( special_n>1 ) {
    special_n--;
    if (  get_char() == ’%’ ) {
       if( special_n>2 ) { <.delimiters for trace dvi.> }
       else { <.consume unused specials.> }
    } else { <.consume unused specials.> }
 } else if( special_n ) {
   special_n--;
   switch ( get_char() ){
      case ’P’: { trace_dvi_P++; break; }
      case ’C’: { trace_dvi_C++; break; }
      case ’V’: { trace_dvi_V++; break; }
      case ’H’: { trace_dvi_H++; break; }
      case ’R’: { trace_dvi_R++; break; }
      case ’p’: { trace_dvi_P--; break; }
      case ’c’: { trace_dvi_C--; break; }
      case ’v’: { trace_dvi_V--; break; }
      case ’h’: { trace_dvi_H--; break; }
      case ’r’: { trace_dvi_R--; break; }
       default: { ; }
 } }
 -_-_-

Options ’h’ and ’v’ are not in use.

<..css for chars..>
 if( span_on && (default_font != font_tbl[cur_fnt].num) ){
   if( !ch_map_flag && start_span ){
     if( span_name_on ){
        <.open output file.>
        if( span_open[0] )  if( *span_open[0] )
            (IGNORED) fprintf(cur_o_file, "%s", span_open[0]);
        if( span_name[0] )  if( *span_name[0] )
            (IGNORED) fprintf(cur_o_file,
                span_name[0], font_tbl[cur_fnt].family_name);
        if( span_size[0] )  if( *span_size[0] )
            (IGNORED) fprintf(cur_o_file,
                span_size[0], font_tbl[cur_fnt].font_size);
        if( span_mag[0] )
          if( *span_mag[0]  && (font_tbl[cur_fnt].mag != 100))
            (IGNORED) fprintf(cur_o_file,
                        span_mag[0], font_tbl[cur_fnt].mag);
        if( span_ch[0] )  if( *span_ch[0] )
            (IGNORED) fprintf(cur_o_file, "%s", span_ch[0]);
     }
     start_span = FALSE;
   }
 }
 -_-_-

<..vars..>+
 static BOOL start_span = FALSE, in_span_ch = FALSE;
 -_-_-

<..span char block..>
 if( span_on && !in_span_ch  && !ignore_chs && !in_accenting
             && (default_font != font_tbl[cur_fnt].num) ){
   if(  (ch < 137) && (ch != <.insert rule + move op.>) ){
     in_span_ch = TRUE; start_span = TRUE;
 } }
 else if ( in_span_ch ){
   if( !span_on ||
      (ch == <.insert rule + move op.>) ||
      ((136 < ch) && (ch < <.mv hor 1-byte.>)) ||
      (ch > <.dx.2 store and mv hor 4-byte.>)
   ){
     in_span_ch = FALSE;
     if( *end_span[0] ){
        <.open output file.>
        (IGNORED) fprintf(cur_o_file, "%s", end_span[0]);
     }
 } }
 -_-_-

<..end accented span..>
 if( span_on && in_span_ch ){
    if( *end_span[0] ){
        in_span_ch = FALSE;
        <.open output file.>
        (IGNORED) fprintf(cur_o_file, "%s", end_span[0]);
 }  }
 -_-_-

<..end css for chars..>
 
 -_-_-

<..trace dvi char block..>
 if( trace_dvi_C && !in_trace_char ){
    if( (ch < 137) && (ch != <.insert rule + move op.>) ){
      in_trace_char = TRUE; block_start = TRUE;
 } }
 else if ( in_trace_char ){
   if( !trace_dvi_C || (ch > 136) || (ch == <.insert rule + move op.>) ){
    in_trace_char = FALSE;
 } }
 -_-_-

Setting the Delimiters

The special is assumed to offer a pattern of the form ‘type-ch del-ch ..... del-ch ....’.

<..delimiters for trace dvi..>
      U_CHAR  type, ch, *p, *q, *pp=0, *qq=0, pre[256], post[256];
 special_n -= 2;   type = get_char();  ch = get_char();
 p = pre;
 while( special_n-- > 0 ) {
   if ( (*(p++)=get_char() ) == ch ) { p--; break; }
 }
 *p = ’\0’;
 p = post;
 while( special_n-- > 0 ) { *(p++)=get_char(); }    *p=’\0’;
 -_-_-

<..delimiters for trace dvi..>+
 p = m_alloc(char, 1 + (int) strlen((char *) pre));
 (IGNORED) strcpy((char *) p, (char *) pre );
 q = m_alloc(char, 1 + (int) strlen((char *) post));
 (IGNORED) strcpy((char *) q, (char *) post );
 -_-_-

<..delimiters for trace dvi..>+
 switch ( type ){
    case ’P’: {
      pp = trace_dvi_del_P;      trace_dvi_del_P = p;
      qq = end_trace_dvi_del_P;  end_trace_dvi_del_P = q;
      break; }
    case ’C’: {
      pp = trace_dvi_del_C;      trace_dvi_del_C = p;
      qq = end_trace_dvi_del_C;  end_trace_dvi_del_C = q;
      break; }
    case ’V’: {
      pp = trace_dvi_del_V;      trace_dvi_del_V = p;
      qq = end_trace_dvi_del_V;  end_trace_dvi_del_V = q;
      break; }
    case ’H’: {
      pp = trace_dvi_del_H;      trace_dvi_del_H = p;
      qq = end_trace_dvi_del_H;  end_trace_dvi_del_H = q;
      break; }
    case ’R’: {
      pp = trace_dvi_del_R;      trace_dvi_del_R = p;
      qq = end_trace_dvi_del_R;  end_trace_dvi_del_R = q;
      break; }
    case ’p’: {
      pp = trace_dvi_del_p;      trace_dvi_del_p = p;
      qq = end_trace_dvi_del_p;  end_trace_dvi_del_p = q;
      break; }
    case ’c’: {
      pp = trace_dvi_del_c;      trace_dvi_del_c = p;
      qq = end_trace_dvi_del_c;  end_trace_dvi_del_c = q;
      break; }
    case ’v’: {
      pp = trace_dvi_del_v;      trace_dvi_del_v = p;
      qq = end_trace_dvi_del_v;  end_trace_dvi_del_v = q;
      break; }
    case ’h’: {
      pp = trace_dvi_del_h;      trace_dvi_del_h = p;
      qq = end_trace_dvi_del_h;  end_trace_dvi_del_h = q;
      break; }
    case ’r’: {
      pp = trace_dvi_del_r;      trace_dvi_del_r = p;
      qq = end_trace_dvi_del_r;  end_trace_dvi_del_r = q;
      break; }
   default: { ; }
 }
 free((void *)  pp);
 free((void *)  qq);
 -_-_-

<..vars..>+
 static BOOL in_trace_char = FALSE, block_start = FALSE;
 static int trace_dvi_P = 0, trace_dvi_C = 0,
      trace_dvi_H = 0, trace_dvi_R = 0, trace_dvi_V = 0;
 static U_CHAR *trace_dvi_del_P,  *end_trace_dvi_del_P,
      *trace_dvi_del_p,  *end_trace_dvi_del_p,
      *trace_dvi_del_C,  *end_trace_dvi_del_C,
      *trace_dvi_del_c,  *end_trace_dvi_del_c,
      *trace_dvi_del_H,  *end_trace_dvi_del_H,
      *trace_dvi_del_h,  *end_trace_dvi_del_h,
      *trace_dvi_del_R,  *end_trace_dvi_del_R,
      *trace_dvi_del_r,  *end_trace_dvi_del_r,
      *trace_dvi_del_V,  *end_trace_dvi_del_V,
      *trace_dvi_del_v,  *end_trace_dvi_del_v;
 static int push_depth=0, push_id=0, push_st[256];
 -_-_-

<..cmd line trace groups..>
 trace_dvi_P++;
 if( !(   *trace_dvi_del_P || *end_trace_dvi_del_P
       || *trace_dvi_del_p || *end_trace_dvi_del_p
      )
  ){
    trace_dvi_del_P =
             (char *)  r_alloc((void *) trace_dvi_del_P,
                               (size_t) 4);
    (IGNORED) strcpy((char *) trace_dvi_del_P, "[G " );
    end_trace_dvi_del_P =
             (char *)  r_alloc((void *) end_trace_dvi_del_P,
                               (size_t) 2);
    (IGNORED) strcpy((char *) end_trace_dvi_del_P, "]" );
    trace_dvi_del_p =
             (char *)  r_alloc((void *) trace_dvi_del_p,
                               (size_t) 5);
    (IGNORED) strcpy((char *) trace_dvi_del_p, "[/G " );
    end_trace_dvi_del_p =
             (char *)  r_alloc((void *) end_trace_dvi_del_p,
                               (size_t) 2);
    (IGNORED) strcpy((char *) end_trace_dvi_del_p, "]" );
 }
 -_-_-

<..init traces..>
 set_del( &trace_dvi_del_P, &end_trace_dvi_del_P);
 set_del( &trace_dvi_del_p, &end_trace_dvi_del_p);
 set_del( &trace_dvi_del_C, &end_trace_dvi_del_C);
 set_del( &trace_dvi_del_c, &end_trace_dvi_del_c);
 set_del( &trace_dvi_del_H, &end_trace_dvi_del_H);
 set_del( &trace_dvi_del_h, &end_trace_dvi_del_h);
 set_del( &trace_dvi_del_R, &end_trace_dvi_del_R);
 set_del( &trace_dvi_del_r, &end_trace_dvi_del_r);
 set_del( &trace_dvi_del_V, &end_trace_dvi_del_V);
 set_del( &trace_dvi_del_v, &end_trace_dvi_del_v);
 -_-_-

<..header functions..>+
 static void set_del( ARG_II(char **, U_CHAR **) );
 -_-_-

<..functions..>+
 
 static  void set_del( del, end_del )
      U_CHAR ** del;
      U_CHAR ** end_del
 ;{
   *del = m_alloc(char, 1);       **del = ’\0’;
   *end_del = m_alloc(char, 1);   **end_del = ’\0’;
 }
 -_-_-

Using the Delimiters

<..trace h spaces..>
 if( trace_dvi_H && !ch_map_flag ){
    if( *trace_dvi_del_H != ’\0’ ){
       (IGNORED) fprintf(cur_o_file, "%s%d", trace_dvi_del_H, (int) dx);
    }
    (IGNORED) fprintf(cur_o_file, "%s", end_trace_dvi_del_H);
 }
 -_-_-

<..trace v spaces..>
 if( trace_dvi_V && !ch_map_flag ){
    if( *trace_dvi_del_V != ’\0’ ){
      (IGNORED) fprintf(cur_o_file, "%s%d", trace_dvi_del_V, d);
    }
    (IGNORED) fprintf(cur_o_file, "%s", end_trace_dvi_del_V);
 }
 -_-_-

<..trace x rules..>
 if( trace_dvi_R && !ch_map_flag ){
    if( *trace_dvi_del_R != ’\0’ ){
       (IGNORED) fprintf(cur_o_file, "%s%d %d",
          trace_dvi_del_R, (int) x_val, (int) y_val);
    }
    (IGNORED) fprintf(cur_o_file, "%s", end_trace_dvi_del_R);
 }
 -_-_-

<..end trace x rules..>
 if( trace_dvi_R && !ch_map_flag ){
    if( *trace_dvi_del_r != ’\0’ ){
       (IGNORED) fprintf(cur_o_file, "%s%d %d",
          trace_dvi_del_R, (int) right, (int) up);
    }
    (IGNORED) fprintf(cur_o_file, "%s", end_trace_dvi_del_r);
 }
 -_-_-

<..trace dvi push..>
 if( push_depth<256 ) { push_st[push_depth] = push_id++; }
 if( trace_dvi_P && !ch_map_flag ){
    <.open output file.>
    if( *trace_dvi_del_P != ’\0’ ){
       (IGNORED) fprintf(cur_o_file, "%s%d %d",
          trace_dvi_del_P, push_depth,
          push_st[(push_depth<256)? push_depth:256]);
    }
    (IGNORED) fprintf(cur_o_file, "%s", end_trace_dvi_del_P);
 }
 push_depth++;
 -_-_-

<..trace dvi pop..>
 push_depth--;
 if( trace_dvi_P && !ch_map_flag ){
    <.open output file.>
    if( *trace_dvi_del_p != ’\0’ ){
       (IGNORED) fprintf(cur_o_file, "%s%d %d",
          trace_dvi_del_p,  push_depth,
          push_st[(push_depth<256)? push_depth:256]);
    }
    (IGNORED) fprintf(cur_o_file, "%s", end_trace_dvi_del_p);
 }
 -_-_-

<..trace dvi char..>
 if( trace_dvi_C ){
    if( !ch_map_flag ){
      <.open output file.>
      if( *trace_dvi_del_C != ’\0’ ){
         (IGNORED) fprintf(cur_o_file,
             block_start? "%s%s %d B" : "%s%s %d",
             trace_dvi_del_C, font_tbl[cur_fnt].name, ch);
      }
      (IGNORED) fprintf(cur_o_file,"%s", end_trace_dvi_del_C);
    }
    block_start = FALSE;
 }
 -_-_-

<..end trace dvi char..>
 if( trace_dvi_C && !ch_map_flag ){
    <.open output file.>
    (IGNORED) fprintf(cur_o_file, "%s%s",
          trace_dvi_del_c, end_trace_dvi_del_c);
 }
 -_-_-