<..header functions..>+
static INTEGER put_4ht_ch( ARG_II(int,FILE *) );
-_-_-
<..functions..>+
static INTEGER put_4ht_ch(ch,htFile) int ch ; FILE* htFile
;{
int c;
c = ch;
if( ch==’&’ ){
<.flush incomplete unicode.>
if( put_4ht_off ){
c = putc( ch, htFile );
} else {
uni_code[0] = ’&’;
uni_code_p = 1;
put_4ht_file = htFile;
}
} else
if( uni_code_p ){
if( ch == ’;’ ){ <.process unicode.> uni_code_p = 0; }
else if ( ((uni_code_p+1) == MAX_UNI_CODE)
||
( ((ch<’0’) || (ch>’9’))
&& ((ch<’a’) || (ch>’f’))
&& ((ch<’A’) || (ch>’F’))
&& (ch!=’#’)
&& (ch!=’x’)
&& (ch!=’X’)
)
)
{ <.flush incomplete unicode.>
c = putc( ch, htFile );
} else { uni_code[ uni_code_p++ ] = ch; }
} else { c = putc( ch, htFile ); }
return c;
}
-_-_-
<..vars..>+
static FILE* put_4ht_file = (FILE *) 0;
static int put_4ht_off = 1;
static char uni_code[MAX_UNI_CODE];
static short uni_code_p = 0;
-_-_-
<..defines..>+
#define MAX_UNI_CODE 20
-_-_-
<..on/off unicode..>
special_n--;
switch ( code = get_char() ){
case ’+’: { put_4ht_off++; <.flush incomplete unicode.> break; }
case ’-’: { if( put_4ht_off>0 ){ put_4ht_off--; }
else { warn_i_str(52, "@u-"); }
break; }
}
-_-_-
<..flush incomplete unicode..>
flush_uni();
-_-_-
<..header functions..>+
static void flush_uni( ARG_I(void) );
-_-_-
<..functions..>+
static void flush_uni( MYVOID )
{
int i;
for( i=0; i<uni_code_p; i++ ){
(IGNORED) putc( uni_code[i], put_4ht_file );
}
uni_code_p = 0;
put_4ht_file = (FILE *) 0;
}
-_-_-
<..load unicode.4hf..>
{ U_CHAR name[256];
FILE* file;
(IGNORED) sprintf(name, "%s.4hf", "unicode");
<.search file in htf locations.>
if( file ){
<.unicode.4hf vars.>
<.mem for charset.>
<.read unicode.4hf.>
put_4ht_off = 0;
} else{ put_4ht_off = 1; <.no mem for charset.> }
}
-_-_-
<..unicode.4hf vars..>
int chr, delimiter, delimiter_n, line_no, digit, i, j;
U_CHAR in[512], *in_p, * start[4], *p;
BOOL char_on, err;
int value;
-_-_-
<..read unicode.4hf..>
err = FALSE;
line_no = 0;
while( TRUE ){
line_no++;
chr = (int) getc(file);
if( chr == EOF ){ break; }
if( (chr>32) && (chr<127) ){
<.scan 4hf fields.>
if( delimiter_n == 8 ){
if( *in != ’?’ ) {
if( <.not hexa unicode?.> ){ err = TRUE; }
else {
<.value = hex into int.>
if( start[3] == (in_p-1) ){
if( !err ){ <.store type-less 4hf endtry.> }
} else { <.store typed 4hf endtry.> }
} } }
else { err = TRUE; }
<.error 4hf fields.>
}
while( (chr != EOF) && (chr!=’\n’) ){
chr = (int) getc(file);
}
if( chr == EOF ){ break; }
}
-_-_-
<..not hexa unicode?..>
(*in != ’&’)
|| (*(in+1) != ’#’)
|| ( (*(in+2) != ’x’) && (*(in+2) != ’X’))
|| (*(start[1] - 2) != ’;’)
-_-_-
<..scan 4hf fields..>
delimiter = chr;
delimiter_n = 1;
char_on = TRUE;
in_p = in;
while( TRUE ) {
chr = (int) getc(file);
if( (chr == EOF) || (chr==’\n’) ){ break; }
if( chr == delimiter ){
if( char_on ){ *(in_p++) = ’\0’; }
else{ start[ delimiter_n/2 ] = in_p; }
char_on = !char_on;
delimiter_n++;
} else if (char_on ) {
*(in_p++) = chr;
}
if( delimiter_n==8 ){ break; }
}
-_-_-
<..process 4hf fields..>
-_-_-
<..error 4hf fields..>
if( err ){
warn_i_int(48,line_no);
(IGNORED) printf( "%c", delimiter );
for( p=in; p != in_p; p++ ){
if( *p==’\0’ ){
(IGNORED) printf("%c", delimiter);
if( p != in_p-1 ){ (IGNORED) printf(" %c", delimiter); }
}
else { (IGNORED) printf( "%c", *p ); }
}
(IGNORED) printf( "\n" );
err = FALSE;
}
-_-_-
<..types..>+
struct charset_rec{ int ch;
char* str; };
-_-_-
<..vars..>+
static int charset_n = 0, max_charset_n;
static struct charset_rec *charset;
-_-_-
<..mem for charset..>
max_charset_n = 256;
charset = m_alloc(struct charset_rec, 256);
-_-_-
<..no mem for charset..>
max_charset_n = 0;
-_-_-
<..value = hex into int..>
value = 0;
for( p=in+3; *p!=’;’; p++){
digit = (int) *p;
if( (digit>=’0’) && (digit<=’9’) ){ digit -= ’0’; }
else if( (digit>=’A’) && (digit<=’F’) ){ digit -= BASE_A; }
else if( (digit>=’a’) && (digit<=’f’) ){ digit -= BASE_a; }
else { digit=0; err = TRUE; }
value = 16*value + digit;
}
-_-_-
<..defines..>+
#define BASE_A 55
#define BASE_a 87
-_-_-
<..store type-less 4hf endtry..>
<.mem for new 4ht entry.>
p = m_alloc(char, (int) (start[3] - start[2]) );
(IGNORED) strcpy((char *) p, (char *) start[2] );
i = charset_n;
while( i-- > 0 ){
if( charset[i].ch == value ){
free((void *) charset[i].str);
break;
} else {
if( (charset[i].ch < value)
|| ((charset[i].ch > value) && (i==0)) ){
if( charset[i].ch < value ){ i++; }
charset_n++;
for( j=charset_n; j>i; j-- ){
charset[j].ch = charset[j-1].ch;
charset[j].str = charset[j-1].str;
}
break;
} }
}
if(i == -1){ i = charset_n; }
if( i==charset_n ){ charset_n++; }
charset[i].str = p;
charset[i].ch = value;
-_-_-
The above backward search allows sorted files to be loaded in linear time.
<..mem for new 4ht entry..>
if( (charset_n+1) == max_charset_n){
max_charset_n += 10;
charset = (struct charset_rec *) r_alloc((void *) charset,
(size_t) ((max_charset_n) * sizeof(struct charset_rec) ));
}
-_-_-
<..process unicode..>
if( uni_code[1] != ’#’ ){
<.flush incomplete unicode.>
(IGNORED) putc( ch, htFile );
}
else{
int i, base, value, digit;
if( (uni_code[2] == ’x’) || (uni_code[2] == ’X’) ){
base =16; i=3;
} else { base=10; i=2; }
value = 0;
for( ; i<uni_code_p; i++ ){
digit = uni_code[i];
if( (digit>=’0’) && (digit<=’9’) ){ digit -= ’0’; }
else if( (digit>=’A’) && (digit<=’F’) ){ digit -= BASE_A; }
else if( (digit>=’a’) && (digit<=’f’) ){ digit -= BASE_a; }
else { value = -1; break; }
if( digit >= base ){ value=-1; break; }
value = value*base + digit;
}
if( value<0 ){ <.flush incomplete unicode.>
(IGNORED) putc( ch, htFile );
} else {
<.search 4hf table.>
} }
-_-_-
<..search 4hf table..>
int bottom, mid, top;
BOOL found=FALSE;
bottom = 0; top = charset_n;
while( !found ){
mid = (bottom + top) / 2;
if( value == charset[mid].ch ){
<.put 4hf replacement.>
found = TRUE;
} else if( value < charset[mid].ch ){
if( bottom == top ){ break; }
top = mid;
}
else {
if ( bottom < mid ){ bottom = mid; }
else if ( bottom<top ){ bottom++; }
else{ break; }
}
}
if( ! found ){
if( u10 || utf8 ){ <.hex uni to base 10 or utf8.> }
<.flush incomplete unicode.>
if( !utf8 ){ (IGNORED) putc( ch, htFile ); }
}
-_-_-
<..put 4hf replacement..>
{ U_CHAR *p;
p = charset[mid].str;
while( *p != ’\0’ ){
if( *p==’\\’ ){
p++;
if( *p==’\\’ ){
(IGNORED) putc( ’\\’, htFile );
} else {
int i;
i = *p - ’0’;
while( *(++p) != ’\\’ ){ i = 10*i + *p - ’0’; }
(IGNORED) putc( i, htFile );
} }
else {
(IGNORED) putc( *p, htFile );
if ( (*p==’&’) && u10 ){ <.u10 for 4hf replacement.> }
}
p++;
} }
-_-_-
The utf8 option don’t apply for unicode symbols coming from a 4hf fonts.
<..vars..>+
static BOOL u10 = FALSE;
-_-_-
<..get unicode entity representations..>
if( eq_str(p+2, "10") ){ u10 = TRUE; }
<.else get utf8 unicode encoding.>
else{ bad_arg;}
-_-_-
<..hex uni to base 10 or utf8..>
short n;
long dec;
int ch;
char uni_10[MAX_UNI_CODE];
if( (uni_code[2] == ’x’) || (uni_code[2] == ’X’) ) {
dec = 0;
for( n=3; n<uni_code_p; n++ ){
ch = uni_code[n];
dec = 16*dec +
((ch > ’9’)?
( 10 + ((ch > ’Z’)? (ch-’a’) : (ch-’A’)) )
: (ch-’0’));
}
if( u10 ){ <.dec to u10.> }
else { <.uni in utf8.> }
}
-_-_-
<..dec to u10..>
if( dec == 0 ){
uni_code_p = 3; uni_code[2] = ’0’;
} else {
n = 0;
while( dec > 0 ){ uni_10[ n++ ] = dec % 10 + ’0’; dec /= 10; }
uni_code_p = 2;
while( n>0 ){ uni_code[ uni_code_p++ ] = uni_10[ --n ]; }
}
-_-_-
<..u10 for 4hf replacement..>
if ( *(p+1) == ’#’ ){
p++;
(IGNORED) putc( ’#’, htFile );
if ( (*(p+1) == ’x’) || (*(p+1) == ’X’) ){
int value, digit;
U_CHAR *q;
q = p+2;
value = 0;
digit = *(q++);
while( digit!=0 ){
if( (digit>=’0’) && (digit<=’9’) ){
value = value*16 + digit - ’0’;
}
else if( (digit>=’A’) && (digit<=’F’) ){
value = value*16 + digit - ’A’+10;
}
else if( (digit>=’a’) && (digit<=’f’) ){
value = value*16 + digit - ’a’+10; }
else {
if( digit == ’;’ ){
<.display value in u10.>
p=q-2;
}
break;
}
digit = *(q++);
}
} }
-_-_-
<..display value in u10..>
char uni_10[MAX_UNI_CODE];
int n;
n = 0;
while( value>0 ){
uni_10[ n++ ] = value % 10 + ’0’;
value /= 10;
}
while( n>0 ){
(IGNORED) putc( uni_10[--n], htFile );
}
-_-_-
<..dec to utf8..>
if( dec < 0x80 ){
uni_code_p = 1; uni_code[0] = dec;
}
else if( dec < 0x800 ){
uni_code_p = 2;
uni_code[0] = (dec >> 6) | 0xC0;
uni_code[1] = (dec & 0x3F) | 0x80;
}
else if( dec < 0x10000 ){
uni_code_p = 3;
uni_code[0] = (dec >> 12) | 0xE0;
uni_code[1] = ((dec >> 6) & 0x3F) | 0x80;
uni_code[2] = (dec & 0x3F) | 0x80;
}
else if( dec < 0x200000 ){
uni_code_p = 4;
uni_code[0] = (dec >> 18) | 0xF0;
uni_code[1] = ((dec >> 12) & 0x3F) | 0x80;
uni_code[2] = ((dec >> 6) & 0x3F) | 0x80;
uni_code[3] = (dec & 0x3F) | 0x80;
}
else if( dec < 0x4000000 ){
uni_code_p = 5;
uni_code[0] = (dec >> 24) | 0xF8;
uni_code[1] = ((dec >> 18) & 0x3F) | 0x80;
uni_code[2] = ((dec >> 12) & 0x3F) | 0x80;
uni_code[3] = ((dec >> 6) & 0x3F) | 0x80;
uni_code[4] = (dec & 0x3F) | 0x80;
}
else if( dec <= 0x7FFFFFFF ){
uni_code_p = 6;
uni_code[0] = (dec >> 30) | 0xFC;
uni_code[1] = ((dec >> 24) & 0x3F) | 0x80;
uni_code[2] = ((dec >> 18) & 0x3F) | 0x80;
uni_code[3] = ((dec >> 12) & 0x3F) | 0x80;
uni_code[4] = ((dec >> 6) & 0x3F) | 0x80;
uni_code[5] = (dec & 0x3F) | 0x80;
}
-_-_-
<..vars..>+
static BOOL utf8 = FALSE;
-_-_-
<..else get utf8 unicode encoding..>
else if( eq_str(p+2, "tf8") ){ utf8 = TRUE; }
-_-_-
<..uni in utf8..>
<.dec to utf8.>
else {
n = 0;
while( dec > 0 ){ uni_10[ n++ ] = dec % 10 + ’0’; dec /= 10; }
uni_code_p = 2;
while( n>0 ){ uni_code[ uni_code_p++ ] = uni_10[ --n ]; }
}
-_-_-
<..store typed 4hf endtry..>
<.mem for new htf-4hf entry.>
(IGNORED) strcpy((char *) p, (char *) start[2] );
i = htf_4hf_n;
while( i-- > 0 ){
if( htf_4hf[i].ch == value ){
free((void *) htf_4hf[i].str);
break;
} else {
if( (htf_4hf[i].ch < value)
|| ((htf_4hf[i].ch > value) && (i==0)) ){
if( htf_4hf[i].ch < value ){ i++; }
htf_4hf_n++;
for( j=htf_4hf_n; j>i; j-- ){
htf_4hf[j].ch = htf_4hf[j-1].ch;
htf_4hf[j].str = htf_4hf[j-1].str;
htf_4hf[j].type1 = htf_4hf[j-1].type1;
htf_4hf[j].type2 = htf_4hf[j-1].type2;
}
break;
} } }
if(i == -1){ i = htf_4hf_n; }
if(i == htf_4hf_n){ htf_4hf_n++; }
htf_4hf[i].str = p;
htf_4hf[i].ch = value;
<.htf_4hf[i].type1 = ....>
<.htf_4hf[i].type2 = ....>
-_-_-
<..end loading fonts..>+
for( i = 0; i<htf_4hf_n; i++){
free((void *) htf_4hf[i].str);
}
free((void *) htf_4hf);
-_-_-
<..mem for new htf-4hf entry..>
if( (htf_4hf_n+1) == max_htf_4hf_n){
max_htf_4hf_n += 10;
htf_4hf = (struct htf_4hf_rec *) r_alloc((void *) htf_4hf,
(size_t) ((max_htf_4hf_n) * sizeof(struct htf_4hf_rec) ));
}
p = m_alloc(char, (int) (start[3] - start[2]) );
-_-_-
<..htf_4hf[i].type1 = .....>
value = 0;
p = start[1];
while( *p != ’\0’ ){
if( (*p < ’0’) || (*p > ’9’) ) break;
value = value * 10 + *p - ’0’;
p++;
}
htf_4hf[i].type1 = value;
-_-_-
<..htf_4hf[i].type1 = .....>+
value = 0;
p = start[3];
while( *p != ’\0’ ){
if( (*p < ’0’) || (*p > ’9’) ) break;
value = value * 10 + *p - ’0’;
p++;
}
htf_4hf[i].type2 = value;
-_-_-
<..types..>+
struct htf_4hf_rec { int ch, type1, type2;
char* str; };
-_-_-
<..vars..>+
static int htf_4hf_n = 0, max_htf_4hf_n;
static struct htf_4hf_rec *htf_4hf;
-_-_-
<..mem for charset..>+
max_htf_4hf_n = 256;
htf_4hf = m_alloc(struct htf_4hf_rec, 256);
-_-_-
<..no mem for charset..>+
max_htf_4hf_n = 0;
-_-_-
<..propagate 4hf info into htf..>
if(
(*str == ’&’)
&& (*(str+1) == ’#’)
&& ( (*(str+2) == ’x’) || (*(str+2) == ’X’))
&& (*(str + strlen((char *) str) - 1) == ’;’)
) {
char* p;
int value = 0;
BOOL err = FALSE;
for( p=str+3; *p!=’;’; p++){
int digit = (int) *p;
if( (digit>=’0’) && (digit<=’9’) ){ digit -= ’0’; }
else if( (digit>=’A’) && (digit<=’F’) ){ digit -= BASE_A; }
else if( (digit>=’a’) && (digit<=’f’) ){ digit -= BASE_a; }
else { digit=0; err = TRUE; }
value = 16*value + digit;
}
if( !err ){
<.search 4hf replacement in htf-4hf.>
<.search 4hf replacement in charset.>
} }
-_-_-
<..htf replacement from htf-4hf..>
if( htf_4hf[mid].type1 == ch1 ){
ch1 = htf_4hf[mid].type2;
(IGNORED) strcpy((char *) str, (char *) htf_4hf[mid].str );
}
-_-_-
<..htf replacement from charset..>
if( charset[mid].type1 == ch1 ){
ch1 = charset[mid].type2;
(IGNORED) strcpy((char *) str, (char *) charset.str );
}
-_-_-
<..search 4hf replacement in htf-4hf..>
int bottom, mid, top;
BOOL found=FALSE;
bottom = 0; top = htf_4hf_n;
while( !found ){
mid = (bottom + top) / 2;
if( value == htf_4hf[mid].ch ){
<.htf replacement from htf-4hf.>
found = TRUE;
} else if( value < htf_4hf[mid].ch ){
if( bottom == top ){ break; }
top = mid;
}
else {
if ( bottom < mid ){ bottom = mid; }
else if ( bottom<top ){ bottom++; }
else{ break; }
}
}
-_-_-
<..htf replacement from charset..>+
bottom = 0; top = charset_n;
while( !found ){
mid = (bottom + top) / 2;
if( value == charset[mid].ch ){
<.htf into 4hf.>
found = TRUE;
} else if( value < charset[mid].ch ){
if( bottom == top ){ break; }
top = mid;
}
else {
if ( bottom < mid ){ bottom = mid; }
else if ( bottom<top ){ bottom++; }
else{ break; }
}
}
-_-_-