/* MIFwriter.c -- MIF output support for WWW * $Id: SGMLmain.c,v 1.3 93/01/06 18:40:27 connolly Exp Locker: connolly $ */ /* implements ... */ #include "MIFwriter.h" /* uses ... */ #include "SGML.h" #include "HTParse.h" #include "HTMLdtd.h" #include #include #include "object.h" #include "debug.h" typedef struct{ char gi[SGML_NAMELEN + 1]; int content; }Element; typedef struct{ FILE* out; Element stack[SGML_TAGLVL]; int literal; int taglvl; int needspace; int empty; /* current paragraph is empty */ enum { MIFFile, VariableFormats, VariableDef, TextFlow, ParaLine, Font }state; }MIF; #define STATE(m, s, l, p) ((m)->state=(s),(m)->literal=l, \ (p) ? ((m)->empty=1,(m)->needspace=0) : 0) static HMStartTagProc start_tag; static HMEndTagProc end_tag; static HMDataProc data; static HMFileWriterProc MIFwriter_new; static HMDeleteProc MIFwriter_dt; static VOID marker PARAMS((MIF* m, CONST HMBinding *attributes, int nattrs)); HMDoc_Class MIFwriter = {MIFwriter_new, 0, MIFwriter_dt, start_tag, end_tag, data, html_entity_text}; static int FrameEncoding[] = { /* 160 /space -> */ 0x20, /* 161 /exclamdown -> */ 0xc1, /* 162 /cent -> */ 0xa2, /* 163 /sterling -> */ 0xa3, /* 164 /currency -> */ 0xdb, /* 165 /yen -> */ 0xb4, /* 166 /brokenbar -> */ 0x00, /* 167 /section -> */ 0xa4, /* 168 /dieresis -> */ 0xac, /* 169 /copyright -> */ 0xa9, /* 170 /ordfeminine -> */ 0xbb, /* 171 /guillemotleft -> */ 0xc7, /* 172 /logicalnot -> */ 0xc2, /* 173 /hyphen -> */ 0x2d, /* 174 /registered -> */ 0xa8, /* 175 /macron -> */ 0xf8, /* 176 /degree -> */ 0x00, /* 177 /plusminus -> */ 0x00, /* 178 /twosuperior -> */ 0x00, /* 179 /threesuperior -> */ 0x00, /* 180 /acute -> */ 0xab, /* 181 /mu -> */ 0x00, /* 182 /paragraph -> */ 0xa6, /* 183 /periodcentered -> */ 0xe1, /* 184 /cedilla -> */ 0xfc, /* 185 /onesuperior -> */ 0x00, /* 186 /ordmasculine -> */ 0xbc, /* 187 /guillemotright -> */ 0xc8, /* 188 /onequarter -> */ 0x00, /* 189 /onehalf -> */ 0x00, /* 190 /threequarters -> */ 0x00, /* 191 /questiondown -> */ 0xc0, /* 192 /Agrave -> */ 0xcb, /* 193 /Aacute -> */ 0xe7, /* 194 /Acircumflex -> */ 0xe5, /* 195 /Atilde -> */ 0xcc, /* 196 /Adieresis -> */ 0x80, /* 197 /Aring -> */ 0x81, /* 198 /AE -> */ 0xae, /* 199 /Ccedilla -> */ 0x82, /* 200 /Egrave -> */ 0xe9, /* 201 /Eacute -> */ 0x83, /* 202 /Ecircumflex -> */ 0xe6, /* 203 /Edieresis -> */ 0xe8, /* 204 /Igrave -> */ 0xed, /* 205 /Iacute -> */ 0xea, /* 206 /Icircumflex -> */ 0xeb, /* 207 /Idieresis -> */ 0xec, /* 208 /Eth -> */ 0x00, /* 209 /Ntilde -> */ 0x84, /* 210 /Ograve -> */ 0xf1, /* 211 /Oacute -> */ 0xee, /* 212 /Ocircumflex -> */ 0xef, /* 213 /Otilde -> */ 0xcd, /* 214 /Odieresis -> */ 0x85, /* 215 /multiply -> */ 0x00, /* 216 /Oslash -> */ 0xaf, /* 217 /Ugrave -> */ 0xf4, /* 218 /Uacute -> */ 0xf2, /* 219 /Ucircumflex -> */ 0xf3, /* 220 /Udieresis -> */ 0x86, /* 221 /Yacute -> */ 0x00, /* 222 /Thorn -> */ 0x00, /* 223 /germandbls -> */ 0xa7, /* 224 /agrave -> */ 0x88, /* 225 /aacute -> */ 0x87, /* 226 /acircumflex -> */ 0x89, /* 227 /atilde -> */ 0x8b, /* 228 /adieresis -> */ 0x8a, /* 229 /aring -> */ 0x8c, /* 230 /ae -> */ 0xbe, /* 231 /ccedilla -> */ 0x8d, /* 232 /egrave -> */ 0x8f, /* 233 /eacute -> */ 0x8e, /* 234 /ecircumflex -> */ 0x90, /* 235 /edieresis -> */ 0x91, /* 236 /igrave -> */ 0x93, /* 237 /iacute -> */ 0x92, /* 238 /icircumflex -> */ 0x94, /* 239 /idieresis -> */ 0x95, /* 240 /eth -> */ 0x00, /* 241 /ntilde -> */ 0x96, /* 242 /ograve -> */ 0x98, /* 243 /oacute -> */ 0x97, /* 244 /ocircumflex -> */ 0x99, /* 245 /otilde -> */ 0x9b, /* 246 /odieresis -> */ 0x9a, /* 247 /divide -> */ 0x00, /* 248 /oslash -> */ 0xbf, /* 249 /ugrave -> */ 0x9d, /* 250 /uacute -> */ 0x9c, /* 251 /ucircumflex -> */ 0x9e, /* 252 /udieresis -> */ 0x9f, /* 253 /yacute -> */ 0x00, /* 254 /thorn -> */ 0x00, /* 255 /ydieresis -> */ 0xd8, }; /* mifwriter constructor */ static HMDoc* MIFwriter_new(fp) FILE* fp; { MIF* m = NEW(MIF, 1); m->out = fp; m->taglvl = 1; strcpy(m->stack[0].gi, "HTML"); /* @@ fake tag minimization */ STATE(m, MIFFile, 0, 1); fprintf(m->out, " # Generated by html2mif\n" ); return (HMDoc*)m; } static VOID MIFwriter_dt(this) HMDoc* this; { FREE(this); } static VOID data(document, chars, nchars) HMDoc* document; CONST char* chars; int nchars; { MIF* m = (MIF*)document; Element* e = &m->stack[m->taglvl - 1]; CONST char* p; debug(("\n", m->empty, chars[0], nchars)); if(chars[0] == '\n' && nchars <2 && m->literal == 0 && m->empty) return; switch(m->state){ case MIFFile: start_tag((HMDoc*)m, "BODY", 0, 0); fprintf(m->out, " \n" " out, " \n" " gi); STATE(m, ParaLine, 0, 1); break; case VariableFormats: /* in element content. Skip data */ return; case VariableDef: /* nothing */ break; default: fprintf(m->out, " empty = 0; if(*p & 0x80){ int i = (*p & 0xFF) - 160; if(i < 96) /* in ISOlat1 encoding? */ printf("\\x%02x ", FrameEncoding[i]); }else switch(*p){ case '\n': if(m->literal) fprintf(m->out, "'>\n" " \n" " > # End ParaLine\n" " needspace){ fprintf(m->out, " "); m->needspace = 0; } break; case '\r': /* nothing */ break; case '\t': fprintf(m->out, "\\t"); m->needspace = 0; break; case '>': fprintf(m->out, "\\>"); m->needspace = 1; break; case '\'': fprintf(m->out, "\\q"); m->needspace = 1; break; case '`': fprintf(m->out, "\\Q"); m->needspace = 1; break; case '\\': fprintf(m->out, "\\\\"); m->needspace = 1; break; case ' ': if(m->literal){ fprintf(m->out, "'>\n" " \n" " needspace = 0; fprintf(m->out, " "); } break; default: m->needspace = 1; fprintf(m->out, "%c", *p); } } fprintf(m->out, "'>\n"); } #if 0 /* save this for insets */ static VOID entity(document, name) HMDoc* document; CONST char* name; { MIF* m = (MIF*)document; /*@@ same prep work as data */ fprintf(m->out, " \n", name); m->needspace = 1; } #endif static VOID marker(m, attributes, nattrs) MIF* m; CONST HMBinding *attributes; int nattrs; { int i; char* name = 0; char* href = 0; for(i = 0; i < nattrs; i++){ if(!strcmp(attributes[i].name, "NAME")) name = attributes[i].value; else if(!strcmp(attributes[i].name, "HREF")) href = attributes[i].value; } if(href){ char* anchor = HTParse(href, "", PARSE_ANCHOR); char* scheme = HTParse(href, "", PARSE_ACCESS); char* path = HTParse(href, "", PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION); fprintf(m->out, " \n"); if(scheme && *scheme) fprintf(m->out, " \n", scheme, path, anchor); else if(path && path[0] && path[1]){ /*@@ in case of just "/" */ if(anchor && *anchor) fprintf(m->out, " \n", path, anchor); else fprintf(m->out, " \n", path); }else fprintf(m->out, " \n", anchor); fprintf(m->out, " > #End of Marker\n"); free(scheme); free(path); free(anchor); } else if (name){ fprintf(m->out, " \n" " \n" " > #End of Marker\n", name); } } static int start_tag(document, gi, attributes, nattrs) HMDoc* document; CONST char* gi; CONST HMBinding attributes[]; int nattrs; { MIF* m = (MIF*)document; Element* e = &m->stack[m->taglvl++]; int taglevel = -1; m->needspace = 0; strcpy(e->gi, gi); debug(("stacking '%s'\n", gi)); if(!strcmp(gi, "H1") || !strcmp(gi, "H2") || !strcmp(gi, "H3") || !strcmp(gi, "H4") || !strcmp(gi, "H5") || !strcmp(gi, "H6") || !strcmp(gi, "PRE") || !strcmp(gi, "XMP") || !strcmp(gi, "LISTING") || !strcmp(gi, "ADDRESS") || !strcmp(gi, "BLOCKQUOTE") || !strcmp(gi, "UL") || !strcmp(gi, "OL") || !strcmp(gi, "MENU") || !strcmp(gi, "DIR") || !strcmp(gi, "DL") ) taglevel = ParaLine; else if(!strcmp(gi, "A") || !strcmp(gi, "EM") || !strcmp(gi, "TT") || !strcmp(gi, "STRONG") || !strcmp(gi, "B") || !strcmp(gi, "I") || !strcmp(gi, "U") || !strcmp(gi, "CODE") || !strcmp(gi, "SAMP") || !strcmp(gi, "KBD") || !strcmp(gi, "KEY") || !strcmp(gi, "VAR") || !strcmp(gi, "DFN") || !strcmp(gi, "CITE")) taglevel = Font; while(1){ switch(m->state){ case MIFFile: if(!strcmp(gi, "BODY")){ fprintf(m->out, "content = SGML_MIXED; } else if(!strcmp(gi, "HEAD")){ return e->content = SGML_ELEMENT; } else if(!strcmp(gi, "TITLE")){ fprintf(m->out, "\n" " content = SGML_RCDATA; /*@@ CDATA? */ } else if(!strcmp(gi, "ISINDEX")){ fprintf(m->out, "\n" " \n" " >\n" ); STATE(m, VariableFormats, 0, 1); m->taglvl--; return SGML_EMPTY; } else if(taglevel == ParaLine || taglevel == Font){ start_tag((HMDoc*)m, "BODY", 0, 0); } else{ debug(("'%s' out of context in state %d", gi, m->state)); m->taglvl--; return SGML_EMPTY; } break; case VariableFormats: if(!strcmp(gi, "TITLE")){ fprintf(m->out, " \n" " content = SGML_RCDATA; /*@@ CDATA? */ } else if(!strcmp(gi, "ISINDEX")){ fprintf(m->out, " \n" " \n" " >\n" ); m->taglvl--; return SGML_EMPTY; } else{ fprintf(m->out, " > #End of VariableFormats\n"); STATE(m, MIFFile, 0, 1); } break; case TextFlow: if(!strcmp(gi, "PRE")){ fprintf(m->out, " \n" " content = SGML_MIXED; } else if(!strcmp(gi, "XMP") || !strcmp(gi, "LISTING")){ fprintf(m->out, " \n" " content = SGML_RCDATA; } else if(taglevel == ParaLine){ fprintf(m->out, " \n" " content = SGML_MIXED; } else if(taglevel == Font){ debug(("%s: transition from TextFlow to BODY ParaLine", gi)); fprintf(m->out, " \n" " state)); m->taglvl--; return SGML_EMPTY; } break; case ParaLine: if(!strcmp(gi, "A")){ fprintf(m->out, " \n" " >\n", gi); marker(m, attributes, nattrs); STATE(m, Font, m->literal, 0); return e->content = SGML_MIXED; } else if(taglevel == Font){ fprintf(m->out, " \n" " >\n" , gi); STATE(m, Font, m->literal, 0); return e->content = SGML_MIXED; } else if(!strcmp(gi, "P")){ m->taglvl--; if(!m->empty) fprintf(m->out, " > # End ParaLine\n" " > # End Para\n"); STATE(m, TextFlow, 0, 1); return SGML_EMPTY; } else if(!strcmp(gi, "DT") || !strcmp(gi, "LI")){ m->taglvl--; if(!m->empty) fprintf(m->out, " > # End ParaLine\n" " > # End Para\n" " empty = 1; m->needspace = 0; return SGML_EMPTY; } else if(!strcmp(gi, "DD")){ fprintf(m->out, " \n"); m->taglvl--; return SGML_EMPTY; } else if(taglevel = ParaLine){ debug(("'%s' start tag: back to TextFlow state\n", gi)); fprintf(m->out, " > # End of ParaLine\n" " > # End of Para\n" ); STATE(m, TextFlow, 0, 1); } else{ debug(("'%s' out of context in state %d", gi, m->state)); m->taglvl--; return SGML_EMPTY; } break; default: debug(("state %d unexpected (<%s>)\n", m->state, gi)); m->taglvl--; return SGML_EMPTY; } } } static VOID end_tag(document, gi) HMDoc* document; CONST char* gi; { MIF* m = (MIF*)document; Element* e; int i; for(i = m->taglvl - 1; i>=0; i--){ debug(("found . stack has %s\n", gi, m->stack[i].gi)); if(m->stack[i].content == SGML_RCDATA || m->stack[i].content == SGML_CDATA || !strcmp(gi, m->stack[i].gi)) break; } if(i < 0){ debug(("Parse error: '%s' end tag with no such element open.\n", gi)); return; } while(m->taglvl > i){ m->taglvl--; switch(m->state){ case VariableDef: fprintf(m->out, " > #End of VariableFormat\n"); STATE(m, VariableFormats, 0, 1); break; case VariableFormats: fprintf(m->out, "> #End of VariableFormats\n"); STATE(m, MIFFile, 0, 1); break; case TextFlow: fprintf(m->out, "> # End of TextFlow\n"); STATE(m, MIFFile, 0, 1); break; case ParaLine: fprintf(m->out, " > # End of ParaLine\n" " > # End of Para\n"); STATE(m, TextFlow, 0, 1); break; case Font: fprintf(m->out, " \n" " > # End of Font\n"); STATE(m, ParaLine, m->literal, 0); break; default: debug(("'%s' end tag unexpected in state %d.", gi, m->state)); } } }