/* HTMLGen.c ** HTML GENERATOR ** ** (c) COPYRIGHT MIT 1995. ** Please first read the full copyright statement in the file COPYRIGH. ** @(#) $Id: HTMLGen.c,v 2.43 1998/05/04 19:36:56 frystyk Exp $ ** ** This version of the HTML object sends HTML markup to the output stream. ** ** Bugs: Line wrapping is not done at all. ** All data handled as PCDATA. ** Should convert old XMP, LISTING and PLAINTEXT to PRE. ** ** It is not obvious to me right now whether the HEAD should be generated ** from the incomming data or the anchor. Currently it is from the former ** which is cleanest. TBL ** ** HISTORY: ** 8 Jul 94 FM Insulate free() from _free structure element. ** */ /* Library include files */ #include "wwwsys.h" #include "HTUtils.h" #include "HTMLPDTD.h" #include "HTStruct.h" #include "HTFormat.h" #include "HTMLGen.h" /* Implemented here */ #define BUFFER_SIZE 80 /* Line buffer attempts to make neat breaks */ #define MAX_CLEANNESS 10 #define PUT_CHAR(c) (*me->target->isa->put_character)(me->target, c) #define PUT_STR(s) (*me->target->isa->put_string)(me->target, s) #define PUT_BLOCK(s,l) (*me->target->isa->put_block)(me->target, s, l) /* HTML Generator Object */ struct _HTStream { const HTStreamClass * isa; HTStream * target; }; struct _HTStructured { const HTStructuredClass * isa; HTStream * target; const SGML_dtd * dtd; BOOL seven_bit; /* restrict output */ char buffer[BUFFER_SIZE+1]; char * write_pointer; char * line_break [MAX_CLEANNESS+1]; int cleanness; BOOL overflowed; BOOL delete_line_break_char[MAX_CLEANNESS+1]; char preformatted; }; /* OUTPUT FUNCTIONS ** ** These function output the finished SGML stream doing the ** line wrap */ /* Flush Buffer ** ------------ */ PRIVATE void flush_breaks (HTStructured * me) { int i; for (i=0; i<= MAX_CLEANNESS; i++) { me->line_break[i] = NULL; } } PRIVATE int HTMLGen_flush (HTStructured * me) { PUT_BLOCK(me->buffer, me->write_pointer - me->buffer); me->write_pointer = me->buffer; flush_breaks(me); me->cleanness = 0; return HT_OK; } /* Weighted optional line break ** ** We keep track of all the breaks for when we chop the line */ PRIVATE void allow_break (HTStructured * me, int new_cleanness, BOOL dlbc) { me->line_break[new_cleanness] = dlbc ? me->write_pointer - 1 /* Point to space */ : me->write_pointer ; /* point to gap */ me->delete_line_break_char[new_cleanness] = dlbc; if (new_cleanness >= me->cleanness) me->cleanness = new_cleanness; } /* Character handling ** ------------------ ** ** The tricky bits are the line break handling. This attempts ** to synchrononise line breaks on sentence or phrase ends. This ** is important if one stores SGML files in a line-oriented code ** repository, so that if a small change is made, line ends don't ** shift in a ripple-through to apparently change a large part of the ** file. We give extra "cleanness" to spaces appearing directly ** after periods (full stops), [semi]colons and commas. ** This should make the source files easier to read and modify ** by hand, too, though this is not a primary design consideration. TBL */ PRIVATE char delims[] = ",;:."; /* @@ english bias */ PRIVATE int HTMLGen_output_character (HTStructured * me, char c) { *me->write_pointer++ = c; if (c=='\n') { /* Newlines */ if (me->preformatted) { HTMLGen_flush(me); return HT_OK; } else { me->write_pointer[-1] = c = ' '; /* Treat same as space */ } } /* Figure our whether we can break at this point */ if ((!me->preformatted && c==' ')) { int new_cleanness = 1; if (me->write_pointer > (me->buffer + 1)) { char * p; p = strchr(delims, me->write_pointer[-2]); if (p) new_cleanness = p - delims + 4; } allow_break(me, new_cleanness, YES); } /* Flush buffer out when full, or whenever the line is over the nominal maximum and we can break at all */ if (me->write_pointer >= me->buffer + BUFFER_SIZE-1 || (me->overflowed && me->cleanness)) { if (me->cleanness) { char line_break_char = me->line_break[me->cleanness][0]; char * saved = me->line_break[me->cleanness]; if (me->delete_line_break_char[me->cleanness]) saved++; me->line_break[me->cleanness][0] = '\n'; PUT_BLOCK(me->buffer, me->line_break[me->cleanness]-me->buffer+1); me->line_break[me->cleanness][0] = line_break_char; { /* move next line in */ char * p=saved; char *q; for(q=me->buffer; p < me->write_pointer; ) *q++ = *p++; } me->cleanness = 0; /* Now we have to check whether ther are any perfectly good breaks ** which weren't good enough for the last line but may be ** good enough for the next */ { int i; for(i=0; i <= MAX_CLEANNESS; i++) { if (me->line_break[i] > saved) { me->line_break[i] = me->line_break[i] - (saved-me->buffer); me->cleanness = i; } else { me->line_break[i] = NULL; } } } me->write_pointer = me->write_pointer - (saved-me->buffer); me->overflowed = NO; } else { /* No break- just output with no newline */ PUT_BLOCK(me->buffer, me->write_pointer - me->buffer); me->write_pointer = me->buffer; flush_breaks(me); me->overflowed = YES; } } return HT_OK; } /* String handling ** --------------- */ PRIVATE int HTMLGen_output_string (HTStructured * me, const char* s) { while (*s) HTMLGen_output_character(me, *s++); return HT_OK; } /* INPUT FUNCTIONS ** ** These take data from the structured stream. In the input ** stream, entities are in raw form. The seven_bit flag controls ** whether the ISO Latin-1 charactrs are represented in SGML entity ** form. This is only recommended for viewing on older non-latin-1 ** capable equipment, or for mailing for example. ** ** Bug: assumes local encoding is ISO! */ PRIVATE int HTMLGen_put_character (HTStructured * me, char c) { if (c=='&') HTMLGen_output_string(me, "&"); else if (c=='<') HTMLGen_output_string(me, "<"); else if (me->seven_bit && ((unsigned char)c > 127)) { char temp[8]; sprintf(temp, "&%d;", c); HTMLGen_output_string(me, temp); } else HTMLGen_output_character(me, c); return HT_OK; } PRIVATE int HTMLGen_put_string (HTStructured * me, const char* s) { while (*s) HTMLGen_put_character(me, *s++); return HT_OK; } PRIVATE int HTMLGen_write (HTStructured * me, const char* b, int l) { while (l-- > 0) HTMLGen_put_character(me, *b++); return HT_OK; } /* Start Element ** ------------- ** ** Within the opening tag, there may be spaces ** and the line may be broken at these spaces. */ PRIVATE void HTMLGen_start_element ( HTStructured * me, int element_number, const BOOL* present, const char ** value) { int i; HTTag * tag = &me->dtd->tags[element_number]; /* Control line breaks allowed within tag! */ int was_preformatted = me->preformatted; /* save state */ me->preformatted = 1; /* Can break between attributes */ HTMLGen_output_character(me, '<'); HTMLGen_output_string(me, tag->name); if (present) for (i=0; i< tag->number_of_attributes; i++) { if (present[i]) { HTMLGen_output_character(me, ' '); allow_break(me, 1, YES); HTMLGen_output_string(me, tag->attributes[i].name); if (value[i]) { HTMLGen_output_string(me, "=\""); HTMLGen_output_string(me, value[i]); HTMLGen_output_character(me, '"'); } } } me->preformatted = was_preformatted; /* Restore state */ /* Nested PRE is no more a problem! */ if (element_number == HTML_PRE) me->preformatted++; HTMLGen_output_character(me, '>'); /* Here is a funny one. In PRE, newlines are significant, except of course for one after the
which is ignored. This means that we MUST put in a dummy one after theto protect any real newline within the pre section. However, *within* a PRE section, although we can break after (for example) emphasis start tags, it will probably confuse some parsers so we won't.*/ if (element_number == HTML_PRE) { HTMLGen_output_character(me, '\n'); } else if (!me->preformatted && tag->contents != SGML_EMPTY) { /* can break after element start */ allow_break(me, 3, NO); } } /* End Element ** ----------- ** ** The rules for insertring CR LF into SGML are weird, strict, and ** nonintitive. ** See comment also about PRE above. */ PRIVATE void HTMLGen_end_element (HTStructured * me, int element_number) { if (element_number == HTML_PRE) { HTMLGen_output_character(me, '\n'); } else if (!me->preformatted) { /* can break before element end */ allow_break(me, 1, NO); } HTMLGen_output_string(me, ""); HTMLGen_output_string(me, me->dtd->tags[element_number].name); HTMLGen_output_character(me, '>'); /* NO break after. TBL 940501 */ if (element_number == HTML_PRE && me->preformatted) me->preformatted--; } /* Expanding entities ** ------------------ ** */ PRIVATE void HTMLGen_put_entity (HTStructured * me, int entity_number) { HTMLGen_output_character(me, '&'); HTMLGen_output_string(me, me->dtd->entity_names[entity_number]); HTMLGen_output_character(me, ';'); } /* Free an object ** -------------- ** */ PRIVATE int HTMLGen_free (HTStructured * me) { HTMLGen_flush(me); PUT_CHAR('\n'); (*me->target->isa->_free)(me->target); HT_FREE(me); return HT_OK; } PRIVATE int PlainToHTML_free (HTStructured * me) { HTMLGen_end_element(me, HTML_PRE); HTMLGen_end_element(me, HTML_BODY); HTMLGen_end_element(me, HTML_HTML); HTMLGen_free(me); return HT_OK; } PRIVATE int HTMLGen_abort (HTStructured * me, HTList * e) { HTMLGen_free(me); return HT_ERROR; } PRIVATE int PlainToHTML_abort (HTStructured * me, HTList * e) { PlainToHTML_free(me); return HT_ERROR; } /* Structured Object Class ** ----------------------- */ PRIVATE const HTStructuredClass HTMLGeneration = /* As opposed to print etc */ { "text/html", HTMLGen_flush, HTMLGen_free, HTMLGen_abort, HTMLGen_put_character, HTMLGen_put_string, HTMLGen_write, HTMLGen_start_element, HTMLGen_end_element, HTMLGen_put_entity }; /* Subclass-specific Methods ** ------------------------- */ PUBLIC HTStructured* HTMLGenerator (HTRequest * request, void * param, HTFormat input_format, HTFormat output_format, HTStream * output_stream) { HTStructured* me; if ((me = (HTStructured *) HT_CALLOC(1, sizeof(HTStructured))) == NULL) HT_OUTOFMEM("HTMLGenerator"); me->isa = &HTMLGeneration; me->dtd = &HTMLP_dtd; if ((me->target = HTStreamStack(WWW_HTML, output_format, output_stream, request, YES)) == NULL) { if (STREAM_TRACE) HTTrace("HTMLGen..... Can't convert to media type\n"); HT_FREE(me); me->target = HTErrorStream(); } me->write_pointer = me->buffer; flush_breaks(me); return me; } /* Stream Object Class ** ------------------- ** ** This object just converts a plain text stream into HTML ** It is officially a structured stream but only the stream bits exist. ** This is just the easiest way of typecasting all the routines. */ PRIVATE const HTStructuredClass PlainToHTMLConversion = { "plaintexttoHTML", HTMLGen_flush, PlainToHTML_free, /* HTMLGen_free, Henrik 03/03-94 */ PlainToHTML_abort, HTMLGen_put_character, HTMLGen_put_string, HTMLGen_write, NULL, /* Structured stuff */ NULL, NULL }; /* HTConverter from plain text to HTML Stream ** ------------------------------------------ ** ** Changed by henrik 03/03-94, so no more core dumps etc. (I hope!!!) */ PUBLIC HTStream* HTPlainToHTML (HTRequest * request, void * param, HTFormat input_format, HTFormat output_format, HTStream * output_stream) { BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */ const char *value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */ HTStructured* me; if ((me = (HTStructured *) HT_CALLOC(1,sizeof(*me))) == NULL) HT_OUTOFMEM("PlainToHTML"); memset((void *) present, '\0', MAX_ATTRIBUTES); memset((void *) value, '\0', MAX_ATTRIBUTES*sizeof(char *)); me->isa = (HTStructuredClass*) &PlainToHTMLConversion; me->dtd = &HTMLP_dtd; me->target = output_stream; me->write_pointer = me->buffer; flush_breaks(me); if (me->target) { HTMLGen_start_element(me, HTML_HTML, present, value); HTMLGen_start_element(me, HTML_BODY, present, value); HTMLGen_start_element(me, HTML_PRE, present, value); } return (HTStream*) me; } /* A safe version for making 7-bit restricted HTML ** Beware that thsi makes it horrible for the Scandinavians ** to actually read it. ** ehh - not horrible - THIS REALLY PISSES THEM OFF - Henrik ;-) */ PUBLIC HTStream* HTPlainTo7BitHTML (HTRequest * request, void * param, HTFormat input_format, HTFormat output_format, HTStream * output_stream) { HTStream* me = HTPlainToHTML(request,param,input_format, output_format, output_stream); ((HTStructured*)me)->seven_bit = YES; return me; }