From: Lukas Hägele Date: Sun, 20 Oct 2024 14:46:27 +0000 (+0200) Subject: initial attempt to finalize the markdown parser (not yet finished) X-Git-Url: https://git.lhaegele.de/?a=commitdiff_plain;h=0ea019c83d4157c32917dfd789ba559ee27a399d;p=recipes.git initial attempt to finalize the markdown parser (not yet finished) --- diff --git a/src/html.c b/src/html.c index 2e295ac..5a59ca2 100644 --- a/src/html.c +++ b/src/html.c @@ -55,7 +55,7 @@ typedef struct html_element struct html_element* Next; struct html_element* Child; - u8 ElementName; + html_element_name ElementName; union { html_attribute* Attribute; str Content; @@ -84,15 +84,6 @@ typedef struct html_meta* Meta; } html; -static void -appendElement(html_element* Sentinel, html_element* Element) -{ - /* todo: create sentinel in html_create() (inline and get rid of this function) */ - Element->Prev = Sentinel->Prev; - Sentinel->Prev->Next = Element; - Element->Next = Sentinel; -} - static html_element* html_createElement(html_element_name Name, arena* Arena) { @@ -113,6 +104,23 @@ html_createElement(html_element_name Name, arena* Arena) return Element; } +static html_meta* +createMeta(arena* Arena) +{ + html_meta* Meta = ARENA_PUSH_STRUCT(Arena, html_meta); + { + html_tag* Sentinel = ARENA_PUSH_STRUCT(Arena, html_tag); + { + Sentinel->Next = Sentinel; + Sentinel->Prev = Sentinel; + + Meta->Tags = Sentinel; + } + } + + return Meta; +} + static inline void addAttribute(html_element* Element, str Key, str Child, arena* Arena) { @@ -125,10 +133,20 @@ addAttribute(html_element* Element, str Key, str Child, arena* Arena) Element->Attribute = Attribute; } +static void +appendElement(html_element* Sentinel, html_element* Element) +{ + /* todo: create sentinel in html_create() (inline and get rid of this function) */ + Element->Prev = Sentinel->Prev; + Element->Next = Sentinel; + Sentinel->Prev->Next = Element; + Sentinel->Prev = Element; +} + static inline void -html_appendValue(html_element* Target, html_element* Child) +html_appendChild(html_element* Target, html_element* Child) { - ASSERT(!"not yet implemented"); + appendElement(Target->Child, Child); } static inline void @@ -140,7 +158,7 @@ html_appendContent(html_element* Target, str ContentStr, arena* Arena) Content->Content = ContentStr; } - Target->Child = Content; + appendElement(Target->Child, Content); } static html* @@ -152,33 +170,29 @@ html_createDefault(arena* Arena) { Html->Root = Root; - html_element* Doctype = html_createElement(HTML_ELEMENT_NAME_content, Arena); - { - html_appendContent(Doctype, STR_LITERAL(""), Arena); - html_appendValue(Root, Doctype); - } + html_appendContent(Root, STR_LITERAL(""), Arena); html_element* HtmlElement = html_createElement(HTML_ELEMENT_NAME_html, Arena); { - addAttribute(Root, STR_LITERAL("lang"), STR_LITERAL("'de-DE'"), Arena); - html_appendValue(Root, HtmlElement); + addAttribute(HtmlElement, STR_LITERAL("lang"), STR_LITERAL("'de-DE'"), Arena); + html_appendChild(Root, HtmlElement); } html_element* Head = html_createElement(HTML_ELEMENT_NAME_head, Arena); { - html_appendValue(Root, Head); + html_appendChild(Root, Head); html_element* MetaCharset = html_createElement(HTML_ELEMENT_NAME_meta, Arena); { addAttribute(MetaCharset, STR_LITERAL("charset"), STR_LITERAL("'utf-8'"), Arena); - html_appendValue(Head, MetaCharset); + html_appendChild(Head, MetaCharset); } html_element* MetaViewport = html_createElement(HTML_ELEMENT_NAME_meta, Arena); { addAttribute(MetaViewport, STR_LITERAL("name"), STR_LITERAL("'viewport'"), Arena); addAttribute(MetaViewport, STR_LITERAL("content"), STR_LITERAL("'width=device-width, initial-scale=1'"), Arena); - html_appendValue(Head, MetaViewport); + html_appendChild(Head, MetaViewport); } /* todo: add later? */ @@ -188,7 +202,7 @@ html_createDefault(arena* Arena) /* todo: use static "nullstring"? */ html_appendContent(Title, STR_LITERAL(""), Arena); - html_appendValue(Head, Title); + html_appendChild(Head, Title); } /* todo: add style? */ @@ -196,104 +210,58 @@ html_createDefault(arena* Arena) html_element* Body = html_createElement(HTML_ELEMENT_NAME_body, Arena); { - html_appendValue(Root, Body); + html_appendChild(Root, Body); html_element* Header = html_createElement(HTML_ELEMENT_NAME_header, Arena); { - html_appendValue(Body, Header); + html_appendChild(Body, Header); html_element* Heading = html_createElement(HTML_ELEMENT_NAME_h1, Arena); { html_appendContent(Heading, STR_LITERAL("Meine Rezeptsammlung"), Arena); - html_appendValue(Header, Heading); + html_appendChild(Header, Heading); } } html_element* Main = html_createElement(HTML_ELEMENT_NAME_main, Arena); { - html_appendValue(Body, Main); + html_appendChild(Body, Main); html_element* Article = html_createElement(HTML_ELEMENT_NAME_article, Arena); { Html->Article = Article; - html_appendValue(Main, Article); + html_appendChild(Main, Article); } } } } - html_meta* Meta = ARENA_PUSH_STRUCT(Arena, html_meta); + html_meta* Meta = createMeta(Arena); { Html->Meta = Meta; - - html_tag* Sentinel = ARENA_PUSH_STRUCT(Arena, html_tag); - { - Sentinel->Next = Sentinel; - Sentinel->Prev = Sentinel; - - Meta->Tags = Sentinel; - } } return Html; } -static inline html_element* -createContent(str ContentStr, arena* Arena) -{ - ContentStr = str_stripHead(ContentStr); - - /* todo: use html_appendContent() */ - html_element* Content = ARENA_PUSH_STRUCT(Arena, html_element); - { - Content->ElementName = HTML_ELEMENT_NAME_content; - Content->Content = ContentStr; - } - - return Content; -} - static inline void -insertTag(html_tag* Sentinel, str Stripped, arena* Arena) +appendTag(html_tag* Sentinel, str Stripped, arena* Arena) { - for (html_tag* Tag = Sentinel->Next; - (Tag != Sentinel) && (Tag->Next != Sentinel); - Tag = Tag->Next) - { - str Candidate = Tag->Content; - str Reference = Tag->Next->Content; - - s32 Result = str_compare(Candidate, Reference); - if (0); - else if (Result < 0) - { - html_tag* Next = Tag->Next; - - Tag->Next = Next; - Tag->Prev = Next->Prev; + html_tag* Tag = ARENA_PUSH_STRUCT(Arena, html_tag); + Tag->Content = Stripped; - Next->Prev->Next = Tag; - Next->Prev = Tag; + Tag->Prev = Sentinel->Prev; + Tag->Next = Sentinel; - break; - } - else if (Result == 0) - { - /* tag already exists. stop insertion */ - break; - } - else if (Result > 0) - { - /* continue insertion */ - continue; - } - } + Sentinel->Prev->Next = Tag; + Sentinel->Prev = Tag; } -static void -parseHeading(str Line, html_element* Html, arena* Arena) +static html_element* +parseHeading(str Line, html_element* Article, arena* Arena) { - html_element* Heading = ARENA_PUSH_STRUCT(Arena, html_element); + /* actual element name will be set below */ + html_element* Heading = html_createElement(HTML_ELEMENT_NAME_invalid, Arena); /* identify element name (depth level) */ u8 Depth = 0u; @@ -322,23 +290,22 @@ parseHeading(str Line, html_element* Html, arena* Arena) HeadingContent.Base += Depth; HeadingContent.Length -= Depth; HeadingContent.Capacity -= Depth; + + HeadingContent = str_stripHead(HeadingContent); } - Heading->Child = createContent(HeadingContent, Arena); + html_appendContent(Heading, HeadingContent, Arena); } - /* todo: check if this is actually the sentinel! */ - html_element* Sentinel = Html; - appendElement(Sentinel, Heading); + html_appendChild(Article, Heading); + + return Article; } -static void -parseListItem(str Line, html_element* Html, arena* Arena) +static html_element* +parseListItem(str Line, html_element* PrevElement, arena* Arena) { - html_element* ListItem = ARENA_PUSH_STRUCT(Arena, html_element); - { - ListItem->ElementName = HTML_ELEMENT_NAME_li; - } + html_element* ListItem = html_createElement(HTML_ELEMENT_NAME_li, Arena); /* process content */ { @@ -347,65 +314,74 @@ parseListItem(str Line, html_element* Html, arena* Arena) ListItemContent.Base += 1u; ListItemContent.Length -= 1u; ListItemContent.Capacity -= 1u; + + ListItemContent = str_stripHead(ListItemContent); } - ListItem->Child = createContent(ListItemContent, Arena); + html_appendContent(ListItem, ListItemContent, Arena); } /* append to unordered list */ + html_element* List; + if (PrevElement->ElementName == HTML_ELEMENT_NAME_ul) { - html_element* List = 0; - html_element* PrevElement = Html->Prev; - if (PrevElement->ElementName == HTML_ELEMENT_NAME_ul) - { - List = PrevElement; - } - else - { - /* todo: allocate sentinel? */ - List = ARENA_PUSH_STRUCT(Arena, html_element); - List->ElementName = HTML_ELEMENT_NAME_ul; - } + List = PrevElement; + html_appendChild(List, ListItem); + } + else + { + ASSERT(PrevElement->ElementName == HTML_ELEMENT_NAME_article); - html_element* ListItemSentinel = List->Child; - appendElement(ListItemSentinel, ListItem); + List = html_createElement(HTML_ELEMENT_NAME_ul, Arena); + html_appendChild(List, ListItem); + html_appendChild(PrevElement, List); } + + return List; } -static void -parseParagraph(str Line, html_element* Html, arena* Arena) +static html_element* +parseParagraph(str Line, html_element* PrevElement, arena* Arena) +//parseParagraph(str Line, b32 PreviousLineWasEmpty, html_element* PrevElement, arena* Arena) { - /* todo: use html_appendContent? */ - html_element* ParagraphItem = ARENA_PUSH_STRUCT(Arena, html_element); + if (PrevElement->ElementName == HTML_ELEMENT_NAME_ul) { - ParagraphItem->ElementName = HTML_ELEMENT_NAME_content; - ParagraphItem->Child = createContent(Line, Arena); - } + html_element* Sentinel = PrevElement->Child; + html_element* ListItem = Sentinel->Prev; - /* append to paragraph */ + html_appendContent(ListItem, Line, Arena); + } + else { - html_element* Paragraph = 0; - html_element* PrevElement = Html->Prev; + ASSERT(!"not yet implemented"); + } - if ((str_isWhitespaceOnly(Line)) || - (PrevElement->ElementName != HTML_ELEMENT_NAME_p)) - { - Paragraph = ARENA_PUSH_STRUCT(Arena, html_element); - Paragraph->ElementName = HTML_ELEMENT_NAME_p; - } - else - { - Paragraph = PrevElement; - } + return PrevElement; +#if 0 + html_element* Paragraph = 0; - html_element* ParagraphSentinel = Paragraph->Child; - appendElement(ParagraphSentinel, ParagraphItem); + if ((PrevElement->ElementName != HTML_ELEMENT_NAME_p) || + (PreviousLineWasEmpty != 0)) + { + Paragraph = ARENA_PUSH_STRUCT(Arena, html_element); + Paragraph->ElementName = HTML_ELEMENT_NAME_p; + html_appendChild(PrevElement, Paragraph); + } + else + { + Paragraph = PrevElement; } + + html_appendContent(Paragraph, Line, Arena); + + return Paragraph; +#endif } static void parseLine(str Line, html_element* Html, arena* Arena) { + ASSERT(!"not yet implemented"); } static html* @@ -416,6 +392,10 @@ html_parseMarkdown(str Source, arena* Arena) b32 Frontmatter = 0; b32 ExpectShebang = 0; + // todo: remove + //b32 PreviousLineWasEmpty = 0; + + html_element* PrevElement = Html->Article; for (str Line = str_getLine(Source); str_isValid(Line); Line = str_getLine(Source)) @@ -424,24 +404,39 @@ html_parseMarkdown(str Source, arena* Arena) if (!Frontmatter) { - if (0); - else if (str_startsWith(Stripped, STR_LITERAL("---"))) - { - Frontmatter = 1; - ExpectShebang = 1; - } - else if (str_startsWith(Stripped, STR_LITERAL("#"))) - { - parseHeading(Stripped, Html->Root, Arena); - } - else if (str_startsWith(Line, STR_LITERAL("-")) || - str_startsWith(Line, STR_LITERAL("*"))) + if (Stripped.Length > 0) { - parseListItem(Stripped, Html->Root, Arena); + if (0); + else if (str_startsWith(Stripped, STR_LITERAL("---"))) + { + Frontmatter = 1; + ExpectShebang = 1; + //PreviousLineWasEmpty = 0; + } + else if (str_startsWith(Stripped, STR_LITERAL("#"))) + { + /* todo: remove return value? */ + PrevElement = parseHeading(Stripped, Html->Article, Arena); + //PreviousLineWasEmpty = 0; + } + else if (str_startsWith(Line, STR_LITERAL("-")) || + str_startsWith(Line, STR_LITERAL("*"))) + { + PrevElement = parseListItem(Stripped, PrevElement, Arena); + //PreviousLineWasEmpty = 0; + } + else if (Stripped.Length > 0) + { + /* todo: check if `PreviousLineWasEmpty` is really necessary */ + PrevElement = parseParagraph(Stripped, PrevElement, Arena); + //PrevElement = parseParagraph(Stripped, PreviousLineWasEmpty, PrevElement, Arena); + //PreviousLineWasEmpty = 0; + } } else { - parseParagraph(Stripped, Html->Root, Arena); + //PreviousLineWasEmpty = 1; + PrevElement = Html->Article; } } else @@ -464,15 +459,20 @@ html_parseMarkdown(str Source, arena* Arena) { Frontmatter = 0; } - else + else if (Stripped.Length > 0) { html_tag* Sentinel = Html->Meta->Tags; - insertTag(Sentinel, Stripped, Arena); + appendTag(Sentinel, Stripped, Arena); + } + else + { + /* skip empty line */ } } } - str_advance(Source, Line.Length); + u32 LengthWithNewline = Line.Length + 1; + Source = str_advance(Source, LengthWithNewline); } return Html; diff --git a/src/main.c b/src/main.c index 45a68f8..ea7aa3a 100644 --- a/src/main.c +++ b/src/main.c @@ -35,6 +35,7 @@ #include #include #include + #include typedef struct node @@ -67,10 +68,10 @@ generateHtmlFile(str Filename, html* Html, arena* Arena) { u32 Error = 0; - char Path[MAX_PATH] = {0}; - str_toCString(Path, sizeof(Path), Filename); + char Name[MAX_PATH] = {0}; + str_toCString(Name, sizeof(Name), Filename); - int FileDescriptor = open(Path, O_WRONLY|O_CREAT); + int FileDescriptor = open(Name, O_WRONLY|O_CREAT); if (FileDescriptor == -1) { perror("open"); @@ -138,6 +139,7 @@ main(int ArgumentCount, char** Arguments) while ((Entry = readdir(Directory)) != NULL) { str Name = str_fromCString(&Context.MainArena, Entry->d_name); + if (str_equals(Name, STR_LITERAL(".") ) || str_equals(Name, STR_LITERAL("..")) || str_equals(Name, STR_LITERAL(".template.md.tmp"))) @@ -168,7 +170,16 @@ main(int ArgumentCount, char** Arguments) str FileStr; { char Path[MAX_PATH] = {0}; - str_toCString(Path, sizeof(Path), Recipe->Name); + { + arena TempArena = Context.MainArena; + str_unbounded uPath = str_startUnbounded(&TempArena); + { + str_appendCString(&uPath.Str, Context.SourceDir); + str_append(&uPath.Str, Recipe->Name); + } + str PathStr = str_endUnbounded(uPath); + str_toCString(Path, sizeof(Path), PathStr); + } int File = open(Path, O_RDONLY); if (File == -1) diff --git a/src/util.h b/src/util.h index 8b26040..ba64c36 100644 --- a/src/util.h +++ b/src/util.h @@ -103,6 +103,26 @@ str_append(str* Target, str Source) Target->Length += Source.Length; } +static inline u64 +str_cStringLength(char* CStr) +{ + char* Start = CStr; + while (*++CStr != '\0'); + u64 Length = CStr - Start; + + return Length; +} + +static inline void +str_appendCString(str* Target, char* CStr) +{ + str Source = {0}; + Source.Base = (u8*)CStr; + Source.Length = str_cStringLength(CStr); + + str_append(Target, Source); +} + static inline str str_getLine(str Str) { @@ -117,6 +137,7 @@ str_getLine(str Str) if (Str.Base[i] == '\n') { Line.Length = i; + Line.Capacity = i; break; } } @@ -125,12 +146,16 @@ str_getLine(str Str) return Line; } -static inline void +static inline str str_advance(str Str, memory_size Increment) { - ASSERT( (Str.Length + Increment) < Str.Capacity ); + ASSERT(Increment <= Str.Capacity); + + Str.Base += Increment; + Str.Capacity -= Increment; + Str.Length -= Increment; - Str.Length += Increment; + return Str; } static inline b32 @@ -181,6 +206,7 @@ str_stripTail(str Str) if (isWhitespace(Candidate)) { Str.Length--; + Str.Capacity--; } else { @@ -344,17 +370,15 @@ str_toCString(char* Destination, memory_size DestinationSize, str Source) static inline str str_fromCString(arena* Arena, char* CStr) { - char* Start = CStr; - - while (*++CStr != '\0'); - u64 Length = CStr - Start; + u64 Length = str_cStringLength(CStr); u8* Target = ARENA_PUSH_ARRAY(Arena, u8, Length); for (u64 i = 0u; i < Length; i++) { - Target[i] = Start[i]; + Target[i] = CStr[i]; } str Result = { .Base = Target, .Length = Length, .Capacity = Length }; return Result; } +