Markdown para HTML em Zig — Tutorial Passo a Passo

Neste tutorial, vamos construir um conversor de Markdown para HTML em Zig. O programa lê texto em formato Markdown e gera HTML válido. Este projeto é excelente para praticar parsing de texto, máquinas de estado e geração de saída estruturada.

O Que Vamos Construir

Nosso conversor vai suportar:

Headings (# a ######)
Parágrafos com quebras de linha
Negrito (**texto**) e itálico (*texto*)
Links ([texto](url))
Listas não-ordenadas (- item)
Blocos de código (` e ```)
Código inline (`código`)
Linhas horizontais (---)

Pré-requisitos

Zig 0.13+ instalado (guia de instalação)
Conhecimentos básicos de Markdown e HTML

Passo 1: Estrutura do Projeto

mkdir markdown-to-html
cd markdown-to-html
zig init

Passo 2: Tokenizador de Linhas

Primeiro, classificamos cada linha do Markdown por seu tipo.

const std = @import("std");
const mem = std.mem;
const ArrayList = std.ArrayList;
const Allocator = std.mem.Allocator;

/// Tipo de bloco de uma linha Markdown.
const TipoBloco = enum {
    heading1,
    heading2,
    heading3,
    heading4,
    heading5,
    heading6,
    paragrafo,
    lista_nao_ordenada,
    bloco_codigo,
    linha_horizontal,
    vazio,
};

/// Uma linha classificada do Markdown.
const LinhaMarkdown = struct {
    tipo: TipoBloco,
    conteudo: []const u8, // conteúdo sem o marcador (ex: sem o "# ")
    raw: []const u8,      // linha original
};

/// Classifica uma linha de Markdown.
fn classificarLinha(linha: []const u8) LinhaMarkdown {
    const trimmed = mem.trimLeft(u8, linha, " ");

    if (trimmed.len == 0) {
        return .{ .tipo = .vazio, .conteudo = "", .raw = linha };
    }

    // Blocos de código
    if (mem.startsWith(u8, trimmed, "```")) {
        return .{ .tipo = .bloco_codigo, .conteudo = trimmed[3..], .raw = linha };
    }

    // Linha horizontal
    if (trimmed.len >= 3) {
        var todos_hifen = true;
        for (trimmed) |c| {
            if (c != '-' and c != ' ') {
                todos_hifen = false;
                break;
            }
        }
        if (todos_hifen) {
            return .{ .tipo = .linha_horizontal, .conteudo = "", .raw = linha };
        }
    }

    // Headings
    if (mem.startsWith(u8, trimmed, "###### ")) return .{ .tipo = .heading6, .conteudo = trimmed[7..], .raw = linha };
    if (mem.startsWith(u8, trimmed, "##### ")) return .{ .tipo = .heading5, .conteudo = trimmed[6..], .raw = linha };
    if (mem.startsWith(u8, trimmed, "#### ")) return .{ .tipo = .heading4, .conteudo = trimmed[5..], .raw = linha };
    if (mem.startsWith(u8, trimmed, "### ")) return .{ .tipo = .heading3, .conteudo = trimmed[4..], .raw = linha };
    if (mem.startsWith(u8, trimmed, "## ")) return .{ .tipo = .heading2, .conteudo = trimmed[3..], .raw = linha };
    if (mem.startsWith(u8, trimmed, "# ")) return .{ .tipo = .heading1, .conteudo = trimmed[2..], .raw = linha };

    // Listas
    if (mem.startsWith(u8, trimmed, "- ") or mem.startsWith(u8, trimmed, "* ")) {
        return .{ .tipo = .lista_nao_ordenada, .conteudo = trimmed[2..], .raw = linha };
    }

    return .{ .tipo = .paragrafo, .conteudo = trimmed, .raw = linha };
}

Passo 3: Processamento Inline

Processamos formatação inline (negrito, itálico, código, links) dentro do conteúdo.

/// Processa formatação inline de Markdown para HTML.
/// Converte **negrito**, *itálico*, `código` e [link](url).
fn processarInline(texto: []const u8, writer: anytype) !void {
    var i: usize = 0;

    while (i < texto.len) {
        // Negrito: **texto**
        if (i + 1 < texto.len and texto[i] == '*' and texto[i + 1] == '*') {
            if (encontrarFechamento2(texto[i + 2 ..], "**")) |fim| {
                try writer.print("<strong>", .{});
                try processarInline(texto[i + 2 .. i + 2 + fim], writer);
                try writer.print("</strong>", .{});
                i += fim + 4;
                continue;
            }
        }

        // Itálico: *texto*
        if (texto[i] == '*' and (i + 1 >= texto.len or texto[i + 1] != '*')) {
            if (encontrarFechamento1(texto[i + 1 ..], '*')) |fim| {
                try writer.print("<em>", .{});
                try processarInline(texto[i + 1 .. i + 1 + fim], writer);
                try writer.print("</em>", .{});
                i += fim + 2;
                continue;
            }
        }

        // Código inline: `código`
        if (texto[i] == '`') {
            if (encontrarFechamento1(texto[i + 1 ..], '`')) |fim| {
                try writer.print("<code>{s}</code>", .{texto[i + 1 .. i + 1 + fim]});
                i += fim + 2;
                continue;
            }
        }

        // Link: [texto](url)
        if (texto[i] == '[') {
            if (parsearLink(texto[i..])) |link| {
                try writer.print("<a href=\"{s}\">", .{link.url});
                try processarInline(link.texto, writer);
                try writer.print("</a>", .{});
                i += link.consumido;
                continue;
            }
        }

        // Escapar caracteres HTML especiais
        switch (texto[i]) {
            '<' => try writer.print("&lt;", .{}),
            '>' => try writer.print("&gt;", .{}),
            '&' => try writer.print("&amp;", .{}),
            '"' => try writer.print("&quot;", .{}),
            else => try writer.print("{c}", .{texto[i]}),
        }
        i += 1;
    }
}

fn encontrarFechamento1(texto: []const u8, delim: u8) ?usize {
    for (texto, 0..) |c, i| {
        if (c == delim) return i;
    }
    return null;
}

fn encontrarFechamento2(texto: []const u8, delim: []const u8) ?usize {
    if (texto.len < delim.len) return null;
    var i: usize = 0;
    while (i + delim.len <= texto.len) : (i += 1) {
        if (mem.eql(u8, texto[i .. i + delim.len], delim)) return i;
    }
    return null;
}

const LinkInfo = struct {
    texto: []const u8,
    url: []const u8,
    consumido: usize,
};

fn parsearLink(texto: []const u8) ?LinkInfo {
    if (texto.len < 4 or texto[0] != '[') return null;

    // Encontra ]
    const fim_texto = encontrarFechamento1(texto[1..], ']') orelse return null;
    const link_texto = texto[1 .. 1 + fim_texto];

    // Verifica (
    const pos_paren = 1 + fim_texto + 1;
    if (pos_paren >= texto.len or texto[pos_paren] != '(') return null;

    // Encontra )
    const fim_url = encontrarFechamento1(texto[pos_paren + 1 ..], ')') orelse return null;
    const url = texto[pos_paren + 1 .. pos_paren + 1 + fim_url];

    return LinkInfo{
        .texto = link_texto,
        .url = url,
        .consumido = pos_paren + 1 + fim_url + 1,
    };
}

Passo 4: Conversor Principal

/// Converte um texto Markdown completo para HTML.
fn converterMarkdown(allocator: Allocator, markdown: []const u8) ![]const u8 {
    var html = ArrayList(u8).init(allocator);
    errdefer html.deinit();
    const writer = html.writer();

    // Divide em linhas
    var linhas = mem.splitScalar(u8, markdown, '\n');

    var em_lista = false;
    var em_bloco_codigo = false;
    var em_paragrafo = false;

    while (linhas.next()) |linha_raw| {
        const classificada = classificarLinha(linha_raw);

        // Bloco de código (toggle)
        if (classificada.tipo == .bloco_codigo) {
            if (em_bloco_codigo) {
                try writer.print("</code></pre>\n", .{});
                em_bloco_codigo = false;
            } else {
                if (em_paragrafo) {
                    try writer.print("</p>\n", .{});
                    em_paragrafo = false;
                }
                if (em_lista) {
                    try writer.print("</ul>\n", .{});
                    em_lista = false;
                }
                const lang = mem.trim(u8, classificada.conteudo, " ");
                if (lang.len > 0) {
                    try writer.print("<pre><code class=\"language-{s}\">", .{lang});
                } else {
                    try writer.print("<pre><code>", .{});
                }
                em_bloco_codigo = true;
            }
            continue;
        }

        // Dentro de bloco de código, não processa markdown
        if (em_bloco_codigo) {
            // Escapar HTML
            for (linha_raw) |c| {
                switch (c) {
                    '<' => try writer.print("&lt;", .{}),
                    '>' => try writer.print("&gt;", .{}),
                    '&' => try writer.print("&amp;", .{}),
                    else => try writer.print("{c}", .{c}),
                }
            }
            try writer.print("\n", .{});
            continue;
        }

        // Fechar lista se necessário
        if (em_lista and classificada.tipo != .lista_nao_ordenada) {
            try writer.print("</ul>\n", .{});
            em_lista = false;
        }

        // Fechar parágrafo se necessário
        if (em_paragrafo and classificada.tipo != .paragrafo) {
            try writer.print("</p>\n", .{});
            em_paragrafo = false;
        }

        switch (classificada.tipo) {
            .heading1, .heading2, .heading3, .heading4, .heading5, .heading6 => {
                const n: u8 = switch (classificada.tipo) {
                    .heading1 => 1, .heading2 => 2, .heading3 => 3,
                    .heading4 => 4, .heading5 => 5, .heading6 => 6,
                    else => unreachable,
                };
                try writer.print("<h{d}>", .{n});
                try processarInline(classificada.conteudo, writer);
                try writer.print("</h{d}>\n", .{n});
            },
            .lista_nao_ordenada => {
                if (!em_lista) {
                    try writer.print("<ul>\n", .{});
                    em_lista = true;
                }
                try writer.print("<li>", .{});
                try processarInline(classificada.conteudo, writer);
                try writer.print("</li>\n", .{});
            },
            .linha_horizontal => {
                try writer.print("<hr />\n", .{});
            },
            .paragrafo => {
                if (!em_paragrafo) {
                    try writer.print("<p>", .{});
                    em_paragrafo = true;
                } else {
                    try writer.print(" ", .{});
                }
                try processarInline(classificada.conteudo, writer);
            },
            .vazio => {
                // Linhas vazias fecham parágrafos (já tratado acima)
            },
            .bloco_codigo => unreachable, // Já tratado
        }
    }

    // Fechar tags abertas
    if (em_paragrafo) try writer.print("</p>\n", .{});
    if (em_lista) try writer.print("</ul>\n", .{});
    if (em_bloco_codigo) try writer.print("</code></pre>\n", .{});

    return html.toOwnedSlice();
}

Passo 5: Interface CLI

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    const allocator = gpa.allocator();

    const stdout = std.io.getStdOut().writer();
    const stdin = std.io.getStdIn().reader();

    try stdout.print(
        \\
        \\  ==========================================
        \\    MARKDOWN PARA HTML - Zig
        \\  ==========================================
        \\
        \\  [1] Converter texto inline
        \\  [2] Converter arquivo
        \\  [3] Demo com exemplo
        \\  [4] Sair
        \\
    , .{});

    var buf: [256]u8 = undefined;

    while (true) {
        try stdout.print("\n  Opcao: ", .{});
        const opcao_raw = stdin.readUntilDelimiterOrEof(&buf, '\n') catch continue orelse break;
        const opcao = mem.trim(u8, opcao_raw, " \t\r\n");

        if (mem.eql(u8, opcao, "4")) break;

        if (mem.eql(u8, opcao, "3")) {
            const demo =
                \\# Titulo Principal
                \\
                \\Este e um **paragrafo** com *italico* e `codigo`.
                \\
                \\## Links e Listas
                \\
                \\Visite [Zig Brasil](https://zigbrasil.dev).
                \\
                \\- Primeiro item
                \\- Segundo item com **negrito**
                \\- Terceiro item
                \\
                \\---
                \\
                \\### Bloco de Codigo
                \\
                \\```zig
                \\const std = @import("std");
                \\pub fn main() void {}
                \\```
            ;

            try stdout.print("\n  --- Markdown ---\n{s}\n", .{demo});

            const html = try converterMarkdown(allocator, demo);
            defer allocator.free(html);

            try stdout.print("\n  --- HTML ---\n{s}\n", .{html});
        } else if (mem.eql(u8, opcao, "2")) {
            try stdout.print("\n  Arquivo Markdown: ", .{});
            const path_raw = stdin.readUntilDelimiterOrEof(&buf, '\n') catch continue orelse continue;
            const path = mem.trim(u8, path_raw, " \t\r\n");

            const conteudo = std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024) catch |err| {
                try stdout.print("  Erro ao ler arquivo: {any}\n", .{err});
                continue;
            };
            defer allocator.free(conteudo);

            const html = try converterMarkdown(allocator, conteudo);
            defer allocator.free(html);

            try stdout.print("\n{s}\n", .{html});
        }
    }

    try stdout.print("\n  Ate logo!\n", .{});
}

Testes

test "heading h1" {
    const html = try converterMarkdown(std.testing.allocator, "# Titulo");
    defer std.testing.allocator.free(html);
    try std.testing.expect(mem.indexOf(u8, html, "<h1>Titulo</h1>") != null);
}

test "paragrafo simples" {
    const html = try converterMarkdown(std.testing.allocator, "Texto simples");
    defer std.testing.allocator.free(html);
    try std.testing.expect(mem.indexOf(u8, html, "<p>Texto simples</p>") != null);
}

test "negrito" {
    const html = try converterMarkdown(std.testing.allocator, "Texto **negrito** aqui");
    defer std.testing.allocator.free(html);
    try std.testing.expect(mem.indexOf(u8, html, "<strong>negrito</strong>") != null);
}

test "link" {
    const html = try converterMarkdown(std.testing.allocator, "[Zig](https://zig.dev)");
    defer std.testing.allocator.free(html);
    try std.testing.expect(mem.indexOf(u8, html, "<a href=\"https://zig.dev\">Zig</a>") != null);
}

test "lista nao ordenada" {
    const html = try converterMarkdown(std.testing.allocator, "- item1\n- item2");
    defer std.testing.allocator.free(html);
    try std.testing.expect(mem.indexOf(u8, html, "<ul>") != null);
    try std.testing.expect(mem.indexOf(u8, html, "<li>item1</li>") != null);
}

test "classificar linha heading" {
    const l = classificarLinha("## Subtitulo");
    try std.testing.expectEqual(TipoBloco.heading2, l.tipo);
    try std.testing.expectEqualStrings("Subtitulo", l.conteudo);
}

test "parsear link" {
    const link = parsearLink("[texto](url)").?;
    try std.testing.expectEqualStrings("texto", link.texto);
    try std.testing.expectEqualStrings("url", link.url);
}

Compilando e Executando

zig build test
zig build run

Conceitos Aprendidos

Parsing linha a linha com classificação de tipos
Máquina de estados para blocos (código, listas, parágrafos)
Processamento inline recursivo (negrito dentro de links, etc.)
Escaping de caracteres HTML especiais
ArrayList como buffer de escrita dinâmico
Writer interface para geração de saída

Próximos Passos

Explore o módulo std.mem para mais funções de manipulação de texto
Veja como servir o HTML gerado com o Servidor HTTP
Construa o próximo projeto: Analisador de Logs

Markdown para HTML em Zig — Tutorial Passo a Passo

Markdown para HTML em Zig — Tutorial Passo a Passo

O Que Vamos Construir

Pré-requisitos

Passo 1: Estrutura do Projeto

Passo 2: Tokenizador de Linhas

Passo 3: Processamento Inline

Passo 4: Conversor Principal

Passo 5: Interface CLI

Testes

Compilando e Executando

Conceitos Aprendidos

Próximos Passos

Explore Mais

Continue aprendendo Zig

Markdown para HTML em Zig — Tutorial Passo a Passo

O Que Vamos Construir

Pré-requisitos

Passo 1: Estrutura do Projeto

Passo 2: Tokenizador de Linhas

Passo 3: Processamento Inline

Passo 4: Conversor Principal

Passo 5: Interface CLI

Testes

Compilando e Executando

Conceitos Aprendidos

Próximos Passos

Explore Mais

Artigos relacionados

Cron Parser em Zig — Tutorial Passo a Passo

INI Parser em Zig — Tutorial Passo a Passo

Parser de Configuração JSON em Zig — Tutorial Passo a Passo

Continue aprendendo Zig