Migrating to Markdown Pt3 From Domino To Markdown

Domino and in particular BlogSphere V3 by the wonderful Declan Lynch has been my blogging platform for years now and has served me well and faithfully, but all good things come to and end as part of the constant fiddling with new stuff LDC do, I have moved my blogging platform to markdown ( The wretch Ben Poole got me started on it ) on the Statamic platform, but what about years of blog entries that are happily snuggled down in my nsf file,

Java to the rescue. I have done a little agent that takes all blogs and exports them to markdown format regardless if they are html of rich text, glues the existing comments on the end of the blog posts, exports quick images and emoticons (while changing their references in the blog posts) and makes a redirects file so all your old external links work

Just copy the below code into a Java agent and set the ‘baseExportDir’ to where ever you want the site to export too. when you run the agent you will end up with a bunch of mark down files representing the blog entries, a redirects.txt file containing the 301 redirects who’s contents you can past into your root directory .htaccess file so all your old link work, a “page” directory with the emoticons in it, and a “blog” directory with all the quick images in it.

As always, yell if there is something missing or wrong

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.text.Normalizer;
import java.util.Calendar;
import java.util.Date;
import java.util.Enumeration;
import java.util.Vector;
import java.util.regex.Pattern;
import lotus.domino.AgentBase;
import lotus.domino.AgentContext;
import lotus.domino.Database;
import lotus.domino.DateTime;
import lotus.domino.Document;
import lotus.domino.DocumentCollection;
import lotus.domino.EmbeddedObject;
import lotus.domino.NotesException;
import lotus.domino.RichTextItem;
import lotus.domino.Session;
import lotus.domino.View;
public class JavaAgent extends AgentBase {
    public void NotesMain() {
        try {
            Session session = getSession();
            AgentContext agentContext = session.getAgentContext();
            Database db = agentContext.getCurrentDatabase();
            String baseExportDir = "C:\markdownexport\";
            // ****** start document export ******
            View content = db.getView("vw_Content_Blogs");
            File theDir = new File(baseExportDir);
            if (!theDir.exists())
                theDir.mkdir();
            // create a file to store all the 301 redirections for existing blog
            // entires
            File redirectfile = new File(baseExportDir + "redirects.txt");
            Writer redirect = new BufferedWriter(new FileWriter(redirectfile));
            Document doc = content.getFirstDocument();
            while (doc != null) {
                Writer output = null;
                if (doc.getItemValueString("FORM").equals("content_BlogEntry")) {
                    String filename = "";
                    Vector dM = doc.getItemValue("EntryDate");
                    DateTime dt = (DateTime) dM.elementAt(0);
                    System.out.println(dt.getLocalTime());
                    Date date = dt.toJavaDate();
                    Calendar cal = Calendar.getInstance();
                    cal.setTime(date);
                    // create file name in the correct format for statamic
                    filename = Integer.toString(cal.get(Calendar.YEAR)) + "-" + String.format("%02d", cal.get(Calendar.MONTH) + 1) + "-" + String.format("%02d", Integer.valueOf(cal.get(Calendar.DAY_OF_MONTH)));
                    filename = filename + "-" + doc.getItemValueString("EntryTitle").trim().replaceAll(" ", "-").replaceAll("\\", "-").replaceAll("/", "-").replaceAll(":", "-").replaceAll("\?", "-").replaceAll(""", "-");
                    File file = new File(baseExportDir + filename + ".md");
                    redirectfile.getParentFile().mkdirs();
                    // add redirect to redirect file
                    redirect.write("Redirect 301 /d6plinks/" + doc.getItemValueString("PermaLink") + " /blog/" + filename);
                    redirect.write("rn");
                    output = new BufferedWriter(new FileWriter(file));
                    output.write("---");
                    output.write("rn");
                    output.write("title: '" + doc.getItemValueString("EntryTitle").trim() + "'");
                    output.write("rn");
                    if (doc.getItemValueString("EntryStatus").trim().equals("Published")) {
                        output.write("status: live");
                        output.write("rn");
                    } else {
                        output.write("status: draft");
                        output.write("rn");
                    }
                    // end of meta
                    output.write("---");
                    output.write("rn");
                    output.write("rn");
                    String body = "";
                    // get the body text and clean it up for UTF-8 standard
                    if (doc.getItemValueString("EntryHTML").trim().length() > 1) {
                        body = doc.getItemValueString("EntryHTML").trim();
                    } else {
                        body = doc.getItemValueString("EntryRICH").trim();
                    }
                    body = Normalizer.normalize(body, Normalizer.Form.NFD).replaceAll("\p{InCombiningDiacriticalMarks}+", ""); // .replaceAll("[^\p{ASCII}]",
                    // "");
                    // smart single quotes and apostrophe
                    body = removeMSRubbish(body);
                    output.write(body.replaceAll("http.*?\$File", "/assets/img/blog"));
                    output.write("rn");
                    output.write("rn");
                    // get all the old comments and add them to the bottom of
                    // the blog
                    DocumentCollection responses = doc.getResponses();
                    if (responses.getCount() > 0) {
                        output.write("Old Comments");
                        output.write("rn");
                        output.write("------------");
                        output.write("rn");
                        output.write("rn");
                        Document rdoc = responses.getFirstDocument();
                        while (rdoc != null) {
                            // setting as h5 in markup
                            output.write("##### " + rdoc.getItemValueString("nameAuthor") + "(" + rdoc.getCreated().toString() + ")");
                            output.write("rn");
                            String comment = Normalizer.normalize(rdoc.getItemValueString("body").replaceAll("http.*?\$File", "/assets/img/page"), Normalizer.Form.NFD).replaceAll("\p{InCombiningDiacriticalMarks}+", "").replaceAll("[^\p{ASCII}]", "");
                            comment = removeMSRubbish(comment);
                            output.write(comment);
                            output.write("rn");
                            output.write("rn");
                            rdoc = responses.getNextDocument(rdoc);
                        }
                    }
                    output.close();
                }
                doc = content.getNextDocument(doc);
            }
            redirect.close();
            // ****** end document export ******
            // ****** start export quick images, the html references
            // '/assets/img/blog' but im just exporting them to 'blog'******
            String imagesExport = "blog\";
            View view = db.getView("lkp_QuickImages");
            doc = view.getFirstDocument();
            File theImagesDir = new File(baseExportDir + imagesExport);
            if (!theImagesDir.exists())
                theImagesDir.mkdir();
            boolean saveFlag = false;
            while (doc != null) {
                RichTextItem body = (RichTextItem) doc.getFirstItem("ImageFile");
                Vector v = body.getEmbeddedObjects();
                Enumeration e = v.elements();
                while (e.hasMoreElements()) {
                    EmbeddedObject eo = (EmbeddedObject) e.nextElement();
                    if (eo.getType() == EmbeddedObject.EMBED_ATTACHMENT) {
                        eo.extractFile(baseExportDir + imagesExport + eo.getSource());
                    }
                }
                doc = view.getNextDocument(doc);
            }
            // ****** end export quick images ******
            // ****** start export emoticon images, the html references
            // '/assets/img/page' but im just exporting them to 'page'******
            imagesExport = "page\";
            view = db.getView("lkp_Emoticons_Web");
            doc = view.getFirstDocument();
            theImagesDir = new File(baseExportDir + imagesExport);
            if (!theImagesDir.exists())
                theImagesDir.mkdir();
            while (doc != null) {
                System.out.println(doc.getItemValueString("EmoticonName"));
                Vector v = session.evaluate("@AttachmentNames", doc);
                System.out.println("emoticon:" + v.firstElement().toString());
                EmbeddedObject eo = doc.getAttachment(v.firstElement().toString());
                eo.extractFile(baseExportDir + imagesExport + v.firstElement().toString());
                doc = view.getNextDocument(doc);
            }
            // ****** end export images ******
        } catch (NotesException e) {
            System.out.println(e.id + " " + e.text);
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    public String removeMSRubbish(String body) {
        body = body.replaceAll("[u2018|u2019|u201A]", "'");
        // smart double quotes
        body = body.replaceAll("[u201C|u201D|u201E]", """);
        // ellipsis
        body = body.replaceAll("u2026", "...");
        // dashes
        body = body.replaceAll("[u2013|u2014]", "-");
        // circumflex
        body = body.replaceAll("u02C6", "^");
        // open angle bracket
        body = body.replaceAll("u2039", "<");
        // close angle bracket
        body = body.replaceAll("u203A", ">");
        // spaces
        body = body.replaceAll("[u02DC|u00A0]", " ");
        return body;
    }
}

Leave a Reply

Your email address will not be published. Required fields are marked *