So, with only
one post in between my admitted dislike for implementing
FSAs, what do I do but go out and volunteer to implement one in one of my
comments.
If you are too lazy to follow the links, I don't blame you. But the jist of it is that
gus complained about the lack of syntax highlighting in all the code I have been posting lately.
I can't blame him. I have been a huge
Larry Osterman fan since the
Channel 9 post about the completion of his
twentieth year at Microsoft. But the one complaint, well, not really complaint, but suggestion for improvement I would make for his blog is
syntax highlighting.
So now I have written a small, quick and dirty MFC app to turn code snippets into syntax highlighted HTML. I am sure others have done this before me, but I thought it would be more interesting to write the app, than
Googling until I found one. Being a quick and dirty app, I implemented the source to HTMLized functions as members of the main window class. Let's have a look at the definition of the class, syntax highlighted with my new tool:
///
/// <summary>
/// This class models the main window of the Snippet2Html application
/// </summary>
/// <author>Jim Barnett</author>
/// <date>3/18/2005</date>
///
class CMainWindow : public CDialog
{
public:
CMainWindow(CWnd* pParent = 0);
afx_msg void OnBnClickedCopyToClipboard();
afx_msg void OnBnClickedHtmlify();
static const int NONE = 0;
static const int SLASH = 1;
static const int SLASHSLASH = 2;
static const int SLASHSTAR = 3;
static const int SLASHSTARSTAR = 4;
static const int LITERAL = 5;
static const int LITERALESCAPE = 6;
static const int CHARLITERAL = 7;
static const int CHARLITERALESCAPE = 8;
static const CString COMMENTHTML;
static const CString ENDCOMMENT;
static const CString ENDSPECIAL;
static const CString SPECIALHTML;
static const CString INITHTML;
static const CString ENDHTML;
enum { IDD = IDD_SNIPPET2HTML_DIALOG };
protected:
bool CheckForReservedWord(int nState, int& nPos, CString& sHtmlified) const;
virtual void DoDataExchange(CDataExchange* pDX);
int HandleReservedWord(int nPos, CString sToken, CString& sCode) const;
void HtmlifyCode();
bool IgnoreReserved(int nState) const;
bool IsSpecial(CString sToken) const;
virtual BOOL OnInitDialog();
afx_msg void OnPaint();
afx_msg HCURSOR OnQueryDragIcon();
HACCEL m_hAccel;
HICON m_hIcon;
CString m_sCode;
static CStringArray m_sReservedOrDirective;
BOOL m_fWebForm;
DECLARE_MESSAGE_MAP()
};
Not terribly interesting by itself, so lets move on to the main function of interest and its helpers:
void CMainWindow::HtmlifyCode()
{
/*
* Comment to prove multiline comments do not always fail :)
*/
VERIFY(UpdateData(true));
CString sHtmlified = _T("");
TCHAR chCurrent;
int nState = NONE;
int nCurPos = 0;
CString sToken;
for (int i = 0; i < m_sCode.GetLength(); ++i)
{
chCurrent = m_sCode.GetAt(i);
switch (chCurrent)
{
case _T(' '):
sHtmlified += _T(" ");
if (nState == SLASH) nState = NONE;
break;
case _T('\t'):
sHtmlified += _T(" ");
if (nState == SLASH) nState = NONE;
break;
case _T('\n'):
if (nState == SLASHSLASH)
{
sHtmlified += ENDCOMMENT;
nState = NONE;
}
else if (nState == SLASH) nState = NONE;
sHtmlified += _T("<br>");
if (!m_fWebForm) sHtmlified += _T("\r\n");
break;
case _T('/'):
switch (nState)
{
case NONE:
sHtmlified += chCurrent;
nState = SLASH;
break;
case SLASH:
nState = SLASHSLASH;
sHtmlified += chCurrent;
sHtmlified.Insert(sHtmlified.GetLength()-2, COMMENTHTML);
break;
case SLASHSTARSTAR:
nState = NONE;
sHtmlified += chCurrent;
sHtmlified += ENDCOMMENT;
break;
default:
sHtmlified += chCurrent;
break;
}
break;
case _T('*'):
switch (nState)
{
case SLASH:
nState = SLASHSTAR;
sHtmlified += chCurrent;
sHtmlified.Insert(sHtmlified.GetLength()-2, COMMENTHTML);
break;
case SLASHSTAR:
nState = SLASHSTARSTAR;
sHtmlified += chCurrent;
break;
default:
sHtmlified += chCurrent;
break;
}
break;
case _T('\r'):
break; // eat it
case _T('"'):
switch (nState)
{
case LITERAL:
nState = NONE;
break;
case LITERALESCAPE:
nState = LITERAL;
break;
}
if (nState == SLASH) nState = NONE;
sHtmlified += chCurrent;
break;
case _T('\''):
switch (nState)
{
case CHARLITERAL:
nState = NONE;
break;
case NONE:
nState = CHARLITERAL;
break;
case CHARLITERALESCAPE:
nState = CHARLITERAL;
break;
}
if (nState == SLASH) nState = NONE;
sHtmlified += chCurrent;
break;
case _T('<'):
if (nState == SLASH) nState = NONE;
sHtmlified += _T("<");
break;
case _T('>'):
if (nState == SLASH) nState = NONE;
sHtmlified += _T(">");
break;
case _T('&'):
if (nState == SLASH) nState = NONE;
sHtmlified += _T("&");
break;
case _T('\\'):
switch (nState)
{
case LITERAL:
nState = LITERALESCAPE;
break;
case CHARLITERAL:
nState = CHARLITERALESCAPE;
break;
case LITERALESCAPE:
nState = LITERAL;
break;
case CHARLITERALESCAPE:
nState = CHARLITERAL;
break;
}
if (nState == SLASH) nState = NONE;
sHtmlified += chCurrent;
break;
default:
if (!CheckForReservedWord(nState, i, sHtmlified))
sHtmlified += chCurrent;
if (nState == SLASH) nState = NONE;
else if (nState == CHARLITERALESCAPE) nState = CHARLITERAL;
break;
}
}
sHtmlified = INITHTML + sHtmlified;
sHtmlified += ENDHTML;
m_sCode = sHtmlified;
UpdateData(false);
}
bool CMainWindow::IgnoreReserved(int nState) const
{
if ((nState == SLASHSLASH) || (nState == SLASHSTAR)
|| (nState == SLASHSTARSTAR) || (nState == LITERAL)
|| (nState == LITERALESCAPE))
return true;
else
return false;
}
bool CMainWindow::IsSpecial(CString sToken) const
{
for (INT_PTR i = 0; i < m_sReservedOrDirective.GetCount(); ++i)
{
if (sToken == m_sReservedOrDirective[i]) return true;
}
return false;
}
void CMainWindow::HtmlifyCode()
{
/*
* Comment to prove multiline comments do not always fail :)
*/
VERIFY(UpdateData(true));
CString sHtmlified = _T("");
TCHAR chCurrent;
int nState = NONE;
int nCurPos = 0;
CString sToken;
for (int i = 0; i < m_sCode.GetLength(); ++i)
{
chCurrent = m_sCode.GetAt(i);
switch (chCurrent)
{
case _T(' '):
sHtmlified += _T(" ");
if (nState == SLASH) nState = NONE;
break;
case _T('\t'):
sHtmlified += _T(" ");
if (nState == SLASH) nState = NONE;
break;
case _T('\n'):
if (nState == SLASHSLASH)
{
sHtmlified += ENDCOMMENT;
nState = NONE;
}
else if (nState == SLASH) nState = NONE;
sHtmlified += _T("<br>");
if (!m_fWebForm) sHtmlified += _T("\r\n");
break;
case _T('/'):
switch (nState)
{
case NONE:
sHtmlified += chCurrent;
nState = SLASH;
break;
case SLASH:
nState = SLASHSLASH;
sHtmlified += chCurrent;
sHtmlified.Insert(sHtmlified.GetLength()-2, COMMENTHTML);
break;
case SLASHSTARSTAR:
nState = NONE;
sHtmlified += chCurrent;
sHtmlified += ENDCOMMENT;
break;
default:
sHtmlified += chCurrent;
break;
}
break;
case _T('*'):
switch (nState)
{
case SLASH:
nState = SLASHSTAR;
sHtmlified += chCurrent;
sHtmlified.Insert(sHtmlified.GetLength()-2, COMMENTHTML);
break;
case SLASHSTAR:
nState = SLASHSTARSTAR;
sHtmlified += chCurrent;
break;
default:
sHtmlified += chCurrent;
break;
}
break;
case _T('\r'):
break; // eat it
case _T('"'):
switch (nState)
{
case LITERAL:
nState = NONE;
break;
case LITERALESCAPE:
nState = LITERAL;
break;
}
if (nState == SLASH) nState = NONE;
sHtmlified += chCurrent;
break;
case _T('\''):
switch (nState)
{
case CHARLITERAL:
nState = NONE;
break;
case NONE:
nState = CHARLITERAL;
break;
case CHARLITERALESCAPE:
nState = CHARLITERAL;
break;
}
if (nState == SLASH) nState = NONE;
sHtmlified += chCurrent;
break;
case _T('<'):
if (nState == SLASH) nState = NONE;
sHtmlified += _T("<");
break;
case _T('>'):
if (nState == SLASH) nState = NONE;
sHtmlified += _T(">");
break;
case _T('&'):
if (nState == SLASH) nState = NONE;
sHtmlified += _T("&");
break;
case _T('\\'):
switch (nState)
{
case LITERAL:
nState = LITERALESCAPE;
break;
case CHARLITERAL:
nState = CHARLITERALESCAPE;
break;
case LITERALESCAPE:
nState = LITERAL;
break;
case CHARLITERALESCAPE:
nState = CHARLITERAL;
break;
}
if (nState == SLASH) nState = NONE;
sHtmlified += chCurrent;
break;
default:
if (!CheckForReservedWord(nState, i, sHtmlified))
sHtmlified += chCurrent;
if (nState == SLASH) nState = NONE;
else if (nState == CHARLITERALESCAPE) nState = CHARLITERAL;
break;
}
}
sHtmlified = INITHTML + sHtmlified;
sHtmlified += ENDHTML;
m_sCode = sHtmlified;
UpdateData(false);
}
bool CMainWindow::IgnoreReserved(int nState) const
{
if ((nState == SLASHSLASH) || (nState == SLASHSTAR)
|| (nState == SLASHSTARSTAR) || (nState == LITERAL)
|| (nState == LITERALESCAPE))
return true;
else
return false;
}
bool CMainWindow::IsSpecial(CString sToken) const
{
for (INT_PTR i = 0; i < m_sReservedOrDirective.GetCount(); ++i)
{
if (sToken == m_sReservedOrDirective[i]) return true;
}
return false;
}
bool CMainWindow::CheckForReservedWord(int nState, int& nPos,
CString& sHtmlified) const
{
int nOrigPos = nPos;
const CString sDelimiters = _T(" \n\t+-\\*~<>!=&;:.{}[]()|^%\\/?',=");
// Get the next token
CString sToken = m_sCode.Tokenize(
_T(" \n\t+-\\*~<>!=&;:.{}[]()|^%\\/?',="), nOrigPos);
CString sNextChars = (m_sCode.Left(nPos+sToken.GetLength())).Right(
sToken.GetLength());
bool fAtTokenStart = (nPos == 0) ? true
: (sDelimiters.Find(m_sCode.GetAt(nPos-1)) != -1);
if (IsSpecial(sToken) && !IgnoreReserved(nState)
&& (sToken == sNextChars) && fAtTokenStart)
{
nPos += HandleReservedWord(nPos, sToken, sHtmlified);
return true;
}
return false;
}
I guess you could say I cheated a little since I only used the automaton to do the highlighting of comments and wrote some helper functions to deal with the keywords/preprocessor directives. One of the main shortcomings of my simple approach is that context senstive preprocessor directives are not supported (such as the the
#pragma once
directive, particularly the "once" token. You could criticize the memory usage of this algorithm since I used another CString to insert the HTML rather than just inserting the HTML right into m_sCode. I think that would have been messier, and it would have been an unnecessary performance hack.
A lot of these web form interfaces don't let you have decently formatted HTML without screwing up your original intention. They tend to interpret <br>\n as two lines, which kept double spacing my code. The m_fWebForm boolean variable indicates wherther you want one big nasty line of HTML if true, or you want decently formatted, somewhat human readable HTML if false. It took a fair amount of work to figure out what truly caused the double-spaced code. This task proved to be much more of a learning experience than I intended, but I guess that could be a good thing.
I am already planning on some improvements. It would be really easy to add highlighting to string and char literals. If I modified it to read values from a file for some variables that are currently hard-coded, this app could be made language agnostic. I may do that just for the fun of it, but I am self-admitted C++ elitist.
So, that is it. Is this any better, gus?
0 Comments:
Post a Comment
<< Home