Cdf546b601bf29a7eb4ca777544d11cd

Procedural style code to strip HTML comments. No attempt was made to make this code more OO/patterned base so it could be readable/maintainable. ;)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
public static class HtmlHelper {
    public static string StripHtmlComments(string html) {
        if (html == null) {
            throw new ArgumentNullException("html");
        }

        if (html.IndexOf("<!", StringComparison.Ordinal) < 0) {
            return html;
        }

        var cleanedHtml = new char[html.Length];
        bool inHtmlComment = false;
        bool inHtmlTag = false;
        int cleanCount = 0;

        for (int i = 0; i < html.Length; i++) {
            char current = html[i];

            if (!inHtmlComment && !inHtmlTag) {
                if (current == '<') {
                    if (i + 1 < html.Length) {
                        char nextChar = html[i + 1];
                        if (nextChar == '!') {
                                inHtmlComment = true;
                                continue;
                            }
                            else {
                                if (IsEnglishLetter(nextChar)) {
                                    inHtmlTag = true;
                                }
                            }
                        }
                    }
                }
                else if(inHtmlComment) {
                    if (current == '>') {
                        if (inHtmlComment) {
                            inHtmlComment = false;
                            continue;
                        }
                    }
                    continue;
                }
                else if (inHtmlTag) {
                    if (current == '>') {
                        inHtmlTag = false;
                    }
                }

                cleanedHtml[cleanCount++] = current;
            }

            return new String(cleanedHtml, 0, cleanCount);
        }

        private static bool IsEnglishLetter(char nextChar) {
            return ('a' <= nextChar && nextChar <= 'z') || ('A' <= nextChar && nextChar <= 'Z');
        }

}

//Unit Tests
[TestMethod]
public void NullStringReturnsThrowsArgumentNullException() {
    try {
        HtmlHelper.StripHtmlComments(null);
        Assert.Fail();
    }
    catch (ArgumentNullException) { 
    }
}

[TestMethod]
public void EmptyStringReturnsEmpty() {
    Assert.AreEqual(string.Empty, HtmlHelper.StripHtmlComments(string.Empty));
}

[TestMethod]
public void StringWithoutCommentReturnsSameString() {
    string s = "This has <strong>No Comments</strong>!";
    Assert.AreEqual(s, HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void StringWithOnlyCommentReturnsEmptyString() {
    string s = "<!-- this go bye bye>";
    Assert.AreEqual(string.Empty, HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void Html_WithNonDashDashComment_ReturnsEmptyString() {
    string s = "<! this go bye bye>";
    Assert.AreEqual(string.Empty, HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void StringWithTwoConsecutiveCommentsReturnsEmptyString() {
    string s = "<!-- this go bye bye><!-- another comment>";
    Assert.AreEqual(string.Empty, HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void CommentWithStringBeforeReturnsString() {
    string s = "Hello<!-- this go bye bye -->";
    Assert.AreEqual("Hello", HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void CommentWithStringAfterReturnsString() {
    string s = "<!-- this go bye bye -->World";
    Assert.AreEqual("World", HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void Html_WithAngleBracketsButNotHtml_NotSripped() {
    string s = "<$)*(@&$(@*>";
    Assert.AreEqual(s, HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void Html_WithCommentInterleavedWithText_RendersText() {
    string s = "Hello <!-- this go bye bye --> World <!--> This is fun";
    Assert.AreEqual("Hello  World  This is fun", HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void Html_WithHtmlTags_DoesNotStripHtml() {
    string s = "<strong>Hello</strong><!this go bye bye>";
    Assert.AreEqual("<strong>Hello</strong>", HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void Html_WithCommentInAttribute_DoesNotStripAttributeValue() {
    string s = "<img alt=\"<!-- This should remain -->\" />";
    Assert.AreEqual(s, HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void Html_WithCommentInSingleQuotedAttribute_DoesNotStripAttributeValue() {
    string s = "<img alt=\'<!-- This should remain -->\' />";
    Assert.AreEqual(s, HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void Html_WithCommentInNonQuotedAttribute_DoesNotStripAttributeValue() {
    string s = "<p title=<!--Thisshouldremain-->Test</p>";
    Assert.AreEqual(s, HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void Html_WithCommentBetweenNonTagButLooksLikeTag_DoesStripComment() {
    string s = @"<ç123 title=""<!bc def>"">";
    Assert.AreEqual(@"<ç123 title="""">", HtmlHelper.StripHtmlComments(s));
}

Refactorings

No refactoring yet !

4d72203c38dd5f3e3d2d446b5888e8a7

Elij

November 11, 2008, November 11, 2008 04:52, permalink

1 rating. Login to rate!

haven't tested -- but if it doesn't work it'll just be a case of reviewing the regex

1
2
3
4
5
6
7
8
9
10
11
12
public static class HtmlHelper {
    public static string StripHtmlComments(string html) {
        if (html == null) {
            throw new ArgumentNullException("html");
        }

        System.Text.RegularExpressions.Regex regex =
             new System.Text.RegularExpressions.Regex("((<!-- )((?!<!-- ).)*( -->))");

        return regex.Replace(html, string.Empty);
        }
}
72f36daa501cf8f5bb861210edd9232d

Moonshield

November 11, 2008, November 11, 2008 05:14, permalink

No rating. Login to rate!

I ran the test unit on the regex : 7/15 passed. If you still want to do it with a loop I've cleaned your code a little. I can take a look tomorrow too. I'm too tired to think tonight :)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
public static class HtmlHelper
{
    public static string StripHtmlComments(string pHtml)
    {
        if (pHtml == null)
            throw new ArgumentNullException("pHtml");

        // Contains cleaned content
        StringBuilder oSb = new StringBuilder();

        // Not comment --> skip
        if (pHtml.IndexOf("<!", StringComparison.Ordinal) < 0)
            oSb.Append(pHtml);

        // Not comment --> skip
        if (oSb.Length == 0)
        {
            bool InHtmlComment = false;
            bool InHtmlTag = false;
            char CurrentChar;
            char NextChar;

            for (int CharIndex = 0; CharIndex < pHtml.Length; CharIndex++)
            {
                CurrentChar = pHtml[CharIndex];

                if (!InHtmlComment && !InHtmlTag)
                {
                    if (CurrentChar == '<' && (CharIndex + 1 < pHtml.Length))
                    {
                        NextChar = pHtml[CharIndex + 1];
                        if (NextChar == '!')
                        {
                            InHtmlComment = true;
                            continue;
                        }
                        else if (NextChar.IsEnglishLetter())
                            InHtmlTag = true;
                    }
                }
                else if (InHtmlComment)
                {
                    if (CurrentChar == '>')
                    {
                        if (InHtmlComment)
                        {
                            InHtmlComment = false;
                            continue;
                        }
                    }
                    continue;
                }
                else if (InHtmlTag && CurrentChar == '>')
                    InHtmlTag = false;

                oSb.Append(CurrentChar);
            }

        }

        // One entry point, one return point
        return oSb.ToString();
    }

    private static bool IsEnglishLetter(this char nextChar)
    {
        return ('a' <= nextChar && nextChar <= 'z') || ('A' <= nextChar && nextChar <= 'Z');
    }
}

Your refactoring





Format Copy from initial code

or Cancel