Cdf546b601bf29a7eb4ca777544d11cd

Procedural style code to strip HTML comments. No attempt was made to make this code more OO/patterned base so it could be readable/maintainable. ;)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
public static class HtmlHelper {
    public static string StripHtmlComments(string html) {
        if (html == null) {
            throw new ArgumentNullException("html");
        }

        if (html.IndexOf("<!", StringComparison.Ordinal) < 0) {
            return html;
        }

        var cleanedHtml = new char[html.Length];
        bool inHtmlComment = false;
        bool inHtmlTag = false;
        int cleanCount = 0;

        for (int i = 0; i < html.Length; i++) {
            char current = html[i];

            if (!inHtmlComment && !inHtmlTag) {
                if (current == '<') {
                    if (i + 1 < html.Length) {
                        char nextChar = html[i + 1];
                        if (nextChar == '!') {
                                inHtmlComment = true;
                                continue;
                            }
                            else {
                                if (IsEnglishLetter(nextChar)) {
                                    inHtmlTag = true;
                                }
                            }
                        }
                    }
                }
                else if(inHtmlComment) {
                    if (current == '>') {
                        if (inHtmlComment) {
                            inHtmlComment = false;
                            continue;
                        }
                    }
                    continue;
                }
                else if (inHtmlTag) {
                    if (current == '>') {
                        inHtmlTag = false;
                    }
                }

                cleanedHtml[cleanCount++] = current;
            }

            return new String(cleanedHtml, 0, cleanCount);
        }

        private static bool IsEnglishLetter(char nextChar) {
            return ('a' <= nextChar && nextChar <= 'z') || ('A' <= nextChar && nextChar <= 'Z');
        }

}

//Unit Tests
[TestMethod]
public void NullStringReturnsThrowsArgumentNullException() {
    try {
        HtmlHelper.StripHtmlComments(null);
        Assert.Fail();
    }
    catch (ArgumentNullException) { 
    }
}

[TestMethod]
public void EmptyStringReturnsEmpty() {
    Assert.AreEqual(string.Empty, HtmlHelper.StripHtmlComments(string.Empty));
}

[TestMethod]
public void StringWithoutCommentReturnsSameString() {
    string s = "This has <strong>No Comments</strong>!";
    Assert.AreEqual(s, HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void StringWithOnlyCommentReturnsEmptyString() {
    string s = "<!-- this go bye bye>";
    Assert.AreEqual(string.Empty, HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void Html_WithNonDashDashComment_ReturnsEmptyString() {
    string s = "<! this go bye bye>";
    Assert.AreEqual(string.Empty, HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void StringWithTwoConsecutiveCommentsReturnsEmptyString() {
    string s = "<!-- this go bye bye><!-- another comment>";
    Assert.AreEqual(string.Empty, HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void CommentWithStringBeforeReturnsString() {
    string s = "Hello<!-- this go bye bye -->";
    Assert.AreEqual("Hello", HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void CommentWithStringAfterReturnsString() {
    string s = "<!-- this go bye bye -->World";
    Assert.AreEqual("World", HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void Html_WithAngleBracketsButNotHtml_NotSripped() {
    string s = "<$)*(@&$(@*>";
    Assert.AreEqual(s, HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void Html_WithCommentInterleavedWithText_RendersText() {
    string s = "Hello <!-- this go bye bye --> World <!--> This is fun";
    Assert.AreEqual("Hello  World  This is fun", HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void Html_WithHtmlTags_DoesNotStripHtml() {
    string s = "<strong>Hello</strong><!this go bye bye>";
    Assert.AreEqual("<strong>Hello</strong>", HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void Html_WithCommentInAttribute_DoesNotStripAttributeValue() {
    string s = "<img alt=\"<!-- This should remain -->\" />";
    Assert.AreEqual(s, HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void Html_WithCommentInSingleQuotedAttribute_DoesNotStripAttributeValue() {
    string s = "<img alt=\'<!-- This should remain -->\' />";
    Assert.AreEqual(s, HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void Html_WithCommentInNonQuotedAttribute_DoesNotStripAttributeValue() {
    string s = "<p title=<!--Thisshouldremain-->Test</p>";
    Assert.AreEqual(s, HtmlHelper.StripHtmlComments(s));
}

[TestMethod]
public void Html_WithCommentBetweenNonTagButLooksLikeTag_DoesStripComment() {
    string s = @"<ç123 title=""<!bc def>"">";
    Assert.AreEqual(@"<ç123 title="""">", HtmlHelper.StripHtmlComments(s));
}

Refactorings

No refactoring yet !

4d72203c38dd5f3e3d2d446b5888e8a7

Elij

November 11, 2008, November 11, 2008 04:52, permalink

1 rating. Login to rate!

haven't tested -- but if it doesn't work it'll just be a case of reviewing the regex

1
2
3
4
5
6
7
8
9
10
11
12
public static class HtmlHelper {
    public static string StripHtmlComments(string html) {
        if (html == null) {
            throw new ArgumentNullException("html");
        }

        System.Text.RegularExpressions.Regex regex =
             new System.Text.RegularExpressions.Regex("((<!-- )((?!<!-- ).)*( -->))");

        return regex.Replace(html, string.Empty);
        }
}
72f36daa501cf8f5bb861210edd9232d

Moonshield

November 11, 2008, November 11, 2008 05:14, permalink

No rating. Login to rate!

I ran the test unit on the regex : 7/15 passed. If you still want to do it with a loop I've cleaned your code a little. I can take a look tomorrow too. I'm too tired to think tonight :)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
public static class HtmlHelper
{
    public static string StripHtmlComments(string pHtml)
    {
        if (pHtml == null)
            throw new ArgumentNullException("pHtml");

        // Contains cleaned content
        StringBuilder oSb = new StringBuilder();

        // Not comment --> skip
        if (pHtml.IndexOf("<!", StringComparison.Ordinal) < 0)
            oSb.Append(pHtml);

        // Not comment --> skip
        if (oSb.Length == 0)
        {
            bool InHtmlComment = false;
            bool InHtmlTag = false;
            char CurrentChar;
            char NextChar;

            for (int CharIndex = 0; CharIndex < pHtml.Length; CharIndex++)
            {
                CurrentChar = pHtml[CharIndex];

                if (!InHtmlComment && !InHtmlTag)
                {
                    if (CurrentChar == '<' && (CharIndex + 1 < pHtml.Length))
                    {
                        NextChar = pHtml[CharIndex + 1];
                        if (NextChar == '!')
                        {
                            InHtmlComment = true;
                            continue;
                        }
                        else if (NextChar.IsEnglishLetter())
                            InHtmlTag = true;
                    }
                }
                else if (InHtmlComment)
                {
                    if (CurrentChar == '>')
                    {
                        if (InHtmlComment)
                        {
                            InHtmlComment = false;
                            continue;
                        }
                    }
                    continue;
                }
                else if (InHtmlTag && CurrentChar == '>')
                    InHtmlTag = false;

                oSb.Append(CurrentChar);
            }

        }

        // One entry point, one return point
        return oSb.ToString();
    }

    private static bool IsEnglishLetter(this char nextChar)
    {
        return ('a' <= nextChar && nextChar <= 'z') || ('A' <= nextChar && nextChar <= 'Z');
    }
}
51d623f33f8b83095db84ff35e15dbe8

Jeff Atwood

November 16, 2009, November 16, 2009 04:25, permalink

No rating. Login to rate!

That "invalid HTML tag" case is really tough, a real edge condition. If you discard that one, it's pretty easy. I'd recommend doing a first pass to remove all invalid HTML tags, first. Like.. er.. <ç123>.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
    public static string StripHtmlComments(string html)
    {
        if (html == null)
        {
            throw new ArgumentNullException("html");
        }

        if (html.IndexOf("<!", StringComparison.Ordinal) < 0)
        {
            return html;
        }

        return Regex.Replace(html, "(?<!='|=\"|=)<![^>]+>", "");
    }
7d2549fc8b7b2ab5449cef8f80741508

asgddhd

February 3, 2010, February 03, 2010 01:50, permalink

No rating. Login to rate!

Desktop Security 2010 is the new antivirus software which is created according to the up-to-date requirements of computer protection. The main aim of this product is to make your computer usage efficient and secure. Multilevel protection of the Desktop Security 2010 antivirus will provide your data integrity and safety, and master components of the program will prevent computer from infection and hack attacks. Special attention is devoted to the network and internet connections security because it is the most probable way of virus intrusion.
1. For me it became a rescuer after a serious virus infection of my pc. Kaspersky antivirus didn't cure viruses, and NOD antivirus did not even detect them. Desktop Security 2010 detected all the viruses very quickly and suggested to cure them. On the whole for now it has been working at my pc for a couple of months as the main antivirus, I don't have any problems with viruses, just sometimes while surfing the web I get messages about attempting intrusions which my Desktop Security 2010 blocks. I am very satisfied with its work.
2. Earlier I did not consider anything except for NOD antivirus. But recently I've been recovering my data for hours after virus intrusion to my computer, and this case happened several times. I understood that I needed to look for something new because my NOD antivirus became too choosy. Kaspersky antivirus is not a variant for me - each computer with a core duo processor hardly works with it. Dr Web is rather old-fashioned. I heard good recommendations about Desktop Security 2010 - and installed it, and it even cured a few viruses after NOD antivirus. Now I just never remind of my problems with viruses.
3. I think that it is one of the best anti-viruses, at least for me - it provides user-friendly menu, easy settings, it doesn't slow down my pc and its visual environment looks nice. It's a pretty good antivirus.
download Desktop Security 2010
virus Desktop Security 2010
antivirus Desktop Security 2010
trojan Desktop Security 2010
remove Desktop Security 2010
buy Desktop Security 2010
cheap Desktop Security 2010

Your refactoring





Format Copy from initial code

or Cancel