<%@ Page Language="vb" %>
<%@ Import Namespace="System" %>
<%@ Import Namespace="System.Text.RegularExpressions" %>
<%@ Import Namespace="System.Text" %>
<%@ Import Namespace="System.Net" %>
<%@ Import Namespace="System.IO" %>
<script language="VB" runat="server">
Private report As New StringBuilder()
Private webPage As String
Private countOfMatches As Int32
Private Sub scrapeButton_Click( ByVal sender As System.Object, ByVal e As System.EventArgs )
webPage = GrabUrl()
Dim myDelegate As New MatchEvaluator( AddressOf MatchHandler )
Dim linksExpression As New Regex( _
"\<a (?# Find the opening ANCHOR tag )" & _
".+? (?# followed, minimally by everything up to the href attribute ) " & _
"href=['""] (?# up to the opening Href attribute ) " & _
"(?!http\:\/\/) (?# assert that the next sequence is not Http://) " & _
"(?!mailto\:) (?# ...or mailto:) " & _
"(?<foundAnchor>[^'"">]+?) (?# now, match everything up to the next ' or "" into a group named 'foundAnchor') " & _
"[^>]*? (?# followed, minimally by everything up to the closing tag ) " & _
"\> (?# then the end of the opening ANCHOR tag)", _
RegexOptions.Multiline Or _
RegexOptions.IgnoreCase Or _
RegExOptions.IgnorePatternWhitespace _
)
Dim newWebPage As String = linksExpression.Replace( webPage, myDelegate )
resultLabel.Text = "<h2>Report Result for " & urlTextBox.Text & "</h2>" & _
"<b>Found and fixed the following " & countOfMatches.ToString() & " anchors...</b><br><br>" & _
report.ToString().Replace( Environment.NewLine, "<br>" )
resultLabel.Text &= "<h2>Fixed Page</h2>" & Server.HtmlEncode( newWebPage )
End Sub
Private Function MatchHandler( ByVal m As Match ) As String
Dim link As String = m.Groups( "foundAnchor" ).Value
Dim rToL As New Regex( "^", RegexOptions.Multiline Or RegexOptions.RightToLeft )
Dim col, row As Int32
Dim lineBegin As Int32 = rToL.Match( webPage, m.Index ).Index
row = rToL.Matches( webPage, m.Index ).Count
col = m.Index - lineBegin
report.AppendFormat( _
"Link <b>{0}</b>, fixed at row: {1}, col: {2}{3}", _
Server.HtmlEncode(m.Groups(0).Value), _
row, _
col, _
Environment.NewLine _
)
Dim newLink As String
If link.StartsWith("/") Then
newLink = link.Substring(1)
Else
newLink = link
End If
countOfMatches += 1
Return m.Groups(0).Value.Replace( link, "https://www.4guysfromrolla.com/" & newLink )
End Function
Private Function GrabUrl() As String
Dim wc As New WebClient()
'TO DO: Implement url validity check on Url value
Dim s As Stream = wc.OpenRead( urlTextBox.Text )
Dim sr As StreamReader = New StreamReader( s )
GrabUrl = sr.ReadToEnd
s.Close()
wc.Dispose()
End Function
</script>
<form id="Form1" method="post" runat="server">
<P>
<asp:Label id="Label1" runat="server">Enter a Url: </asp:Label>
<asp:TextBox id="urlTextBox" runat="server" Width="336px">https://www.4guysfromrolla.com/</asp:TextBox>
<asp:Button OnClick="scrapeButton_Click" id="scrapeButton" runat="server" Text="Scrape..."></asp:Button></P>
<HR width="100%" SIZE="1">
<P>
<asp:Label id="resultLabel" runat="server"></asp:Label></P>
</form>
|