Web page downloader that supports redirects

.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;

up vote
5
down vote

favorite

I wrote a class that downloads the HTML of a page from the internet. This class also manages redirects. This works well but I'm not very satisfied about time management for slower servers. In fact, if the website is slow, the method GetHtml will return an empty string, so how can I optimize this class, to prevent such a situation from happening?

CODE

using System;
using System.IO;
using System.Net;
using System.Net.Http;

namespace SWP.Helpers

 /// <summary>
 /// Download html from an Internet page.
 /// </summary>
 public class NetworkHelper
 
 /// <summary>
 /// Store the previous url used in the request.
 /// </summary>
 private Uri _storedUrl = null;

 /// <summary>
 /// Store the previous html downloaded.
 /// </summary>
 private string _storedData = "";

 /// <summary>
 /// Get the html structure of a site.
 /// </summary>
 /// <param name="url">Represents the URL of the page where to download the data.</param>
 /// <returns>Return a string that contains the html of the site.</returns>
 public string GetHtml(Uri url)
 
 //If the current url is the same of the older, then will returned the previous html structure, in this way will not be wasted any time to perform another connection.
 if (url == _storedUrl)
 return _storedData;
 else
 _storedUrl = url;

 //Web request handler.
 HttpWebRequest webReq = (HttpWebRequest)WebRequest.Create(url);

 try
 DecompressionMethods.GZip;

 //Handle the client to manage possible redirect.
 HttpClient httpClient = new HttpClient(handler);

 //Set the web request.
 HttpRequestMessage request = new HttpRequestMessage
 
 RequestUri = url,
 Method = HttpMethod.Get
 ;

 //Get the response.
 HttpResponseMessage response = httpClient.SendAsync(request).Result;

 //Return the code of the web request.
 int statusCode = (int)response.StatusCode;

 //If the code is within this range, then the server has executed a redirect.
 if (statusCode >= 300 && statusCode <= 399)
 
 //Get the new url.
 Uri redirectUri = response.Headers.Location;

 //Sanitize the url.
 if (!redirectUri.IsAbsoluteUri)
 
 redirectUri = new Uri(request.RequestUri.GetLeftPart(UriPartial.Authority) + redirectUri);
 

 //Call again this method with the correct url.
 return GetHtml(redirectUri);
 

 _storedData = response.Content.ReadAsStringAsync().Result;
 return _storedData;
 
 catch (WebException)
 
 throw;

as you can see, the logic is really simple, and the method is well commented. Are there any problems I haven't taken into account?

edited Jun 10 at 16:47

Daniel

4,1132836

asked Jun 10 at 11:33

Charanoglu

333

Any reason why blocking calls are being made and that the function is not async?
â€“Â Nkosi
Jun 10 at 13:56

HttpClientHandler already has the ability to follow redirection responses out of the box with AllowAutoRedirect property
â€“Â Nkosi
Jun 10 at 13:59

add a commentÂ |Â

up vote
5
down vote

favorite

CODE

using System;
using System.IO;
using System.Net;
using System.Net.Http;

namespace SWP.Helpers

 /// <summary>
 /// Download html from an Internet page.
 /// </summary>
 public class NetworkHelper
 
 /// <summary>
 /// Store the previous url used in the request.
 /// </summary>
 private Uri _storedUrl = null;

 /// <summary>
 /// Store the previous html downloaded.
 /// </summary>
 private string _storedData = "";

 /// <summary>
 /// Get the html structure of a site.
 /// </summary>
 /// <param name="url">Represents the URL of the page where to download the data.</param>
 /// <returns>Return a string that contains the html of the site.</returns>
 public string GetHtml(Uri url)
 
 //If the current url is the same of the older, then will returned the previous html structure, in this way will not be wasted any time to perform another connection.
 if (url == _storedUrl)
 return _storedData;
 else
 _storedUrl = url;

 //Web request handler.
 HttpWebRequest webReq = (HttpWebRequest)WebRequest.Create(url);

 try
 DecompressionMethods.GZip;

 //Handle the client to manage possible redirect.
 HttpClient httpClient = new HttpClient(handler);

 //Set the web request.
 HttpRequestMessage request = new HttpRequestMessage
 
 RequestUri = url,
 Method = HttpMethod.Get
 ;

 //Get the response.
 HttpResponseMessage response = httpClient.SendAsync(request).Result;

 //Return the code of the web request.
 int statusCode = (int)response.StatusCode;

 //If the code is within this range, then the server has executed a redirect.
 if (statusCode >= 300 && statusCode <= 399)
 
 //Get the new url.
 Uri redirectUri = response.Headers.Location;

 //Sanitize the url.
 if (!redirectUri.IsAbsoluteUri)
 
 redirectUri = new Uri(request.RequestUri.GetLeftPart(UriPartial.Authority) + redirectUri);
 

 //Call again this method with the correct url.
 return GetHtml(redirectUri);
 

 _storedData = response.Content.ReadAsStringAsync().Result;
 return _storedData;
 
 catch (WebException)
 
 throw;

as you can see, the logic is really simple, and the method is well commented. Are there any problems I haven't taken into account?

edited Jun 10 at 16:47

Daniel

4,1132836

asked Jun 10 at 11:33

Charanoglu

333

Any reason why blocking calls are being made and that the function is not async?
â€“Â Nkosi
Jun 10 at 13:56

HttpClientHandler already has the ability to follow redirection responses out of the box with AllowAutoRedirect property
â€“Â Nkosi
Jun 10 at 13:59

add a commentÂ |Â

up vote
5
down vote

favorite

CODE

using System;
using System.IO;
using System.Net;
using System.Net.Http;

namespace SWP.Helpers

 /// <summary>
 /// Download html from an Internet page.
 /// </summary>
 public class NetworkHelper
 
 /// <summary>
 /// Store the previous url used in the request.
 /// </summary>
 private Uri _storedUrl = null;

 /// <summary>
 /// Store the previous html downloaded.
 /// </summary>
 private string _storedData = "";

 /// <summary>
 /// Get the html structure of a site.
 /// </summary>
 /// <param name="url">Represents the URL of the page where to download the data.</param>
 /// <returns>Return a string that contains the html of the site.</returns>
 public string GetHtml(Uri url)
 
 //If the current url is the same of the older, then will returned the previous html structure, in this way will not be wasted any time to perform another connection.
 if (url == _storedUrl)
 return _storedData;
 else
 _storedUrl = url;

 //Web request handler.
 HttpWebRequest webReq = (HttpWebRequest)WebRequest.Create(url);

 try
 DecompressionMethods.GZip;

 //Handle the client to manage possible redirect.
 HttpClient httpClient = new HttpClient(handler);

 //Set the web request.
 HttpRequestMessage request = new HttpRequestMessage
 
 RequestUri = url,
 Method = HttpMethod.Get
 ;

 //Get the response.
 HttpResponseMessage response = httpClient.SendAsync(request).Result;

 //Return the code of the web request.
 int statusCode = (int)response.StatusCode;

 //If the code is within this range, then the server has executed a redirect.
 if (statusCode >= 300 && statusCode <= 399)
 
 //Get the new url.
 Uri redirectUri = response.Headers.Location;

 //Sanitize the url.
 if (!redirectUri.IsAbsoluteUri)
 
 redirectUri = new Uri(request.RequestUri.GetLeftPart(UriPartial.Authority) + redirectUri);
 

 //Call again this method with the correct url.
 return GetHtml(redirectUri);
 

 _storedData = response.Content.ReadAsStringAsync().Result;
 return _storedData;
 
 catch (WebException)
 
 throw;

as you can see, the logic is really simple, and the method is well commented. Are there any problems I haven't taken into account?

edited Jun 10 at 16:47

Daniel

4,1132836

asked Jun 10 at 11:33

Charanoglu

333

CODE

using System;
using System.IO;
using System.Net;
using System.Net.Http;

namespace SWP.Helpers

 /// <summary>
 /// Download html from an Internet page.
 /// </summary>
 public class NetworkHelper
 
 /// <summary>
 /// Store the previous url used in the request.
 /// </summary>
 private Uri _storedUrl = null;

 /// <summary>
 /// Store the previous html downloaded.
 /// </summary>
 private string _storedData = "";

 /// <summary>
 /// Get the html structure of a site.
 /// </summary>
 /// <param name="url">Represents the URL of the page where to download the data.</param>
 /// <returns>Return a string that contains the html of the site.</returns>
 public string GetHtml(Uri url)
 
 //If the current url is the same of the older, then will returned the previous html structure, in this way will not be wasted any time to perform another connection.
 if (url == _storedUrl)
 return _storedData;
 else
 _storedUrl = url;

 //Web request handler.
 HttpWebRequest webReq = (HttpWebRequest)WebRequest.Create(url);

 try
 DecompressionMethods.GZip;

 //Handle the client to manage possible redirect.
 HttpClient httpClient = new HttpClient(handler);

 //Set the web request.
 HttpRequestMessage request = new HttpRequestMessage
 
 RequestUri = url,
 Method = HttpMethod.Get
 ;

 //Get the response.
 HttpResponseMessage response = httpClient.SendAsync(request).Result;

 //Return the code of the web request.
 int statusCode = (int)response.StatusCode;

 //If the code is within this range, then the server has executed a redirect.
 if (statusCode >= 300 && statusCode <= 399)
 
 //Get the new url.
 Uri redirectUri = response.Headers.Location;

 //Sanitize the url.
 if (!redirectUri.IsAbsoluteUri)
 
 redirectUri = new Uri(request.RequestUri.GetLeftPart(UriPartial.Authority) + redirectUri);
 

 //Call again this method with the correct url.
 return GetHtml(redirectUri);
 

 _storedData = response.Content.ReadAsStringAsync().Result;
 return _storedData;
 
 catch (WebException)
 
 throw;

as you can see, the logic is really simple, and the method is well commented. Are there any problems I haven't taken into account?

edited Jun 10 at 16:47

Daniel

4,1132836

asked Jun 10 at 11:33

Charanoglu

333

edited Jun 10 at 16:47

Daniel

4,1132836

edited Jun 10 at 16:47

Daniel

4,1132836

edited Jun 10 at 16:47

Daniel

4,1132836

asked Jun 10 at 11:33

Charanoglu

333

asked Jun 10 at 11:33

Charanoglu

333

asked Jun 10 at 11:33

Charanoglu

333

Any reason why blocking calls are being made and that the function is not async?
â€“Â Nkosi
Jun 10 at 13:56

HttpClientHandler already has the ability to follow redirection responses out of the box with AllowAutoRedirect property
â€“Â Nkosi
Jun 10 at 13:59

add a commentÂ |Â

Any reason why blocking calls are being made and that the function is not async?
â€“Â Nkosi
Jun 10 at 13:56

HttpClientHandler already has the ability to follow redirection responses out of the box with AllowAutoRedirect property
â€“Â Nkosi
Jun 10 at 13:59

Any reason why blocking calls are being made and that the function is not async?
â€“Â Nkosi
Jun 10 at 13:56

HttpClientHandler already has the ability to follow redirection responses out of the box with AllowAutoRedirect property
â€“Â Nkosi
Jun 10 at 13:59

add a commentÂ |Â

1 Answer
1

active

oldest

votes

up vote
6
down vote

accepted

HttpClient and associated classes provide most of the desired behavior.

HttpClientHandler already has the ability to follow redirection responses out of the box with AllowAutoRedirect property. You can also specify how many redirections to allow.

That can be extracted out into a function

static HttpMessageHandler CreateHandler() 
 var handler = new HttpClientHandler();
 // if the framework supports redirect configuration
 // set max redirect to the desired amount. The default is 50
 if (handler.SupportsRedirectConfiguration) 
 handler.AllowAutoRedirect = true;
 handler.MaxAutomaticRedirections = 5;
 
 // if the framework supports automatic decompression 
 // set automatic decompression
 if (handler.SupportsAutomaticDecompression) 
 handler.AutomaticDecompression = System.Net.DecompressionMethods.GZip 
 return handler;

Creating multiple instances of HttpClient can cause an exhaustion of available sockets, so it is suggested to try keeping one instance for the duration of the application.

Specify a short time out duration for slower responses to help with time management.

static Lazy<HttpClient> httpClient = new Lazy<HttpClient>(() => 
 var handler = CreateHandler();
 return new HttpClient(handler) 
 Timeout = TimeSpan.FromSeconds(3)
 ;
);

Timeout can be adjusted to specific needs.

Cancellation tokens can also be used to help with time management.

Seeing as any exception is just being rethrown, then there is really no need for the try/catch. The exception would be handled by the caller.

HttpClient uses an asynchronous API. Its dependents should also be asynchronous all the way through and avoid blocking calls like .Result or .Wait to avoid deadlocks.

The refactored helper now looks like this

/// <summary>
/// Download html from an Internet page.
/// </summary>
public class NetworkHelper 
 static Lazy<HttpClient> httpClient = new Lazy<HttpClient>(() => 
 var handler = CreateHandler();
 return new HttpClient(handler) 
 Timeout = TimeSpan.FromSeconds(3)
 ;
 );

 static HttpMessageHandler CreateHandler() 
 var handler = new HttpClientHandler();
 // if the framework supports redirect configuration
 // set max redirect to the desired amount the default is 50
 if (handler.SupportsRedirectConfiguration) 
 handler.AllowAutoRedirect = true;
 handler.MaxAutomaticRedirections = 5;
 
 // if the framework supports automatic decompression 
 // set automatic decompression
 if (handler.SupportsAutomaticDecompression) 
 handler.AutomaticDecompression = System.Net.DecompressionMethods.GZip 
 return handler;
 

 /// <summary>
 /// Get the html structure of a site.
 /// </summary>
 /// <param name="url">Represents the URL of the page where to download the data.</param>
 /// <returns>Return a string that contains the html of the site.</returns>
 public async Task<string> GetHtmlAsync(Uri url, CancellationToken cancellationToken = default(CancellationToken)) 
 var response = await httpClient.Value.GetAsync(url, cancellationToken);
 var content = await response.Content.ReadAsStringAsync();
 return content;

And used

//...

var client = new NetworkHelper();
var html = await clien.GetHtmlAsync(uri);

edited Jun 11 at 14:27

answered Jun 10 at 14:52

Nkosi

1,868619

1

I didn't know this CancellationToken cancellationToken = default(CancellationToken) was possible with the CancellationToken as a parameter :-o
â€“Â t3chb0t
Jun 10 at 15:05

@Nkosi thanks for the answer, I'll test this in my library. Do you think that 3 seconds are enough for slow server?
â€“Â Charanoglu
Jun 10 at 16:55

@Charanoglu it was just meant as an example. You can test it with different durations till you get the one you are most comfortable with.
â€“Â Nkosi
Jun 10 at 16:59

@Nkosi thanks for clarify this, just a question for download the data should I do something like this: new NetworkHelper().GetHtmlAsync(coachLink).Result?
â€“Â Charanoglu
Jun 10 at 18:22

@Charanoglu that would again be mixing async and blocking calls that can cause deadlock. instead var html = await new NetworkHelper().GetHtmlAsync(coachLink);. Other wise you need to use an API that is not asynchronous.
â€“Â Nkosi
Jun 10 at 18:24

add a commentÂ |Â

Your Answer

StackExchange.ifUsing("editor", function ()
return StackExchange.using("mathjaxEditing", function ()
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix)
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
);
);
, "mathjax-editing");

StackExchange.ifUsing("editor", function ()
StackExchange.using("externalEditor", function ()
StackExchange.using("snippets", function ()
StackExchange.snippets.init();
);
);
, "code-snippets");

StackExchange.ready(function()
var channelOptions =
tags: "".split(" "),
id: "196"
;
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function()
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled)
StackExchange.using("snippets", function()
createEditor();
);

else
createEditor();

);

function createEditor()
StackExchange.prepareEditor(
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: false,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
);

);

draft saved

draft discarded

StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f196226%2fweb-page-downloader-that-supports-redirects%23new-answer', 'question_page');

);

Post as a guest

Name

1 Answer
1

active

oldest

votes

1 Answer
1

active

oldest

votes

up vote
6
down vote

accepted

HttpClient and associated classes provide most of the desired behavior.

HttpClientHandler already has the ability to follow redirection responses out of the box with AllowAutoRedirect property. You can also specify how many redirections to allow.

That can be extracted out into a function

static HttpMessageHandler CreateHandler() 
 var handler = new HttpClientHandler();
 // if the framework supports redirect configuration
 // set max redirect to the desired amount. The default is 50
 if (handler.SupportsRedirectConfiguration) 
 handler.AllowAutoRedirect = true;
 handler.MaxAutomaticRedirections = 5;
 
 // if the framework supports automatic decompression 
 // set automatic decompression
 if (handler.SupportsAutomaticDecompression) 
 handler.AutomaticDecompression = System.Net.DecompressionMethods.GZip 
 return handler;

Creating multiple instances of HttpClient can cause an exhaustion of available sockets, so it is suggested to try keeping one instance for the duration of the application.

Specify a short time out duration for slower responses to help with time management.

static Lazy<HttpClient> httpClient = new Lazy<HttpClient>(() => 
 var handler = CreateHandler();
 return new HttpClient(handler) 
 Timeout = TimeSpan.FromSeconds(3)
 ;
);

Timeout can be adjusted to specific needs.

Cancellation tokens can also be used to help with time management.

Seeing as any exception is just being rethrown, then there is really no need for the try/catch. The exception would be handled by the caller.

HttpClient uses an asynchronous API. Its dependents should also be asynchronous all the way through and avoid blocking calls like .Result or .Wait to avoid deadlocks.

The refactored helper now looks like this

/// <summary>
/// Download html from an Internet page.
/// </summary>
public class NetworkHelper 
 static Lazy<HttpClient> httpClient = new Lazy<HttpClient>(() => 
 var handler = CreateHandler();
 return new HttpClient(handler) 
 Timeout = TimeSpan.FromSeconds(3)
 ;
 );

 static HttpMessageHandler CreateHandler() 
 var handler = new HttpClientHandler();
 // if the framework supports redirect configuration
 // set max redirect to the desired amount the default is 50
 if (handler.SupportsRedirectConfiguration) 
 handler.AllowAutoRedirect = true;
 handler.MaxAutomaticRedirections = 5;
 
 // if the framework supports automatic decompression 
 // set automatic decompression
 if (handler.SupportsAutomaticDecompression) 
 handler.AutomaticDecompression = System.Net.DecompressionMethods.GZip 
 return handler;
 

 /// <summary>
 /// Get the html structure of a site.
 /// </summary>
 /// <param name="url">Represents the URL of the page where to download the data.</param>
 /// <returns>Return a string that contains the html of the site.</returns>
 public async Task<string> GetHtmlAsync(Uri url, CancellationToken cancellationToken = default(CancellationToken)) 
 var response = await httpClient.Value.GetAsync(url, cancellationToken);
 var content = await response.Content.ReadAsStringAsync();
 return content;

And used

//...

var client = new NetworkHelper();
var html = await clien.GetHtmlAsync(uri);

edited Jun 11 at 14:27

answered Jun 10 at 14:52

Nkosi

1,868619

1

I didn't know this CancellationToken cancellationToken = default(CancellationToken) was possible with the CancellationToken as a parameter :-o
â€“Â t3chb0t
Jun 10 at 15:05

@Nkosi thanks for the answer, I'll test this in my library. Do you think that 3 seconds are enough for slow server?
â€“Â Charanoglu
Jun 10 at 16:55

@Charanoglu it was just meant as an example. You can test it with different durations till you get the one you are most comfortable with.
â€“Â Nkosi
Jun 10 at 16:59

@Nkosi thanks for clarify this, just a question for download the data should I do something like this: new NetworkHelper().GetHtmlAsync(coachLink).Result?
â€“Â Charanoglu
Jun 10 at 18:22

@Charanoglu that would again be mixing async and blocking calls that can cause deadlock. instead var html = await new NetworkHelper().GetHtmlAsync(coachLink);. Other wise you need to use an API that is not asynchronous.
â€“Â Nkosi
Jun 10 at 18:24

add a commentÂ |Â

up vote
6
down vote

accepted

HttpClient and associated classes provide most of the desired behavior.

HttpClientHandler already has the ability to follow redirection responses out of the box with AllowAutoRedirect property. You can also specify how many redirections to allow.

That can be extracted out into a function

static HttpMessageHandler CreateHandler() 
 var handler = new HttpClientHandler();
 // if the framework supports redirect configuration
 // set max redirect to the desired amount. The default is 50
 if (handler.SupportsRedirectConfiguration) 
 handler.AllowAutoRedirect = true;
 handler.MaxAutomaticRedirections = 5;
 
 // if the framework supports automatic decompression 
 // set automatic decompression
 if (handler.SupportsAutomaticDecompression) 
 handler.AutomaticDecompression = System.Net.DecompressionMethods.GZip 
 return handler;

Creating multiple instances of HttpClient can cause an exhaustion of available sockets, so it is suggested to try keeping one instance for the duration of the application.

Specify a short time out duration for slower responses to help with time management.

static Lazy<HttpClient> httpClient = new Lazy<HttpClient>(() => 
 var handler = CreateHandler();
 return new HttpClient(handler) 
 Timeout = TimeSpan.FromSeconds(3)
 ;
);

Timeout can be adjusted to specific needs.

Cancellation tokens can also be used to help with time management.

Seeing as any exception is just being rethrown, then there is really no need for the try/catch. The exception would be handled by the caller.

HttpClient uses an asynchronous API. Its dependents should also be asynchronous all the way through and avoid blocking calls like .Result or .Wait to avoid deadlocks.

The refactored helper now looks like this

/// <summary>
/// Download html from an Internet page.
/// </summary>
public class NetworkHelper 
 static Lazy<HttpClient> httpClient = new Lazy<HttpClient>(() => 
 var handler = CreateHandler();
 return new HttpClient(handler) 
 Timeout = TimeSpan.FromSeconds(3)
 ;
 );

 static HttpMessageHandler CreateHandler() 
 var handler = new HttpClientHandler();
 // if the framework supports redirect configuration
 // set max redirect to the desired amount the default is 50
 if (handler.SupportsRedirectConfiguration) 
 handler.AllowAutoRedirect = true;
 handler.MaxAutomaticRedirections = 5;
 
 // if the framework supports automatic decompression 
 // set automatic decompression
 if (handler.SupportsAutomaticDecompression) 
 handler.AutomaticDecompression = System.Net.DecompressionMethods.GZip 
 return handler;
 

 /// <summary>
 /// Get the html structure of a site.
 /// </summary>
 /// <param name="url">Represents the URL of the page where to download the data.</param>
 /// <returns>Return a string that contains the html of the site.</returns>
 public async Task<string> GetHtmlAsync(Uri url, CancellationToken cancellationToken = default(CancellationToken)) 
 var response = await httpClient.Value.GetAsync(url, cancellationToken);
 var content = await response.Content.ReadAsStringAsync();
 return content;

And used

//...

var client = new NetworkHelper();
var html = await clien.GetHtmlAsync(uri);

edited Jun 11 at 14:27

answered Jun 10 at 14:52

Nkosi

1,868619

1

I didn't know this CancellationToken cancellationToken = default(CancellationToken) was possible with the CancellationToken as a parameter :-o
â€“Â t3chb0t
Jun 10 at 15:05

@Nkosi thanks for the answer, I'll test this in my library. Do you think that 3 seconds are enough for slow server?
â€“Â Charanoglu
Jun 10 at 16:55

@Charanoglu it was just meant as an example. You can test it with different durations till you get the one you are most comfortable with.
â€“Â Nkosi
Jun 10 at 16:59

@Nkosi thanks for clarify this, just a question for download the data should I do something like this: new NetworkHelper().GetHtmlAsync(coachLink).Result?
â€“Â Charanoglu
Jun 10 at 18:22

@Charanoglu that would again be mixing async and blocking calls that can cause deadlock. instead var html = await new NetworkHelper().GetHtmlAsync(coachLink);. Other wise you need to use an API that is not asynchronous.
â€“Â Nkosi
Jun 10 at 18:24

add a commentÂ |Â

up vote
6
down vote

accepted

HttpClient and associated classes provide most of the desired behavior.

HttpClientHandler already has the ability to follow redirection responses out of the box with AllowAutoRedirect property. You can also specify how many redirections to allow.

That can be extracted out into a function

static HttpMessageHandler CreateHandler() 
 var handler = new HttpClientHandler();
 // if the framework supports redirect configuration
 // set max redirect to the desired amount. The default is 50
 if (handler.SupportsRedirectConfiguration) 
 handler.AllowAutoRedirect = true;
 handler.MaxAutomaticRedirections = 5;
 
 // if the framework supports automatic decompression 
 // set automatic decompression
 if (handler.SupportsAutomaticDecompression) 
 handler.AutomaticDecompression = System.Net.DecompressionMethods.GZip 
 return handler;

Creating multiple instances of HttpClient can cause an exhaustion of available sockets, so it is suggested to try keeping one instance for the duration of the application.

Specify a short time out duration for slower responses to help with time management.

static Lazy<HttpClient> httpClient = new Lazy<HttpClient>(() => 
 var handler = CreateHandler();
 return new HttpClient(handler) 
 Timeout = TimeSpan.FromSeconds(3)
 ;
);

Timeout can be adjusted to specific needs.

Cancellation tokens can also be used to help with time management.

Seeing as any exception is just being rethrown, then there is really no need for the try/catch. The exception would be handled by the caller.

HttpClient uses an asynchronous API. Its dependents should also be asynchronous all the way through and avoid blocking calls like .Result or .Wait to avoid deadlocks.

The refactored helper now looks like this

/// <summary>
/// Download html from an Internet page.
/// </summary>
public class NetworkHelper 
 static Lazy<HttpClient> httpClient = new Lazy<HttpClient>(() => 
 var handler = CreateHandler();
 return new HttpClient(handler) 
 Timeout = TimeSpan.FromSeconds(3)
 ;
 );

 static HttpMessageHandler CreateHandler() 
 var handler = new HttpClientHandler();
 // if the framework supports redirect configuration
 // set max redirect to the desired amount the default is 50
 if (handler.SupportsRedirectConfiguration) 
 handler.AllowAutoRedirect = true;
 handler.MaxAutomaticRedirections = 5;
 
 // if the framework supports automatic decompression 
 // set automatic decompression
 if (handler.SupportsAutomaticDecompression) 
 handler.AutomaticDecompression = System.Net.DecompressionMethods.GZip 
 return handler;
 

 /// <summary>
 /// Get the html structure of a site.
 /// </summary>
 /// <param name="url">Represents the URL of the page where to download the data.</param>
 /// <returns>Return a string that contains the html of the site.</returns>
 public async Task<string> GetHtmlAsync(Uri url, CancellationToken cancellationToken = default(CancellationToken)) 
 var response = await httpClient.Value.GetAsync(url, cancellationToken);
 var content = await response.Content.ReadAsStringAsync();
 return content;

And used

//...

var client = new NetworkHelper();
var html = await clien.GetHtmlAsync(uri);

edited Jun 11 at 14:27

answered Jun 10 at 14:52

Nkosi

1,868619

HttpClient and associated classes provide most of the desired behavior.

HttpClientHandler already has the ability to follow redirection responses out of the box with AllowAutoRedirect property. You can also specify how many redirections to allow.

That can be extracted out into a function

static HttpMessageHandler CreateHandler() 
 var handler = new HttpClientHandler();
 // if the framework supports redirect configuration
 // set max redirect to the desired amount. The default is 50
 if (handler.SupportsRedirectConfiguration) 
 handler.AllowAutoRedirect = true;
 handler.MaxAutomaticRedirections = 5;
 
 // if the framework supports automatic decompression 
 // set automatic decompression
 if (handler.SupportsAutomaticDecompression) 
 handler.AutomaticDecompression = System.Net.DecompressionMethods.GZip 
 return handler;

Creating multiple instances of HttpClient can cause an exhaustion of available sockets, so it is suggested to try keeping one instance for the duration of the application.

Specify a short time out duration for slower responses to help with time management.

static Lazy<HttpClient> httpClient = new Lazy<HttpClient>(() => 
 var handler = CreateHandler();
 return new HttpClient(handler) 
 Timeout = TimeSpan.FromSeconds(3)
 ;
);

Timeout can be adjusted to specific needs.

Cancellation tokens can also be used to help with time management.

Seeing as any exception is just being rethrown, then there is really no need for the try/catch. The exception would be handled by the caller.

HttpClient uses an asynchronous API. Its dependents should also be asynchronous all the way through and avoid blocking calls like .Result or .Wait to avoid deadlocks.

The refactored helper now looks like this

/// <summary>
/// Download html from an Internet page.
/// </summary>
public class NetworkHelper 
 static Lazy<HttpClient> httpClient = new Lazy<HttpClient>(() => 
 var handler = CreateHandler();
 return new HttpClient(handler) 
 Timeout = TimeSpan.FromSeconds(3)
 ;
 );

 static HttpMessageHandler CreateHandler() 
 var handler = new HttpClientHandler();
 // if the framework supports redirect configuration
 // set max redirect to the desired amount the default is 50
 if (handler.SupportsRedirectConfiguration) 
 handler.AllowAutoRedirect = true;
 handler.MaxAutomaticRedirections = 5;
 
 // if the framework supports automatic decompression 
 // set automatic decompression
 if (handler.SupportsAutomaticDecompression) 
 handler.AutomaticDecompression = System.Net.DecompressionMethods.GZip 
 return handler;
 

 /// <summary>
 /// Get the html structure of a site.
 /// </summary>
 /// <param name="url">Represents the URL of the page where to download the data.</param>
 /// <returns>Return a string that contains the html of the site.</returns>
 public async Task<string> GetHtmlAsync(Uri url, CancellationToken cancellationToken = default(CancellationToken)) 
 var response = await httpClient.Value.GetAsync(url, cancellationToken);
 var content = await response.Content.ReadAsStringAsync();
 return content;

And used

//...

var client = new NetworkHelper();
var html = await clien.GetHtmlAsync(uri);

edited Jun 11 at 14:27

answered Jun 10 at 14:52

Nkosi

1,868619

edited Jun 11 at 14:27

answered Jun 10 at 14:52

Nkosi

1,868619

answered Jun 10 at 14:52

Nkosi

1,868619

answered Jun 10 at 14:52

Nkosi

1,868619

1

I didn't know this CancellationToken cancellationToken = default(CancellationToken) was possible with the CancellationToken as a parameter :-o
â€“Â t3chb0t
Jun 10 at 15:05

@Nkosi thanks for the answer, I'll test this in my library. Do you think that 3 seconds are enough for slow server?
â€“Â Charanoglu
Jun 10 at 16:55

@Charanoglu it was just meant as an example. You can test it with different durations till you get the one you are most comfortable with.
â€“Â Nkosi
Jun 10 at 16:59

@Nkosi thanks for clarify this, just a question for download the data should I do something like this: new NetworkHelper().GetHtmlAsync(coachLink).Result?
â€“Â Charanoglu
Jun 10 at 18:22

@Charanoglu that would again be mixing async and blocking calls that can cause deadlock. instead var html = await new NetworkHelper().GetHtmlAsync(coachLink);. Other wise you need to use an API that is not asynchronous.
â€“Â Nkosi
Jun 10 at 18:24

add a commentÂ |Â

1

I didn't know this CancellationToken cancellationToken = default(CancellationToken) was possible with the CancellationToken as a parameter :-o
â€“Â t3chb0t
Jun 10 at 15:05

@Nkosi thanks for the answer, I'll test this in my library. Do you think that 3 seconds are enough for slow server?
â€“Â Charanoglu
Jun 10 at 16:55

@Charanoglu it was just meant as an example. You can test it with different durations till you get the one you are most comfortable with.
â€“Â Nkosi
Jun 10 at 16:59

@Nkosi thanks for clarify this, just a question for download the data should I do something like this: new NetworkHelper().GetHtmlAsync(coachLink).Result?
â€“Â Charanoglu
Jun 10 at 18:22

@Charanoglu that would again be mixing async and blocking calls that can cause deadlock. instead var html = await new NetworkHelper().GetHtmlAsync(coachLink);. Other wise you need to use an API that is not asynchronous.
â€“Â Nkosi
Jun 10 at 18:24

I didn't know this CancellationToken cancellationToken = default(CancellationToken) was possible with the CancellationToken as a parameter :-o
â€“Â t3chb0t
Jun 10 at 15:05

@Nkosi thanks for the answer, I'll test this in my library. Do you think that 3 seconds are enough for slow server?
â€“Â Charanoglu
Jun 10 at 16:55

@Charanoglu it was just meant as an example. You can test it with different durations till you get the one you are most comfortable with.
â€“Â Nkosi
Jun 10 at 16:59

@Nkosi thanks for clarify this, just a question for download the data should I do something like this: new NetworkHelper().GetHtmlAsync(coachLink).Result?
â€“Â Charanoglu
Jun 10 at 18:22

@Charanoglu that would again be mixing async and blocking calls that can cause deadlock. instead var html = await new NetworkHelper().GetHtmlAsync(coachLink);. Other wise you need to use an API that is not asynchronous.
â€“Â Nkosi
Jun 10 at 18:24

add a commentÂ |Â

draft saved

draft discarded

draft saved

draft discarded

Post as a guest

Name

搜尋此網誌

trjhtr