“F# Weekly” under the hood

Under the F# Weekly news preparation lies a simple F# script.

This script uses Twitterizer2  – one of the simplest Twitter client libraries for .NET. Source code ia available on GitHub, binaries are available through NuGet.

Script logic is relatively simple. First of all, collect a list of queries for Twitter.

    let tweets = ["#fsharp";"#fsharpx";"@dsyme";"#websharper";"#fsharpweekly"]

Then make a call to the Twitter Search API for each query, concatenate the results for last week and sort all tweets by creation date.

                |> List.map (getTweets (DateTime.Now - TimeSpan.FromDays(7.0)))
                |> List.concat
                |> List.sortBy (fun t -> t.CreatedDate)

Then leave only ‘en’ news and filter out tweets without links and RT leaving only first occurrence of each unique link.

                |> List.filter (fun t -> t.Language = "en")
                |> filterUniqLinks

Also in the source code below you can find console printing method for results verification and html printing method for further manual results review.

Feel free to use it in your social researches.

#r "Twitterizer2.dll"

open Twitterizer
open Twitterizer.Entities
open System
open System.Net
open System.Text.RegularExpressions;

let getTweets (sinceDate:DateTime) query =
    let rec collect pageNum =
        let options = SearchOptions(NumberPerPage = 100, SinceDate = sinceDate, PageNumber = pageNum);
        printfn "Loading %d-%d" (pageNum*options.NumberPerPage) ((pageNum+1)*options.NumberPerPage)
        let result = TwitterSearch.Search(query, options);
        if (result.Result <> RequestResult.Success || result.ResponseObject.Count = 0)
            then List.empty
            else result.ResponseObject |> List.ofSeq |> List.append (collect (pageNum+1))
        collect 1 |> List.rev

let urlRegexp = Regex("http://([\\w+?\\.\\w+])+([a-zA-Z0-9\\~\\!\\@\\#\\$\\%\\^\\&amp;\\*\\(\\)_\\-\\=\\+\\\\\\/\\?\\.\\:\\;\\'\\,]*)?", RegexOptions.IgnoreCase);

let filterUniqLinks (tweets: TwitterSearchResult list) =
    let hash = new System.Collections.Generic.HashSet<string>();
    tweets |> List.fold
        (fun acc t ->
            let mathces = urlRegexp.Matches(t.Text)
            if (mathces.Count = 0) then acc
            else let urls =
                   [0 .. (mathces.Count-1)]
                       |> List.map (fun i -> mathces.[i].Value)
                       |> List.filter (fun url -> not(hash.Contains(url)))
                 if (List.isEmpty urls) then acc
                 else urls |> List.iter(fun url -> hash.Add(url) |> ignore)
                      t :: acc)
        [] |> List.rev

let printTweets (tweets: TwitterSearchResult list) =
    tweets |> List.iter (fun t ->
        printfn "%15s : %s : %s" t.FromUserScreenName (t.CreatedDate.ToShortDateString()) t.Text)

let tweets = ["#fsharp";"#fsharpx";"@dsyme";"#websharper";"#fsharpweekly"]
                |> List.map (getTweets (DateTime.Now - TimeSpan.FromDays(7.0)))
                |> List.concat
                |> List.sortBy (fun t -> t.CreatedDate)
                |> List.filter (fun t -> t.Language = "en")
                |> filterUniqLinks
printfn "Tweets count : %d" tweets.Length
printTweets tweets

let printTweetsInHtml filename (tweets: TwitterSearchResult list) =
    let formatTweet (text:string) =
        let matches = urlRegexp.Matches(text)
        seq {0 .. (matches.Count-1)}
            |> Seq.fold (
                fun (t:string) i ->
                    let url = matches.[i].Value
                    t.Replace(url, (sprintf "<a href=\"%s\" target=\"_blank\">%s</a>" url url)))
    let rows =
        |> List.mapi (fun i t ->
            let id = (tweets.Length - i)
            let text = formatTweet(t.Text)
            sprintf "<table id=\"%d\"><tr><td rowspan=\"2\" width=\"30\">%d</td><td rowspan=\"2\" width=\"80\"><a href=\"javascript:remove('%d')\">Remove</a><td rowspan=\"2\"><a href=\"https://twitter.com/%s\" target=\"_blank\"><img src=\"%s\"/></a></td><td><b>%s</b></td></tr><tr><td>Created : %s <br></td></tr></table>"
                     id id id t.FromUserScreenName t.ProfileImageLocation text (t.CreatedDate.ToString()))
        |> List.fold (fun s r -> s+"&nbsp;"+r) ""
    let html = sprintf "<html><head><script>function remove(id){return (elem=document.getElementById(id)).parentNode.removeChild(elem);}</script></head><body>%s</body></html>" rows
    System.IO.File.WriteAllText(filename, html)

printTweetsInHtml "d:\\tweets.html" tweets

4 thoughts on ““F# Weekly” under the hood

  1. Does your blog have a contact page? I’m having a tough time locating it but, I’d like to send you an email.

    I’ve got some suggestions for your blog you might be interested in hearing. Either way, great site and I look forward to seeing it develop over time.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s