suite 'Unfluff', ->
  _ = require('lodash')
  extractor = require("../src/unfluff")

  cleanTestingText = (text, origTextLength) ->
    text.replace(/\n\n/g, " ").replace(/\ \ /g, " ")[0..origTextLength-1]

  cleanOrigText = (text) ->
    text.replace(/\n\n/g, " ")

  checkFixture = (site, fields) ->
    html = fs.readFileSync("./fixtures/test_#{site}.html").toString()
    orig = JSON.parse(fs.readFileSync("./fixtures/test_#{site}.json"))
    data = extractor(html)
    dataLazy = extractor.lazy(html)

    _.each fields, (field) ->
      if field == 'title'
        eq orig.expected.title, data.title, "#{site}: title didn't match expected value"
        eq data.title, dataLazy.title()

      else if field == 'cleaned_text'
        origText = cleanOrigText(orig.expected.cleaned_text)
        newText = cleanTestingText(data.text, origText.length)
        partialExtractText = cleanTestingText(dataLazy.text(), origText.length)
        ok newText, "#{site}: no text was found"
        ok data.text.length >= orig.expected.cleaned_text.length , "#{site}: cleaned text was too short"
        eq origText, newText, "#{site}: cleaned text didn't match expected value"
        eq origText, partialExtractText, "#{site}: cleaned text from partial extract didn't match expected value"

      else if field == 'link'
        eq orig.expected.final_url, data.canonicalLink, "#{site}: canonical link didn't match expected value"
        eq data.canonicalLink, dataLazy.canonicalLink(), "#{site}: canonical link from partial extraction didn't match expected value"

      else if field == 'image'
        eq orig.expected.image, data.image, "#{site}: image didn't match expected value"
        eq data.image, dataLazy.image(), "#{site}: image from partial extraction didn't match expected value"

      else if field == 'description'
        eq orig.expected.meta_description, data.description, "#{site}: meta description didn't match expected value"
        eq data.description, dataLazy.description(), "#{site}: description from partial extraction didn't match expected value"

      else if field == 'lang'
        eq orig.expected.meta_lang, data.lang, "#{site}: detected langauge didn't match expected value"
        eq data.lang, dataLazy.lang(), "#{site}: langauge from partial extraction didn't match expected value"

      else if field == 'keywords'
        eq orig.expected.meta_keywords, data.keywords, "#{site}: meta keywords didn't match expected value"
        eq data.keywords, dataLazy.keywords(), "#{site}: meta keywords from partial extraction didn't match expected value"

      else if field == 'favicon'
        eq orig.expected.meta_favicon, data.favicon, "#{site}: favicon url didn't match expected value"
        eq data.favicon, dataLazy.favicon(), "#{site}: favicon url from partial extraction didn't match expected value"

      else if field == 'tags'
        sortedTags = data.tags.sort()
        arrayEq orig.expected.tags.sort(), sortedTags, "#{site}: meta tags didn't match expected value"
        arrayEq sortedTags, dataLazy.tags().sort(), "#{site}: meta tags from partial extraction didn't match expected value"

      else if field == 'links'
        sortedLinks = data.links.sort()
        sortedLazyLinks = dataLazy.links().sort()
        if !orig.expected.links
          orig.expected.links = sortedLinks
          fs.writeFileSync("./fixtures/test_#{site}.json", JSON.stringify(orig, null, 4))
        deepEq orig.expected.links.sort(), sortedLinks, "#{site}: links didn't match expected value"
        deepEq orig.expected.links.sort(), sortedLazyLinks, "#{site}: links from partial extraction didn't match expected value"

      else if field == 'videos'
        sortedVideos = data.videos.sort()
        deepEq orig.expected.movies.sort(), sortedVideos, "#{site}: videos didn't match expected value"
        deepEq sortedVideos, dataLazy.videos().sort(), "#{site}: videos from partial extraction didn't match expected value"

      else
        # Oops!
        eq true, false, "#{site}: Invalid test!"

  test 'exists', ->
    ok extractor

  test 'lazy version exists', ->
    ok extractor.lazy

  test 'reads favicon', ->
    checkFixture('aolNews' , ['favicon'])

  test 'reads description', ->
    checkFixture('allnewlyrics1' , ['description'])

  test 'reads open graph description', ->
    checkFixture('twitter' , ['description'])

  test 'reads keywords', ->
    checkFixture('allnewlyrics1' , ['keywords'])

  test 'reads lang', ->
    checkFixture('allnewlyrics1' , ['lang'])

  test 'reads canonical link', ->
    checkFixture('allnewlyrics1' , ['link'])

  test 'reads tags', ->
    checkFixture('tags_kexp' , ['tags'])
    checkFixture('tags_deadline' , ['tags'])
    checkFixture('tags_wnyc' , ['tags'])
    checkFixture('tags_cnet' , ['tags'])
    checkFixture('tags_abcau' , ['tags'])

  test 'reads videos', ->
    checkFixture('embed' , ['videos'])
    checkFixture('iframe' , ['videos'])
    checkFixture('object' , ['videos'])

  test 'links', ->
    checkFixture('theverge1' , ['links'])
    checkFixture('techcrunch1' , ['links'])
    checkFixture('polygon' , ['links'])

  test 'images', ->
    checkFixture('aolNews' , ['image'])
    checkFixture('polygon' , ['image'])
    checkFixture('theverge1' , ['image'])

  test 'gets cleaned text - Polygon', ->
    checkFixture('polygon' , ['cleaned_text', 'title', 'link', 'description', 'lang', 'favicon'])

  test 'gets cleaned text - The Verge', ->
    checkFixture('theverge1' , ['cleaned_text', 'title', 'link', 'description', 'lang', 'favicon'])

  test 'gets cleaned tags - The Verge', ->
    checkFixture('theverge2' , ['tags'])

  test 'gets cleaned text - McSweeneys', ->
    checkFixture('mcsweeney', ['cleaned_text', 'link', 'lang', 'favicon'])

  test 'gets cleaned text - CNN', ->
    checkFixture('cnn1' , ['cleaned_text'])

  test 'gets cleaned text - MSN', ->
    checkFixture('msn1' , ['cleaned_text'])

  test 'gets cleaned text - Time', ->
    checkFixture('time2' , ['cleaned_text'])

  test 'gets cleaned text - BI', ->
    checkFixture('businessinsider1' , ['cleaned_text'])
    checkFixture('businessinsider2' , ['cleaned_text'])
    checkFixture('businessinsider3' , ['cleaned_text'])

  test 'gets cleaned text - CNBC', ->
    checkFixture('cnbc1' , ['cleaned_text'])

  test 'gets cleaned text - CBS Local', ->
    checkFixture('cbslocal' , ['cleaned_text'])

  test 'gets cleaned text - Business Week', ->
    checkFixture('businessWeek1' , ['cleaned_text'])
    checkFixture('businessWeek2' , ['cleaned_text'])
    checkFixture('businessWeek3' , ['cleaned_text'])

  test 'gets cleaned text - El Pais', ->
    checkFixture('elpais' , ['cleaned_text'])

  test 'gets cleaned text - Techcrunk', ->
    checkFixture('techcrunch1' , ['cleaned_text'])

  test 'gets cleaned text - Fox "News"', ->
    checkFixture('foxNews' , ['cleaned_text'])

  test 'gets cleaned text - Huff Po', ->
    checkFixture('huffingtonPost2' , ['cleaned_text'])
    checkFixture('testHuffingtonPost' , ['cleaned_text', 'description', 'title'])

  test 'gets cleaned text - ESPN', ->
    checkFixture('espn' , ['cleaned_text'])

  test 'gets cleaned text - Time', ->
    checkFixture('time' , ['cleaned_text'])

  test 'gets cleaned text - CNet', ->
    checkFixture('cnet' , ['cleaned_text'])

  test 'gets cleaned text - Yahoo', ->
    checkFixture('yahoo' , ['cleaned_text'])

  test 'gets cleaned text - Politico', ->
    checkFixture('politico' , ['cleaned_text'])

  test 'gets cleaned text - Goose Regressions', ->
    checkFixture('issue4' , ['cleaned_text'])
    checkFixture('issue24' , ['cleaned_text'])
    checkFixture('issue25' , ['cleaned_text'])
    checkFixture('issue28' , ['cleaned_text'])

  test 'gets cleaned text - Gizmodo', ->
    checkFixture('gizmodo1' , ['cleaned_text', 'description', 'keywords'])

  test 'gets cleaned text - Mashable', ->
    checkFixture('mashable_issue_74' , ['cleaned_text'])

  test 'gets cleaned text - USA Today', ->
    checkFixture('usatoday_issue_74' , ['cleaned_text'])
    checkFixture('usatoday1' , ['cleaned_text'])

  test 'gets cleaned text - dcurt.is', ->
    checkFixture('dcurtis' , ['cleaned_text'])
