# -*- coding: utf-8 -*-
import scrapy
from items import CommentItem
import textblob
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer

class TownhallSpider(scrapy.Spider):
    name = 'townhall'
    allowed_domains = ['townhall.virginia.gov']
    start_urls = ['https://www.townhall.virginia.gov/L/comments.cfm?GDocForumID=452']
    custom_settings = {
        'FEED_EXPORTERS' : {
            "jsonlines": "scrapy.exporters.JsonLinesItemExporter",
        },
        'FEED_URI' : '%(name)s_%(time)s.json',
        'FEED_FORMAT': 'jsonlines'
    }

    def parse(self, response):
        rows = response.css('#contentwide>table>tr')
        # cut out the header row
        for each in rows[1:]:
        # for each in rows[1:6]:
            cols = each.xpath('.//td')
            linkfollow = cols[0].css('a::attr(href)').get()
            comment_title = cols[0].xpath('a/text()').get()
            # clean up
            commenter = cols[1].xpath('text()').get()
            # clean up
            date = cols[2].xpath('a/text()').get()
            print(f'{comment_title}  |  {commenter}')
            yield response.follow(linkfollow, callback = self.parse_comment)

    def parse_comment(self, response):
        entry = CommentItem()
        text = response.css('.divComment>p::text').get()
        text = text.replace(u'\u00a0',' ')
        entry['comment'] = text
        blob = TextBlob(text, analyzer=NaiveBayesAnalyzer())
        entry['sentiment'] = blob.sentiment.classification
        entry['sentiment_pos'] = blob.sentiment.p_pos
        entry['sentiment_neg'] = blob.sentiment.p_neg
        # yield CommentItem(comment = response.css('.divComment>p::text').get())
        yield entry