import jsonlines import re from textblob import TextBlob from collections import Counter def tprint(obj): print(f"{type(obj)} : {obj}") def sort_file(file): '''return number of positive and negative comments based on TextBlob sentiment analysis''' # with jsonlines.open("/vadoe/vadoe/vadoe/townhall_2021-01-14T02-05-51.json") as reader: with jsonlines.open(file, mode='r') as reader: # Confirm type tprint(reader) # Build iterator _doc = iter(reader) i = 0 pos = 0 neg = 0 posl = [] negl = [] while i<25: _line = next(_doc) tprint(_line) if _line['sentiment'] == 'pos': pos = pos + 1 posl.append(_line['comment']) elif _line['sentiment'] == 'neg': neg = neg + 1 negl.append(_line['comment']) i=i+1 print(f'{pos} positive and {neg} negative comments') # tst = TextBlob(obj['comment']) # tst.sentiment def process_file(file): '''Find Smythers posts''' with jsonlines.open(file, mode='r') as reader: _doc = iter(reader) _list = [] for item in _doc: try: if item['author'][0] == 'Smythers': _list.append(item['content'][0]) except KeyError: continue return(_list) def write_file(file, data:object): '''Write data to file''' with jsonlines.open(file, mode='w') as writer: for each in data: writer.write(each) print('write successful') def clean_text(text:str): s1 = remove_html(text) s2 = remove_http(s1) return s2 def remove_html(text:str): '''Remove html tags from string''' clean = re.compile('<.*?>') return re.sub(clean, '', text) def remove_http(text:str): '''Remove URLs from string''' return re.sub(r'http\S+','', text) def get_nouns(text:str): blob = TextBlob(text) # check nouns? or no return blob.tags vadoe = '/vadoe/vadoe/vadoe/townhall_2021-01-14T02-05-51.json' vadoe_p = '/vadoe/vadoe/vadoe/townhall_2021-01-14T05-11-55.json' dlr = '/vadoe/vadoe/vadoe/dlr.json' smythers_pc = '/vadoe/vadoe/vadoe/smythers.json' write_to = '/vadoe/vadoe/vadoe/nouns.json' # processed_file(file) smythers_posts = process_file(dlr) # cleaned = [] # for each in smythers: # cleaned.append(clean_text(each)) cleaned = [clean_text(each) for each in smythers_posts] nouns = [] for x in cleaned: _list = get_nouns(x) for y in _list: nouns.append(y) # nouns.append(x for x in [get_nouns()) sortedNouns = Counter(nouns) nouns = [] for k, v in sortedNouns.items(): if v > 2: _d = (k, v) nouns.append(_d) print(nouns) write_file(write_to, nouns)