Commit ac480a74 authored by dirk.wintergruen's avatar dirk.wintergruen

only_year mode added

parent 4f424f2b
......@@ -4,6 +4,7 @@ import sys
from collections import Counter, defaultdict
#from multiprocessing.pool import Pool
from billiard.pool import Pool
#from billiard.dummy import Pool
import dateparser
import matplotlib
import textacy
......@@ -476,7 +477,7 @@ class Analyser(object):
def find_bursts(self,data, s = 0.4,gam = 0.1 ):
def find_bursts(self,data, s = 0.4,gam = 0.1, only_years=False ):
"""
:param s : default 0.4 -resolution of state jumps; higher s --> fewer but stronger bursts
:param gam: gam = 0.1 - difficulty of moving up a state; larger gamma --> harder to move up states, less bursty
......@@ -492,7 +493,14 @@ class Analyser(object):
# find bursts
#unique_words_tmp = filter(filter_func, self.uniqueWords)
try:
d = data.groupby(['_year', '_month'])['_words'].count()
if only_years:
d = data.groupby(['_year'])['_words'].count()
date_list = list(d.index)
d.index = range(0,len(d)) # have to reindex -> burst assumes I can access d with index number
else:
d = data.groupby(['_year', '_month'])['_words'].count() #multiindex can be accesses with a number!
date_list = list(d.index)
except KeyError:
logger.error(f"Key error _words in data {data}")
# create a dataframe to hold results
......@@ -512,10 +520,17 @@ class Analyser(object):
self.createAllPresentWordsPerYearMonth(data)
if only_years:
all_r = self.all_r.groupby("_year").sum()
all_r.index = range(0,len(all_r)) ## see above
else:
all_r = self.all_r
for i in range(0, len(list(unique_words_list)), size):
batch = list(unique_words_list)[i:i + size]
batches.append((d, n, s, gam, batch, self.all_r))
batches.append((d, n, s, gam, batch, all_r))
all_r.sort_index(ascending=True)
#
#Global.all_r = all_r
......@@ -533,7 +548,7 @@ class Analyser(object):
all_bursts = pd.concat(res)
return all_bursts
return all_bursts, date_list
if __name__ == '__main__':
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment