Rev. | 46aba94810162bb69df68017075b4e0c9262c11f |
---|---|
大小 | 12,994 字节 |
时间 | 2010-04-01 19:32:12 |
作者 | lorenzo |
Log Message | Minor modifications (I am also saving data on the real times [made unique]
|
#!/usr/bin/env python
import scipy as s
import pylab as p
import numpy as n
import sys
import string
def unique_rows(A):
d = {}
for r in A:
t = tuple(r)
d[t] = d.get(t,0) + 1
# The dict d now has the counts of the unique rows of A.
B = n.array(d.keys()) # The unique rows of A
C = n.array(d.values()) # The counts of the unique rows
return B,C
def remove_self_loops(clean_data):
sel=s.where(clean_data[:,1]!=clean_data[:,2])[0]
clean_data_no_int=clean_data[sel,:]
return (clean_data_no_int.astype("int64"))
#NB: the following function only keeps track of the interaction between infected
#individuals giving rise to new infected.
#If initially I introduce two infected individuals in the system which are in contact, then I will
#not see their contact as it does not increment the # of infected
def time_infection(data_no_loops, infected):
before_and_after=\
network_after_infection_introduction(infected,data_no_loops)
net_infected=before_and_after[0]
infected_list_overall=s.zeros(0).astype("int64")
infected_times_overall=s.zeros(0).astype("int64")
set_infected=set(infected)
#print "set_infected is, ", set_infected
time_unique=s.unique1d(net_infected[:,0])
cumulative_new_infected=s.zeros(len(time_unique)).astype("int64")
#print "time_unique is, ", time_unique
for i in xrange(len(time_unique)):
time_sel=s.where(net_infected[:,0]==time_unique[i])[0]
contacts_in_frame=net_infected[time_sel,:] #all contacts in a time
#frame
for m in xrange(len(contacts_in_frame)):
if (len(set(contacts_in_frame[m,1:3]) & set_infected) ==1):
#I am imposing that only one of the individuals involved in the
#contact is infected.
#that I am NOT considering the case of two infected individuals
#meeting up as a new infection.
#Now I have a genuine new infected individual
set_infected_temp= set_infected | set(contacts_in_frame[m,1:3])
#set_infected= set_infected | set(contacts_in_frame[m,1:3])
#print "len(set_infected) is, ", len(set_infected)
infected_list_overall=s.hstack((infected_list_overall,\
contacts_in_frame[m,1:3]))
infected_times_overall=s.hstack((infected_times_overall,\
contacts_in_frame[m,0],\
contacts_in_frame[m,0]))
set_infected=set_infected_temp #It is a matter of being consistent
#here: I update the list of infected individuals only after I have
#finished scanning a time frame. Otherwise, within the same time frame
#the spreading of the infection may depend on which node gets processed
#first.
cumulative_new_infected[i]=len(set_infected)
return [infected_list_overall,infected_times_overall,\
cumulative_new_infected, time_unique]
def iterate_time_infection(data_no_loops,visitor_list):
ini_infected=s.zeros(1).astype("int64")
total_infected_distr=s.zeros(len(visitor_list)).astype("int64")
start_infection=s.arange(len(visitor_list)).astype("int64")
for visitor_iter in xrange(len(visitor_list)):
print "visitor_iter+1 is, ", visitor_iter+1
ini_infected[:]=visitor_list[visitor_iter]
#print "ini_infected is, ", ini_infected
#First I save the network as created as soon as an infected individual is introduced.
net_before_and_after=network_after_infection_introduction(ini_infected,clean_data)
network_infected=net_before_and_after[0]
prefix="network_after_infection_"
prefix=prefix+file_number
postfix="_%01d"%(visitor_iter)
postfix=postfix+"_.dat"
filename=prefix+postfix
n.savetxt(filename, network_infected,fmt="%d")
network_infected_unique=unique_rows(s.sort(network_infected[:,1:3]))[0]
prefix="network_after_infection_unique"
prefix=prefix+file_number
postfix="_%01d"%(visitor_iter)
postfix=postfix+"_.dat"
filename=prefix+postfix
n.savetxt(filename, network_infected_unique,fmt="%d")
#print "ini_infected is, ", ini_infected
#Now I propagate the infection
dynamic_infection=time_infection(data_no_loops, ini_infected)
#At this point I am done with the infection and it is time to
#save some data in a suitable form.
prefix="dynamic_infectious_contacts_"
prefix=prefix+file_number
postfix="_%01d"%(visitor_iter)
postfix=postfix+"_.dat"
filename=prefix+postfix
n.savetxt(filename, dynamic_infection[0],fmt="%d")
binary_infection=dynamic_infection[0].reshape((-1,2))
prefix="dynamic_infectious_contacts_binary_"
prefix=prefix+file_number
postfix="_%01d"%(visitor_iter)
postfix=postfix+"_.dat"
filename=prefix+postfix
n.savetxt(filename,binary_infection ,fmt="%d")
binary_infection_unique=unique_rows(s.sort(binary_infection))[0]
prefix="dynamic_infectious_contacts_binary_unique_"
prefix=prefix+file_number
postfix="_%01d"%(visitor_iter)
postfix=postfix+"_.dat"
filename=prefix+postfix
n.savetxt(filename,binary_infection_unique ,fmt="%d")
prefix="dynamic_infectious_contacts_binary_and_time_"
prefix=prefix+file_number
postfix="_%01d"%(visitor_iter)
postfix=postfix+"_.dat"
filename=prefix+postfix
n.savetxt(filename, s.hstack((dynamic_infection[0].reshape((-1,2)),\
dynamic_infection[1].reshape((-1,2)) ))[:,0:3],fmt="%d")
prefix="dynamic_infected_individuals_"
prefix=prefix+file_number
postfix="_%01d"%(visitor_iter)
postfix=postfix+"_.dat"
filename=prefix+postfix
n.savetxt(filename, s.unique1d(dynamic_infection[0]),fmt="%d")
prefix="dynamic_infectious_times_"
prefix=prefix+file_number
postfix="_%01d"%(visitor_iter)
postfix=postfix+"_.dat"
filename=prefix+postfix
n.savetxt(filename, dynamic_infection[1]-dynamic_infection[1][0],fmt="%d")
####################################
####################################
#Now I am going to calculate some statistics.
# time_unique=dynamic_infection[3]-dynamic_infection[3][0]
prefix="dynamic_infectious_times_unique_"
prefix=prefix+file_number
postfix="_%01d"%(visitor_iter)
postfix=postfix+"_.dat"
filename=prefix+postfix
n.savetxt(filename, s.unique1d(dynamic_infection[1]-dynamic_infection[1][0]),fmt="%d")
prefix="dynamic_infectious_real_times_unique_"
prefix=prefix+file_number
postfix="_%01d"%(visitor_iter)
postfix=postfix+"_.dat"
filename=prefix+postfix
n.savetxt(filename, s.unique1d(dynamic_infection[1]),fmt="%d")
start_infection[visitor_iter]=dynamic_infection[1][0]
#print "time_unique is, ", time_unique
#count_infections=s.ones(len(time_unique)).astype("int64")
# for i in xrange(len(time_unique)):
# count_infections[i]=len(s.where(dynamic_infection[1]==time_unique[i])[0])/2
# #NB: I have to divide by 2 since every contact time is reported twice
count_infections=dynamic_infection[2]
prefix="dynamic_infectious_cumulative_number_"
prefix=prefix+file_number
postfix="_%01d"%(visitor_iter)
postfix=postfix+"_.dat"
filename=prefix+postfix
n.savetxt(filename, count_infections,fmt="%d")
prefix="dynamic_infectious_cumulative_number_unique"
prefix=prefix+file_number
postfix="_%01d"%(visitor_iter)
postfix=postfix+"_.dat"
filename=prefix+postfix
n.savetxt(filename, s.unique1d(count_infections),fmt="%d")
#print "count_infections is, ", count_infections
#print "count_infections[-1] is, ", count_infections[-1]
total_infected_distr[visitor_iter]=count_infections[-1]
n.savetxt("infected_number_distr.dat",total_infected_distr, fmt="%d" )
n.savetxt("infection_start_times.dat",start_infection, fmt="%d" )
return
def read_and_clean_data(file_number):
prefix="raw_time_edge_list_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
print "filename is, ", filename
f = open(filename)
raw_data = [map(int, string.split(line)) for line in f.readlines()]
f.close()
raw_data = s.array(raw_data, dtype="int64")
print "s.shape(raw_data) is, ", s.shape(raw_data)
prefix="blacklist_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
f = open(filename)
blacklist = [map(int, string.split(line)) for line in f.readlines()]
f.close()
blacklist = s.array(blacklist, dtype="int64")
print "blacklist is, ", blacklist
#removal_list=s.zeros(0,dtype="int64")
if (len(blacklist)>0):
for i in xrange(len(blacklist)):
keep_list=s.where((raw_data[:,1]!=blacklist[i])\
& (raw_data[:,2]!=blacklist[i]) )[0]
# print "keep_list is, ", keep_list
raw_data=raw_data[keep_list, :]
return (raw_data.astype("int64"))
def read_dirty_data(file_number):
prefix="raw_time_edge_list_"
prefix=prefix+file_number
postfix="_.dat"
filename=prefix+postfix
print "filename is, ", filename
f = open(filename)
raw_data = [map(int, string.split(line)) for line in f.readlines()]
f.close()
raw_data = s.array(raw_data, dtype="int64")
return (raw_data.astype("int64"))
#Causality: I can modify the dataset only after the first appearance of any
#initially_infected_individual
def network_after_infection_introduction(ini_infected,clean_data):
earliest_infected=len(clean_data)
infected_times=s.zeros(0).astype("int64")
for i in xrange(len(ini_infected)):
infected=s.where((clean_data[:,1]==ini_infected[i])\
| (clean_data[:,2]==ini_infected[i]) )[0]
infected_times=s.hstack((infected_times,infected))
#temp_first_infected=min(infected_times)
#earliest_infected=min(earliest_infected,temp_first_infected)
earliest_infected=min(infected_times)
# print "ealiest_infected is, ", earliest_infected
# print "clean_data[earliest_infected] is, ", clean_data[earliest_infected]
#Now get rid of the array before that time (I do not need it since I cannot)
#modify it (no infection going backward in time!)
network_section=clean_data[earliest_infected :, :]
network_section_before=clean_data[0:earliest_infected, :]
return [network_section.astype("int64"), network_section_before.astype("int64")]
def spread_infection(network_infected,infected):
infection_position_all=s.zeros(0)
for i in xrange(len(infected)):
infected_pos=s.where((network_infected[:,1]==infected[i])\
| (network_infected[:,2]==infected[i]) )[0]
infection_position_all=s.hstack((infection_position_all,infected_pos))
infection_position_all=s.unique1d(infection_position_all)
infection_position_all=s.sort(infection_position_all).astype("int64")
infected=network_infected[infection_position_all,1:3].\
reshape(2*len(infection_position_all))
infected=s.unique1d(infected)
return [infection_position_all.astype("int64"),infected.astype("int64")]
################################################################
################################################################
################################################################
################################################################
################################################################
file_number=sys.argv[1]
data_laundry=1 #this specifies whether I need to clean the data via blacklists
#or not
if (data_laundry==1):
clean_data=read_and_clean_data(file_number)
else:
clean_data=read_dirty_data(file_number)
print "s.shape(clean_data) is, ", s.shape(clean_data)
n.savetxt("clean_data.dat", clean_data, fmt="%d")
data_no_loops=remove_self_loops(clean_data)
n.savetxt("data_no_loops.dat", data_no_loops, fmt="%d")
visitor_list=s.unique1d(data_no_loops[:,1:3]) #this way I can ensure I will not
#iterate on isolated visitors
n.savetxt("visitor_list.dat", visitor_list, fmt="%d")
iterate_time_infection(data_no_loops,visitor_list)
print "So far so good"