• R/O
  • SSH

标签
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

File Info

Rev. 46aba94810162bb69df68017075b4e0c9262c11f
大小 12,994 字节
时间 2010-04-01 19:32:12
作者 lorenzo
Log Message

Minor modifications (I am also saving data on the real times [made unique]
of the infection).

Content

#!/usr/bin/env python
import scipy as s
import pylab as p
import numpy as n
import sys
import string


def unique_rows(A):



    d = {}
    for r in A:
       t = tuple(r)
       d[t] = d.get(t,0) + 1

    # The dict d now has the counts of the unique rows of A.

    B = n.array(d.keys())    # The unique rows of A
    C = n.array(d.values())  # The counts of the unique rows

    return B,C




def remove_self_loops(clean_data):
    sel=s.where(clean_data[:,1]!=clean_data[:,2])[0]

    clean_data_no_int=clean_data[sel,:]

    return (clean_data_no_int.astype("int64"))


#NB: the following function only keeps track of the interaction between infected
#individuals giving rise to new infected.
#If initially I introduce two infected individuals in the system which are in contact, then I will
#not see their contact as it does not increment the # of infected

def time_infection(data_no_loops, infected):
    before_and_after=\
        network_after_infection_introduction(infected,data_no_loops)
    
    net_infected=before_and_after[0]

    infected_list_overall=s.zeros(0).astype("int64")
    infected_times_overall=s.zeros(0).astype("int64")

    set_infected=set(infected)

    #print "set_infected is, ", set_infected
 
    time_unique=s.unique1d(net_infected[:,0])
    cumulative_new_infected=s.zeros(len(time_unique)).astype("int64")

    #print "time_unique is, ", time_unique

    for i in xrange(len(time_unique)):
        time_sel=s.where(net_infected[:,0]==time_unique[i])[0]

        contacts_in_frame=net_infected[time_sel,:] #all contacts in a time
        #frame

        for m in xrange(len(contacts_in_frame)):

  
            if (len(set(contacts_in_frame[m,1:3]) & set_infected) ==1):
            
                #I am imposing that only one of the individuals involved in the
                #contact is infected.
                #that I am NOT considering the case of two infected individuals
                #meeting up as a new infection.

                #Now I have a genuine new infected individual

  
                
                set_infected_temp= set_infected | set(contacts_in_frame[m,1:3])

                #set_infected= set_infected | set(contacts_in_frame[m,1:3])

                #print "len(set_infected) is, ", len(set_infected)


                infected_list_overall=s.hstack((infected_list_overall,\
                                                contacts_in_frame[m,1:3]))

                infected_times_overall=s.hstack((infected_times_overall,\
                                                 contacts_in_frame[m,0],\
                                                 contacts_in_frame[m,0]))
        
        set_infected=set_infected_temp #It is a matter of being consistent
        #here: I update the list of infected individuals only after I have
        #finished scanning a time frame. Otherwise, within the same time frame
        #the spreading of the infection may depend on which node gets processed
        #first.

        cumulative_new_infected[i]=len(set_infected)
                    
    return [infected_list_overall,infected_times_overall,\
            cumulative_new_infected, time_unique]


def iterate_time_infection(data_no_loops,visitor_list):

    ini_infected=s.zeros(1).astype("int64")

    total_infected_distr=s.zeros(len(visitor_list)).astype("int64")

    start_infection=s.arange(len(visitor_list)).astype("int64")

    for visitor_iter in xrange(len(visitor_list)):

        print "visitor_iter+1 is, ", visitor_iter+1

        ini_infected[:]=visitor_list[visitor_iter]

        #print "ini_infected is, ", ini_infected

        #First I save the network as created as soon as an infected individual is introduced.

        net_before_and_after=network_after_infection_introduction(ini_infected,clean_data)

        network_infected=net_before_and_after[0]


        prefix="network_after_infection_"

        prefix=prefix+file_number

        postfix="_%01d"%(visitor_iter)

        postfix=postfix+"_.dat"

        filename=prefix+postfix


        n.savetxt(filename, network_infected,fmt="%d")


        network_infected_unique=unique_rows(s.sort(network_infected[:,1:3]))[0]

        prefix="network_after_infection_unique"

        prefix=prefix+file_number

        postfix="_%01d"%(visitor_iter)

        postfix=postfix+"_.dat"

        filename=prefix+postfix


        n.savetxt(filename, network_infected_unique,fmt="%d")

        #print "ini_infected is, ", ini_infected

        #Now I propagate the infection

        dynamic_infection=time_infection(data_no_loops, ini_infected)


        #At this point I am done with the infection and it is time to
        #save some data in a suitable form.

        prefix="dynamic_infectious_contacts_"

        prefix=prefix+file_number

        postfix="_%01d"%(visitor_iter)

        postfix=postfix+"_.dat"

        filename=prefix+postfix


        n.savetxt(filename, dynamic_infection[0],fmt="%d")




        binary_infection=dynamic_infection[0].reshape((-1,2))

        prefix="dynamic_infectious_contacts_binary_"

        prefix=prefix+file_number

        postfix="_%01d"%(visitor_iter)

        postfix=postfix+"_.dat"

        filename=prefix+postfix

        n.savetxt(filename,binary_infection ,fmt="%d")


        binary_infection_unique=unique_rows(s.sort(binary_infection))[0]



        prefix="dynamic_infectious_contacts_binary_unique_"

        prefix=prefix+file_number

        postfix="_%01d"%(visitor_iter)

        postfix=postfix+"_.dat"

        filename=prefix+postfix

        n.savetxt(filename,binary_infection_unique ,fmt="%d")





        prefix="dynamic_infectious_contacts_binary_and_time_"

        prefix=prefix+file_number

        postfix="_%01d"%(visitor_iter)

        postfix=postfix+"_.dat"

        filename=prefix+postfix


        n.savetxt(filename, s.hstack((dynamic_infection[0].reshape((-1,2)),\
                       dynamic_infection[1].reshape((-1,2)) ))[:,0:3],fmt="%d")




        prefix="dynamic_infected_individuals_"

        prefix=prefix+file_number

        postfix="_%01d"%(visitor_iter)

        postfix=postfix+"_.dat"

        filename=prefix+postfix


        n.savetxt(filename, s.unique1d(dynamic_infection[0]),fmt="%d")



        prefix="dynamic_infectious_times_"

        prefix=prefix+file_number

        postfix="_%01d"%(visitor_iter)

        postfix=postfix+"_.dat"

        filename=prefix+postfix


        n.savetxt(filename, dynamic_infection[1]-dynamic_infection[1][0],fmt="%d")

        ####################################
        ####################################
        #Now I am going to calculate some statistics.


        # time_unique=dynamic_infection[3]-dynamic_infection[3][0]


        prefix="dynamic_infectious_times_unique_"

        prefix=prefix+file_number

        postfix="_%01d"%(visitor_iter)

        postfix=postfix+"_.dat"

        filename=prefix+postfix


        n.savetxt(filename, s.unique1d(dynamic_infection[1]-dynamic_infection[1][0]),fmt="%d")


        prefix="dynamic_infectious_real_times_unique_"

        prefix=prefix+file_number

        postfix="_%01d"%(visitor_iter)

        postfix=postfix+"_.dat"

        filename=prefix+postfix


        n.savetxt(filename, s.unique1d(dynamic_infection[1]),fmt="%d")


        start_infection[visitor_iter]=dynamic_infection[1][0]

        #print "time_unique is, ", time_unique

        #count_infections=s.ones(len(time_unique)).astype("int64")

        # for i in xrange(len(time_unique)):
        #     count_infections[i]=len(s.where(dynamic_infection[1]==time_unique[i])[0])/2
        #     #NB: I have to divide by 2 since every contact time is reported twice




        count_infections=dynamic_infection[2]



        prefix="dynamic_infectious_cumulative_number_"

        prefix=prefix+file_number

        postfix="_%01d"%(visitor_iter)

        postfix=postfix+"_.dat"

        filename=prefix+postfix


        n.savetxt(filename, count_infections,fmt="%d")



        prefix="dynamic_infectious_cumulative_number_unique"

        prefix=prefix+file_number

        postfix="_%01d"%(visitor_iter)

        postfix=postfix+"_.dat"

        filename=prefix+postfix


        n.savetxt(filename, s.unique1d(count_infections),fmt="%d")



        #print "count_infections is, ", count_infections

        #print "count_infections[-1] is, ", count_infections[-1]

        total_infected_distr[visitor_iter]=count_infections[-1]

    n.savetxt("infected_number_distr.dat",total_infected_distr, fmt="%d" )
    n.savetxt("infection_start_times.dat",start_infection, fmt="%d" )

    
    return





def read_and_clean_data(file_number):

    prefix="raw_time_edge_list_"

    prefix=prefix+file_number

    postfix="_.dat"

    filename=prefix+postfix

    print "filename is, ", filename


    f = open(filename)
    raw_data = [map(int, string.split(line)) for line in f.readlines()]
    f.close()

    raw_data = s.array(raw_data, dtype="int64")


    print "s.shape(raw_data) is, ", s.shape(raw_data)


    prefix="blacklist_"

    prefix=prefix+file_number

    postfix="_.dat"

    filename=prefix+postfix


    f = open(filename)
    blacklist = [map(int, string.split(line)) for line in f.readlines()]
    f.close()

    blacklist = s.array(blacklist, dtype="int64")

    print "blacklist is, ", blacklist

    #removal_list=s.zeros(0,dtype="int64")

    if (len(blacklist)>0):

        for i in xrange(len(blacklist)):
            keep_list=s.where((raw_data[:,1]!=blacklist[i])\
                              & (raw_data[:,2]!=blacklist[i]) )[0]

            # print "keep_list is, ", keep_list

            raw_data=raw_data[keep_list, :]

    return (raw_data.astype("int64"))


def read_dirty_data(file_number):

    prefix="raw_time_edge_list_"

    prefix=prefix+file_number

    postfix="_.dat"

    filename=prefix+postfix

    print "filename is, ", filename


    f = open(filename)
    raw_data = [map(int, string.split(line)) for line in f.readlines()]
    f.close()

    raw_data = s.array(raw_data, dtype="int64")



    return (raw_data.astype("int64"))




#Causality: I can modify the dataset only after the first appearance of any
#initially_infected_individual


def network_after_infection_introduction(ini_infected,clean_data):

    earliest_infected=len(clean_data)
    
    infected_times=s.zeros(0).astype("int64")
    
    for i in xrange(len(ini_infected)):

        infected=s.where((clean_data[:,1]==ini_infected[i])\
                           | (clean_data[:,2]==ini_infected[i]) )[0]

        infected_times=s.hstack((infected_times,infected))

        #temp_first_infected=min(infected_times)

    #earliest_infected=min(earliest_infected,temp_first_infected)

    earliest_infected=min(infected_times)

    # print "ealiest_infected is, ", earliest_infected
    # print "clean_data[earliest_infected] is, ", clean_data[earliest_infected]

    #Now get rid of the array before that time (I do not need it since I cannot)
    #modify it (no infection going backward in time!)

    network_section=clean_data[earliest_infected :, :]

    network_section_before=clean_data[0:earliest_infected, :]

    

    return [network_section.astype("int64"), network_section_before.astype("int64")]



def spread_infection(network_infected,infected):

    infection_position_all=s.zeros(0)

    for i in xrange(len(infected)):

        infected_pos=s.where((network_infected[:,1]==infected[i])\
                              | (network_infected[:,2]==infected[i]) )[0]

        infection_position_all=s.hstack((infection_position_all,infected_pos))

    infection_position_all=s.unique1d(infection_position_all)
    infection_position_all=s.sort(infection_position_all).astype("int64")

    infected=network_infected[infection_position_all,1:3].\
              reshape(2*len(infection_position_all))

    infected=s.unique1d(infected)
    
    return [infection_position_all.astype("int64"),infected.astype("int64")]


################################################################
################################################################
################################################################
################################################################
################################################################

file_number=sys.argv[1]

data_laundry=1 #this specifies whether I need to clean the data via blacklists
#or not


if (data_laundry==1):

    clean_data=read_and_clean_data(file_number)
else:
    clean_data=read_dirty_data(file_number)

print "s.shape(clean_data) is, ", s.shape(clean_data)
n.savetxt("clean_data.dat", clean_data, fmt="%d")

data_no_loops=remove_self_loops(clean_data)

n.savetxt("data_no_loops.dat", data_no_loops, fmt="%d")

visitor_list=s.unique1d(data_no_loops[:,1:3]) #this way I can ensure I will not
#iterate on isolated visitors

n.savetxt("visitor_list.dat", visitor_list, fmt="%d")



iterate_time_infection(data_no_loops,visitor_list)


print "So far so good"