Rev. | fd2a10f8a811605534377b659656b1720f554f4e |
---|---|
大小 | 13,407 字节 |
时间 | 2009-12-03 23:42:27 |
作者 | lorenzo |
Log Message | I updated the way in which I save files (p.save gets replaced by n.savetxt). |
#!/usr/bin/env python
import scipy as s
import numpy as n
import pylab as p
import sys
import string
#The following function is really useful to find out which elements
#of a given array can be found in another. For reference look at
# http://mail.scipy.org/pipermail/numpy-discussion/2007-March/026448.html
#the suggestion I got to use s.where(s.setmember1d(A,B)) fails unless both
#the elements of A and B are both unique.
def ismember(ar1, ar2) :
a = s.sort(ar2)
il = a.searchsorted(ar1, side='left')
ir = a.searchsorted(ar1, side='right')
return ir != il
#function suggested by Ciro to combine tag id and boot_count.
#It has the advantage that it does not require an hash (it does not really
#carry out a calculation.)
def combine(tag_id, boot_count):
return (tag_id << 16) | boot_count
def iter_combine(tag_id_list, boot_count_list):
output=s.arange(len(tag_id_list))
for i in xrange(len(output)):
output[i]=combine(tag_id_list[i],boot_count_list[i])
return output
def resize_bootcount_file2(time_edge_list, bootcount_data):
#I use this function to select the bootcount info for the times
#at which I have some tags estabilishing a contact.
time_edge=time_edge_list[:,0]
time_unique=s.unique1d(time_edge) #all times without repetitions
#at which contacts are estabilished. It is important to get rid
#of repeated times, otherwise the final array would be
#both redundant and huge
n.savetxt("time_unique.dat",time_unique, fmt='%d')
time_boot=bootcount_data[:,0]
# s.where(s.setmember1d(A,B))
#See help from scipy mailing list for an explanation of what I commented
#above and applied below. This is supposed to be more efficient than the
#the code I had.
# my_sel=s.where(s.setmember1d(time_boot,time_unique)) #this fires back.
#The line above works only is time_boot contains no repeated times, which
#is obviously not the case.
my_sel=ismember(time_boot, time_unique)
time_boot_sel=time_boot[my_sel]
n.savetxt("time_boot_sel.dat",time_boot_sel, fmt='%d')
boot_cut=bootcount_data[my_sel,:]
boot_out=boot_cut[:,0:3]
boot_out[:,2]=boot_cut[:,6]
return (boot_out)
def duplicate_and_mix_array(my_2d_arr):
#this function is useful to cast the time-dependent edge list
#into a shape more similar to the bootcount list.
new_arr=s.zeros(2*2*len(my_2d_arr)).reshape((2*len(my_2d_arr),2))
new_arr=new_arr.astype("int64")
sel_even=s.arange(0,len(new_arr),2)
sel_odd=s.arange(1,len(new_arr),2)
new_arr[sel_odd,0]=my_2d_arr[:,0]
new_arr[sel_even,0]=my_2d_arr[:,0]
new_arr[sel_odd,1]=my_2d_arr[:,2]
new_arr[sel_even,1]=my_2d_arr[:,1]
return (new_arr)
# def my_hash(arr):
# my_hash=hash((arr[0], arr[1]))
# return my_hash
# def combine(tag_id, boot_count):
# return (tag_id << 16) | boot_count
#To see the original data I combined into a single integer, use the following
# combine(2,3) >> 16
# combine(2,3) & 0xFFFF
def my_hash(arr):
tag_id=arr[0]
boot_count=arr[1]
return (tag_id << 16) | boot_count
def couple_hash_table2(sliced_data):
#a modification of a function I had already coded
hash_list=s.arange(len(sliced_data))
for i in xrange(len(sliced_data)):
hash_list[i]=my_hash(sliced_data[i,:])
hash_list=hash_list.astype("int64")
return hash_list
def associate_boot_to_tag_improved(resized_tag_boot_list,time_edge_list_reshaped):
# time_edge_list_and_boot= \
# -s.ones((3*len(time_edge_list_reshaped)))\
# .reshape((len(time_edge_list_reshaped),3)).astype("int")
# time_edge_list_and_boot_binary=\
# -s.ones(5*(len(time_edge_list_reshaped)/2))\
# .reshape(((len(time_edge_list_reshaped)/2),5))
# time_edge_list_and_boot_hashed=\
# -s.ones(3*(len(time_edge_list_reshaped)/2))\
# .reshape(((len(time_edge_list_reshaped)/2),3))
#I combine into a single number the times at which a contact is estabi
#lished and the id of the tag estabilishing the contact
hash_contact=couple_hash_table2(time_edge_list_reshaped)
#At the corresponding times, I do the same for all tags sending out
#a sighting report.
hash_boot=couple_hash_table2(resized_tag_boot_list[:,0:2])
boot_sel=ismember(hash_boot, hash_contact)
time_edge_list_and_boot=resized_tag_boot_list[boot_sel,:]
print "s.shape(time_edge_list_and_boot) is, ", s.shape(time_edge_list_and_boot)
if (s.remainder(len(time_edge_list_and_boot),2)==1):
time_edge_list_and_boot=time_edge_list_and_boot[:-1,:]
# p.save("edge_list_and_boot.dat",time_edge_list_and_boot, fmt='%d')
#Now I need to change the shape of the array in order to revert
#to the form of a binary interaction.
sel_even=s.arange(0,len(time_edge_list_and_boot),2)
print "len(sel_even is, ), ", len(sel_even)
sel_odd=s.arange(1,len(time_edge_list_and_boot),2)
print "len(sel_odd is, ), ", len(sel_odd)
time_edge_list_and_boot_binary= \
s.arange(5*(len(time_edge_list_and_boot)/2)).\
reshape(((len(time_edge_list_and_boot)/2),5))
time_edge_list_and_boot_hashed= \
s.arange(3*(len(time_edge_list_and_boot)/2)).\
reshape(((len(time_edge_list_and_boot)/2),3))
time_edge_list_and_boot_binary[:,0]=time_edge_list_and_boot[sel_odd,0]
time_edge_list_and_boot_binary[:,1]=time_edge_list_and_boot[sel_even,1]
time_edge_list_and_boot_binary[:,2]=time_edge_list_and_boot[sel_even,2]
time_edge_list_and_boot_binary[:,3]=time_edge_list_and_boot[sel_odd,1]
time_edge_list_and_boot_binary[:,4]=time_edge_list_and_boot[sel_odd,2]
n.savetxt("edge_list_and_boot_binary.dat",\
time_edge_list_and_boot_binary, fmt='%d')
#Now I can create a new list where each couple (tag_id,bootcount) is
#replaced by its hash((tag_id,bootcount))
time_edge_list_and_boot_hashed[:,0]=time_edge_list_and_boot_binary[:,0]
print "hashing again"
time_edge_list_and_boot_hashed[:,1]=\
couple_hash_table2(time_edge_list_and_boot_binary[:,1:3])
time_edge_list_and_boot_hashed[:,2]=\
couple_hash_table2(time_edge_list_and_boot_binary[:,3:5])
print "hashing done again"
n.savetxt("edge_list_and_boot_hashed.dat",\
time_edge_list_and_boot_hashed, fmt='%d')
return (time_edge_list_and_boot_hashed)
############################################################################
############################################################################
def interpolate_boot_count(tag_id_boot_seq,tag_id_boot_times,tag_id_contact_times):
tag_id_boot_interpolated=s.ones(len(tag_id_contact_times))
for i in xrange(len(tag_id_contact_times)):
sel=s.where(tag_id_boot_times<=tag_id_contact_times[i])[0]
if (len(sel)==0):
tag_id_boot_interpolated[i]=tag_id_boot_seq[0]
elif (len(sel)>0):
tag_id_boot_interpolated[i]=tag_id_boot_seq[max(sel)]
return (tag_id_boot_interpolated.astype("int64"))
def interpolate_boot_count_improved(tag_id_boot_seq,tag_id_boot_times,tag_id_contact_times):
# Suggestion from mailing list
# A=np.array([1,2,4,5,6,8,9])
# B=np.array([2,4,5,8])
# C=[24,45,77,99]
# idx=np.array(B.searchsorted(A,side='right'))
# C=np.array([C[0]]+C+[C[-1]])
# print(C[idx])
#where A is the time for which I have (many) observations to interpolate,
#B is a smaller time set and C are the observations at times B.
idx=s.array(tag_id_boot_times.searchsorted(tag_id_contact_times,side='right'))
tag_id_boot_interpolated=s.array([tag_id_boot_seq[0]]+tag_id_boot_seq+[tag_id_boot_seq[-1]])
return (tag_id_boot_interpolated[idx].astype("int"))
def associate_boot_to_tag_improved2(resized_tag_boot_list,time_edge_list_reshaped):
time_edge_list_and_boot= \
-s.ones((3*len(time_edge_list_reshaped)))\
.reshape((len(time_edge_list_reshaped),3)).astype("int64")
# time_edge_list_and_boot_binary=\
# -s.ones(5*(len(time_edge_list_reshaped)/2))\
# .reshape(((len(time_edge_list_reshaped)/2),5))
# time_edge_list_and_boot_hashed=\
# -s.ones(3*(len(time_edge_list_reshaped)/2))\
# .reshape(((len(time_edge_list_reshaped)/2),3))
tag_id_list=s.unique1d(time_edge_list_reshaped[:,1])
print "before the loop"
print "len(tag_id_list) is, ", len(tag_id_list)
for i in xrange(len(tag_id_list)):
print "i is, ", i
tag_sel=s.where(resized_tag_boot_list[:,1]==tag_id_list[i])[0]
tag_id_boot_seq=resized_tag_boot_list[tag_sel,2]
tag_id_boot_times=resized_tag_boot_list[tag_sel,0]
contact_times_sel=s.where(time_edge_list_reshaped[:,1]\
==tag_id_list[i])[0]
tag_id_contact_times=time_edge_list_reshaped[contact_times_sel,0]
tag_id_boot_interpolated=interpolate_boot_count(tag_id_boot_seq,tag_id_boot_times,tag_id_contact_times)
# tag_id_boot_interpolated=interpolate_boot_count_improved(tag_id_boot_seq,tag_id_boot_times,tag_id_contact_times)
time_edge_list_and_boot[contact_times_sel,2]=tag_id_boot_interpolated
time_edge_list_and_boot[:,0:2]=time_edge_list_reshaped
print "after the loop"
# #I combine into a single number the times at which a contact is estabi
# #lished and the id of the tag estabilishing the contact
# hash_contact=couple_hash_table2(time_edge_list_reshaped)
# #At the corresponding times, I do the same for all tags sending out
# #a sighting report.
# hash_boot=couple_hash_table2(resized_tag_boot_list[:,0:2])
# boot_sel=ismember(hash_boot, hash_contact)
# time_edge_list_and_boot=resized_tag_boot_list[boot_sel,:]
# print "s.shape(time_edge_list_and_boot) is, ", s.shape(time_edge_list_and_boot)
# if (s.remainder(len(time_edge_list_and_boot),2)==1):
# time_edge_list_and_boot=time_edge_list_and_boot[:-1,:]
# p.save("edge_list_and_boot.dat",time_edge_list_and_boot, fmt='%d')
#Now I need to change the shape of the array in order to revert
#to the form of a binary interaction.
sel_even=s.arange(0,len(time_edge_list_and_boot),2)
print "len(sel_even is, ), ", len(sel_even)
sel_odd=s.arange(1,len(time_edge_list_and_boot),2)
print "len(sel_odd is, ), ", len(sel_odd)
time_edge_list_and_boot_binary= \
s.arange(5*(len(time_edge_list_and_boot)/2)).\
reshape(((len(time_edge_list_and_boot)/2),5))
time_edge_list_and_boot_hashed= \
s.arange(3*(len(time_edge_list_and_boot)/2)).\
reshape(((len(time_edge_list_and_boot)/2),3))
time_edge_list_and_boot_binary[:,0]=time_edge_list_and_boot[sel_odd,0]
time_edge_list_and_boot_binary[:,1]=time_edge_list_and_boot[sel_even,1]
time_edge_list_and_boot_binary[:,2]=time_edge_list_and_boot[sel_even,2]
time_edge_list_and_boot_binary[:,3]=time_edge_list_and_boot[sel_odd,1]
time_edge_list_and_boot_binary[:,4]=time_edge_list_and_boot[sel_odd,2]
n.savetxt("edge_list_and_boot_binary.dat",\
time_edge_list_and_boot_binary, fmt='%d')
#Now I can create a new list where each couple (tag_id,bootcount) is
#replaced by its hash((tag_id,bootcount))
time_edge_list_and_boot_hashed[:,0]=time_edge_list_and_boot_binary[:,0]
print "hashing again"
time_edge_list_and_boot_hashed[:,1]=\
couple_hash_table2(time_edge_list_and_boot_binary[:,1:3])
time_edge_list_and_boot_hashed[:,2]=\
couple_hash_table2(time_edge_list_and_boot_binary[:,3:5])
print "hashing done again"
n.savetxt("edge_list_and_boot_hashed.dat",\
time_edge_list_and_boot_hashed, fmt='%d')
return (time_edge_list_and_boot_hashed)
f = open(sys.argv[1])
time_edge_list = [map(int, string.split(line)) for line in f.readlines()]
f.close()
time_edge_list = s.array(time_edge_list, dtype="int64")
print "done reading the edge list"
# time_edge_list=time_edge_list.astype("uint64")
# bootcount_data=p.load("tag_and_boot_every_20_sec.dat")
# bootcount_data=bootcount_data.astype("int")
# print "time to use the new function"
# bootcount_cut=resize_bootcount_file2(time_edge_list, bootcount_data)
# print "new function is done"
# p.save("boot_count_cut.dat",bootcount_cut, fmt='%d')
f = open("boot_new_out.dat")
bootcount_cut = [map(int, string.split(line)) for line in f.readlines()]
f.close()
bootcount_cut = s.array(bootcount_cut, dtype="int64")
print "done reading the bootcount list"
# bootcount_cut=bootcount_cut.astype("uint64")
# time_edge_list=time_edge_list.astype("uint64")
time_edge_list_reshaped=duplicate_and_mix_array(time_edge_list)
time_edge_list_reshaped=time_edge_list_reshaped.astype("int64")
n.savetxt("edge_list_reshaped.dat",time_edge_list_reshaped, fmt='%d')
# resized_tag_boot_list=p.load("boot_count_cut.dat")
# resized_tag_boot_list=resized_tag_boot_list.astype("int")
# time_and_boot_edge_list= \
# associate_boot_to_tag_improved(resized_tag_boot_list,time_edge_list_reshaped)
time_and_boot_edge_list= \
associate_boot_to_tag_improved2(bootcount_cut,time_edge_list_reshaped)
print "So far so good"