Contents
导出各种邮箱联系人
weafriend <[email protected]> reply-to [email protected] to [email protected] date Mon, Sep 15, 2008 at 10:15 subject [CPyUG:65691] 分享导出邮件联系人的脚本
- hi all
分享导出邮件联系人的脚本.
MailContact1.py
1 #!/usr/bin/env python
2
3 #coding=utf-8
4
5 from BeautifulSoup import BeautifulSoup
6
7 import os,urllib,urllib2,pdb
8
9 import cookielib
10
11 import httplib
12
13 import csv,re
14
15
16
17 GDATA_URL = '/accounts/ClientLogin'
18
19
20
21 class MailContactError(Exception):
22
23 pass
24
25
26
27 class MailContact:
28
29 def __init__(self,username,password):
30
31 pass
32
33 def login(self):
34
35 pass
36
37 def get_contacts(self):
38
39 pass
40
41 def get_contact_page(self):
42
43 pass
44
45
46
47 class GMailContact(MailContact):
48
49 """
50
51 A class to retrieve a users contacts from their Google Account.
52
53
54
55 Dependencies:
56
57 -------------
58
59 * BeautifulSoup.
60
61 * That's it. :-)
62
63
64
65 Usage:
66
67 ------
68
69 >>> g = GMailContact('[email protected]', 'password')
70
71 >>> g.login()
72
73 (200, 'OK')
74
75 >>> g.get_contacts()
76
77 >>> g.contacts
78
79 [(u'Persons Name', '[email protected]'), ...]
80
81
82
83
84
85 """
86
87 def __init__(self, username='[email protected]', password='test', service='cp'):
88
89 self.mail_type="@gmail.com"
90
91 self.username = username + self.mail_type
92
93 self.password = password
94
95 self.account_type = 'HOSTED_OR_GOOGLE' # Allow both Google Domain and Gmail accounts
96
97 self.service = service # Defaults to cp (contacts)
98
99 self.source = 'google-data-import' # Our application name
100
101 self.code = '' # Empty by default, populated by self.login()
102
103 self.contacts = [] # Empty list by default, populated by self.get_contacts()
104
105
106
107 def login(self):
108
109 """
110
111 Login to Google. No arguments.
112
113 """
114
115 data = urllib.urlencode({
116
117 'accountType': self.account_type,
118
119 'Email': self.username,
120
121 'Passwd': self.password,
122
123 'service': self.service,
124
125 'source': self.source
126
127 })
128
129 headers = {
130
131 'Content-type': 'application/x-www-form-urlencoded',
132
133 'Accept': 'text/plain'
134
135 }
136
137
138
139 conn = httplib.HTTPSConnection('google.com')
140
141 conn.request('POST', GDATA_URL, data, headers)
142
143 response = conn.getresponse()
144
145 if not str(response.status) == '200':
146
147 raise GdataError("Couldn't log in. HTTP Code: %s, %s" % (response.status, response.reason))
148
149
150
151 d = response.read()
152
153
154
155 self.code = d.split("\n")[2].replace('Auth=', '')
156
157 conn.close()
158
159 return response.status, response.reason
160
161
162
163 def _request(self, max_results=200):
164
165 """
166
167 Base function for requesting the contacts. We'll allow other methods eventually
168
169 """
170
171 url = '/m8/feeds/contacts/%s/base/?max-results=%d' % (self.username, max_results)
172
173
174
175 headers = {'Authorization': 'GoogleLogin auth=%s' % self.code}
176
177
178
179 conn = httplib.HTTPConnection('www.google.com')
180
181 conn.request('GET', url, headers=headers)
182
183 response = conn.getresponse()
184
185 if not str(response.status) == '200':
186
187 raise MailContactError("Couldn't log in. HTTP Code: %s, %s" % (response.status, response.reason))
188
189
190
191 page = response.read()
192
193 conn.close()
194
195 return page
196
197
198
199 def get_contacts(self, max_results=200):
200
201 """ Parses the contacts (using BeautifulSoup) from self._request, and then populates self.contacts
202
203 """
204
205 soup = BeautifulSoup(self._request(max_results))
206
207 self.contacts = []
208
209 for entry in soup.findAll('title'):
210
211 if len(entry.parent.findAll(['gd:email', 'title'])) == 2:
212
213 s = entry.parent.findAll(['gd:email', 'title'])
214
215 self.contacts.append((s[0].string, s[1].get('address')))
216
217
218
219 return
220
221
222
223 class M126Contact(MailContact):
224
225 def __init__(self,username,password):
226
227 self.mail_type="@126.com"
228
229 self.username = username
230
231 self.password = password
232
233 self.login_host = 'entry.mail.126.com'
234
235 self.login_url = '/cgi/login?redirTempName=https.htm&hid=10010102&lightweight=1&verifycookie=1&language=0&style=-1'
236
237 self.login_data = urllib.urlencode({
238
239 'domain':'126.com',
240
241 'language':0,
242
243 'bCookie':'',
244
245 'user':self.username,
246
247 'pass':self.password,
248
249 'style':-1,
250
251 'remUser':'',
252
253 'secure':'',
254
255 'enter.x':'%B5%C7+%C2%BC'
256
257 })
258
259 self.login_headers = {
260
261 'Content-type': 'application/x-www-form-urlencoded',
262
263 'Accept': 'text/xml,text/plain',
264
265 'Refer':'http://www.126.com/'
266
267 }
268
269 self.contact_host = 'g2a10.mail.126.com'
270
271 self.contact_url = '/coremail/fcg/ldvcapp?funcid=prtsearchres&sid=%(sid)s&listnum=200&tempname=address%%2faddress.htm'
272
273
274
275
276
277 def login(self):
278
279 conn = httplib.HTTPSConnection(self.login_host)
280
281 conn.request('POST', self.login_url,self.login_data,self.login_headers)
282
283 response = conn.getresponse()
284
285 if not str(response.status) == '200':
286
287 raise MailContactError("Couldn't log in. HTTP Code: %s, %s" % (response.status, response.reason))
288
289 #sc="Coremail=aaYgsaQsvSmKa%MBgzxnddkKzjPJUTbMddRUIgVwfeiBUd; path=/; domain=.126.com"
290
291 #sid="MBgzxnddkKzjPJUTbMddRUIgVwfeiBUd"
292
293 sc = response.getheader('Set-Cookie')
294
295 if not sc or sc.find("Coremail") == -1:
296
297 #用户密码不正确
298
299 raise MailContactError("Email user %s%s password %s not correct!" % (self.username,self.mail_type,self.password))
300
301 cookie=sc.split()[0]
302
303 coremail = cookie[cookie.find('=')+1:cookie.find(';')]
304
305 sid = coremail[coremail.find('%')+1:]
306
307 self.contact_url = self.contact_url % {'sid':sid}
308
309 self.contact_headers={
310
311 'Cookie':'MAIL126_SSN=%(user)s; NETEASE_SSN=%(user)s; nts_mail_user=%(user)s; logType=df; ntes_mail_firstpage=normal; \
312
313 Coremail=%(coremail)s;mail_host=g2a14.mail.126.com; mail_sid=%(sid)s; mail_uid=%(user)s@126.com;
314 mail_style=dm3; oulink_h=520; ntes_mail_noremember=true' % {'user':self.username,'coremail':coremail,'sid':sid}
315
316 }
317
318 conn.close()
319
320
321
322 def get_contact_page(self):
323
324 conn = httplib.HTTPConnection(self.contact_host)
325
326 conn.request('GET',self.contact_url,headers=self.contact_headers)
327
328 response = conn.getresponse()
329
330 if not str(response.status) == '200':
331
332 raise MailContactError("Couldn't getc contact page. HTTP Code: %s, %s" % (response.status, response.reason))
333
334 page = response.read()
335
336 conn.close()
337
338 return page
339
340
341
342 def get_contacts(self):
343
344 page = self.get_contact_page()
345
346 self.contacts = []
347
348 soup = BeautifulSoup(page)
349
350 xmps = soup.findAll('xmp')
351
352 for x in xmps:
353
354 if x['id'].startswith('t'):
355
356 self.contacts.append((x.contents[0],x.space.string))
357
358
359
360 class M163Contact(MailContact):
361
362 def __init__(self,username,password):
363
364 self.mail_type="@163.com"
365
366 self.username = username
367
368 self.password = password
369
370 self.contacts = []
371
372 self.login_host = 'reg.163.com'
373
374 self.login_url = '/logins.jsp?type=1&url=http://fm163.163.com/coremail/fcg/ntesdoor2?lightweight=1&verifycookie=1&language=-1&style=-1'
375
376
377
378 self.login_data = urllib.urlencode({
379
380 'verifycookie':1,
381
382 'style':-1,
383
384 'product':'mail163',
385
386 'username':self.username,
387
388 'password':self.password,
389
390 'selType':-1,
391
392 'remUser':'',
393
394 'secure':'on'
395
396 })
397
398 self.login_headers = {
399
400 'Content-type': 'application/x-www-form-urlencoded',
401
402 'Accept': 'text/xml,text/plain',
403
404 'Refer':'http://mail.163.com/'
405
406 }
407
408 self.contact_host = 'g2a10.mail.163.com'
409
410
411
412
413
414 def login(self):
415
416 conn = httplib.HTTPSConnection(self.login_host)
417
418 conn.request('POST', self.login_url,self.login_data,self.login_headers)
419
420 response = conn.getresponse()
421
422 if not str(response.status) == '200':
423
424 raise MailContactError("Couldn't log in. HTTP Code: %s, %s" % (response.status, response.reason))
425
426
427
428 sc1 = response.getheader('Set-Cookie')
429
430 '''
431
432 Set-Cookie: NTES_SESS=ohAWkiyj.OCjHdh1BK4ToxPcUvFX2fSLaN3FaU0cRInzLoieELdifjyqnBdk4C8qWIZkirZ7.JF.IPFDuR7BcAtKL; domain=.163.com; path=/
433
434 Set-Cookie: NETEASE_SSN=weafriend; domain=.163.com; path=/; expires=Mon, 08-Jun-2009 10:42:26 GMT
435
436 Set-Cookie: NETEASE_ADV=11&24&1212921746999; domain=.163.com; path=/; expires=Mon, 08-Jun-2009 10:42:26 GMT
437
438 '''
439
440 ntes_sess,ntes_adv = None,None
441
442 for s in sc1.split():
443
444 if s.startswith('NTES_SESS'):
445
446 ntes_sess=s[s.find('=')+1:s.find(';')]
447
448 elif s.startswith('NETEASE_ADV'):
449
450 ntes_adv=s[s.find('=')+1:s.find(';')]
451
452 if not ntes_sess or not ntes_adv:
453
454 #用户密码不正确
455
456 raise MailContactError("Email user %s%s password %s not correct!" % (self.username,self.mail_type,self.password))
457
458
459
460 url = '/coremail/fcg/ntesdoor2?lightweight=1&verifycookie=1&language=-1&style=-1&username=weafriend'
461
462 headers = {'cookie':sc1}
463
464 conn = httplib.HTTPConnection('fm163.163.com')
465
466 conn.request('GET',url,{},headers)
467
468 response = conn.getresponse()
469
470 sc2 = response.getheader('Set-Cookie')
471
472 coremail = sc2[sc2.find('=')+1:sc2.find(';')]
473
474 sid = coremail[coremail.find('%')+1:]
475
476 self.contact_url = '/coremail/fcg/ldvcapp?funcid=prtsearchres&sid=' + sid +'&listnum=200&tempname=address%2faddress.htm'
477
478
479
480
481
482 self.contact_headers = {
483
484 'Cookie':'MAIL163_SSN=%(user)s; vjlast=1212911118; vjuids=-99d7a91f6.1156a6ea3cd.0.9e6d0e6f029e78; \
485
486 _ntes_nuid=7118c6a1c9d16ee59a045a2e66186af8; NTES_adMenuNum=3;
487 _ntes_nnid=7118c6a1c9d16ee59a045a2e66186af8,0|www|urs|163mail|news|ent|sports|digi|lady|tech|stock|travel|music|2008|;
488 NTES_UFC=9110001100010000000000000000000000100000000000000002331026300000; logType=-1; nts_mail_user=weafriend:-1:1;
489 Province=010; _ntes_nvst=1212911122953,|www|urs|; Coremail=%(coremail)s;
490 wmsvr_domain=g1a109.mail.163.com; ntes_mail_truename=; ntes_mail_province=; ntes_mail_sex=; mail_style=js3;
491 mail_host=g1a109.mail.163.com; mail_sid=%(sid)s; USERTRACK=58.31.69.214.1212911333143304;
492 ntes_mail_firstpage=normal; NTES_SESS=%(ntes_sess)s;
493 NETEASE_SSN=%(user)s; NETEASE_ADV=%(ntes_adv)s' % {'user':self.username,'coremail':coremail,'sid':sid,'ntes_sess':ntes_sess,'ntes_adv':ntes_adv}
494
495 }
496
497 return True
498
499
500
501
502
503
504
505 def get_contact_page(self):
506
507 conn = httplib.HTTPConnection(self.contact_host)
508
509 conn.request('GET',self.contact_url,headers=self.contact_headers)
510
511 response = conn.getresponse()
512
513 if not str(response.status) == '200':
514
515 raise MailContactError("Couldn't getc contact page. HTTP Code: %s, %s" % (response.status, response.reason))
516
517 page = response.read()
518
519 conn.close()
520
521 return page
522
523
524
525 def get_contacts(self):
526
527 page = self.get_contact_page()
528
529 soup = BeautifulSoup(page)
530
531 xmps = soup.findAll('xmp')
532
533 for x in xmps:
534
535 if x['id'].startswith('t'):
536
537 self.contacts.append((x.contents[0],x.space.string))
538
539
540
541
542
543
544
545
546
547 class SohuContact(MailContact):
548
549 def __init__(self,username,password):
550
551 self.mail_type="@sohu.com"
552
553 self.username = username
554
555 self.password = password
556
557 self.contacts = []
558
559 self.login_host = 'passport.sohu.com'
560
561 self.login_url = 'http://passport.sohu.com/login.jsp'
562
563 self.login_data = urllib.urlencode({
564
565 'loginid':self.username+self.mail_type,
566
567 'passwd':self.password,
568
569 'sg':'5175b065623bb194e85903f5e8c43386',
570
571 'eru':'http://login.mail.sohu.com/login.php',
572
573 'ru':'http://login.mail.sohu.com/login_comm.php',
574
575 'appid':1000,
576
577 'fl':'1',
578
579 'ct':1126084880,
580
581 'vr':'1|1'
582
583 })
584
585 self.login_headers = {
586
587 'User-agent':'Opera/9.23',
588
589 'Content-type': 'application/x-www-form-urlencoded',
590
591 'Accept': 'text/xml,text/plain'
592
593 }
594
595 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar()))
596
597 urllib2.install_opener(opener)
598
599 self.contact_host = 'www50.mail.sohu.com'
600
601 self.contact_url = '/webapp/contact'
602
603
604
605 def login(self):
606
607 req = urllib2.Request(self.login_url,self.login_data)
608
609 conn = urllib2.urlopen(req)
610
611 self.contact_url = os.path.dirname(conn.geturl())+'/contact'
612
613
614
615 def get_contacts(self):
616
617 req = urllib2.Request(self.contact_url)
618
619 conn = urllib2.urlopen(req)
620
621 buf = conn.readlines()
622
623 import simplejson
624
625 info = simplejson.loads(buf[0])
626
627 for i in info['listString']:
628
629 self.contacts.append((i['name'],i['email']))
630
631
632
633 class HotmailContact(MailContact):
634
635 def __init__(self,username,password):
636
637 self.mail_type="@hotmail.com"
638
639 self.username = username
640
641 self.password = password
642
643 self.contacts = []
644
645 self.login_host = 'login.live.com'
646
647 self.login_url = '/ppsecure/post.srf?id=2'
648
649 self.login_data = urllib.urlencode({
650
651 'login':self.username+self.mail_type,
652
653 'passwd':self.password,
654
655 'PPSX':'Pass',
656
657 'LoginOption':2,
658
659 'PwdPad':'IfYouAreReadingThisYouHaveTooMuchFreeTime'[0:-len(self.password)],
660
661 'PPFT':'B1S2dWnsGTFLpX9h8fxfE*ym5OABStpt0fjo%21YICXQOy1b%21xP4dRx8F1h1w6tR8ZyLP4h3TYGS8gSZGku3j7CxQ4poqr'
662
663 })
664
665 self.login_headers = {
666
667 'Content-type': 'application/x-www-form-urlencoded',
668
669 'Accept': 'text/xml,text/plain',
670
671 'Cookie': 'CkTst=G1213457870062; MobileProf=2AV3mTOwJEE8smIfIyq69wbCn08y6UX7910BtLhqTto2MYrNSBW5hhlEuGlMJdMwwGq1WcxtENCAI1JSyTNfrS23ArFLxDjBNk!xtbIj0iglbu8DQVg9TnSTPtHj975deR; MUID=C2DC0F9324AA47DCB05CE14B989D89C2; ANON=A=E81AEA51F927860B07BBA712FFFFFFFF&E=69f&W=2; s_lastvisit=1213455335875; MH=MSFT; wlidperf=throughput=2087.201125175809&latency=1.422; MSPRequ=lt=1213455763&co=1&id=2; MSPOK=uuid-d75c4c53-1b6e-433c-af95-c3c0175a48cd; CkTst=G1213455761093; [email protected]; MSPCID=0f45e10de2ad38c9; NAP=V=1.7&E=6b4&C=bKkGf4IbC96JLFhsoKyccKm1Kf7jjhX5I3C1ofjvyMoY3iI9j0b6gg&W=2; MSPSoftVis=@:@; BrowserSense=Win=1&Downlevel=0&WinIEOnly=0&Firefox=1&FirefoxVersion=2.0; mktstate=U=&E=en-us; mkt1=norm=en-us; s_cc=true; s_sq=%5B%5BB%5D%5D; MSPP3RD=3688532421',
672
673 'Referer': 'http://login.live.com/ppsecure/post.srf?id=2&bk=1213455763'
674
675 }
676
677
678
679 self.contact_host = 'by120w.bay120.mail.live.com'
680
681 self.contact_url = '/mail/GetContacts.aspx'
682
683
684
685 def getInputValue(self,name,content):
686
687 pass
688
689 def login(self):
690
691 #登录过程见http://blog.jiexoo.com/2008/05/21/%e7%94%a8httpclient%e8%8e%b7%e5%8f%96hotmail%e8%81%94%e7%b3%bb%e4%ba%ba%e5%88%97%e8%a1%a8/
692
693 conn = httplib.HTTPSConnection(self.login_host)
694
695 conn.request('GET','login.srf?id=2')
696
697 response = conn.getresponse()
698
699
700
701 conn = httplib.HTTPSConnection(self.login_host)
702
703 conn.request('POST', self.login_url,self.login_data,self.login_headers)
704
705 response = conn.getresponse()
706
707 if not str(response.status) == '200':
708
709 raise MailContactError("Couldn't getc contact page. HTTP Code: %s, %s" % (response.status, response.reason))
710
711 page = response.read()
712
713 print page
714
715
716
717
718
719 def get_contacts(self):
720
721 conn = httplib.HTTPConnection(self.contact_host)
722
723 conn.request('GET',self.contact_url)
724
725 response = conn.getresponse()
726
727 if not str(response.status) == '200':
728
729 raise MailContactError("Couldn't getc contact page. HTTP Code: %s, %s" % (response.status, response.reason))
730
731 page = response.read()
732
733 conn.close()
734
735 print page
736
737
738
739 class SinaContact(MailContact):
740
741 pass
742
743
744
745
746
747
748
749 class YahooContact(MailContact):
750
751 pass
752
753
754
755 class MsnContact(MailContact):
756
757 pass
758
759
760
761 def get_mailcontact(user,password,mailtype):
762
763 if mailtype == "126.com":
764
765 g = M126Contact(user,password)
766
767 elif mailtype == "163.com":
768
769 g = M163Contact(user,password)
770
771 elif mailtype == "sohu.com":
772
773 g = SohuContact(user,password)
774
775 elif mailtype == "hotmail.com":
776
777 g = HotmailContact(user,password)
778
779 elif mailtype == "sina.com":
780
781 g = SinaContact(user,password)
782
783 elif mailtype == "gmail.com":
784
785 g = GMailContact(user,password)
786
787 try:
788
789 g.login()
790
791 g.get_contacts()
792
793 return g.contacts
794
795 except:
796
797 return []
798
799
800
801
802
803
804
805
806
807
808
809 def get_csvcontact(iter):
810
811 contact,name = [],None
812
813 reader = csv.reader(iter)
814
815 for r in reader:
816
817 for c in r:
818
819 if not c or not len(c.strip()):
820
821 continue
822
823 m=re.search('\w+@\w+(?:\.\w+)+',c)
824
825 if m:
826
827 print name,m.group(0)
828
829 contact.append((name,m.group(0)))
830
831 break
832
833 else:
834
835 name = c
836
837 return contact
838
839
840
841 def get_imcontact(iter):
842
843 contact = []
844
845 reader = csv.reader(iter)
846
847 for r in reader:
848
849 for c in r:
850
851 m=re.search('\w+@\w+(?:\.\w+)+',c)
852
853 if m:
854
855 print m
856
857 contact.append((m))
858
859 return contact
860
861
862
863 if __name__=='__main__':
864
865 pdb.set_trace()
866
867 httplib.HTTPSConnection.debuglevel=1
868
869 httplib.HTTPConnection.debuglevel=1
870
871 g = GMailContact('***', '***')
872
873 g.login()
874
875 g.get_contacts()
876
877 print g.contacts
878
879
880
881 g = M163ContactContact('***', '***')
882
883 g.login()
884
885 g.get_contacts()
886
887 print g.contacts
ERROR: EOF in multi-line statement
反馈
创建 by -- ZoomQuiet [2008-09-15 12:26:12]