挺喜欢用Java写一些小东西的,这次的内容比较简单,是利用正则表达式提取本地文件或者URL中的邮箱信息。
正则表达式:
"[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+"
代码如下:
1 import java.util.regex.*; 2 import java.io.*; 3 import java.net.*; 4 import java.util.*; 5 6 public class EmailCatch { 7 8 private BufferedReader br; 9 private PrintWriter pw;10 private Listm;11 12 public EmailCatch(String tf) { //tf是目的文件名13 try {14 this.pw = new PrintWriter(new FileWriter(tf,true));15 this.m = new ArrayList ();16 } catch (IOException e) {17 e.printStackTrace();18 }19 }20 21 public EmailCatch(String sf, String tf) { //sf是源文件22 this(tf);23 try {24 this.br = new BufferedReader(new FileReader(sf));25 } catch (IOException e) {26 e.printStackTrace();27 }28 }29 30 31 public void start() {32 try { 33 String s = null;34 String result = null;35 while ((s = br.readLine()) != null) {36 if ((result = getEmail(s)) != null) {37 m.add(result); 38 pw.println(result); 39 pw.flush();40 }41 }42 43 show(); //show方法是在控制台验证44 45 pw.close();46 br.close();47 48 49 } catch (IOException e) {50 e.printStackTrace();51 }52 } 53 54 public void start(String urlAdd) {55 try {56 URL url = new URL(urlAdd);57 URLConnection conn = url.openConnection();58 59 br = new BufferedReader(new InputStreamReader(conn.getInputStream()));60 61 start();62 63 64 } catch (MalformedURLException e) {65 e.printStackTrace();66 } catch (IOException e) {67 e.printStackTrace();68 }69 70 }71 72 public String getEmail(String s) {73 Pattern p = Pattern.compile("[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+"); //邮箱正则表达式。74 Matcher m = p.matcher(s);75 if (m.find()) {76 return m.group();77 } else {78 return null;79 }80 }81 82 public void show() {83 for (int i = 0; i < m.size(); ++i) {84 System.out.println(i + ":" + m.get(i));85 }86 }87 88 public static void main(String[] args) {89 90 new EmailCatch("d:/email.txt","d:/emailCatcher.txt").start();//本地文件打开91 System.out.println("========================");92 new EmailCatch("d:/emailCatcherURL.txt").start("http://tieba.baidu.com/p/941471635");//网页直接打开93 94 }95 96 }
main方法里做了两个例子。
另外想问问大家对于邮箱的正则表达式是否有更好的写法?
-END