Nie jesteś zalogowany.
Jeśli nie posiadasz konta, zarejestruj je już teraz! Pozwoli Ci ono w pełni korzystać z naszego serwisu. Spamerom dziękujemy!
Prosimy o pomoc dla małej Julki — przekaż 1% podatku na Fundacji Dzieciom zdazyć z Pomocą.
Więcej informacji na dug.net.pl/pomagamy/.
Witam, mam problem z program.Od razu zaznaczam że piszę go na potrzeby studiów a nie własnych.Program mam zbierać maile ze strony www.
Przerobilem delikatnie program ktory zbierał linki lecz coś nie chce mi działać . Mógłby ktoś mi coś porawdzić
import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.Socket; import java.net.UnknownHostException; import java.util.ArrayList; /** * * @author Blizzard */ public class niuchacz extends Thread { static int getLengthOfTask() { throw new UnsupportedOperationException("Not yet implemented"); } private String pobierzStrone(String adres, int port, String sciezka) throws UnknownHostException, IOException { PrintWriter netOut = null; BufferedReader netIn = null; try { Socket s = new Socket(adres, port); netIn = new BufferedReader(new InputStreamReader(s.getInputStream())); netOut = new PrintWriter(s.getOutputStream()); netOut.println("GET " + sciezka + " HTTP/1.1"); netOut.println("HOST: " + adres); netOut.println(); netOut.flush(); StringBuilder page = new StringBuilder(); String line; while ((line = netIn.readLine()) != null) { ; page.append(line).append("\n"); } String pageStr = page.toString(); return pageStr; } finally { try { if (netIn != null) { netIn.close(); } if (netOut != null) { netOut.close(); } } catch (IOException ex) { System.out.println(ex); } } } public String[] znajdzLinki(String adres, int port, String sciezka) throws UnknownHostException, IOException { String strona = pobierzStrone(adres, port, sciezka); String[] tab1 = strona.split("[a-zA-Z0-9\\._%+-]+@[A-Z0-9.-]+\\.[a-zA-Z]{2,4}"); ArrayList<String> linki = new ArrayList<String>(); for (int i = 1; i < tab1.length; ++i) { String[] tab2 = tab1[i].split("\""); String lnk = tab2[0]; linki.add(lnk); } String[] linkiTab = linki.toArray(new String[1]); return linkiTab; } @Override public void run() { } }
Offline
Spróbuj może z taką metodą:
public String[] znajdzLinki(String adres, int port, String sciezka) throws UnknownHostException, IOException { String strona = pobierzStrone(adres, port, sciezka); Pattern pattern = Pattern.compile("[a-zA-Z0-9\\._%+-]+@[a-zA-Z0-9\\._%+-]+\\.[a-zA-Z]{2,4}"); Matcher matcher = pattern.matcher(strona); ArrayList<String> linki = new ArrayList<String>(); while (matcher.find()) { linki.add(matcher.group()); } String[] linkiTab = linki.toArray(new String[linki.size()]); return linkiTab; }
Ostatnio edytowany przez 0dd (2011-02-07 16:12:23)
Offline
poprawiłem lecz cos nie dziala:
to jest moj glowny plik :
/* * To change this template, choose Tools | Templates * and open the template in the editor. */ /* * NewJFrame.java * * Created on 2010-11-23, 16:18:52 */ package pl.krakow.up.inf3.g2.seba; import java.io.IOException; import java.net.UnknownHostException; import java.util.logging.Level; import java.util.logging.Logger; import javax.swing.JProgressBar; /** * * @author Blizzard */ public class Seba_Robot extends javax.swing.JFrame { /** Creates new form NewJFrame */ public Seba_Robot() { initComponents(); } /** This method is called from within the constructor to * initialize the form. * WARNING: Do NOT modify this code. The content of this method is * always regenerated by the Form Editor. */ @SuppressWarnings("unchecked") // <editor-fold defaultstate="collapsed" desc="Generated Code"> private void initComponents() { jProgressBar1 = new javax.swing.JProgressBar(); jLabel1 = new javax.swing.JLabel(); jLabel2 = new javax.swing.JLabel(); jLabel3 = new javax.swing.JLabel(); adresTF = new javax.swing.JTextField(); portTF = new javax.swing.JTextField(); ścieżkaTF = new javax.swing.JTextField(); startBT = new javax.swing.JButton(); jScrollPane1 = new javax.swing.JScrollPane(); linkiTA = new javax.swing.JTextArea(); jLabel4 = new javax.swing.JLabel(); koniecBT = new javax.swing.JButton(); JProgressBar = new javax.swing.JProgressBar(); jButton1 = new javax.swing.JButton(); jLabel5 = new javax.swing.JLabel(); setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE); setTitle("SEBA Spider v0.2"); setBackground(new java.awt.Color(0, 255, 255)); setBounds(new java.awt.Rectangle(0, 0, 0, 0)); setResizable(false); jLabel1.setText("SERWER"); jLabel2.setText("PORT"); jLabel3.setText("ŚCIEŻKA"); adresTF.setText("www.onet.pl"); adresTF.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { adresTFActionPerformed(evt); } }); portTF.setText("80"); ścieżkaTF.setText("/"); startBT.setText("START !"); startBT.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { startBTActionPerformed(evt); } }); linkiTA.setColumns(20); linkiTA.setRows(5); jScrollPane1.setViewportView(linkiTA); jLabel4.setFont(new java.awt.Font("Tahoma", 1, 18)); jLabel4.setForeground(new java.awt.Color(255, 0, 51)); jLabel4.setText("SEBA SPIDER v0.2"); koniecBT.setText("Wyjście"); koniecBT.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { koniecBTActionPerformed(evt); } }); jButton1.setText("Anuluj"); jButton1.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { jButton1ActionPerformed(evt); } }); jLabel5.setText("Przechwycone linki:"); javax.swing.GroupLayout layout = new javax.swing.GroupLayout(getContentPane()); getContentPane().setLayout(layout); layout.setHorizontalGroup( layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) .addGroup(layout.createSequentialGroup() .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) .addGroup(layout.createSequentialGroup() .addGap(33, 33, 33) .addComponent(jLabel1) .addGap(18, 18, 18) .addComponent(adresTF, javax.swing.GroupLayout.DEFAULT_SIZE, 349, Short.MAX_VALUE)) .addGroup(javax.swing.GroupLayout.Alignment.TRAILING, layout.createSequentialGroup() .addGap(23, 23, 23) .addComponent(jLabel4) .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED, 165, Short.MAX_VALUE) .addComponent(koniecBT, javax.swing.GroupLayout.PREFERRED_SIZE, 90, javax.swing.GroupLayout.PREFERRED_SIZE)) .addGroup(layout.createSequentialGroup() .addGap(48, 48, 48) .addComponent(jLabel2) .addGap(18, 18, 18) .addComponent(portTF, javax.swing.GroupLayout.PREFERRED_SIZE, 42, javax.swing.GroupLayout.PREFERRED_SIZE) .addGap(18, 18, 18) .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) .addComponent(jButton1, javax.swing.GroupLayout.DEFAULT_SIZE, 71, Short.MAX_VALUE) .addComponent(startBT)) .addGap(18, 18, 18) .addComponent(JProgressBar, javax.swing.GroupLayout.DEFAULT_SIZE, 191, Short.MAX_VALUE) .addGap(9, 9, 9)) .addGroup(layout.createSequentialGroup() .addGap(33, 33, 33) .addComponent(jLabel3) .addGap(18, 18, 18) .addComponent(ścieżkaTF, javax.swing.GroupLayout.DEFAULT_SIZE, 349, Short.MAX_VALUE)) .addGroup(layout.createSequentialGroup() .addContainerGap() .addComponent(jLabel5)) .addGroup(layout.createSequentialGroup() .addContainerGap() .addComponent(jScrollPane1, javax.swing.GroupLayout.DEFAULT_SIZE, 432, Short.MAX_VALUE))) .addContainerGap()) ); layout.setVerticalGroup( layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) .addGroup(layout.createSequentialGroup() .addContainerGap() .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE) .addComponent(koniecBT) .addComponent(jLabel4)) .addGap(26, 26, 26) .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE) .addComponent(jLabel1) .addComponent(adresTF, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE)) .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING, false) .addGroup(layout.createSequentialGroup() .addGap(18, 18, 18) .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING, false) .addComponent(jLabel2) .addComponent(portTF, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE))) .addGroup(layout.createSequentialGroup() .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) .addComponent(startBT, javax.swing.GroupLayout.PREFERRED_SIZE, 20, javax.swing.GroupLayout.PREFERRED_SIZE) .addGap(2, 2, 2) .addComponent(jButton1, javax.swing.GroupLayout.PREFERRED_SIZE, 23, javax.swing.GroupLayout.PREFERRED_SIZE)) .addGroup(javax.swing.GroupLayout.Alignment.TRAILING, layout.createSequentialGroup() .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE) .addComponent(JProgressBar, javax.swing.GroupLayout.PREFERRED_SIZE, 28, javax.swing.GroupLayout.PREFERRED_SIZE) .addGap(11, 11, 11))) .addGap(9, 9, 9) .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE) .addComponent(jLabel3) .addComponent(ścieżkaTF, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE)) .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED) .addComponent(jLabel5) .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) .addComponent(jScrollPane1, javax.swing.GroupLayout.DEFAULT_SIZE, 130, Short.MAX_VALUE) .addContainerGap()) ); pack(); }// </editor-fold> private void startBTActionPerformed(java.awt.event.ActionEvent evt) { niuchacz n = new niuchacz(); JProgressBar.setIndeterminate(true); String adres = adresTF.getText(); String sciezka = ścieżkaTF.getText(); int port = 0; try { port = Integer.parseInt(portTF.getText()); } catch (NumberFormatException ex) { } String[] linki = null; try { linki = n.znajdzLinki(adres, port, sciezka); } catch (UnknownHostException ex) { Logger.getLogger(Seba_Robot.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(Seba_Robot.class.getName()).log(Level.SEVERE, null, ex); } StringBuilder sb = new StringBuilder(); for (int i = 0; i < linki.length; ++i) { int pasek = 1; sb.append(linki[i]).append("\n"); JProgressBar.setStringPainted(true); } JProgressBar.setValue(100); //JProgressBar.setStringPainted(true); linkiTA.setText(sb.toString()); } private void adresTFActionPerformed(java.awt.event.ActionEvent evt) { // TODO add your handling code here: } private void koniecBTActionPerformed(java.awt.event.ActionEvent evt) { System.exit(1); } private void jButton1ActionPerformed(java.awt.event.ActionEvent evt) { // TODO add your handling code here: } /** * @param args the command line arguments */ public static void main(String args[]) { java.awt.EventQueue.invokeLater(new Runnable() { public void run() { new Seba_Robot().setVisible(true); } }); } // Variables declaration - do not modify private javax.swing.JProgressBar JProgressBar; private javax.swing.JTextField adresTF; private javax.swing.JButton jButton1; private javax.swing.JLabel jLabel1; private javax.swing.JLabel jLabel2; private javax.swing.JLabel jLabel3; private javax.swing.JLabel jLabel4; private javax.swing.JLabel jLabel5; private javax.swing.JProgressBar jProgressBar1; private javax.swing.JScrollPane jScrollPane1; private javax.swing.JButton koniecBT; private javax.swing.JTextArea linkiTA; private javax.swing.JTextField portTF; private javax.swing.JButton startBT; private javax.swing.JTextField ścieżkaTF; // End of variables declaration }
to moj który "ma" szukać
import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.Socket; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * * @author Blizzard */ public class niuchacz extends Thread { static int getLengthOfTask() { throw new UnsupportedOperationException("Not yet implemented"); } private String pobierzStrone(String adres, int port, String sciezka) throws UnknownHostException, IOException { PrintWriter netOut = null; BufferedReader netIn = null; try { Socket s = new Socket(adres, port); netIn = new BufferedReader(new InputStreamReader(s.getInputStream())); netOut = new PrintWriter(s.getOutputStream()); netOut.println("GET " + sciezka + " HTTP/1.1"); netOut.println("HOST: " + adres); netOut.println(); netOut.flush(); StringBuilder page = new StringBuilder(); String line; while ((line = netIn.readLine()) != null) { ; page.append(line).append("\n"); } String pageStr = page.toString(); return pageStr; } finally { try { if (netIn != null) { netIn.close(); } if (netOut != null) { netOut.close(); } } catch (IOException ex) { System.out.println(ex); } } } public String[] znajdzLinki(String adres, int port, String sciezka) throws UnknownHostException, IOException { String strona = pobierzStrone(adres, port, sciezka); Pattern pattern = Pattern.compile("[a-zA-Z0-9\\._%+-]+@[a-zA-Z0-9\\._%+-]+\\.[a-zA-Z]{2,4}"); Matcher matcher = pattern.matcher(strona); ArrayList<String> linki = new ArrayList<String>(); while (matcher.find()) { linki.add(matcher.group()); } String[] linkiTab = linki.toArray(new String[linki.size()]); return linkiTab; } } @Override public void run() { } }
Ostatnio edytowany przez ryba_pank (2011-02-07 17:09:24)
Offline
A na jakich stronach testujesz? Na onet.pl chyba nie ma żadnego adresu e-mail.
Na stronie http://dug.net.pl/about/ działa dobrze.
Offline
A wiesz może jak by to zrobić zeby sprawdzało z kliku stron (adresy sa w pliku)?
Offline
No to trzeba zczytać adresy z pliku. Potem kolejno dla każdego wywołać to co już masz w poszukiwaniu adresów. Chyba dosyć oczywiste.
Offline
ok dzieki . Mam jeszcze takie pytanie , mam program który szuka linków na stronie sa jakies funckje które połączyły by te dwa programy zeby np. przeszukwiał strone i potem jej podstrony itd
Ostatnio edytowany przez ryba_pank (2011-02-08 12:58:00)
Offline
Trzeba pewnie napisać, raczej nic gotowego nie ma w JDK. Chyba, że poszukasz jakiegoś zewnętrznego kodu, wpisz "java web spider" w google. Mógłbyś sam coś takiego prostego napisać, np. przeszukiwać strone aktualną szukając adresów email i linków, potem przerabiać strony, do których kierują linki i tak dalej rekurencyjnie. Musiałbyś jeszcze pilnować by nie wyleźć poza stronę sprawdzając adres linka i się nie zapętlić sprawdzający zcy nowo odwiedzany link nie był już przetwarzany.
Offline
mam jeszcze problem w tym miejscu:
. . . String[] linkiTab = linki.toArray(new String[linki.size()]); FileOutputStream f = new FileOutputStream("a.txt"); for (int i=0; i<linkiTab.length; i++) { f.write(linkiTab[i]); f.close(); } return linkiTab;
Przy f.write(linkiTab[i]); daj mi blad : cannot find symbol
symbol: method write(java.lang.String)
location: class java.io.FileOutputStream
Offline
Writer out = new OutputStreamWriter(new FileOutputStream(fFileName), fEncoding); try { out.write(FIXED_TEXT); }
czyli
FileOutputStream f; try { f = new FileOutputStream("a.txt"); try { OutputStreamWriter out = new OutputStreamWriter(f); for (int i=0; i<linkiTab.length; i++) { out.write(linkiTab[i]); } } catch (IOException e) { // TODO gdy poleci z out.write(); } finally { try { f.close(); } catch (IOException e) { // TODO gdy poleci przy zamykaniu. } } } catch (FileNotFoundException e) { // TODO gdy pliku brak. }
Ostatnio edytowany przez paoolo (2011-02-08 19:33:06)
Offline
spoko już nie ma błędów, tworzy plik ale jest on pusty ;/
Offline
A masz coś w linkiTab?
Ostatnio edytowany przez paoolo (2011-02-08 20:32:18)
Offline
mam bo bo wyswietla mi z tej tablicy normalnie
doszedłem do tego:)
try { BufferedWriter out = new BufferedWriter(new FileWriter("/home/ryba/Pulpit/dmaile.txt")); for (int i=0; i<linkiTab.length; i++) { out.write(linkiTab[i]+System.getProperty("line.separator")); } out.close(); } catch (IOException e) { }
Offline
Ma takie wyrazenie , ale na nie wiedzieć czemu nie chce mi znajdowac linków
Pattern pattern = Pattern.compile("/^(http|https|ftp)://([A-Z0-9][A-Z0-9_-]*(?:.[A-Z0-9][A-Z0-9_-]*)+):?(d+)?/?/i"); Matcher matcher = pattern.matcher(strona); ArrayList<String> linki = new ArrayList<String>(); while (matcher.find()) { linki.add(matcher.group());
Offline
Spróbuj może takiego wyrażenia użyć
^(ht|f)tp(s?)://([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)?$
Offline
coś nie działa próbowałem już różnych
Offline
Dla jakich danych testujesz?
Podane wyrażenie działa jedynie dla adresów bezwzględnych. Możesz je zmodyfikować by działało dla względnych.
Offline
Testuje na różnego typu stronach. właśnie chodzi mi o linki bezwzględne
Offline
Time (s) | Query |
---|---|
0.00016 | SET CHARSET latin2 |
0.00004 | SET NAMES latin2 |
0.00103 | SELECT u.*, g.*, o.logged FROM punbb_users AS u INNER JOIN punbb_groups AS g ON u.group_id=g.g_id LEFT JOIN punbb_online AS o ON o.ident='18.117.103.185' WHERE u.id=1 |
0.00084 | REPLACE INTO punbb_online (user_id, ident, logged) VALUES(1, '18.117.103.185', 1732551680) |
0.00043 | SELECT * FROM punbb_online WHERE logged<1732551380 |
0.00157 | SELECT topic_id FROM punbb_posts WHERE id=164813 |
0.00007 | SELECT id FROM punbb_posts WHERE topic_id=18253 ORDER BY posted |
0.00061 | SELECT t.subject, t.closed, t.num_replies, t.sticky, f.id AS forum_id, f.forum_name, f.moderators, fp.post_replies, 0 FROM punbb_topics AS t INNER JOIN punbb_forums AS f ON f.id=t.forum_id LEFT JOIN punbb_forum_perms AS fp ON (fp.forum_id=f.id AND fp.group_id=3) WHERE (fp.read_forum IS NULL OR fp.read_forum=1) AND t.id=18253 AND t.moved_to IS NULL |
0.00006 | SELECT search_for, replace_with FROM punbb_censoring |
0.00256 | SELECT u.email, u.title, u.url, u.location, u.use_avatar, u.signature, u.email_setting, u.num_posts, u.registered, u.admin_note, p.id, p.poster AS username, p.poster_id, p.poster_ip, p.poster_email, p.message, p.hide_smilies, p.posted, p.edited, p.edited_by, g.g_id, g.g_user_title, o.user_id AS is_online FROM punbb_posts AS p INNER JOIN punbb_users AS u ON u.id=p.poster_id INNER JOIN punbb_groups AS g ON g.g_id=u.group_id LEFT JOIN punbb_online AS o ON (o.user_id=u.id AND o.user_id!=1 AND o.idle=0) WHERE p.topic_id=18253 ORDER BY p.id LIMIT 0,25 |
0.00097 | UPDATE punbb_topics SET num_views=num_views+1 WHERE id=18253 |
Total query time: 0.00834 s |