Internet and Distributed Programming (IS52025A)

 Sebastian Danicic 


Contents

Assignment

This course has two assignements. One to implement an Internet chat room and the other is to implement a web-crawler which recursively checks for broken links in a web site. Only the first of these (the chat-room) is assessed as coursework. Components of both, however, may be assessed in the exam.

What to hand in

You must produce a proper document conaining the following:
  1. A full English description of the problem you attempted to solve.
  2. A detailed design of your system and its separate components with an explanation of the Objects (data structures) you used.
  3. All source code fully commented and properly indented.
  4. Screen shots of your working system.

  5. Any extra features that you have implemented should be clearly described.

  6. Conclusion: This should say which feature worked, what didn't work and fututre enhancements.
  7. You should include a contents page, a cover page and your document should be properly bound using a spiral comb binder (marks wil be deducted if you do not do this. The library has a comb binding machine. Please use it.
  8. Upload your source files as a single jar file, in a place to be announced

When to hand it in

Deadline: Tues 1 May 2012 at 4pm.

Marking Scheme

  1. English Description 10%
  2. Design 10%
  3. Source Code 50%
  4. Extra Features (Source Code and Discussion) 20%
  5. Conclusions 10%
  6. You mark will be halved if you do not submit it in a spiral binder with a title page with your name!
  7. Evidence of Plagirism will be invesitgated and servere punishemnts may ensue.

Simple Clients and Servers

A Very Simple Echo Server

import java.io.*;
import java.net.*;

class evenSimplerEchoServer
{
 public static void main(String[] argv) throws Exception
  {ServerSocket s = new ServerSocket(5000);
   Socket t = s.accept();//wait for client to connect
   InputStream b = t.getInputStream();
   OutputStream p =t.getOutputStream();
   int c;
   while((c=b.read())!=-1) {
                            p.write(c);
                            p.flush();	 
                            System.out.print((char) c);
			   }
  }
}

A Very Simple Client

import java.io.*;
import java.net.*;

class evenSimplerEchoClient
{
 public static void main(String[] argv) throws Exception
  {Socket s = new Socket("localhost",5000);
   OutputStream p =s.getOutputStream();
   InputStream i = s.getInputStream();
   InputStreamReader b = new InputStreamReader(System.in); 
    int c;
   while((c=b.read())!=-1) {
                            p.write(c);
                            p.flush();
                            System.out.print((char)i.read());
		           }
  }
}

Lab Exercises

  1. Compile and run the server and then the client on your local machine.
  2. Copy the server to igor. Compile the server on igor and point your client at the server on igor. You will have to choose a different port number.
  3. Rewrite your client and server so that the ports etc. can be taken from the command line (using args[0] etc.)
  4. Rewrite the server so it sends back upper case values of the characters it receives.

A Graphical Client and a Multithreaded Echo Server

Concurrency and Threads

Compile and run z below:

class p
{
  void f()
  { while (true) System.out.println("red");}

  void g()
 { while (true) System.out.println("green");}
}

class t1 extends Thread
{ p x;
   t1(p y)
   {x=y;}
  
  public void run()
  {x.g();}
}

class t2 extends Thread
{ p x;
  t2(p y){x=y;}

public void run()
{x.f();}
}

class z
{
public static void main(String[] argv)
{
p it= new p();
new t2(it).start();
new t1(it).start();
}
}

What happens?

A Graphical Client

import java.awt.*;
import java.awt.event.*;
import javax.swing.*;
import java.io.*;
import java.net.*;


public class evenSimplerGuiClient implements ActionListener {
	private JTextField user = new JTextField("user",20);
	private JTextArea server = new JTextArea("server",5,20);
	private JScrollPane sp =new JScrollPane(server); 
	private  Socket s;
	private OutputStreamWriter p;
	private InputStream i; 
	private JFrame window = new JFrame("client");
        
	
	class serverReader extends Thread
        {
	 
	        public void run()
		{ 
		  String s="";
		  int c;
		  try
		  {
		       while ((c=i.read())!=-1)
		       {
		  	s=s+ ((char)c);
			server.setText(s);
		       }
                  }
		  catch(Exception e){};	
		}
	       }
	 
	public evenSimplerGuiClient() throws Exception
	{
	  try
	  {
	     s = new Socket("localhost",5000);
	     p =new  OutputStreamWriter(s.getOutputStream());
	     i =  s.getInputStream();
	     new serverReader().start();
	  }
	    
	  catch (Exception e){System.out.println("error");};
	  
	  window.setSize(300,300);
	  window.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
	  window.setLayout(new FlowLayout());
	  window.add(sp);
	  window.add(user);
	
	  user.addActionListener(this);
	  
          window.setVisible(true);
	}	 
	
	public void actionPerformed(ActionEvent a) 
	{
		
		String s= user.getText();
		try
		{
		 p.write(s+'\n',0,s.length()+1);
		 p.flush();user.setText("");
		}
	        catch (Exception e){};  
	}
	
	public static void main(String[] args) throws Exception
        {
		
		new evenSimplerGuiClient();
	}
}




A Simple Multithreaded Echo Server

import java.io.*;
import java.net.*;

class simpleMultiThreadedEchoServer
{
 public static void main(String[] argv) throws Exception
  {
   ServerSocket s = new ServerSocket(5000);
   Transaction k;
   while (true) 
   {
      k = new Transaction(s.accept());
      k.start();
   }
  }
}   
 
class Transaction extends Thread
{
 InputStream b;
 OutputStream p;
  public Transaction(Socket s) throws Exception
  {
    b=s.getInputStream();
    p =s.getOutputStream();
  }
 
 public void run() 
  {
   int c;
   try
   {
     while((c=b.read())!=-1)          
     {
       p.write((char)c);
       p.flush();
       System.out.print((char)c);
     }
   }
   
   catch (Exception e)
   {
   }   
  }
}

Lab Exercises

  1. Compile and run the server and then the graphical client on your local machine.
  2. Copy the server to igor. Compile the server on igor and point your graphical client.

  3. Check you can run more than one client.

  4. Rewrite your graphical client so that the ports etc. can be taken from the command line (using args[0] etc.)

Multi-threaded BroadCaster Server and Proxy Server

Broadcaster Server

import java.util.*;
import java.io.*;
import java.net.*;


class SynchList
{
	ArrayList <OutputStream> it;
	SynchList()
	{
		it=new ArrayList <OutputStream> ();
	}

	synchronized OutputStream get(int i)
	{
	  return it.get(i);
	}

	synchronized void add(OutputStream o)
	{
		it.add(o);
	}

	synchronized int size()
	{
		return it.size();
	}
}

class broadcasterWithList
{
static SynchList Outputs= new SynchList();
static int i=0;
 public static void main(String[] argv) throws Exception
  {ServerSocket s = new ServerSocket(5000);
   Transaction k;
   while (true)  {k = new Transaction(i,s.accept(),Outputs);k.start();i++;
           System.out.println("client joined");}//wait for client to connect
   }
}   
 
class Transaction extends Thread
{
 SynchList outputs;
 int n;
 Socket t;
 InputStream b;
 OutputStream p;
 public Transaction(int i,Socket s, SynchList v) throws Exception
  {
    outputs=v;
    n=i;t=s; b = t.getInputStream();
    p =t.getOutputStream();
    outputs.add(p);
   }
 
 public void run() 
  {
   int c;
   try{
   while((c=b.read())!=-1) 
      {
      	for (int j=0;j<outputs.size();j++)
       	{
	 if (j!=n) 
	          {
			(outputs.get(j)).write(c);
       			(outputs.get(j)).flush();
		  }
       }
       System.out.print((char)c);
       System.out.print("size of ArrayList :"+outputs.size()); 
       
       }
        System.out.print("left loop");
      }
     
   catch (Exception e)
   { System.out.print(e);}   
  }
 
}

The need for Synchronisation with Shared Data and Threads

Read about Thread Interference. Try the following programs and explain the difference in output:

Removing Clients from the List

If you kill one of the clients communication with the Multithreaded Broadcaster, there is a disaster. Explain why this happens. We solve this by removing the corresponding outputStream from the list when a client dies. Note we have to be careful with the list indexing. Here is a solution:

import java.util.*;
import java.io.*;
import java.net.*;


class SynchList
{
	ArrayList <OutputStream> it;
	
	
	SynchList()
	{
		it=new ArrayList <OutputStream> ();
	}

	synchronized OutputStream get(int i)
	{
	  return it.get(i);
	}

	synchronized void add(OutputStream o)
	{
		it.add(o);
	}

	synchronized int size()
	{
		return it.size();
	}

	synchronized void remove(int i)
	{
		 it.remove(i);
	}
}

class broadcasterWithListest1
{
static SynchList Outputs= new SynchList();
static int i=0;
 public static void main(String[] argv) throws Exception
  {ServerSocket s = new ServerSocket(5000);
   Transaction k;
   while (true)  {k = new Transaction(Outputs.size(),s.accept(),Outputs);k.start();
           System.out.println("client joined");}//wait for client to connect
   }
}   
 
class Transaction extends Thread
{
 SynchList outputs;
 int n;
 Socket t;
 InputStream b;
 OutputStream p;
 public Transaction(int i,Socket s, SynchList v) throws Exception
  {
    outputs=v;
    n=i;t=s; b = t.getInputStream();
    p =t.getOutputStream();
    outputs.add(p);
   }
 
 public void run() 
  {
   int c;
   try{
   while((c=b.read())!=-1) 
      {
      	for (int j=0;j<outputs.size();j++)
       	{
	 //if (j!=n) 
	          {
			(outputs.get(j)).write(c);
       			(outputs.get(j)).flush();
		  }
       }
       System.out.print((char)c);
      // System.out.print("size of ArrayList :"+outputs.size()); 
       
       }
        System.out.print("client " + n + " left loop");
	outputs.remove(n);
      }
     
   catch (Exception e)
   { System.out.print(e);}   
  }
 
}

A Proxy Server

Here is a simple multi-threaded proxy server which listens on port args[2] and then sends this data to and receives data from a server which is listening on port args[1] of host args[0].
import java.io.*;
import java.net.*;

public class SebMultiProxy {

public static void main(String[] args) throws Exception {
    
      String host = args[0];
      int remoteport = Integer.parseInt(args[1]);
      int localport = Integer.parseInt(args[2]);
      System.out.println("Starting proxy for " + host + ":" + remoteport
          + " on port " + localport);
      ServerSocket s = new ServerSocket(localport);	  
      while (true)  {
      Socket cl=s.accept(); 
      Socket se= new Socket(host,remoteport);  
      fromClientToServer k = new fromClientToServer(cl,se);k.start();
      fromClientToServer m = new fromClientToServer(se,cl);m.start();
      
      }
    
  }
}

class fromClientToServer extends Thread 
{
 InputStream b;
 OutputStream p;
 
 
 public fromClientToServer(Socket c, Socket s) throws Exception 
  
  
  { 
    b = c.getInputStream();
    p =s.getOutputStream();
   }
 
 
 public void run()  
  {
   int c;
   try{
   while((c=b.read())!=-1) {p.write(c);p.flush();}
      }
   catch (Exception e)
   {}   
  }
 
}

Lab Exercises

  1. Compile and run the server with many clients.
  2. Rewrite the server so the port is given on the command line.
  3. Try removing the j!=n. Explain what happens.
  4. Find out about Proxy-Servers and their use.
  5. Suppose ma323.gold.ac.uk is behind a firewall but igor.gold.ac.uk has port 5000-10000 open. How can you use the simple proxy server above to connect to a webserver listening on port 80 of ma323.gold.ac.uk. (pick a free port on igor)
  6. Run your broadcaster on a lab machine and then use the proxy server running on igor to allow clients to connect to your server. See if your friends can connect to your server.
  7. One person in the class set up the broadcaster server. See if everyone in the class can connect and chat together!

Object Serialization: Sending and Receiving Objects

See Notes on Object Serialiazation.

A Client that sends Objects and Receives Objects

import java.io.*;
public class Student implements Serializable
{

String name;
int mark;

 public Student (String n, int a)
 {
	mark=a;name=n;
 } 

 public String toString()
 {

	return name+" "+age;
 }

}


class objectClient1
{
 public static void main(String[] argv) throws Exception
  {Socket s = new Socket("localhost",5000);
   ObjectOutputStream p =new ObjectOutputStream(s.getOutputStream());
   ObjectInputStream q =new ObjectInputStream(s.getInputStream());
   Scanner b = new Scanner(System.in); 
   int c;
   while(b.hasNext()) {
   			    String name=b.nextLine();	
   			    int mark=Integer.parseInt(b.nextLine());
			    p.writeObject(new Student(name,mark));
			    p.flush();
			    System.out.println(q.readObject());
		      
		      }
                                   
  
  }
}

An Object Echo Server

import java.io.*;
import java.net.*;

class objectEchoServer
{
 public static void main(String[] argv) throws Exception
  {ServerSocket s = new ServerSocket(5000);
   Socket t = s.accept();//wait for client to connect
   System.out.println("server connected");
   ObjectInputStream b = new ObjectInputStream(t.getInputStream());
   ObjectOutputStream q = new ObjectOutputStream(t.getOutputStream());
   Object c;
   while((c=b.readObject())!=null) { 
                       		q.writeObject(c);      
			   
			   }
                            
 			  
  }
 
}

Lab Exercises

  1. Write a Graphical Version of the objectClient1 above.
  2. Write a multi-threaded Object Echo Server.
  3. Write a client which terminates if it receives a Student Object whose name is `end' and whose mark is 0.
  4. Think about the Objects your Chatroom client (assignment) and server must communicate. For example, perhaps the Server should send the username with the message to the client. Define an Object like Student for this. The client can then choose not to display messages from blocked users.

Communicating with Databases in Java

import java.sql.*;


public class seb5 {
	
	
		
	public static void main(String[] args) throws Exception 
	{
	  Class.forName("com.mysql.jdbc.Driver");
	  Connection connect=
	  DriverManager.getConnection("jdbc:mysql://localhost/art55","mas01sd","sebastian");
	  Statement st = connect.createStatement();
          st.executeUpdate("INSERT INTO one VALUES('" + args[0] +"','" + args[1] + "');");
          ResultSet resultSet = st.executeQuery("SELECT *  from one");
	  while (resultSet.next()) 
	  {
              for (int i=1;i<3;i++)System.out.print(resultSet.getString(i) + " ");
              System.out.println();				
          } 
         }

}

Lab Exercises

  1. Set up a simple MYSQL or PostgreSQL database with a single table to hold data about students.
  2. Change the program above to work with your database and table.
  3. Download JDBC4 Postgresql Driver, Version 9.1-901 or mysql-connector-java-5.0.8-bin.jar.
  4. To compile use For example:
    javac -cp .:mysql-connector-java-5.0.8-bin.jar seb5.java
    

    and to run:

    For example:

    java -cp .:mysql-connector-java-5.0.8-bin.jar seb5
    
  5. Change your Multi-threaded Object Server from last week to receive Student Objects and store them in your database. Non-Student objects should be discarded. (Use if (x instanceOf Student) ... for this.)

Revision of Data Structures

See here to revise Lists, HashSets, Recusion, Maps.

Web Crawling

Take a copy of jsoup-1.6.1.jar. We are going to use jsoup to parse HTML documents in order to spider through websites.

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
public class SebLinks 
{
        public static void main(String[] args) throws IOException 
        {
           String url = args[0];
           Document doc = Jsoup.connect(url).get();
           Elements links = doc.select("a[href]");
           for (Element link : links) System.out.println(link.attr("abs:href"));
        }
}

Lab Exercises

  1. Compile and run the above program: To compile do
    javac -cp jsoup-1.6.1.jar SebLinks.java
    

    To run do

    javac-cp .:jsoup-1.6.1.jar SebLinks http://localhost
    
    (or something else apart from http://localhost.)

A Web-Crawler

import java.util.ArrayList;
import java.util.HashSet;
import java.util.HashSet.*;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
public class NewSpider 
{


static ArrayList<String>  listLinks (String url)
{
	ArrayList<String> a= new ArrayList<String>();
	try{org.jsoup.Connection z=Jsoup.connect(url);
           Document doc = z.get();
	   Elements links = doc.select("a[href]");
           for (Element link : links) a.add(link.attr("abs:href"));
	   
	   }
	   catch (Exception e)
	   {
	   
	   }
	 return a;  
}


static HashSet <String> listToSet (ArrayList <String> m)
{
	HashSet <String>  S=new HashSet <String> ();
        for (String t : m) S.add(t);
	return S;
}


static  HashSet <String> links (String url)
{
	  return listToSet(listLinks(url));
}



static  void Spider (String url, int n)
{
	  HashSet<String> alreadyVisited = new HashSet <String> ();
          HashSet<String> toVisit = new HashSet <String> ();
	  toVisit.addAll(links(url));
	  alreadyVisited.add(url);  
	  int i=0;
	  while (i<n && !toVisit.isEmpty())
	  {
	  	
		String z= toVisit.iterator().next();
	        boolean already=alreadyVisited.contains(z);
		if (already) toVisit.remove(z);
		else
		{
		  System.out.println(z);
		  HashSet <String> k= links(z);
		  toVisit.addAll(k);
		  alreadyVisited.add(z);
		  i++;
		}  
	  }
}

        public static void main(String[] args) throws IOException 
	{
           String url = args[0];
	   Spider(url,100);           
		   
        }
}

Lab Exercises

  1. Rewrite the program above so it only crawls a domain given as the second command line argument i.e. it only follows links whose name starts with the second command line argument. Solution:
    import java.util.ArrayList;
    import java.util.HashSet;
    import java.util.HashSet.*;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    import java.io.IOException;
    public class NewSpider1 
    {
    
    
    static ArrayList<String>  listLinks (String url)
    {
    	ArrayList<String> a= new ArrayList<String>();
    	try{org.jsoup.Connection z=Jsoup.connect(url);
               Document doc = z.get();
    	   Elements links = doc.select("a[href]");
               for (Element link : links) a.add(link.attr("abs:href"));
    	   
    	   }
    	   catch (Exception e)
    	   {
    	   
    	   }
    	 return a;  
    }
    
    
    static HashSet <String> listToSet (ArrayList <String> m)
    {
    	HashSet <String>  S=new HashSet <String> ();
            for (String t : m) S.add(t);
    	return S;
    }
    
    
    static  HashSet <String> links (String url)
    {
    	  return listToSet(listLinks(url));
    }
    
    
    
    static  void Spider (String url, int n, String contains)
    {
    	  HashSet<String> alreadyVisited = new HashSet <String> ();
              HashSet<String> toVisit = new HashSet <String> ();
    	  toVisit.addAll(links(url));
    	  alreadyVisited.add(url);  
    	  int i=0;
    	  while (i<n && !toVisit.isEmpty())
    	  {
    	  	
    		String z= toVisit.iterator().next();
    	        boolean already=alreadyVisited.contains(z);
    		if (already) toVisit.remove(z);
    		else 
    		{
    		  if (z.contains(contains))
    		  {
    		    System.out.println(z);
    		    HashSet <String> k= links(z);
    		    toVisit.addAll(k);
    		  }
    		  alreadyVisited.add(z);
    		  i++;
    		}  
    	  }
    }
    
            public static void main(String[] args) throws IOException 
    	{
               String url = args[0];
    	   Spider(url,100,args[1]);           
    		   
            }
    }
    

  2. Experiment with this:
    mport java.util.ArrayList;
    import java.util.HashSet;
    import java.util.HashSet.*;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    import java.io.IOException;
    public class Brokens 
    {
    
    	static boolean broken(String url)
    	{
    	        try {Jsoup.connect(url).get(); return false;}
    	        catch (java.net.MalformedURLException e) {return true;}	
    		
    		catch (IOException e){if (e.toString().contains("java.io.IOException: 404"))
    		                        { 
    					 return true;
    				    return false; //this means it exists but isn't html
    				     }
    		catch(Exception e) {
    						
    					return false; 
    		//for any other errror assume not broken - this is a guess
    		                   }
    	
    	}
    			
    
    
    
    static ArrayList<String>  listLinks (String url)
    {
    	ArrayList<String> a= new ArrayList<String>();
    	try{org.jsoup.Connection z=Jsoup.connect(url);
               Document doc = z.get();
    	   Elements links = doc.select("a[href]");
               for (Element link : links) a.add(link.attr("abs:href"));
    	   
    	   }
    	   catch (Exception e)
    	   {
    	   
    	   }
    	 return a;  
    }
    
    
    static HashSet <String> listToSet (ArrayList <String> m)
    {
    	HashSet <String>  S=new HashSet <String> ();
            for (String t : m) S.add(t);
    	return S;
    }
    
    
    static  HashSet <String> links (String url)
    {
    	  return listToSet(listLinks(url));
    }
    
    
    
    static  void Spider (String url, int n, String contains)
    {
    	  HashSet<String> alreadyVisited = new HashSet <String> ();
              HashSet<String> toVisit = new HashSet <String> ();
    	  toVisit.addAll(links(url));
    	  alreadyVisited.add(url);  
    	  int i=0;
    	  while (i<n && !toVisit.isEmpty())
    	  {
    	  	
    		String z= toVisit.iterator().next();
    	        boolean already=alreadyVisited.contains(z);
    		if (already) toVisit.remove(z);
    		else 
    		{
    		  if (z.contains(contains))
    		  {
    		    System.out.println(z);
    		    HashSet <String> k= links(z);
    		    toVisit.addAll(k);
    		  }
    		  alreadyVisited.add(z);
    		  i++;
    		}  
    	  }
    	  
    	  for (String k:alreadyVisited)
    	  if (broken(k)) System.out.println("Broken: " +k);
    }
    
            public static void main(String[] args) throws IOException 
    	{
               String url = args[0];
    	   Spider(url,100,args[1]);           
    		   
            }
    }
    





s.danicic@gold.ac.uk
Sebastian Danicic BSc MSc PhD (Reader in Computer Science)
Dept of Computing, Goldsmiths, University of London, London SE14 6NW
Last updated 2012-03-22