// // Programmer: Craig Stuart Sapp // Creation Date: Mon Oct 11 15:25:04 PDT 2010 // Last Modified: Mon Oct 11 16:15:38 PDT 2010 // Filename: webget.c // Syntax: C // // Description: Demonstration of how to download a URL from the internet and // print to standard output. // Usage: webget http://www.domain.com/index.html > index.html // $Smake: gcc -o webget webget.c // #include /* printf */ #include /* socket, connect */ #include /* socket, connect */ #include /* htons */ #include /* gethostbyname */ #include /* read, write */ #include /* memcpy */ void readFromHttpURI (const char* webaddress); int getChunk (int socket_id, char* buffer, int bufsize); int getFixedDataSize (int socket_id, int datalength, char* buffer, int bufsize); int open_network_socket (const char *hostname, unsigned short int port); void prepare_address (struct sockaddr_in *address, const char *hostname, unsigned short int port); void printChunk (const char* buffer, int chunksize); const char* getLine (char* buffer, const char* hptr, int bufsize); /////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { if (argc != 2) { printf("Usage: %s web-address > local-file\n", argv[0]); exit(1); } readFromHttpURI(argv[1]); return 1; } /////////////////////////////////////////////////////////////////////////// ////////////////////////////// // // readFromHttpURI -- Read a Humdrum file from an http:// web address // void readFromHttpURI(const char* webaddress) { char hostname[100000] = {0}; char location[100000] = {0}; const char* ptr = webaddress; const char* filename = NULL; if (strncmp(webaddress, "http://", strlen("http://")) == 0) { // remove the "http://" portion of the webaddress ptr += strlen("http://"); } strcpy(hostname, ptr); char* pot; if ((pot = strchr(hostname, '/')) != NULL) { *pot = '\0'; } if ((filename = strchr(ptr, '/')) != NULL) { strcpy(location, filename); } if (location[0] == '\0') { strcpy(location, "/"); } char newline[3] = {0x0d, 0x0a, 0}; char request[100000] = {0}; strcpy(request, "GET "); strcat(request, location); strcat(request, " HTTP/1.1\n"); strcat(request, "Host: "); strcat(request, hostname); strcat(request, "\n"); strcat(request, "User-Agent: webget 1.0 ("); strcat(request, __DATE__); strcat(request, ") \n"); strcat(request, "Connection: close\n"); // this line is necessary strcat(request, "\n"); // printf("HOSTNAME: %s\n", hostname); // printf("LOCATION: %s\n", location); // printf("%s\n", request); // printf("-------------------------------------------------\n"); int socket_id = open_network_socket(hostname, 80); if (write(socket_id, request, strlen(request)) == -1) { exit(-1); } #define URI_BUFFER_SIZE (10000) char buffer[URI_BUFFER_SIZE]; int message_len; char header[100000] = {0}; int foundcontent = 0; int i; int newlinecounter = 0; // read the response header: while ((message_len = read(socket_id, buffer, 1)) != 0) { buffer[1] = '\0'; strcat(header, buffer); if ((buffer[0] == 0x0a) || (buffer[0] == 0x0d)) { newlinecounter++; } else { newlinecounter = 0; } if (newlinecounter == 4) { foundcontent = 1; break; } } if (foundcontent == 0) { printf("Funny error trying to read server response\n"); exit(1); } // now read the size of the rest of the data which is expected int datalength = -1; // also, check for chunked transfer encoding: int chunked = 0; // printf("%s\n", header); // printf("-------------------------------------------------\n"); const char* hptr = header; while (hptr = getLine(buffer, hptr, URI_BUFFER_SIZE)) { int len = strlen(buffer); for (i=0; i= 0) { // break; // } } // once the length of the remaining data is known (or not), read it: if (datalength > 0) { getFixedDataSize(socket_id, datalength, buffer, URI_BUFFER_SIZE); } else if (chunked) { int chunksize; int totalsize = 0; do { chunksize = getChunk(socket_id, buffer, URI_BUFFER_SIZE); printChunk(buffer, chunksize); totalsize += chunksize; } while (chunksize > 0); if (totalsize == 0) { printf("Error: no data found for URI (probably invalid)\n"); exit(1); } } else { // if the size of the rest of the data cannot be found in the // header, then just keep reading until done (but this will // probably cause a 5 second delay at the last read). while ((message_len = read(socket_id, buffer, URI_BUFFER_SIZE)) != 0) { if (foundcontent) { printChunk(buffer, message_len); } else { for (i=0; i datalength) { readsize = datalength - readcount; } message_len = read(socket_id, buffer, readsize); if (message_len == 0) { // shouldn't happen, but who knows... break; } printChunk(buffer, message_len); readcount += message_len; } return readcount; } ////////////////////////////// // // prepare_address -- Store a computer name, such as // www.google.com into a sockaddr_in structure for later use in // open_network_socket. // void prepare_address(struct sockaddr_in *address, const char *hostname, unsigned short int port) { memset(address, 0, sizeof(struct sockaddr_in)); struct hostent *host_entry; host_entry = gethostbyname(hostname); if (host_entry == NULL) { printf("Could not find address for %s\n", hostname); exit(1); } // copy the address to the sockaddr_in struct. memcpy(&address->sin_addr.s_addr, host_entry->h_addr_list[0], host_entry->h_length); // set the family type (PF_INET) address->sin_family = host_entry->h_addrtype; address->sin_port = htons(port); } ////////////////////////////// // // open_network_socket -- Open a connection to a computer on the internet. // Intended for downloading a Humdrum file from a website. // int open_network_socket(const char *hostname, unsigned short int port) { int inet_socket; // socket descriptor struct sockaddr_in servaddr; // IP/port of the remote host prepare_address(&servaddr, hostname, port); // socket(domain, type, protocol) // domain = PF_INET(internet/IPv4 domain) // type = SOCK_STREAM(tcp) * // protocol = 0 (only one SOCK_STREAM type in the PF_INET domain inet_socket = socket(PF_INET, SOCK_STREAM, 0); if (inet_socket < 0) { // socket returns -1 on error printf("Error opening socket to computer %s\n", hostname); exit(1); } // connect(sockfd, serv_addr, addrlen) if (connect(inet_socket, (struct sockaddr *)&servaddr, sizeof(struct sockaddr_in)) < 0) { // connect returns -1 on error printf("Error opening connection to coputer: %s\n", hostname); exit(1); } return inet_socket; }