我正在使用套接字製作代理服務器。當請求的文件不在我的當前目錄(緩存)中時,我對源服務器(這是www)執行一個http get請求,並將其緩存以備後用。在Python套接字緩存HTTP GET請求
我的代碼的問題是,每當我從www中獲取資源時,我都會緩存它,但文件的內容總是「永久移動」。
所以這就是發生了什麼:用戶通過在瀏覽器中輸入「localhost:8080/stackoverflow.com」來請求「stackoverlflow.com」。瀏覽器將正確返回頁面。當用戶在瀏覽器中第二次輸入「localhost:8080/stackoverflow.com」時,瀏覽器將返回一個頁面,說明stackoverflow.com已永久移動。
下面是確實的HTTP GET請求,並緩存方法的代碼:
@staticmethod
def find_on_www(conn, requested_file):
try:
# Create a socket on the proxy server
print 'Creating socket on proxy server'
c = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
host_name = requested_file.replace("www.","",1)
print 'Host Name: ', host_name
# Connect to the socket to port 80
c.connect((host_name, 80))
print 'Socket connected to port 80 of the host'
# Create a temporary file on this socket and ask port 80
# for the file requested by the client
file_object = c.makefile('r', 0)
file_object.write("GET " + "http://" + requested_file + " HTTP/1.0\n\n")
# Read the response into buffer
buff = file_object.readlines()
# Create a new file in the cache for the requested file.
# Also send the response in the buffer to client socket
# and the corresponding file in the cache
temp_file = open("./" + requested_file, "wb")
for i in range(0, len(buff)):
temp_file.write(buff[i])
conn.send(buff[i])
conn.close()
,這裏是我的代碼的其餘部分,如果有人有興趣:
import socket # Socket programming
import signal # To shut down server on ctrl+c
import time # Current time
import os # To get the last-modified
import mimetypes # To guess the type of requested file
import sys # To exit the program
from threading import Thread
def generate_header_lines(code, modified, length, mimetype):
""" Generates the header lines for the response message """
h = ''
if code == 200:
# Append status code
h = 'HTTP/1.1 200 OK\n'
# Append the date
# Append the name of the server
h += 'Server: Proxy-Server-Thomas\n'
# Append the date of the last modification to the file
h += 'Last-Modified: ' + modified + '\n'
elif code == 404:
# Append the status code
h = 'HTTP/1.1 404 Not Found\n'
# Append the date
h += 'Date: ' + time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) + '\n'
# Append the name of the web server
h += 'Server: Web-Server-Thomas\n'
# Append the length of the content
h += 'Content-Length: ' + str(length) + '\n'
# Append the type of the content
h += 'Content-Type: ' + mimetype + '\n'
# Append the connection closed - let the client know we close the connection
h += 'Connection: close\n\n'
return h
def get_mime_type(requested_file):
# Get the file's mimetype and encoding
try:
(mimetype, encoding) = mimetypes.guess_type(requested_file, True)
if not mimetype:
print "Mimetype found: text/html"
return 'text/html'
else:
print "Mimetype found: ", mimetype
return mimetype
except TypeError:
print "Mimetype found: text/html"
return 'text/html'
class WebServer:
def __init__(self):
"""
Constructor
:return:
"""
self.host = '' # Host for the server
self.port = 8000 # Port for the server
# Create socket
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
def start_server(self):
""" Starts the server
:return:
"""
# Bind the socket to the host and port
self.socket.bind((self.host, self.port))
print "Connection started on ", self.port
# Start the main loop of the server - start handling clients
self.main_loop()
@staticmethod
def shutdown():
""" Shuts down the server """
try:
s.socket.close()
except Exception as e:
print "Something went wrong closing the socket: ", e
def main_loop(self):
"""Main loop of the server"""
while True:
# Start listening
self.socket.listen(1)
# Wait for a client to connect
client_socket, client_address = self.socket.accept()
# Wait for a request from the client
data = client_socket.recv(1024)
t = Thread(target=self.handle_request, args=(client_socket, data))
t.start()
# # Handle the request from the client
# self.handle_request(client_socket, data)
def handle_request(self, conn, data):
""" Handles a request from the client """
# Decode the data
string = bytes.decode(data)
# Split the request
requested_file = string.split(' ')
# Get the method that is requested
request_method = requested_file[0]
if request_method == 'GET':
# Get the part of the request that contains the name
requested_file = requested_file[1]
# Get the name of the file from the request
requested_file = requested_file[1:]
print "Searching for: ", requested_file
try:
# Open the file
file_handler = open(requested_file, 'rb')
# Get the content of the file
response_content = file_handler.read()
# Close the handler
file_handler.close()
# Get information about the file from the OS
file_info = os.stat(requested_file)
# Extract the last modified time from the information
time_modified = time.ctime(file_info[8])
# Get the time modified in seconds
modified_seconds = os.path.getctime(requested_file)
print "Current time: ", time.time()
print "Modified: ", time_modified
if (float(time.time()) - float(modified_seconds)) > 120: # more than 2 minutes
print "Time outdated!"
#self.find_on_www(conn, requested_file)
# Get the file's mimetype and encoding
mimetype = get_mime_type(requested_file)
print "Mimetype = ", mimetype
# Create the correct header lines
response_headers = generate_header_lines(200, time_modified, len(response_content), mimetype)
# Create the response to the request
server_response = response_headers.encode() + response_content
# Send the response back to the client
conn.send(server_response)
# Close the connection
conn.close()
except IOError: # Couldn't find the file in the cache - Go find file on www
print "Error: " + requested_file + " not found in cache!"
self.find_on_www(conn, requested_file)
@staticmethod
def find_on_www(conn, requested_file):
try:
# Create a socket on the proxy server
print 'Creating socket on proxy server'
c = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
host_name = requested_file.replace("www.","",1)
print 'Host Name: ', host_name
# Connect to the socket to port 80
c.connect((host_name, 80))
print 'Socket connected to port 80 of the host'
# Create a temporary file on this socket and ask port 80
# for the file requested by the client
file_object = c.makefile('r', 0)
file_object.write("GET " + "http://" + requested_file + " HTTP/1.0\n\n")
# Read the response into buffer
buff = file_object.readlines()
# Create a new file in the cache for the requested file.
# Also send the response in the buffer to client socket
# and the corresponding file in the cache
temp_file = open("./" + requested_file, "wb")
for i in range(0, len(buff)):
temp_file.write(buff[i])
conn.send(buff[i])
conn.close()
except Exception as e:
# Generate a body for the file - so we don't have an empty page
response_content = "<html><body><p>Error 404: File not found</p></body></html>"
# Generate the correct header lines
response_headers = generate_header_lines(404, '', len(response_content), 'text/html')
# Create the response to the request
server_response = response_headers.encode() + response_content
# Send the response back to the client
conn.send(server_response)
# Close the connection
conn.close()
def shutdown_server(sig, dummy):
""" Shuts down the server """
# Shutdown the server
s.shutdown()
# exit the program
sys.exit(1)
# Shut down on ctrl+c
signal.signal(signal.SIGINT, shutdown_server)
# Create a web server
s = WebServer()
# Start the server
s.start_server()
當我嘗試使用Firefox 33時,我無法獲得相同的結果。取而代之的是第二次嘗試連接時,我詢問是否希望下載頁面,因爲它認爲它是Windows可執行文件。 這是因爲您的代碼根據由擴展名確定的文件類型返回了mimetype,「.com」是Windows可執行文件。 – mpursuit 2014-12-07 13:29:01
正如你可以在我的get_mime_type函數中看到的那樣,如果mimetypes庫不能猜測MIME類型,我只會返回'text/html'。所以.com會返回'text/html',它不應該認爲它是可執行文件。但是,也許這不是正確的做法,你有什麼建議嗎? – 2014-12-07 13:45:56
當你向實際的Web服務器發送請求時,我會讀取請求頭部返回的mimetype(在Content-Type中給出),將它存儲在某個地方,然後在你從版本庫中返回版本時重新創建頭部時使用它緩存。 – mpursuit 2014-12-07 14:07:09