Vega strike Python Modules doc  0.5.1
Documentation of the " Modules " folder of Vega strike
 All Data Structures Namespaces Files Functions Variables
RobotFileParser Class Reference

Public Member Functions

def __init__
 
def mtime
 
def modified
 
def set_url
 
def read
 
def parse
 
def can_fetch
 
def __str__
 

Data Fields

 entries
 
 disallow_all
 
 allow_all
 
 last_checked
 
 url
 
 path
 
 errcode
 

Detailed Description

Definition at line 22 of file robotparser.py.

Constructor & Destructor Documentation

def __init__ (   self,
  url = '' 
)

Definition at line 23 of file robotparser.py.

23 
24  def __init__(self, url=''):
25  self.entries = []
26  self.disallow_all = 0
27  self.allow_all = 0
28  self.set_url(url)
29  self.last_checked = 0

Member Function Documentation

def __str__ (   self)

Definition at line 139 of file robotparser.py.

References RobotFileParser.entries, and locale.str().

140  def __str__(self):
141  ret = ""
142  for entry in self.entries:
143  ret = ret + str(entry) + "\n"
144  return ret
145 
def can_fetch (   self,
  useragent,
  url 
)
using the parsed robots.txt decide if useragent can fetch url

Definition at line 121 of file robotparser.py.

References RobotFileParser.allow_all, RobotFileParser.disallow_all, RobotFileParser.entries, urllib.quote(), and urlparse.urlparse().

122  def can_fetch(self, useragent, url):
123  """using the parsed robots.txt decide if useragent can fetch url"""
124  _debug("Checking robot.txt allowance for:\n user agent: %s\n url: %s" %
125  (useragent, url))
126  if self.disallow_all:
127  return 0
128  if self.allow_all:
129  return 1
130  # search for given user agent matches
131  # the first match counts
132  url = urllib.quote(urlparse.urlparse(url)[2]) or "/"
133  for entry in self.entries:
134  if entry.applies_to(useragent):
135  return entry.allowance(url)
136  # agent not found ==> access granted
137  return 1
138 
def modified (   self)

Definition at line 33 of file robotparser.py.

References RobotFileParser.last_checked.

33 
34  def modified(self):
35  import time
36  self.last_checked = time.time()
def mtime (   self)

Definition at line 30 of file robotparser.py.

References RobotFileParser.last_checked.

30 
31  def mtime(self):
32  return self.last_checked
def parse (   self,
  lines 
)
parse the input lines from a robot.txt file.
   We allow that a user-agent: line is not preceded by
   one or more blank lines.

Definition at line 56 of file robotparser.py.

References string.lower(), locale.str(), and string.strip().

56 
57  def parse(self, lines):
58  """parse the input lines from a robot.txt file.
59  We allow that a user-agent: line is not preceded by
60  one or more blank lines."""
61  state = 0
62  linenumber = 0
63  entry = Entry()
64 
65  for line in lines:
66  line = line.strip()
67  linenumber = linenumber + 1
68  if not line:
69  if state==1:
70  _debug("line %d: warning: you should insert"
71  " allow: or disallow: directives below any"
72  " user-agent: line" % linenumber)
73  entry = Entry()
74  state = 0
75  elif state==2:
76  self.entries.append(entry)
77  entry = Entry()
78  state = 0
79  # remove optional comment and strip line
80  i = line.find('#')
81  if i>=0:
82  line = line[:i]
83  line = line.strip()
84  if not line:
85  continue
86  line = line.split(':', 1)
87  if len(line) == 2:
88  line[0] = line[0].strip().lower()
89  line[1] = line[1].strip()
90  if line[0] == "user-agent":
91  if state==2:
92  _debug("line %d: warning: you should insert a blank"
93  " line before any user-agent"
94  " directive" % linenumber)
95  self.entries.append(entry)
96  entry = Entry()
97  entry.useragents.append(line[1])
98  state = 1
99  elif line[0] == "disallow":
100  if state==0:
101  _debug("line %d: error: you must insert a user-agent:"
102  " directive before this line" % linenumber)
103  else:
104  entry.rulelines.append(RuleLine(line[1], 0))
105  state = 2
106  elif line[0] == "allow":
107  if state==0:
108  _debug("line %d: error: you must insert a user-agent:"
109  " directive before this line" % linenumber)
110  else:
111  entry.rulelines.append(RuleLine(line[1], 1))
112  else:
113  _debug("line %d: warning: unknown key %s" % (linenumber,
114  line[0]))
115  else:
116  _debug("line %d: error: malformed line %s"%(linenumber, line))
117  if state==2:
118  self.entries.append(entry)
119  _debug("Parsed rules:\n%s" % str(self))
120 
def read (   self)

Definition at line 41 of file robotparser.py.

References RobotFileParser.url.

41 
42  def read(self):
43  opener = URLopener()
44  f = opener.open(self.url)
45  lines = f.readlines()
46  self.errcode = opener.errcode
47  if self.errcode == 401 or self.errcode == 403:
48  self.disallow_all = 1
49  _debug("disallow all")
50  elif self.errcode >= 400:
51  self.allow_all = 1
52  _debug("allow all")
53  elif self.errcode == 200 and lines:
54  _debug("parse lines")
55  self.parse(lines)
def set_url (   self,
  url 
)

Definition at line 37 of file robotparser.py.

37 
38  def set_url(self, url):
39  self.url = url
40  self.host, self.path = urlparse.urlparse(url)[1:3]

Field Documentation

allow_all

Definition at line 26 of file robotparser.py.

disallow_all

Definition at line 25 of file robotparser.py.

entries

Definition at line 24 of file robotparser.py.

errcode

Definition at line 45 of file robotparser.py.

last_checked

Definition at line 28 of file robotparser.py.

path

Definition at line 39 of file robotparser.py.

url

Definition at line 38 of file robotparser.py.


The documentation for this class was generated from the following file: