<?xml version="1.0"?>
<?xml-stylesheet type="text/css" href="http://72.14.177.54/skins/common/feed.css?207"?>
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
	<channel>
		<title>Python File Slicing - Revision history</title>
		<link>http://72.14.177.54/sfvlug/?title=Python_File_Slicing&amp;action=history</link>
		<description>Revision history for this page on the wiki</description>
		<language>en</language>
		<generator>MediaWiki 1.15.1</generator>
		<lastBuildDate>Mon, 06 Apr 2026 13:11:15 GMT</lastBuildDate>
		<item>
			<title>Dualdflipflop at 06:01, 29 January 2007</title>
			<link>http://72.14.177.54/sfvlug/?title=Python_File_Slicing&amp;diff=1687&amp;oldid=prev</link>
			<description>&lt;p&gt;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;[[pyFileSlice]] is a simple utility that will chop out a section of a file that has common starting and ending tags.  Pulling out the page referrers section in an [http://awstats.sf.net Awstats] data file for further analysis prompted this little bit of research.  After trying 3 methods (one involving regex pattern checking over each element in a list, one involving startswith(), and one that uses startswith() and doesn't read the file all at once) the one presented here works the fastest with the least amount of memory used.  [http://www.cs.ucr.edu/~nsoracco/py/fileSlice.html Source Code]&lt;br /&gt;
&lt;br /&gt;
 #!/usr/bin/env python&lt;br /&gt;
 #&lt;br /&gt;
 # Simple tool to spit out referrer information from an awstats database&lt;br /&gt;
 # for later searching an analysis.  A good example of file slicing!&lt;br /&gt;
 &lt;br /&gt;
 __author__ = &amp;quot;Nick Guy &amp;amp; Brian Guy&amp;quot;&lt;br /&gt;
 __license__ = &amp;quot;GPL&amp;quot;&lt;br /&gt;
 &lt;br /&gt;
 import sys, string;&lt;br /&gt;
 &lt;br /&gt;
 # lolz, no argc it seems.  :P&lt;br /&gt;
 argc = len(sys.argv)&lt;br /&gt;
 &lt;br /&gt;
 if argc &amp;gt; 2 :&lt;br /&gt;
 	print sys.argv[0] + &amp;quot; [filename]&amp;quot;&lt;br /&gt;
 	print &amp;quot;[filename] is optional, leave out to use stdin&amp;quot;&lt;br /&gt;
 	sys.exit(1)&lt;br /&gt;
 &lt;br /&gt;
 # variables instantiated here to keep them in file scope.&lt;br /&gt;
 awsdata = []&lt;br /&gt;
 infile = False&lt;br /&gt;
 &lt;br /&gt;
 if argc == 2:&lt;br /&gt;
 	try:&lt;br /&gt;
 		infile = open( sys.argv[1], 'r' )&lt;br /&gt;
 	except IOError:&lt;br /&gt;
 		print &amp;quot;Can't open &amp;quot; + sys.argv[1] + &amp;quot; for reading.&amp;quot;&lt;br /&gt;
 		sys.exit(2)&lt;br /&gt;
 &lt;br /&gt;
 if argc == 1:&lt;br /&gt;
 	infile = sys.stdin&lt;br /&gt;
 &lt;br /&gt;
 # fastest method.  Note that the strings inside startswith() are&lt;br /&gt;
 # the start and end block tokens we need.  Note also that the strings&lt;br /&gt;
 # used to delimit the block we want are NOT included in the final output.&lt;br /&gt;
 while not infile.readline().startswith(&amp;quot;BEGIN_PAGEREFS&amp;quot;):&lt;br /&gt;
 	pass&lt;br /&gt;
 &lt;br /&gt;
 # This is a syntactic hack to implement do/while loops.&lt;br /&gt;
 line=infile.readline()&lt;br /&gt;
 while not line.startswith(&amp;quot;END_PAGEREFS&amp;quot;):&lt;br /&gt;
 	awsdata.append(line)&lt;br /&gt;
 	line=infile.readline()[:-1]	# remove trailing \&lt;br /&gt;
, similar to chomp in perl.&lt;br /&gt;
 infile.close()&lt;br /&gt;
 &lt;br /&gt;
 # send data to stdout.&lt;br /&gt;
 for line in awsdata:&lt;br /&gt;
 	print line&lt;/div&gt;</description>
			<pubDate>Mon, 29 Jan 2007 06:01:54 GMT</pubDate>			<dc:creator>Dualdflipflop</dc:creator>			<comments>http://72.14.177.54/sfvlug/Talk:Python_File_Slicing</comments>		</item>
	</channel>
</rss>