<?xml version="1.0"?>
<?xml-stylesheet type="text/css" href="http://72.14.177.54/skins/common/feed.css?207"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
		<id>http://72.14.177.54/sfvlug/?action=history&amp;feed=atom&amp;title=Python_File_Slicing</id>
		<title>Python File Slicing - Revision history</title>
		<link rel="self" type="application/atom+xml" href="http://72.14.177.54/sfvlug/?action=history&amp;feed=atom&amp;title=Python_File_Slicing"/>
		<link rel="alternate" type="text/html" href="http://72.14.177.54/sfvlug/?title=Python_File_Slicing&amp;action=history"/>
		<updated>2026-04-06T13:11:14Z</updated>
		<subtitle>Revision history for this page on the wiki</subtitle>
		<generator>MediaWiki 1.15.1</generator>

	<entry>
		<id>http://72.14.177.54/sfvlug/?title=Python_File_Slicing&amp;diff=1687&amp;oldid=prev</id>
		<title>Dualdflipflop at 06:01, 29 January 2007</title>
		<link rel="alternate" type="text/html" href="http://72.14.177.54/sfvlug/?title=Python_File_Slicing&amp;diff=1687&amp;oldid=prev"/>
				<updated>2007-01-29T06:01:54Z</updated>
		
		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;[[pyFileSlice]] is a simple utility that will chop out a section of a file that has common starting and ending tags.  Pulling out the page referrers section in an [http://awstats.sf.net Awstats] data file for further analysis prompted this little bit of research.  After trying 3 methods (one involving regex pattern checking over each element in a list, one involving startswith(), and one that uses startswith() and doesn't read the file all at once) the one presented here works the fastest with the least amount of memory used.  [http://www.cs.ucr.edu/~nsoracco/py/fileSlice.html Source Code]&lt;br /&gt;
&lt;br /&gt;
 #!/usr/bin/env python&lt;br /&gt;
 #&lt;br /&gt;
 # Simple tool to spit out referrer information from an awstats database&lt;br /&gt;
 # for later searching an analysis.  A good example of file slicing!&lt;br /&gt;
 &lt;br /&gt;
 __author__ = &amp;quot;Nick Guy &amp;amp; Brian Guy&amp;quot;&lt;br /&gt;
 __license__ = &amp;quot;GPL&amp;quot;&lt;br /&gt;
 &lt;br /&gt;
 import sys, string;&lt;br /&gt;
 &lt;br /&gt;
 # lolz, no argc it seems.  :P&lt;br /&gt;
 argc = len(sys.argv)&lt;br /&gt;
 &lt;br /&gt;
 if argc &amp;gt; 2 :&lt;br /&gt;
 	print sys.argv[0] + &amp;quot; [filename]&amp;quot;&lt;br /&gt;
 	print &amp;quot;[filename] is optional, leave out to use stdin&amp;quot;&lt;br /&gt;
 	sys.exit(1)&lt;br /&gt;
 &lt;br /&gt;
 # variables instantiated here to keep them in file scope.&lt;br /&gt;
 awsdata = []&lt;br /&gt;
 infile = False&lt;br /&gt;
 &lt;br /&gt;
 if argc == 2:&lt;br /&gt;
 	try:&lt;br /&gt;
 		infile = open( sys.argv[1], 'r' )&lt;br /&gt;
 	except IOError:&lt;br /&gt;
 		print &amp;quot;Can't open &amp;quot; + sys.argv[1] + &amp;quot; for reading.&amp;quot;&lt;br /&gt;
 		sys.exit(2)&lt;br /&gt;
 &lt;br /&gt;
 if argc == 1:&lt;br /&gt;
 	infile = sys.stdin&lt;br /&gt;
 &lt;br /&gt;
 # fastest method.  Note that the strings inside startswith() are&lt;br /&gt;
 # the start and end block tokens we need.  Note also that the strings&lt;br /&gt;
 # used to delimit the block we want are NOT included in the final output.&lt;br /&gt;
 while not infile.readline().startswith(&amp;quot;BEGIN_PAGEREFS&amp;quot;):&lt;br /&gt;
 	pass&lt;br /&gt;
 &lt;br /&gt;
 # This is a syntactic hack to implement do/while loops.&lt;br /&gt;
 line=infile.readline()&lt;br /&gt;
 while not line.startswith(&amp;quot;END_PAGEREFS&amp;quot;):&lt;br /&gt;
 	awsdata.append(line)&lt;br /&gt;
 	line=infile.readline()[:-1]	# remove trailing \&lt;br /&gt;
, similar to chomp in perl.&lt;br /&gt;
 infile.close()&lt;br /&gt;
 &lt;br /&gt;
 # send data to stdout.&lt;br /&gt;
 for line in awsdata:&lt;br /&gt;
 	print line&lt;/div&gt;</summary>
		<author><name>Dualdflipflop</name></author>	</entry>

	</feed>