Back to SDS/2 Parametric Scripts
# initialize
file word list
filewordList
= []
# create word list
for line in open(r'H:\TEMP\temsys\Anchor
Rod Plans.txt').readlines():
filewordList
+= [x.lower().strip(",.!?:()[]/\\\n\"\'")
for x in line.split()]
# open
file to get key words
wordfile
= open(r'H:\TEMP\temsys\anagrams.txt')
# set the word chunk
size
chunksize
= 100
# calculate the
number of chunks
# this
is integer math, so add one to it if needed
numofchunks
= len(filewordList)/chunksize
if len(filewordList) % chunksize > 0:
numofchunks
+= 1
# initialize key
word dictionary
wordDict
= {}
# create the
dictionary
# populate a list
with number of zeros = to numofchunks
for word in wordfile:
word = word.lower().strip()
wordDict[word] = []
for i in range(numofchunks):
wordDict[word].append(0)
# close
file
wordfile.close()
# iterate on file
word list
# if
word is a keyword, add to keyword list count
chunk, word_num = 0, 0
for word in filewordList:
if word_num == chunksize:
chunk += 1
word_num = 0
if word in wordDict.keys():
wordDict[word][chunk] += 1
word_num += 1
# initialize
print out string
outStr
= ''
for i in range(numofchunks):
keyList
= wordDict.keys()
keyList.sort()
for key in keyList:
if wordDict[key][i] > 0 and len(key) > 0:
outStr += 'Text section %d: %-12s %d\n' % (i+1, key, wordDict[key][i])
print outStr
wordcnt
= sum([sum(wordDict[key]) for key in wordDict if len(key) > 0])
charcnt
= sum([sum(wordDict[key])*len(key)
for key in wordDict])
print charcnt, wordcnt, charcnt/float(wordcnt)
#302 51
5.92156862745
'''
Text section 1:
actual 2
Text section 1:
are 2
Text section 1:
base 1
Text section 1:
column 1
Text section 1:
count 1
Text section 1:
elevation 3
Text section 1:
elevations 3
Text section 1:
picture 1
Text section 1:
plate 1
Text section 1:
typical 1
Text section 2:
anchor 1
Text section 2: are 1
Text section 2:
base 5
Text section 2:
column 2
Text section 2:
definition 1
Text section 2:
elevation 2
Text section 2:
equate 1
Text section 2:
plate 6
Text section 2: subassembly 1
Text section 3:
anchor 1
Text section 3:
base 3
Text section 3:
column 3
Text section 3:
do 1
Text section 3: plate 3
Text section 3:
see 1
Text section 3: subassembly 1
Text section 3:
typical 1
Text section 4: are 1
'''