Back to SDS/2 Parametric Scripts

 

# initialize file word list

filewordList = []

# create word list

for line in open(r'H:\TEMP\temsys\Anchor Rod Plans.txt').readlines():

    filewordList += [x.lower().strip(",.!?:()[]/\\\n\"\'") for x in line.split()]

 

# open file to get key words

wordfile = open(r'H:\TEMP\temsys\anagrams.txt')

 

# set the word chunk size

chunksize = 100

 

# calculate the number of chunks

# this is integer math, so add one to it if needed

numofchunks = len(filewordList)/chunksize

if len(filewordList) % chunksize > 0:

    numofchunks += 1

 

# initialize key word dictionary

wordDict = {}

# create the dictionary

# populate a list with number of zeros = to numofchunks

for word in wordfile:

    word = word.lower().strip()

    wordDict[word] = []

    for i in range(numofchunks):

        wordDict[word].append(0)

 

# close file

wordfile.close()

 

# iterate on file word list

# if word is a keyword, add to keyword list count

chunk, word_num = 0, 0

for word in filewordList:

    if word_num == chunksize:

        chunk += 1

        word_num = 0

    if word in wordDict.keys():

        wordDict[word][chunk] += 1

    word_num += 1

 

# initialize print out string

outStr = ''

for i in range(numofchunks):

    keyList = wordDict.keys()

    keyList.sort()

    for key in keyList:

        if wordDict[key][i] > 0 and len(key) > 0:

            outStr += 'Text section %d: %-12s %d\n' % (i+1, key, wordDict[key][i])

print outStr

 

 

wordcnt = sum([sum(wordDict[key]) for key in wordDict if len(key) > 0])

charcnt = sum([sum(wordDict[key])*len(key) for key in wordDict])

print charcnt, wordcnt, charcnt/float(wordcnt)

#302 51 5.92156862745

 

'''

Text section 1: actual       2

Text section 1: are          2

Text section 1: base         1

Text section 1: column       1

Text section 1: count        1

Text section 1: elevation    3

Text section 1: elevations   3

Text section 1: picture      1

Text section 1: plate        1

Text section 1: typical      1

Text section 2: anchor       1

Text section 2: are          1

Text section 2: base         5

Text section 2: column       2

Text section 2: definition   1

Text section 2: elevation    2

Text section 2: equate       1

Text section 2: plate        6

Text section 2: subassembly  1

Text section 3: anchor       1

Text section 3: base         3

Text section 3: column       3

Text section 3: do           1

Text section 3: plate        3

Text section 3: see          1

Text section 3: subassembly  1

Text section 3: typical      1

Text section 4: are          1

'''