# -*- coding: utf-8 -*- ######################################################## # KaOS # Das Kategorisierungs- & Ordnungs-System zum ZaPF Wiki # Author: Marvin aus Tuebingen (Telegram: @MarvinTue) # Version 1.0 ######################################################## # Changes: # # # # Author: ######################################################## # Changes: # # # # Author: ######################################################## # Settings: outputFile="index.html" #the file this script writes to baseUrl="http://zapf.wiki" #the base url to use as prefix for links to the wiki apiUrl="https://zapf.wiki/api.php" #the url of the api access point # Imports: import urllib.request import time import json # some HTML code blocks htmlHead="""

KaOS

Das Kategorisierungs- & Ordnungs-System zum ZaPF Wiki

| Siehe auch: HowTo ZaPF-Wiki

""" htmlDebugSeperator="

Reduziere Chaos und mache KaOS besser

" htmlFoot="" # Functions: # Request content from url with a nice header def getUrl(url): global baseUrl print("get: "+url) #time.sleep(1) q = urllib.request.Request(url) q.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11') q.add_header('Connection', 'keep-alive') q.add_header('Accept','text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8') q.add_header('Accept-Charset','ISO-8859-1,utf-8;q=0.7,*;q=0.3') q.add_header('Accept-Encoding','none') q.add_header('Accept-Language','en-US,en;q=0.8') return urllib.request.urlopen(q).read().decode('utf-8') def urlDecode(url): return urllib.parse.unquote(url) def urlEncode(url): return urllib.parse.quote(url) def removePrefex(s): return s[s.find(':')+1:] # get all categories def getCats(dict): content = json.loads(getUrl(apiUrl + "?action=query&aclimit=500&list=allcategories&format=json")) entrys = content['query']['allcategories'] for entry in entrys: addCategory(entry['*'],dict) #add a categorie plus meta data to the dict def addCategory(cat,dict): catsDict[cat]={} catsDict[cat]["subCats"] = [] catsDict[cat]["pages"] = [] catsDict[cat]["files"] = [] morePages = True continueStr="" while morePages: content = json.loads(getUrl(apiUrl + "?action=query&cmtitle=category:"+urlEncode(cat)+"&list=categorymembers&cmlimit=500&cmtype=subcat|file|page&format=json&cmcontinue="+continueStr)) entrys = content['query']['categorymembers'] for entry in entrys: if(entry['ns']==14): catsDict[cat]["subCats"].append(removePrefex(entry['title'])) if(entry['ns']==0): catsDict[cat]["pages"].append(entry['title']) if(entry['ns']==6): catsDict[cat]["files"].append(entry['title']) if ("continue" in content): continueStr=content["continue"]["cmcontinue"] else: morePages = False # get categories without parents def getRoots(dict): nonRoots=[] roots=[] for cat in dict: for subCat in catsDict[cat]["subCats"]: if not (subCat in nonRoots): nonRoots.append(subCat) for cat in dict: if not (cat in nonRoots): roots.append(cat) return roots # get categories without pages or subcategories def getEmpty(dict): empty=[] for cat in dict: if len(dict[cat]["subCats"]) == 0 and len(dict[cat]["pages"]) == 0: empty.append(cat) return empty # remove category and all links def purgeCat(dict,catToRemove): for cat in dict: if (catToRemove in dict[cat]["subCats"]): dict[cat]["subCats"].remove(catToRemove) del dict[catToRemove] # recursive zyklen suche def zyklenFinden(dict,roots,zyklen): flag={} for root in roots: zyklenFindenR(dict,root,flag,"",zyklen) islands="" for cat in dict: if not (cat in flag): islands += cat + " --- " if not (islands ==""): zyklen.append("islands: " + islands) def zyklenFindenR(dict,root,flag,path,zyklen): path+=root+" --> " if (root in flag and flag[root] == 1): zyklen.append(path) else: if(not root in flag): flag[root]=1 for subCat in dict[root]["subCats"]: zyklenFindenR(dict,subCat,flag,path,zyklen) flag[root]=2 # Some functions to generate HTML code blocks: def htmlOutTree(dict,root,depth,pagesDict): global htmlOut if (depth > 0): htmlOut+="

"+htmlOutCatLink(dict,root)+"

" for subCat in dict[root]["subCats"]: htmlOutTree(dict,subCat,depth-1,pagesDict) htmlOut+=htmlOutPageLinkListExtended(dict[root]["pages"],pagesDict,root) def htmlOutCatLink(dict,cat): global baseUrl return cat + " 🔗 (" + str(len(dict[cat]["subCats"])) + "+" + str(len(dict[cat]["pages"])) + ")" def htmlOutPageLinkList(pages): global baseUrl htmlOut="

"+page+"

" return htmlOut def htmlOutPageLinkListExtended(pages,pagesDict,rootName): global baseUrl htmlOut="

"+page+" " for cat in pagesDict[page]: if (cat != rootName): htmlOut+="["+cat+"]" htmlOut+="

" return htmlOut # Main program: catsDict={} pagesDict={} htmlOut="" htmlDebug="" print("load categories") getCats(catsDict) print("build pages list") for cat in catsDict: for page in catsDict[cat]["pages"]: if (not page in pagesDict): pagesDict[page]=[] pagesDict[page].append(cat) print("output statistics") htmlDebug+="

Snapshot from: " + str(time.ctime()) + "
The wiki has " + str(len(pagesDict)) + " categorised pages in "+ str(len(catsDict)) + " categories

" htmlDebug+="

These pages have no category please add some in the wiki

" htmlDebug+="link

" #only1catPages=[] #for page in pagesDict: # if(len(pagesDict[page])==1): # only1catPages.append(page) #htmlDebug+="

These pages have only one category please add some more in the wiki

" #htmlDebug+=htmlOutPageLinkList(only1catPages)+"

" print("remove empty categories") removedCats=[] empty=getEmpty(catsDict) while(len(empty)>0): print("found " + str(len(empty)) + " empty") for catToRemove in empty: removedCats.append(catToRemove) purgeCat(catsDict,catToRemove) empty=getEmpty(catsDict) htmlDebug+="

These categories are empty and should be removed (admin needed) or edited in the wiki

" htmlDebug+=str(removedCats)+"

" print("find root categories") rootCats=getRoots(catsDict) print("find cycles") zyklen = [] zyklenFinden(catsDict,rootCats,zyklen) htmlDebug+="

These categories cause cycles in the category structure and should be edited in the wiki

" htmlDebug+=str(zyklen)+"

" print("output tree structure") for root in rootCats: htmlOutTree(catsDict,root,10,pagesDict) print("write file") file = open(outputFile,"w") file.write(htmlHead+htmlOut+htmlDebugSeperator+htmlDebug+htmlFoot) file.close()