# -*- coding: utf-8 -*-
########################################################
# KaOS
# Das Kategorisierungs- & Ordnungs-System zum ZaPF Wiki
# Author: Marvin aus Tuebingen (Telegram: @MarvinTue)
# Version 1.0
########################################################
# Changes:
# 
#
#
# Author:
########################################################
# Changes:
# 
#
#
# Author:
########################################################

# Settings:
outputFile="index.html" #the file this script writes to
baseUrl="http://zapf.wiki" #the base url to use as prefix for links to the wiki
apiUrl="https://zapf.wiki/api.php" #the url of the api access point


# Imports:
import urllib.request
import time
import json

# some HTML code blocks
htmlHead="""
<!DOCTYPE html>
<html>
<head>
<meta charset='utf-8'>
<style>
    details{
        margin-left:50px;
    }
    summary{
        margin-left:-50px;
    }
    ul{
        padding-left: 15px;
        margin: 0px;
    }
    .highlight{
        background-color: yellow;
    }
    form{
        display:inline-block;
    }
</style>
</head>
<body>
<script>
function comp(searchStr) {
return searchStr.toLowerCase().replace(/[^a-z0-9]/g, "+");
}
if (window.location.href.indexOf("=")>0){
    window.onload = function () {
        var searchStr = decodeURIComponent(window.location.href.substring(window.location.href.indexOf("=")+1));
        searchStr=comp(searchStr);
        if (searchStr == "") return;
        const searchItems = Array.from(document.querySelectorAll(".s"));
        var jumpPoint=searchItems[0];
        for (var i = searchItems.length-1; i>=0; i--) { 
            if (comp(searchItems[i].textContent).indexOf(searchStr)>-1 ){
                var parent = searchItems[i].parentNode;
                searchItems[i].setAttribute("class", "highlight");
                if(searchItems[i].tagName=="SUMMARY"){
                    jumpPoint=searchItems[i];
                }
                while (parent.tagName!="BODY"){
                    if (parent.tagName=="DETAILS"){
                        if (parent.getAttribute("open")==1){
                            break;
                        }
                        parent.setAttribute("open",1);
                    }
                    parent=parent.parentNode;
                }
            }
        }
        jumpPoint.scrollIntoView(true);
    }
}
</script>
<h1>KaOS</h1>
<h2>Das Kategorisierungs- & Ordnungs-System zum <a href='https://zapf.wiki/Hauptseite'>ZaPF Wiki</a></h2>
<hr>
<form><input name="search" type="text"><input type="submit" value="Search here" > <input type="submit" formaction="https://zapf.wiki/" value="Search wiki"></form>
<form><input type="submit" value="Reset"></form> | Siehe auch: <a href='https://zapf.wiki/HowTo_ZaPF-Wiki'>HowTo ZaPF-Wiki</a>
<hr>
<br><br>
"""

htmlDebugSeperator="<br><br><h2>Reduziere Chaos und mache KaOS besser</h2><hr>"

htmlFoot="</body></html>"

# Functions:

# Request content from url with a nice header
def getUrl(url):
    global baseUrl
    print("get: "+url)
    #time.sleep(1)
    q = urllib.request.Request(url)
    q.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11')
    q.add_header('Connection', 'keep-alive')
    q.add_header('Accept','text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
    q.add_header('Accept-Charset','ISO-8859-1,utf-8;q=0.7,*;q=0.3')
    q.add_header('Accept-Encoding','none')
    q.add_header('Accept-Language','en-US,en;q=0.8')
    return urllib.request.urlopen(q).read().decode('utf-8')

def urlDecode(url):
    return urllib.parse.unquote(url)

def urlEncode(url):
    return urllib.parse.quote(url)

def removePrefex(s):
    return s[s.find(':')+1:]

# get all categories
def getCats(dict):
    content = json.loads(getUrl(apiUrl + "?action=query&aclimit=500&list=allcategories&format=json"))
    entrys = content['query']['allcategories']
    for entry in entrys:
        addCategory(entry['*'],dict)

#add a categorie plus meta data to the dict
def addCategory(cat,dict):

    catsDict[cat]={}
    catsDict[cat]["subCats"] = [] 
    catsDict[cat]["pages"] = [] 
    catsDict[cat]["files"] = [] 

    morePages = True
    continueStr=""
    while morePages:
        content = json.loads(getUrl(apiUrl + "?action=query&cmtitle=category:"+urlEncode(cat)+"&list=categorymembers&cmlimit=500&cmtype=subcat|file|page&format=json&cmcontinue="+continueStr))
        entrys = content['query']['categorymembers']
        for entry in entrys:
            if(entry['ns']==14):
                catsDict[cat]["subCats"].append(removePrefex(entry['title']))
        
            if(entry['ns']==0):
                catsDict[cat]["pages"].append(entry['title'])
        
            if(entry['ns']==6):
                catsDict[cat]["files"].append(entry['title'])
        if ("continue" in content):   
            continueStr=content["continue"]["cmcontinue"]
        else:
            morePages = False

    


# get categories without parents
def getRoots(dict):
    nonRoots=[]
    roots=[]
    for cat in dict:
        for subCat in catsDict[cat]["subCats"]:
            if not (subCat in nonRoots):
                nonRoots.append(subCat)
    for cat in dict:
        if not (cat in nonRoots):
            roots.append(cat)
    return roots

# get categories without pages or subcategories
def getEmpty(dict):
    empty=[]
    for cat in dict:
        if len(dict[cat]["subCats"]) == 0 and len(dict[cat]["pages"]) == 0:
            empty.append(cat)
    return empty

# remove category and all links
def purgeCat(dict,catToRemove):
    for cat in dict:
        if (catToRemove in dict[cat]["subCats"]):
            dict[cat]["subCats"].remove(catToRemove)
    del dict[catToRemove]

# recursive zyklen suche
def zyklenFinden(dict,roots,zyklen):
    flag={}
    for root in roots:
        zyklenFindenR(dict,root,flag,"",zyklen)
    islands=""
    for cat in dict:
        if not (cat in flag):
            islands += cat + " --- "
    if not (islands ==""):
        zyklen.append("islands: " + islands)

def zyklenFindenR(dict,root,flag,path,zyklen):
    path+=root+" --> "
    if (root in flag and flag[root] == 1):
        zyklen.append(path)
    else:
        if(not root in flag):
            flag[root]=1
            for subCat in dict[root]["subCats"]:
                zyklenFindenR(dict,subCat,flag,path,zyklen)
    flag[root]=2
   
# Some functions to generate HTML code blocks:
def htmlOutTree(dict,root,depth,pagesDict):
    global htmlOut
    if (depth > 0):
        htmlOut+="<details><summary class='s'>"+htmlOutCatLink(dict,root)+"</summary>"
        for subCat in dict[root]["subCats"]:
            htmlOutTree(dict,subCat,depth-1,pagesDict)
        htmlOut+=htmlOutPageLinkListExtended(dict[root]["pages"],pagesDict,root)
        

def htmlOutCatLink(dict,cat):
    global baseUrl
    return cat + " <a target='_blank' href='"+baseUrl+"/category:"+cat+"'>&#x1f517;</a> (" + str(len(dict[cat]["subCats"])) + "+" + str(len(dict[cat]["pages"])) + ")" 


def htmlOutPageLinkList(pages):
    global baseUrl
    htmlOut="<ul>"
    for page in pages:
        htmlOut+="<li><a target='_blank' href='"+baseUrl+"/"+urlEncode(page)+"'>"+page+"</a></li>"
    htmlOut+="</ul></details>"
    return htmlOut

def htmlOutPageLinkListExtended(pages,pagesDict,rootName):
    global baseUrl
    htmlOut="<ul>"
    for page in pages:
        htmlOut+="<li><a target='_blank'  class='s' href='"+baseUrl+"/"+urlEncode(page)+"'>"+page+"</a> "
        for cat in pagesDict[page]:
            if (cat != rootName):
                htmlOut+="[<a href='?q="+cat+"'>"+cat+"</a>]"
        htmlOut+="</li>"
    htmlOut+="</ul></details>"
    return htmlOut

# Main program:
catsDict={}
pagesDict={}
htmlOut=""
htmlDebug=""


print("load categories")
getCats(catsDict)



print("build pages list")
for cat in catsDict:
    for page in catsDict[cat]["pages"]:
        if (not page in pagesDict):
            pagesDict[page]=[]
        pagesDict[page].append(cat)


print("output statistics")
htmlDebug+="<ul><li>Snapshot from: " + str(time.ctime()) + "</li>"
htmlDebug+="<li>The wiki has " + str(len(pagesDict)) + " categorised pages in "+ str(len(catsDict)) + " categories</li></ul>" 
htmlDebug+="<details><summary>These pages have no category please add some in the wiki</summary>"
htmlDebug+="<a href='https://zapf.wiki/Spezial:Nicht_kategorisierte_Seiten'>link</a></details>"

#only1catPages=[]
#for page in pagesDict:
#    if(len(pagesDict[page])==1):
#        only1catPages.append(page)
#htmlDebug+="<details><summary>These pages have only one category please add some more in the wiki</summary>"
#htmlDebug+=htmlOutPageLinkList(only1catPages)+"</details>"


print("remove empty categories")
removedCats=[]
empty=getEmpty(catsDict)
while(len(empty)>0):
    print("found " + str(len(empty)) + " empty")
    for catToRemove in empty:
        removedCats.append(catToRemove)
        purgeCat(catsDict,catToRemove)
    empty=getEmpty(catsDict)
htmlDebug+="<details><summary>These categories are empty and should be removed (admin needed) or edited in the wiki</summary>"
htmlDebug+=str(removedCats)+"</details>"


print("find root categories")
rootCats=getRoots(catsDict)


print("find cycles")
zyklen = []
zyklenFinden(catsDict,rootCats,zyklen)
htmlDebug+="<details><summary>These categories cause cycles in the category structure and should be edited in the wiki</summary>"
htmlDebug+=str(zyklen)+"</details>"


print("output tree structure")
for root in rootCats:
    htmlOutTree(catsDict,root,10,pagesDict)



print("write file")
file = open(outputFile,"w") 
file.write(htmlHead+htmlOut+htmlDebugSeperator+htmlDebug+htmlFoot) 
file.close() 



