Χρήστης:Egmontaz/Interwiki
Εμφάνιση
Κώδικας για την εύρεση σελίδων χωρίς διαγλωσσικούς συνδέσμους από αρχείο XML της βάσης δεδομένω Βασίζεται στο Wikifind το οποίο διανέμεται υπό την GNU General Public License (GPL) version 3 or later http://www.gnu.org/licenses/gpl.html.
void missing_interwikis (void) {
/* define data */
string dummy; //dummy
string line, title, title2;
/* vars for redirects */
string * redirect;
redirect = new string [6];
ifstream redirects("redirect.xml");
getline (redirects, dummy);
for (int i=0;i<6;i++) {
getline (redirects, redirect[i]);
}
/* vars for namespaces */
string * nspace;
nspace = new string [4];
ifstream nspaces("namespace.xml");
getline (nspaces, dummy);
for (int i=0;i<4;i++) {
getline (nspaces, nspace[i]);
}
/* Read interwiki codes & initialize counters */
string * iw;
iw = new string [60];
int iwc[60];
ifstream interwiki_codes("iw.dat");
for (int i=0; i <60;i++){
getline (interwiki_codes, iw[i]);
iw[i] ="[["+iw[i]+":";
}
for (int i =0;i<60;i++){
iwc[i]=0;
}
/* XML */
string pagestart ="<page>";
string pageend = "</page>";
string titlestart="<title>";
string textstart="<text";
string fin = "articles.xml"; /* dump file*/
string nfout1 ="interwiki.txt"; /* output file interwikis */
string nfout2 ="dabiw.txt"; /* output file disambiguations */
string nfout3 ="catiw.txt"; /* output file categories */
string nfout4 ="wikiw.txt"; /* output file wikipedia */
string nfout5 ="temiw.txt"; /* output file templates */
string nfout6 ="heliw.txt"; /* output file help */
/* flags */
bool hasinterwiki = false;
bool isredirect = false;
int articles_without = 0;
int tems_without = 0;
int cats_without = 0;
int wiki_without = 0;
int dabs_without = 0;
int help_without = 0;
int tot = 0;
ofstream deb("debug.txt", ios::app);
ofstream deb2("debug2.txt", ios::app);
ofstream sample("sample.txt", ios::app);
ifstream FileIn(fin.c_str()); /* Open dump */
if (!FileIn){ /* if something goes wrong with file opening */
cout << "File not found!" <<endl;
}
while (getline (FileIn, line)) {
int foundtitle = line.find(titlestart);
if (foundtitle != -1) {
title = line;
tot++;
cout << tot << "\t" << articles_without << "\t" << cats_without << "\t" << tems_without << "\t" << wiki_without << "\t" << help_without << endl;
hasinterwiki = false;
isredirect = false;
int foundtext = 0;
do {
getline (FileIn, line);
foundtext = line.find(textstart);
} while (foundtext != -1);
int foundpage = 0;
int lix =0;
do {
getline (FileIn, line);
lix++;
foundpage = line.find(pageend);
for (int i =0 ; i < 60 ; i++){
int iwiki =line.find(iw[i]);
if (iwiki!=-1) {
hasinterwiki=true;
}
}
for (int i=0; i<6 ;i++) {
int rdc = line.find(redirect[i]);
if (rdc!=-1) {
isredirect=true;
}
}
} while (foundpage == -1);
if ((hasinterwiki==false)&&(isredirect==false)) {
int isarticle = title.find(":");
if (isarticle==-1) {
articles_without++;
ofstream fout1(nfout1.c_str(), ios::app);
int langd = title.length() - 19; //removingt xml- taggs
int i = 11;
fout1 << "|-\n";
fout1 << "|" << articles_without << "|| [[:"; //wikiformating
while (langd > 0) //printing pagename
{
fout1 << title[i];
i = i + 1;
langd = langd - 1;
}
fout1 << "]]\n"; //wikiformating
}
int iscat=title.find(nspace[0]);
if (iscat!=-1) {
cats_without++;
ofstream fout3(nfout3.c_str(), ios::app);
int langd = title.length() - 19; //removingt xml- taggs
int i = 11;
fout3 << "|-\n";
fout3 << "|" << cats_without << "|| [[:"; //wikiformating
while (langd > 0) //printing pagename
{
fout3 << title[i];
i = i + 1;
langd = langd - 1;
}
fout3 << "]]\n"; //wikiformating
}
int istem=title.find(nspace[1]);
if (istem!=-1) {
tems_without++;
ofstream fout5(nfout5.c_str(), ios::app);
int langd = title.length() - 19; //removingt xml- taggs
int i = 11;
fout5 << "|-\n";
fout5 << "|" << tems_without << "|| [[:"; //wikiformating
while (langd > 0) //printing pagename
{
fout5 << title[i];
i = i + 1;
langd = langd - 1;
}
fout5 << "]]\n"; //wikiformating
}
int iswiki=title.find(nspace[2]);
int ispg = title.find("/");
if ((iswiki!=-1)&&(ispg==-1)) {
wiki_without++;
ofstream fout4(nfout4.c_str(), ios::app);
int langd = title.length() - 19; //removingt xml- taggs
int i = 11;
fout4 << "|-\n";
fout4 << "|" << wiki_without << "|| [[:"; //wikiformating
while (langd > 0) //printing pagename
{
fout4 << title[i];
i = i + 1;
langd = langd - 1;
}
fout4 << "]]\n"; //wikiformating
}
int ishelp=title.find(nspace[3]);
if (ishelp!=-1) {
help_without++;
ofstream fout6(nfout6.c_str(), ios::app);
int langd = title.length() - 19; //removingt xml- taggs
int i = 11;
fout6 << "|-\n";
fout6 << "|" << help_without << "|| [[:"; //wikiformating
while (langd > 0) //printing pagename
{
fout6 << title[i];
i = i + 1;
langd = langd - 1;
}
fout6 << "]]\n"; //wikiformating
}
}
}
}
}