Commit 25d0858e authored by jlopez's avatar jlopez
Browse files

save old technique to get packages

parent 0cbe8d43
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from os import walk
import os
import re
if __name__ == '__main__':
print("Start");
results = open("./cpan/packages.csv","w")
indexP = 0
with open("./cpan/packages.txt", "r") as ins:
for line in ins:
nl = ' '.join(line.split()).replace(' ', ';')
nt = nl.split(";")
tmpI = 0
res = ""
for n in nt:
if tmpI == 0:
res = res + '"' + n + '"'
else:
res = res + '; "' + n + '"'
tmpI += 1
res = res + '; ""'
results.write(nl+"\n")
indexP = indexP + 1
results.close()
print("Packages = " + str(indexP))
print("End");
\ No newline at end of file
getPackagesWithTitle <- function() {
contrib.url("https://cran.rstudio.com/", "source")
description <- sprintf("%s/web/packages/packages.rds",
"https://cran.rstudio.com/")
con <- if(substring(description, 1L, 7L) == "file://") {
file(description, "rb")
} else {
url(description, "rb")
}
on.exit(close(con))
db <- readRDS(gzcon(con))
TOTO <<- db
db[, c("Package", "Version", "Title", "Depends")]
}
packages <- getPackagesWithTitle()
colnames(packages) <- NULL
packages[is.na(packages)] <- ""
write.table(packages, file = "./cran/packages.csv",sep=';', row.names=FALSE, col.names = FALSE)
\ No newline at end of file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import json
# wget https://repo.anaconda.com/pkgs/main/linux-64/repodata.json
from pprint import pprint
import os
import sys
if __name__ == '__main__':
print("Start")
with open('./anaconda/repodata.json') as f:
data = json.load(f)
packages = data['packages']
results = open("./anaconda/packages.csv","w")
for p in packages:
pk = data['packages'][p]
line = '"'+data['packages'][p]['name']+'"'+";"+'"'+data['packages'][p]['version']+'"'+";"+'""'+";"+"https://anaconda.org/anaconda/"+data['packages'][p]['name']+"\n"
results.write(line)
results.close()
print("End")
from xmlrpclib import ServerProxy
import re
from os import walk
import os
import sys
from bs4 import BeautifulSoup
from rpmUtils.miscutils import splitFilename
import urllib, json
reload(sys)
sys.setdefaultencoding('utf8')
URL_INFO = 'https://pypi.python.org/pypi/'
URL_SIMPLE = "https://pypi.org/simple/"
def run():
try:
import xmlrpclib
except ImportError:
import xmlrpc.client as xmlrpclib
client = xmlrpclib.ServerProxy('https://pypi.python.org/pypi')
# get a list of package names
packages = client.list_packages()
indexP = 0
print("Start : python")
results = open("./python/packages.csv","w")
for p in packages:
print(p)
response = urllib.urlopen(URL_INFO+p+"/json")
if response.getcode() == 404 :
print(" 404 : " + p)
continue
dataP = json.loads(response.read())
name = p
version = dataP['info']['version']
description = dataP['info']['summary']
doc = dataP['info']['home_page']
if(doc == None):
doc = ""
classifiers = dataP['info']['classifiers']
depends_python = ""
for c in classifiers:
if("Programming Language" in c):
#print(c)
val1 = c.replace('Programming Language :: Python','')
if val1 != "":
val2 = val1.replace(' :: ', '')
depends_python = depends_python + val2 + ' | '
if(depends_python == ""):
depends_python = "all"
if(version == None):
version = "0"
if(description == None):
description = ""
if(doc == None):
doc = ""
line = '"' + name + '";"' + version + '";"' + description + '";"' + doc + '";"' + depends_python + '"\n'
line = line.encode('utf-8')
results.write(line)
indexP = indexP + 1
print(str(indexP) + " / " + str(len(packages)))
print("Packages : " + str(indexP))
print("End : python")
if __name__ == '__main__':
run()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import string
import subprocess
import json
SHORT_DOWNLOAD = False
if __name__ == '__main__':
print("Start ");
alp = list(string.ascii_lowercase)
indexP = 0
if SHORT_DOWNLOAD :
results = open("./ruby/packages.csv","w")
else:
results = open("./ruby/packages-full.csv","w")
for i in alp:
cmd = " gem search ^"+i+" > ./ruby/"+i+".txt"
os.system(cmd)
print(i)
with open("./ruby/"+i+".txt" , "r") as ins:
for line in ins:
values = line.split()
name = values[0]
if SHORT_DOWNLOAD :
depends = values[1].split()[0]
version = depends[1:]
if(version[len(version)-1:len(version)] != ")"):
pass
else:
version = version[:-1]
doc = "http://www.rubydoc.info/gems/"+name+"/"+version
res = '"' + name + '";"' + version + '";"";"' + doc + '"\n'
else:
subcmd = "curl -s https://rubygems.org/api/v1/gems/"+name+".json"
print(subcmd)
output = subprocess.check_output(subcmd, shell=True)
parsed_json = json.loads(output)
res = '"' + parsed_json['name'] + '";"' + parsed_json['version'] + '";"' \
+ parsed_json['info'] + '";"' \
+ parsed_json['documentation_uri'] + '"' + "\n"
print(res)
indexP = indexP + 1
results.write(res.encode('utf-8'))
results.close()
print("Packages : " + str(indexP))
print("End");
/usr/local/lib/node_modules/all-the-packages
\ No newline at end of file
/usr/local/lib/node_modules/nice-package
\ No newline at end of file
const registry = require('all-the-packages')
var fs = require('fs');
var stream = fs.createWriteStream("./packages.csv");
console.log("Start");
var index = 0;
stream.once('open', function(fd) {
registry
.on('package', function (pkg) {
//console.log(`${pkg.name} - ${pkg.description}\n`)
index++;
var name = pkg.name;
var version = pkg.version;
var description = pkg.description;
var doc = "https://www.npmjs.com/package/"+name;
var line = '"' + name + '";"' + version + '";"' + description + '";"' + doc + '"\n';
//console.log(line);
stream.write(line);
console.log(index);
})
.on('end', function () {
// done
})
// stream.end();
});
console.log(index);
console.log("End");
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from lxml import etree
from os import walk
import os
from bs4 import BeautifulSoup
from rpmUtils.miscutils import splitFilename
import sys
reload(sys)
sys.setdefaultencoding('utf8')
class Packages:
def __init__(self, name, version, description, doc):
self.name = name
self.version = version
self.description = description
self.doc = doc
self.rpm = ""
def to_string(self, distrib_os, distrib_release):
self.description = self.description.replace('"','\'')
#if(distrib_os == "centos" or distrib_os == "fedora"):
# return "\"" + self.name + "\";\"" + self.version + "\";\"" + self.description + "\";\"" + self.doc + "\";\"" + distrib_os + "\";\"" + distrib_release + "\";\"" + self.rpm + "\"\n"
#else:
# return "\"" + self.name + "\";\"" + self.version + "\";\"" + self.description + "\";\"" + self.doc + "\";\"" + distrib_os + "\";\"" + distrib_release + "\"\n"
if(distrib_os == "centos" or distrib_os == "fedora"):
return "\"" + self.name + "\";\"" + self.version + "\";\"" + self.description + "\";\"" + self.doc + "\";\"" + self.rpm + "\"\n"
else:
return "\"" + self.name + "\";\"" + self.version + "\";\"" + self.description + "\";\"" + self.doc + "\";\"" + "\"\n"
class Distrib:
def __init__(self, name, version_num, version_name, url_packages):
self.name = name
self.version_num = version_num
self.version_name = version_name
self.url_packages = url_packages
self.packages = []
def getPackages(self):
name_dir = "./"+self.name+":"+self.version_num
if(not os.path.isdir(name_dir)):
os.mkdir(name_dir)
if(os.path.exists(name_dir+"/packages.html")):
os.remove(name_dir+"/packages.html")
url = name_dir+"/packages.html"
if(self.name == "alpine"):
cont_download = True
indexPage = 1
while cont_download :
url_page = self.url_packages + "?page=" + str(indexPage) +"&branch=v"+self.version_num+"&arch=x86_64"
os.system("cd " + name_dir + "; wget -q -c '" + url_page + "' -O " + str(indexPage)+".html")
path_file = name_dir + "/" + str(indexPage) + ".html"
if os.stat(path_file).st_size <= 18552: #TODO fixe du pauvre
cont_download = False
indexPage = indexPage - 1
else:
indexPage = indexPage + 1
print("Start : " + self.name + "-" + self.version_name)
results = open(name_dir+"/packages.csv","w")
indexP = 0
indexIMG = 0
for i in range(1,indexPage) :
path_file = name_dir + "/" + str(i) + ".html"
soup = BeautifulSoup(open(path_file), "html.parser")
indexIMG = 0
for tr in soup.find_all('tr'):
if indexIMG > 0:
children = tr.findChildren()
name = children[1].get_text()
version = children[2].get_text()
currentP = Packages(name, version, "", "")
currentP.doc = "https://pkgs.alpinelinux.org/package/v"+ self.version_num + "/main/x86_64/" + name
results.write(currentP.to_string(self.name, self.version_num).encode('utf-8'))
indexP = indexP + 1
indexIMG = indexIMG + 1
print("Packages = " + str(indexP))
print("End : " + self.name + "-" + self.version_name)
results.close()
elif(self.name == "fedora"):
pk_tab = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"]
for pk in pk_tab:
os.system("cd " + name_dir + "; wget -q -c " + self.url_packages + pk + "/ -O " + pk+".html")
print("Start : " + self.name + "-" + self.version_name)
results = open(name_dir+"/packages.csv","w")
indexP = 0
indexIMG = 0
for pk in pk_tab:
path_file = name_dir + "/" + pk + ".html"
if os.stat(path_file).st_size > 0:
indexIMG = 0
soup = BeautifulSoup(open(path_file), "html.parser")
for img in soup.find_all('a'):
if indexIMG > 4:
values = img.get_text()
(n, v, r, e, a) = splitFilename(values)
currentP = Packages(n, v, "", "")
currentP.rpm = values
currentP.doc = "https://dl.fedoraproject.org/pub/fedora/linux/releases/"+ self.version_num + "/Everything/source/tree/Packages/" + pk + "/" + values
results.write(currentP.to_string(self.name, self.version_num).encode('utf-8'))
indexP = indexP + 1
indexIMG = indexIMG + 1
print("Packages = " + str(indexP))
print("End : " + self.name + "-" + self.version_name)
results.close()
elif(self.name == "centos"):
os.system("cd " + name_dir + "; wget -q -c " + self.url_packages + " -O packages.html")
soup = BeautifulSoup(open(url), "html.parser")
results = open(name_dir+"/packages.csv","w")
print("Start : " + self.name + "-" + self.version_name)
indexP = 0
indexTR = 0
for tr in soup.find_all('tr'):
if indexTR > 4:
children = tr.findChildren()
if len(children) > 2:
values = children[2].get_text()
if "x86_64" in values:
(n, v, r, e, a) = splitFilename(values)
currentP = Packages(n, v, "", "")
currentP.rpm = values
currentP.doc = "http://mirror.centos.org/centos/"+ self.version_num + "/os/x86_64/Packages/" + values
results.write(currentP.to_string(self.name, self.version_num).encode('utf-8'))
indexP = indexP + 1
indexTR = indexTR + 1
print("Packages = " + str(indexP))
print("End : " + self.name + "-" + self.version_name)
results.close()
elif(self.name == "ubuntu" or self.name == "debian"):
os.system("cd " + name_dir + "; wget -q -c " + self.url_packages + " -O packages.html")
soup = BeautifulSoup(open(url), "html.parser")
results = open(name_dir+"/packages.csv","w")
print("Start : " + self.name + "-" + self.version_name)
indexP = 0
for dl in soup.find_all('dl'):
children = dl.findChildren()
indexP = 0
currentP = None
currentID = 0
for child in children:
if child.name == "dt":
values = child.get_text().split(" ")
vv = 2
if(self.name == "ubuntu"):
vv = 3
if(len(values) >= vv):
indexP = indexP + 1
name = values[0]
version = values[1][1:-1]
currentP = Packages(name, version, "", "")
currentID = indexP
else:
currentID = -1
elif child.name == "dd":
description = child.get_text()
if(currentID > -1):
currentP.description = description
currentP.doc = "https://packages.ubuntu.com/" + self.version_name + "/" + currentP.name + "/"
#print(currentP.to_string(self.name, self.version_num))
results.write(currentP.to_string(self.name, self.version_num).encode('utf-8'))
print("Packages = " + str(indexP))
print("End : " + self.name + "-" + self.version_name)
results.close()
else:
print("This OS not yet implemented")
if __name__ == '__main__':
print("Start");
allS = []
#allS.append(Distrib("ubuntu", "16.04", "xenial", "https://packages.ubuntu.com/xenial/allpackages"))
#allS.append(Distrib("ubuntu", "18.04", "bionic", "https://packages.ubuntu.com/bionic/allpackages"))
#allS.append(Distrib("debian", "8", "jessie", "https://packages.debian.org/jessie/allpackages"))
#allS.append(Distrib("debian", "9", "stretch", "https://packages.debian.org/stretch/allpackages"))
allS.append(Distrib("centos", "7", "7", "http://mirror.centos.org/centos/7/os/x86_64/Packages/"))
allS.append(Distrib("fedora", "28", "28", "https://dl.fedoraproject.org/pub/fedora/linux/releases/28/Everything/source/tree/Packages/"))
#allS.append(Distrib("alpine", "3.9", "3.9", "https://pkgs.alpinelinux.org/packages"))
for d in allS:
d.getPackages()
print("End");
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from os import walk
import os
import re