list 列表相关
list 中最小值、最大值
import operator values = [1, 2, 3, 4, 5] min_index, min_value = min(enumerate(values), key=operator.itemgetter(1))max_index, max_value = max(enumerate(values), key=operator.itemgetter(1)) print('min_index:', min_index, 'min_value:', min_value)print('max_index:', max_index, 'max_value:', max_value)# Outmin_index: 0 min_value: 1max_index: 4 max_value: 5
list 中连续元素之间的差
from itertools import islicels = [1,2,3,5,8]diff = [j-i for i,j in zip(ls, islice(ls, 1, None))]print(diff)# Out[1, 1, 2, 3]
删除列表中的重复元素
下面这种方法不能维持顺序:
x = [1, 8, 4, 5, 5, 5, 8, 1, 8]list(set(x))# Out[8, 1, 4, 5]
下面的方法,可以维持顺序:
from collections import OrderedDictx = [1, 8, 4, 5, 5, 5, 8, 1, 8]list(OrderedDict.fromkeys(x))# Out[1,8,4,5]
并行遍历2个列表
a = [1, 2, 3]b = [4, 5, 6]for (a_val, b_val) in zip(a, b): print(a_val, b_val)# Out1 42 53 6
合并列表值
输入的两个数组,输出一个是数组&值相加或者相乘:
# inputfirst = [1,2,3,4,5]second = [6,7,8,9,10]#outputthree = [7,9,11,13,15]# The zip function is useful here, used with a list comprehension.# add[x + y for x, y in zip(first, second)]# other[x*y for x, y in zip(first, second)][max(x,y) for x, y in zip(first, second)]
参考:
字典处理
字典做交、差、并
a={'name':'michael','age':"27",'sex':'male'}b={'name':'hqh','age':'27'}{k:a[k] for k in a.keys()-b.keys()}out: {'sex': 'male'}dict(a.items()-b.items())out: {'name': 'michael', 'sex': 'male'}
需要注意的是,当字典的值有字典时,a.items()-b.items()
这种方式会报错 TypeError: unhashable type: 'dict'
;
参考:
字典的Key与Value对调
m = {'A': 1, 'B': 2, 'C': 3}invert_map_key_value = lambda m: dict(zip(m.values(), m.keys()))invert_map_key_value(m)# output: {1: 'A', 2: 'B', 3: 'C'}
参考:
合并字典值
>>> from collections import Counter>>> A = Counter({'a':1, 'b':2, 'c':3})>>> B = Counter({'b':3, 'c':4, 'd':5})>>> A + BCounter({'c': 7, 'b': 5, 'd': 5, 'a': 1})
字典的增加
用 update
方法往已有字典中增加键值对:
deploy_info=dict()for idx, row in raw_data.iterrows(): temp=dict() version = row['version'] app_comp_name = row['app_comp_name'] pkg_name = "{}_{}.tar.gz".format(app_comp_name, version) time.sleep(2) data = get_verify_value(api_url,pkg_name) temp = { deploy_history_id:{ 'app_comp_name':app_comp_name, 'version':version, 'pkg_name':pkg_name, 'data':data } } deploy_info.update(temp)
字符串相关
索引
tag='hx/mitaka_compute/12.0.0'[m.start() for m in re.finditer('/',tag)]
参考:
将百分号的百分比字符串转为数字
p="75%"float(p.strip('%'))/100
参考:
剔除分隔符
通常做法:
''.join('A|B|C|D|E|F|G'.split('|'))# output: 'ABCDEFG'
用 itertools.islice
,因为可以节选字符串:
import itertools''.join(itertools.islice('A|B|C|D|E|F|G', 6, None, 2))# output: 'DEFG'''.join(itertools.islice('A|B|C|D|E|F|G', 0, None, 2))# output: ''ABCDEFG'
美观打印
import pprint as ppanimals = [{'animal': 'dog', 'legs': 4, 'breeds': ['Border Collie', 'Pit Bull', 'Huskie']}, {'animal': 'cat', 'legs': 4, 'breeds': ['Siamese', 'Persian', 'Sphynx']}]pp.pprint(animals, width=1)# Out[{'animal': 'dog', 'breeds': ['Border ' 'Collie', 'Pit ' 'Bull', 'Huskie'], 'legs': 4}, {'animal': 'cat', 'breeds': ['Siamese', 'Persian', 'Sphynx'], 'legs': 4}]
width参数指定一行上最大的字符数。设置width为1确保字典打印在单独的行
文件读写
基本文件读 txt
# Note: rb opens file in binary mode to avoid issues with Windows systems# where 'rn' is used instead of 'n' as newline character(s). # A) Reading in Byte chunksreader_a = open("file.txt", "rb")chunks = []data = reader_a.read(64) # reads first 64 byteswhile data != "": chunks.append(data) data = reader_a.read(64)if data: chunks.append(data)print(len(chunks))reader_a.close() # B) Reading whole file at once into a list of lineswith open("file.txt", "rb") as reader_b: # recommended syntax, auto closes data = reader_b.readlines() # data is assigned a list of linesprint(len(data)) # C) Reading whole file at once into a stringwith open("file.txt", "rb") as reader_c: data = reader_c.read() # data is assigned a list of linesprint(len(data)) # D) Reading line by line into a listdata = []with open("file.txt", "rb") as reader_d: for line in reader_d: data.append(line)print(len(data))
json 读写json文件
- json.loads()是将str转化成dict格式,json.dumps()是将dict转化成str格式。
- json.load()和json.dump()也是类似的功能,只是与文件操作结合起来了。
# 解码import jsonwith open('build_info.json','r') as f: array = json.load(f)print(array)
在编码JSON的时候,还有一些选项很有用。 如果你想获得漂亮的格式化字符串后输出,可以使用 json.dumps()
的indent
参数。 它会使得输出和pprint() 函数效果类似:
>>> print(json.dumps(data)){"price": 542.23, "name": "ACME", "shares": 100}>>> print(json.dumps(data, indent=4)){ "price": 542.23, "name": "ACME", "shares": 100}>>>
保存为 json 文件:
# 编码import jsona = {"name":"michael"}with open("demo.json","w") as f: json.dump(a, f, indent=4)
时间日期
基本时间(time)和日期(date)
import time # print time HOURS:MINUTES:SECONDS# e.g., '10:50:58'print(time.strftime("%H:%M:%S")) # print current date DAY:MONTH:YEAR# e.g., '05/01/2019'print(time.strftime("%d/%m/%Y"))# Out15:18:0305/01/2019
字符串和日期的相互转换
strptime 是将字符串转换为 datetime,其实这个方法的全称是 “string parse time”,叫做字符串解析成时间,重点在解析(parse):
from datetime import datetimedate_obj = datetime.strptime('2018-10-15 20:59:29', '%Y-%m-%d %H:%M:%S')print(type(date_obj),date_obj)# Out2018-10-15 20:59:29
strftime 是将 datetime 转换为字符串,全称是 “string format time”,翻译过来就是将字符串的形式来格式化时间,重点在格式化(format),使之以一种可读的字符串形式返回:
from datetime import datetimedate_obj = datetime.now()date_string = datetime.now().strftime("%Y-%m-%d %H:%M:%S")print(type(date_string),date_string)# Out2019-01-05 18:41:04
参考:
编码相关
Python Requests 编码问题
下载
Python下载文件
Python根据url下载目录或者文件
def download_package(self, package_url): print("start download_build_result") if not package_url.endswith("/"): package_url += '/' cmd = "wget -c -r -nd -np -P %s %s" % ("output", package_url) print(cmd) os.system(cmd) print(os.getcwd()) print("finish download_build_result")
数据处理
Python Pandas处理Excel数据
逐行处理数据 iterrows
for idx, row in data.iterrows(): project_name=row['projectName'] tag_name=row['tagName']
Pandas追加模式写入csv文件
data = pd.DataFrame([[1,2,3]])csv_headers=['A','B','C']data.to_csv('./Marvel3_yingpping.csv', header=csv_headers, index=False, mode='a+', encoding='utf-8')data = pd.DataFrame([[4,5,6]])data.to_csv('./Marvel3_yingpping.csv', header=False, index=False, mode='a+', encoding='utf-8')data = pd.DataFrame([[7,8,9]])data.to_csv('./Marvel3_yingpping.csv', header=False, index=False, mode='a+', encoding='utf-8')
Python-CSV-Excel
for idx, row in data.iterrows(): project_name=row['projectName'] tag_name=row['tagName']
to_csv表格中文乱码
ipython中直接打印df,中文没有乱码,但是to_csv
方法存储时,中文有乱码。
df.to_csv('file.csv',encoding='utf-8-sig')
参考:
itero
看题目:
- 答案中有位前辈用这个用的炉火纯青啊!
Shell/Linux 操作相关
Python运行shell命令的函数:
def run(cmd_str, fatal=True): # this is not a good implement log.command(log.term.cmd(cmd_str)) ret = os.system(cmd_str) if ret is not 0: if fatal: log.error('[ERROR] run cmd: %s failed', cmd_str) os._exit(1) else: log.info('[INFO] %s is not fatal' % cmd_str)
调用外部的命令
# import subprocesssubprocess.call(['mkdir', 'empty_folder'])# 运行一条命令并输出得到的结果output = subprocess.check_output(['ls', '-l'])# 上面的调用是阻塞的# 如果运行shell中内置的命令,如cd或者dir,需要指定标记shell=Trueoutput = subprocess.call(['cd', '/'], shell=True)# 对于更高级的用例,可以使用 Popen constructor。
Python 3.5引进了一个新的run函数,它的行为与call和check_output很相似。如果你使用的是3.5版本或更高版本,看一看run的文档,里面有一些有用的例子。否则,如果你使用的是Python 3.5以前的版本或者你想保持向后兼容性,上面的call和check_output代码片段是你最安全和最简单的选择
参考:
计算文件的校验值
可以计算文件的 md5
、sha256
等值
# https://pymotw.com/3/hashlib/index.html#module-hashlibdef get_verify_value(file_path, verify_type): """ 计算指定文件的校验值 :param file_path: 文件路径 :param verify_type: 校验值类型,md5 sha256 等等 :return: """ h = hashlib.new(verify_type) if not file_path: return None with open(file_path, 'rb') as f: for block in iter(lambda: f.read(4096), b""): h.update(block) return h.hexdigest()
性能相关
脚本的运行时间
import time start_time = time.clock() for i in range(10000000): pass elapsed_time = time.clock() - start_timeprint("Time elapsed: {} seconds".format(elapsed_time))# OutTime elapsed: 0.30121700000000007 seconds
import timeitelapsed_time = timeit.timeit('for i in range(10000000): pass', number=1)print("Time elapsed: {} seconds".format(elapsed_time))# OutTime elapsed: 0.2051873060000844 seconds
计算运行时间
class Timer(object): def __enter__(self): self.error = None self.start = time.time() return self def __exit__(self, type, value, tb): self.finish = time.time() if type: self.error = (type, value, tb) def duration(self): return self.finish - self.startwith Timer() as timer: func()timer.duration()# Out0.29994797706604004
参考:
目录、路径相关
基本目录文件操作
import osimport shutilimport glob # working directoryc_dir = os.getcwd() # show current working directoryos.listdir(c_dir) # shows all files in the working directoryos.chdir('~/Data') # change working directory # get all files in a directoryglob.glob('/Users/sebastian/Desktop/*') # e.g., ['/Users/sebastian/Desktop/untitled folder', '/Users/sebastian/Desktop/Untitled.txt'] # walktree = os.walk(c_dir)# moves through sub directories and creates a 'generator' object of tuples# ('dir', [file1, file2, ...] [subdirectory1, subdirectory2, ...]),# (...), ... #check files: returns either True or Falseos.exists('../rel_path')os.exists('/home/abs_path')os.isfile('./file.txt')os.isdir('./subdir') # file permission (True or Falseos.access('./some_file', os.F_OK) # File exists? Python 2.7os.access('./some_file', os.R_OK) # Ok to read? Python 2.7os.access('./some_file', os.W_OK) # Ok to write? Python 2.7os.access('./some_file', os.X_OK) # Ok to execute? Python 2.7os.access('./some_file', os.X_OK | os.W_OK) # Ok to execute or write? Python 2.7 # join (creates operating system dependent paths)os.path.join('a', 'b', 'c')# 'a/b/c' on Unix/Linux# 'a\b\c' on Windowsos.path.normpath('a/b/c') # converts file separators # os.path: direcory and file namesos.path.samefile('./some_file', '/home/some_file') # True if those are the sameos.path.dirname('./some_file') # returns '.' (everythin but last component)os.path.basename('./some_file') # returns 'some_file' (only last componentos.path.split('./some_file') # returns (dirname, basename) or ('.', 'some_file)os.path.splitext('./some_file.txt') # returns ('./some_file', '.txt')os.path.splitdrive('./some_file.txt') # returns ('', './some_file.txt')os.path.isabs('./some_file.txt') # returns False (not an absolute path)os.path.abspath('./some_file.txt') # create and delete files and directoriesos.mkdir('./test') # create a new direcotoryos.rmdir('./test') # removes an empty direcotoryos.removedirs('./test') # removes nested empty directoriesos.remove('file.txt') # removes an individual fileshutil.rmtree('./test') # removes directory (empty or not empty) os.rename('./dir_before', './renamed') # renames directory if destination doesn't existshutil.move('./dir_before', './renamed') # renames directory always shutil.copytree('./orig', './copy') # copies a directory recursivelyshutil.copyfile('file', 'copy') # copies a file # Getting files of particular type from directoryfiles = [f for f in os.listdir(s_pdb_dir) if f.endswith(".txt")] # Copy and moveshutil.copyfile("/path/to/file", "/path/to/new/file")shutil.copy("/path/to/file", "/path/to/directory")shutil.move("/path/to/file","/path/to/directory") # Check if file or directory existsos.path.exists("file or directory")os.path.isfile("file")os.path.isdir("directory") # Working directory and absolute path to filesos.getcwd()os.path.abspath("file")
参考:
## Python 删除文件夹
def onerror(func, path, exc_info): """ Error handler for ``shutil.rmtree``. If the error is due to an access error (read only file) it attempts to add write permission and then retries. If the error is for another reason it re-raises the error. Usage : ``shutil.rmtree(path, onerror=onerror)`` """ import stat if not os.access(path, os.W_OK): # Is the error an access error ? os.chmod(path, stat.S_IWUSR) func(path) else: raise
参考:
## Python 切换目录
执行完,返回之前目录
import contextlib@contextlib.contextmanagerdef cdir(path): prev_cwd = os.getcwd() os.chdir(path) try: yield finally: os.chdir(prev_cwd)
用法:
with cdir(path): func()
搜索指定目录下的文件
将指定目录及其子目录下的文件搜索出来:
def find_file(start_path, name): """ search the files of name from the dir start_path,存放的是搜索文件的路径 :param start_path: the search scope of dir :param name: the name of search file :return: set of files path """ files_path = set() for rel_path, dirs, files in os.walk(start_path): # if name in files: for f in files: if name in f: full_path = os.path.join(start_path, rel_path, f) path = os.path.normpath(os.path.abspath(full_path)) files_path.add(path) return files_path
只列出文件夹下的文件夹
[ name for name in os.listdir(thedir) if os.path.isdir(os.path.join(thedir, name)) ]filter(os.path.isdir, os.listdir(os.getcwd()))
Python Path相关问题
os.path.split(r"C:\foo\bar\file_name.txt")
# 数据库
## MySQL 数据库
db = MySQLdb.connect("localhost","your_username","your_password","your_dbname")cursor = db.cursor()sql = "select Column1,Column2 from Table1"cursor.execute(sql)results = cursor.fetchall() for row in results: print row[0]+row[1] db.close()
参考:
MongoDB
uri="mongodb://admin:admin@xxx.xxx.xxx.xxx:27017,xxx.xxx.xxx.xxx:27018,xxx.xxx.xxx.xxx:27019/test"client=pymongo.MongoClient(uri,replicaSet='noah-cluster',readPreference='primaryPreferred')db=client.get_default_database() decouple_history=db.rpm_decouple_release_history_info pprint(decouple_history.find_one({'service_name':'test'}))