U
    ɀ^8                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZ d dlmZmZ zd dlmZmZ W n   Y nX zd dlmZ W n   d dlmZ Y nX dd	lmZ d d
lT d d
lT dd Zedddd Zdd Zdd Zdd ZdEddZedddd Z edddd Z!eddd d! Z"d"d# Z#dFd$d%Z$d&d' Z%eddd(d) Z&eddd*d+ Z'eddd,d- Z(dGd/d0Z)d1Z*ee*Z+e,d2e+ d3 e+ d4 Z-eddd5d6 Z.edddHd8d9Z/ed:dd;d< Z0dId=d>Z1d?d@ Z2dAdB Z3dCdD Z4dS )J    N)fnmatch)escape)urljoinurlsplit
urlunsplit
quote_plus)make_responseResponse)UnionTuple)	lru_cache   )CONSTS)*c                 C   s   t jtj| S )N)ospathjoinr   ZMIRROR_ROOT)filename r   !/var/www/zmirror/zmirror/utils.pyzmirror_root   s    r      )maxsizec                 C   s   |  ddS )zM
    equivalent to s.replace("/",r"\/")
    :type s: str
    :rtype: str
    /\/)replace)sr   r   r   s_esc   s    r   c                 C   s   |  d dd} | d}t|d dko:|d dk}t|dksXt|dkr`|r`td	fS |rd|d
d d|dd
 fS d|dd d|dd fS dS )u   
    提取出一个域名的根域名
    支持二级顶级域名, 允许包含端口(端口会被舍去)

    :param domain: eg: dwn.cdn.google.co.jp[:233]
    :type domain: str
    :return: root_domain, sub_domain
    :rtype: Tuple[str, str]
    
0123456789:.   )comnetorgcoedumilgovac    N)rstripstripsplitlentarget_domainr   )domaintempZis_level2_tldr   r   r   extract_root_domain)   s    

$r7   c                 C   s  t d|  d|  d|  d|  d|  d|  ddd|  ddtd|  td|  td|  d	|  d
|  d|  ddd|  ddd|  ddd|  ddd|  ddd|  ddtd|  td|  td|  td|  td|  ttd|  ttd|  ttd|  dS )uN   生成各种形式的scheme变体
    :type _domain: str
    :rtype: bool
    z//zhttp://zhttps://z"%s"z'%s'r   z\x2fz\x2Fz\"%s\"z\'%s\'z\\/z\\\/)slashhttphttpsZdouble_quotedsingle_quotedZ	hex_lowerZ	hex_upperZ	slash_escZhttp_escZ	https_escZdouble_quoted_escZsingle_quoted_escZslash_double_escZhttp_double_escZhttps_double_escZslash_triple_escZhttp_triple_escZhttps_triple_escZslash_ueZhttp_ueZhttps_ueZdouble_quoted_ueZsingle_quoted_ueZslash_esc_ueZhttp_esc_ueZhttps_esc_ue)dictr   r   r   )_domainr   r   r   calc_domain_replace_prefixC   s8    







r>   c                  C   s   ddl } |  jjS )zeReturns the current line number in our program.
    :return: current line number
    :rtype: int
    r   N)inspectcurrentframef_backf_lineno)r?   r   r   r   current_line_numberm   s    rC      We Got An Unknown Error  c                 C   s
   t | |S )zO

    :type errormsg: bytes
    :type error_code: int
    :rtype: Response
    )r   )errormsg
error_coder   r   r   generate_simple_resp_pagev   s    rH      c                 C   s$   |   }tD ]}||kr dS qdS )z
    Determine whether an mime is text (eg: text/html: True, image/png: False)
    :param input_mime: str
    :return: bool
    TF)lowertext_like_mime_keywords)Z
input_mimeZinput_mime_lZ	text_wordr   r   r   is_mime_represents_text   s
    rL   c                 C   s.   |  d}|dkr|  S | d|  S dS )ue   从content-type中提取出mime, 如 'text/html; encoding=utf-8' --> 'text/html'
    :rtype: str
    ;r"   N)findrJ   )_content_typecr   r   r   extract_mime_from_content_type   s    
rQ   c                 C   s   t | }|tkr|S dS dS )u0   根据content-type确定该资源是否使用CDNFN)rQ   mime_to_use_cdn)rO   _mimer   r   r   is_content_type_using_cdn   s    rT   c                 C   s2   z| | }W n   Y dS X |dk	r*|S dS dS )z0return a blank string if the match group is Noner.   N)group)name	match_objobjr   r   r   	get_group   s    rY   c                 C   s   dS )uC   旧版本遗留函数, 已经不再需要, 永远返回空字符串r.   r   )Z
ext_domainZforce_httpsr   r   r   "get_ext_domain_inurl_scheme_prefix   s    rZ   c                  G   s,   d}| D ]}|t |d 7 }q|d |S )z
    :return: str
    r.    )strr0   )argsoutputargr   r   r   strx   s
    
r`   c                 C   s,   | dkst sdS |  } t | kr$dS dS dS )u(   该user-agent是否满足全局白名单NFT)global_ua_white_namerJ   )ua_strr   r   r   check_global_ua_pass   s    rc   c                 C   s   t D ]}t| |r dS qdS )u   
    域名是否匹配 `domains_whitelist_auto_add_glob_list` 中设置的通配符
    :type domain: str
    :rtype: bool
    TF)$domains_whitelist_auto_add_glob_listr   )r5   Zdomain_globr   r   r   is_domain_match_glob_whitelist   s    
re   c                 C   s   t D ]}|| kr dS qdS )u#  
    根据content-type判断是否应该用stream模式传输(服务器下载的同时发送给用户)
     视频/音频/图片等二进制内容默认用stream模式传输
     :param mime: mime or content-type, eg: "plain/text; encoding=utf-8"
     :type mime: str
     :rtype: bool
    TF)steamed_mime_keywords)mimeZstreamed_keywordr   r   r   is_mime_streamed   s    	rh   r.   c              	   C   sB   d|t | t | |d t |t | t | f }|d}t|dS )u   生成一个HTML重定向页面
    某些浏览器在301/302页面不接受cookies, 所以需要用html重定向页面来传cookie
    :type target_url: str
    :type msg: str
    :type delay_sec: int
    :rtype: Response
    u  <!doctype html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<title>重定向 (Page Redirect)</title>
<meta http-equiv="refresh" content="%d; url=%s">
<script>setTimeout(function(){location.href="%s"} , %d000);</script>
</head>
<body>
<pre>%s</pre>
<hr />
You are now redirecting to <a href="%s">%s</a>, if it didn't redirect automatically, please click that link.
</body>
</html>r   utf-8)response)html_escapeencoder	   )Z
target_urlmsgZ	delay_secZresp_contentr   r   r   generate_html_redirect_page   s         
rn   Zzm26_z+(?P<gzip>z?)_\.(?P<b64>[a-zA-Z0-9-_]+=*)\._z_\.[a-zA-Z\d]+\bc                 C   s   dt  d | dd krdS t| }td|}| d| d  }t|}td|}|rft|}|j	dd	}t
|d
| }|S )u4  
    将 embed_real_url_to_embedded_url() 编码后的url转换为原来的带有参数的url
    `cdn_redirect_encode_query_str_into_url`设置依赖于本函数, 详细说明请看配置文件中这个参数的部分

    eg: https://cdn.domain.com/a.php_zm24_.cT1zb21ldGhpbmc=._zm24_.css
        ---> https://foo.com/a.php?q=something (assume it returns an css) (base64 only)
    eg2: https://cdn.domain.com/a/b/_zm24_.bG92ZT1saXZl._zm24_.jpg
        ---> https://foo.com/a/b/?love=live (assume it returns an jpg) (base64 only)
    eg3: https://cdn.domain.com/a/b/_zm24z_.[some long long base64 encoded string]._zm24_.jpg
        ---> https://foo.com/a/b/?love=live[and a long long query string] (assume it returns an jpg) (gzip + base64)
    eg4:https://cdn.domain.com/a  (no change)
        ---> (no query string): https://foo.com/a (assume it returns an png) (no change)
    :param embedded_url: 可能被编码的URL
    :return: 如果传入的是编码后的URL, 则返回解码后的URL, 否则返回None
    :type embedded_url: str
    :rtype: Union[str, None]
    .__.iNb64r   gzipri   )encoding?)cdn_url_query_encode_salt&regex_extract_base64_from_embedded_urlsearchrY   spanbase64urlsafe_b64decodezlib
decompressdecoder   )Zembedded_urlmrr   Zreal_request_url_no_queryZquery_string_byteZ
is_gzippedquery_stringresultr   r   r   "extract_real_url_from_embedded_url  s    




r   Fc           
      C   s   |r|  dd}n| }t|}|js(| S |j }t|dkrNd}t|}nd}t|	 }|j
d t | d | d t d t|  }t|j|j|ddf}	|rt|	}	|	S )	u  
    将url的参数(?q=some&foo=bar)编码到url路径中, 并在url末添加一个文件扩展名
    在某些对url参数支持不好的CDN中, 可以减少错误
    `cdn_redirect_encode_query_str_into_url`设置依赖于本函数, 详细说明可以看配置文件中的对应部分
    解码由 extract_real_url_from_embedded_url() 函数进行, 对应的例子也请看这个函数
    :rtype: str
    r   r   rI   zr.   ro   rq   rp   )r   r   queryrl   r3   r|   compressrz   urlsafe_b64encoder~   r   	_url_saltrR   r   schemenetlocr   )
Zreal_url_rawurl_mimeescape_slashreal_urlZurl_spZ
byte_queryZ
gzip_labelZ	b64_queryZ
mixed_pathr   r   r   r   embed_real_url_to_embedded_url7  s6    

r   @   c                 C   s:   d| krdS d| kr&d| kr dS dS nd| kr2dS dS d	S )
uL   根据 slash(/) 的格式, 猜测最有可能与之搭配的 colon(:) 格式%r    z%25Fz%253Az%253az%3Az%3aNr   )r8   r   r   r   guess_colon_from_slash]  s    r   c                 C   s   |ri nd}t | D ]x}|d dkr|dd  dkrtt| |}|rlt|}||krl|d | d| }|rz|||< q|t|d|d7 }q|S )	Nr.   r   ro   r$   __z....(total:{})r    
)dirr\   getattrr3   formatr`   )varto_dictZmax_lenr^   rV   valuelengthr   r   r   
attributesp  s    
r   c                 C   s6   | dkrt ||S | dkr$t||S td| dS )uT  
    将文本内容注入到html中
    详见 default_config.py 的 `Custom Content Injection` 部分
    :param position: 插入位置
    :type position: str
    :param html: 原始html
    :type html: str
    :param content: 等待插入的自定义文本内容
    :type content: str
    :return: 处理后的html
    :rtype: str
    
head_first	head_lastzUnknown Injection Position: {}N)inject_content_head_firstinject_content_head_last
ValueErrorr   )positionhtmlcontentr   r   r   inject_content  s
    

r   c                 C   sl   |  d}|  d}|dkr | S |dkrL||k rL| d| | | |d  S | d| | | |d  S dS )u   
    将文本内容插入到head中第一个现有<script>之前
    如果head中不存在<script>, 则加在</head>标签之前

    :type html: str
    :type content: str
    :rtype: str
    </headz<scriptr"   NrN   )r   r   head_end_posZscript_begin_posr   r   r   r     s    	

r   c                 C   s2   |  d}|dkr| S | d| | | |d  S )uk   
    将文本内容插入到head的尾部

    :type html: str
    :type content: str
    :rtype: str
    r   r"   Nr   )r   r   r   r   r   r   r     s    
r   )rD   rE   )N)r.   r   )F)Fr   )5r   rer|   rz   r   r   r   rk   urllib.parser   r   r   r   flaskr   r	   typingr
   r   	fastcacher   	functoolsr.   r   config_defaultconfigr   r   r7   r>   rC   rH   rL   rQ   rT   rY   rZ   r`   rc   re   rh   rn   rv   r   compilerw   r   r   r   r   r   r   r   r   r   r   r   <module>   sp   
	*	











&%

