
ɀ^8                 @   s  d  d l  Z  d  d l Z d  d l Z d  d l Z d  d l m Z d  d l m Z d  d l m	 Z	 m
 Z
 m Z m Z d  d l m Z m Z y d  d l m Z m Z Wn Yn Xy d  d l m Z Wn d  d l m Z Yn Xd d	 l m Z d  d
 l Td  d
 l Td d   Z e d d  d d    Z d d   Z d d   Z d d   Z d d d d  Z e d d  d d    Z  e d d  d d    Z! e d d  d  d!    Z" d" d#   Z# d d$ d%  Z$ d& d'   Z% e d d  d( d)    Z& e d d  d* d+    Z' e d d  d, d-    Z( d. d d/ d0  Z) d1 Z* e j e*  Z+ e j, d2 e+ d3 e+ d4  Z- e d d  d5 d6    Z. e d d  d7 d8 d9   Z/ e d d:  d; d<    Z0 d7 d d= d>  Z1 d? d@   Z2 dA dB   Z3 dC dD   Z4 d S)E    N)fnmatch)escape)urljoinurlsplit
urlunsplit
quote_plus)make_responseResponse)UnionTuple)	lru_cache   )CONSTS)*c             C   s   t  j j t j |   S)N)ospathjoinr   ZMIRROR_ROOT)filename r   !/var/www/zmirror/zmirror/utils.pyzmirror_root   s    r   maxsizei   c             C   s   |  j  d d  S)zM
    equivalent to s.replace("/",r"\/")
    :type s: str
    :rtype: str
    /z\/)replace)sr   r   r   s_esc   s    r   c          	   C   s   |  j  d  j  d  j d  }  |  j d  } t | d  d k oS | d d k } t |  d k s t |  d k r | r t d f S| r d j | d d   d j | d d   f Sd j | d d   d j | d d   f Sd S)u   
    提取出一个域名的根域名
    支持二级顶级域名, 允许包含端口(端口会被舍去)

    :param domain: eg: dwn.cdn.google.co.jp[:233]
    :type domain: str
    :return: root_domain, sub_domain
    :rtype: Tuple[str, str]
    
0123456789:.r         comnetorgcoedumilgovac N)zcomznetzorgzcozeduzmilzgovzacr-   r,   r,   )rstripstripsplitlentarget_domainr   )domaintempZis_level2_tldr   r   r   extract_root_domain)   s    
!&*
0r5   c          8   C   s  t  d d |  d d |  d d |  d d |  d	 d
 |  d d |  j d d  d d |  j d d  d t d |   d t d |   d t d |   d d |  d d |  d d |  j d d  d d |  j d d  d d |  j d d  d d |  j d d  d d |  j d d  d d |  j d d  d t d |   d  t d |   d! t d |   d" t d |   d# t d
 |   d$ t t d |    d% t t d |    d& t t d |     S)'uN   生成各种形式的scheme变体
    :type _domain: str
    :rtype: bool
    slashz//httpzhttp://httpszhttps://Zdouble_quotedz"%s"single_quotedz'%s'Z	hex_lowerr   z\x2fZ	hex_upperz\x2FZ	slash_escZhttp_escZ	https_escZdouble_quoted_escz\"%s\"Zsingle_quoted_escz\'%s\'Zslash_double_escz\\/Zhttp_double_escZhttps_double_escZslash_triple_escz\\\/Zhttp_triple_escZhttps_triple_escZslash_ueZhttp_ueZhttps_ueZdouble_quoted_ueZsingle_quoted_ueZslash_esc_ueZhttp_esc_ueZhttps_esc_ue)dictr   r   r   )_domainr   r   r   calc_domain_replace_prefixC   s6    






r<   c              C   s   d d l  }  |  j   j j S)zeReturns the current line number in our program.
    :return: current line number
    :rtype: int
    r   N)inspectcurrentframef_backf_lineno)r=   r   r   r   current_line_numberm   s    rA   s   We Got An Unknown Errori  c             C   s   t  |  |  S)zO

    :type errormsg: bytes
    :type error_code: int
    :rtype: Response
    )r   )errormsg
error_coder   r   r   generate_simple_resp_pagev   s    rD      c             C   s1   |  j    } x t D] } | | k r d Sq Wd S)z
    Determine whether an mime is text (eg: text/html: True, image/png: False)
    :param input_mime: str
    :return: bool
    TF)lowertext_like_mime_keywords)Z
input_mimeZinput_mime_lZ	text_wordr   r   r   is_mime_represents_text   s
    rH   c             C   s=   |  j  d  } | d k r% |  j   S|  d |  j   Sd S)ue   从content-type中提取出mime, 如 'text/html; encoding=utf-8' --> 'text/html'
    :rtype: str
    ;r   Nr+   )findrF   )_content_typecr   r   r   extract_mime_from_content_type   s    
rM   c             C   s$   t  |   } | t k r | Sd Sd S)u0   根据content-type确定该资源是否使用CDNFN)rM   mime_to_use_cdn)rK   _mimer   r   r   is_content_type_using_cdn   s    rP   c          	   C   s:   y | j  |   } Wn d SYn X| d k	 r2 | Sd Sd S)z0return a blank string if the match group is Noner*   N)group)name	match_objobjr   r   r   	get_group   s    	rU   c             C   s   d S)uC   旧版本遗留函数, 已经不再需要, 永远返回空字符串r*   r   )Z
ext_domainZforce_httpsr   r   r   "get_ext_domain_inurl_scheme_prefix   s    rV   c              G   s<   d } x" |  D] } | t  |  d 7} q W| j d  | S)z
    :return: str
    r*    )strr.   )argsoutputargr   r   r   strx   s
    r\   c             C   s;   |  d k s t  r d S|  j   }  t  |  k r3 d Sd Sd S)u(   该user-agent是否满足全局白名单NFT)global_ua_white_namerF   )ua_strr   r   r   check_global_ua_pass   s    r_   c             C   s(   x! t  D] } t |  |  r d Sq Wd S)u   
    域名是否匹配 `domains_whitelist_auto_add_glob_list` 中设置的通配符
    :type domain: str
    :rtype: bool
    TF)$domains_whitelist_auto_add_glob_listr   )r3   Zdomain_globr   r   r   is_domain_match_glob_whitelist   s    ra   c             C   s%   x t  D] } | |  k r d Sq Wd S)u#  
    根据content-type判断是否应该用stream模式传输(服务器下载的同时发送给用户)
     视频/音频/图片等二进制内容默认用stream模式传输
     :param mime: mime or content-type, eg: "plain/text; encoding=utf-8"
     :type mime: str
     :rtype: bool
    TF)steamed_mime_keywords)mimeZstreamed_keywordr   r   r   is_mime_streamed   s    	rd   r*   c          	   C   s]   d | t  |   t  |   | d t  |  t  |   t  |   f } | j d  } t d |  S)u   生成一个HTML重定向页面
    某些浏览器在301/302页面不接受cookies, 所以需要用html重定向页面来传cookie
    :type target_url: str
    :type msg: str
    :type delay_sec: int
    :rtype: Response
    u  <!doctype html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<title>重定向 (Page Redirect)</title>
<meta http-equiv="refresh" content="%d; url=%s">
<script>setTimeout(function(){location.href="%s"} , %d000);</script>
</head>
<body>
<pre>%s</pre>
<hr />
You are now redirecting to <a href="%s">%s</a>, if it didn't redirect automatically, please click that link.
</body>
</html>r   zutf-8response)html_escapeencoder	   )Z
target_urlmsgZ	delay_secZresp_contentr   r   r   generate_html_redirect_page   s
    "ri   Zzm26_z+(?P<gzip>z?)_\.(?P<b64>[a-zA-Z0-9-_]+=*)\._z_\.[a-zA-Z\d]+\bc             C   s   d t  d |  d d  k r" d St j |   } t d |  } |  d | j   d  } t j |  } t d |  } | r t j |  } | j	 d d	  } t
 | d
 |  } | S)u4  
    将 embed_real_url_to_embedded_url() 编码后的url转换为原来的带有参数的url
    `cdn_redirect_encode_query_str_into_url`设置依赖于本函数, 详细说明请看配置文件中这个参数的部分

    eg: https://cdn.domain.com/a.php_zm24_.cT1zb21ldGhpbmc=._zm24_.css
        ---> https://foo.com/a.php?q=something (assume it returns an css) (base64 only)
    eg2: https://cdn.domain.com/a/b/_zm24_.bG92ZT1saXZl._zm24_.jpg
        ---> https://foo.com/a/b/?love=live (assume it returns an jpg) (base64 only)
    eg3: https://cdn.domain.com/a/b/_zm24z_.[some long long base64 encoded string]._zm24_.jpg
        ---> https://foo.com/a/b/?love=live[and a long long query string] (assume it returns an jpg) (gzip + base64)
    eg4:https://cdn.domain.com/a  (no change)
        ---> (no query string): https://foo.com/a (assume it returns an png) (no change)
    :param embedded_url: 可能被编码的URL
    :return: 如果传入的是编码后的URL, 则返回解码后的URL, 否则返回None
    :type embedded_url: str
    :rtype: Union[str, None]
    z._z_.   Nb64r   gzipencodingzutf-8?i)cdn_url_query_encode_salt&regex_extract_base64_from_embedded_urlsearchrU   spanbase64urlsafe_b64decodezlib
decompressdecoder   )Zembedded_urlmrl   Zreal_request_url_no_queryZquery_string_byteZ
is_gzippedquery_stringresultr   r   r   "extract_real_url_from_embedded_url  s    r|   Fc       
      C   s   | r |  j  d d  } n |  } t |  } | j s: |  S| j j   } t |  d k rs d } t j |  } n d } t j |  j	   } | j
 d t | d | d t d t | } t | j | j | d d f  }	 | r t |	  }	 |	 S)	u  
    将url的参数(?q=some&foo=bar)编码到url路径中, 并在url末添加一个文件扩展名
    在某些对url参数支持不好的CDN中, 可以减少错误
    `cdn_redirect_encode_query_str_into_url`设置依赖于本函数, 详细说明可以看配置文件中的对应部分
    解码由 extract_real_url_from_embedded_url() 函数进行, 对应的例子也请看这个函数
    :rtype: str
    z\/r   rE   zr*   rj   z_.z._)r   r   queryrg   r1   rv   compressrt   urlsafe_b64encoderx   r   	_url_saltrN   r   schemenetlocr   )
Zreal_url_rawurl_mimeescape_slashreal_urlZurl_spZ
byte_queryZ
gzip_labelZ	b64_queryZ
mixed_pathr{   r   r   r   embed_real_url_to_embedded_url7  s"    
	1!r   @   c             C   sK   d |  k r d Sd |  k r3 d |  k r, d Sd Sn d |  k rC d Sd Sd	 S)
uL   根据 slash(/) 的格式, 猜测最有可能与之搭配的 colon(:) 格式%r   z%25Fz%253Az%253az%3Az%3aNr   )r6   r   r   r   guess_colon_from_slash]  s    r   c             C   s   | r i  n d } x t  |   D] } | d d k r | d	 d   d k r t t |  |   } | r t |  } | | k r | d  |  d j |  } | r | | | <q | t | d | d  7} q W| S)
Nr*   r   rj   r!   __z....(total:{})r   
r,   )dirrX   getattrr1   formatr\   )varto_dictZmax_lenrZ   rR   valuelengthr   r   r   
attributesp  s    &r   c             C   sK   |  d k r t  | |  S|  d k r2 t | |  St d j |     d S)uT  
    将文本内容注入到html中
    详见 default_config.py 的 `Custom Content Injection` 部分
    :param position: 插入位置
    :type position: str
    :param html: 原始html
    :type html: str
    :param content: 等待插入的自定义文本内容
    :type content: str
    :return: 处理后的html
    :rtype: str
    
head_firstZ	head_lastzUnknown Injection Position: {}N)inject_content_head_firstinject_content_head_last
ValueErrorr   )positionhtmlcontentr   r   r   inject_content  s
    r   c             C   s   |  j  d  } |  j  d  } | d k r. |  S| d k rf | | k  rf |  d |  | |  | d  S|  d |  | |  | d  Sd S)u   
    将文本内容插入到head中第一个现有<script>之前
    如果head中不存在<script>, 则加在</head>标签之前

    :type html: str
    :type content: str
    :rtype: str
    z</headz<scriptr   Nr+   r+   )rJ   )r   r   head_end_posZscript_begin_posr   r   r   r     s    	 r   c             C   s?   |  j  d  } | d k r |  S|  d |  | |  | d  S)uk   
    将文本内容插入到head的尾部

    :type html: str
    :type content: str
    :rtype: str
    z</headr   Nr+   )rJ   )r   r   r   r   r   r   r     s    r   )5r   rerv   rt   r   r   r   rf   urllib.parser   r   r   r   flaskr   r	   typingr
   r   	fastcacher   	functoolsr*   r   config_defaultconfigr   r   r5   r<   rA   rD   rH   rM   rP   rU   rV   r\   r_   ra   rd   ri   rp   r   compilerq   r|   r   r   r   r   r   r   r   r   r   r   <module>   s\   "


*	
'%